diff options
Diffstat (limited to 'vendor/golang.org/x/net/publicsuffix/gen.go')
-rw-r--r-- | vendor/golang.org/x/net/publicsuffix/gen.go | 713 |
1 files changed, 0 insertions, 713 deletions
diff --git a/vendor/golang.org/x/net/publicsuffix/gen.go b/vendor/golang.org/x/net/publicsuffix/gen.go deleted file mode 100644 index f85a3c32b..000000000 --- a/vendor/golang.org/x/net/publicsuffix/gen.go +++ /dev/null @@ -1,713 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -// This program generates table.go and table_test.go based on the authoritative -// public suffix list at https://publicsuffix.org/list/effective_tld_names.dat -// -// The version is derived from -// https://api.github.com/repos/publicsuffix/list/commits?path=public_suffix_list.dat -// and a human-readable form is at -// https://github.com/publicsuffix/list/commits/master/public_suffix_list.dat -// -// To fetch a particular git revision, such as 5c70ccd250, pass -// -url "https://raw.githubusercontent.com/publicsuffix/list/5c70ccd250/public_suffix_list.dat" -// and -version "an explicit version string". - -import ( - "bufio" - "bytes" - "flag" - "fmt" - "go/format" - "io" - "io/ioutil" - "net/http" - "os" - "regexp" - "sort" - "strings" - - "golang.org/x/net/idna" -) - -const ( - // These sum of these four values must be no greater than 32. - nodesBitsChildren = 10 - nodesBitsICANN = 1 - nodesBitsTextOffset = 15 - nodesBitsTextLength = 6 - - // These sum of these four values must be no greater than 32. - childrenBitsWildcard = 1 - childrenBitsNodeType = 2 - childrenBitsHi = 14 - childrenBitsLo = 14 -) - -var ( - maxChildren int - maxTextOffset int - maxTextLength int - maxHi uint32 - maxLo uint32 -) - -func max(a, b int) int { - if a < b { - return b - } - return a -} - -func u32max(a, b uint32) uint32 { - if a < b { - return b - } - return a -} - -const ( - nodeTypeNormal = 0 - nodeTypeException = 1 - nodeTypeParentOnly = 2 - numNodeType = 3 -) - -func nodeTypeStr(n int) string { - switch n { - case nodeTypeNormal: - return "+" - case nodeTypeException: - return "!" - case nodeTypeParentOnly: - return "o" - } - panic("unreachable") -} - -const ( - defaultURL = "https://publicsuffix.org/list/effective_tld_names.dat" - gitCommitURL = "https://api.github.com/repos/publicsuffix/list/commits?path=public_suffix_list.dat" -) - -var ( - labelEncoding = map[string]uint32{} - labelsList = []string{} - labelsMap = map[string]bool{} - rules = []string{} - - // validSuffixRE is used to check that the entries in the public suffix - // list are in canonical form (after Punycode encoding). Specifically, - // capital letters are not allowed. - validSuffixRE = regexp.MustCompile(`^[a-z0-9_\!\*\-\.]+$`) - - shaRE = regexp.MustCompile(`"sha":"([^"]+)"`) - dateRE = regexp.MustCompile(`"committer":{[^{]+"date":"([^"]+)"`) - - comments = flag.Bool("comments", false, "generate table.go comments, for debugging") - subset = flag.Bool("subset", false, "generate only a subset of the full table, for debugging") - url = flag.String("url", defaultURL, "URL of the publicsuffix.org list. If empty, stdin is read instead") - v = flag.Bool("v", false, "verbose output (to stderr)") - version = flag.String("version", "", "the effective_tld_names.dat version") -) - -func main() { - if err := main1(); err != nil { - fmt.Fprintln(os.Stderr, err) - os.Exit(1) - } -} - -func main1() error { - flag.Parse() - if nodesBitsTextLength+nodesBitsTextOffset+nodesBitsICANN+nodesBitsChildren > 32 { - return fmt.Errorf("not enough bits to encode the nodes table") - } - if childrenBitsLo+childrenBitsHi+childrenBitsNodeType+childrenBitsWildcard > 32 { - return fmt.Errorf("not enough bits to encode the children table") - } - if *version == "" { - if *url != defaultURL { - return fmt.Errorf("-version was not specified, and the -url is not the default one") - } - sha, date, err := gitCommit() - if err != nil { - return err - } - *version = fmt.Sprintf("publicsuffix.org's public_suffix_list.dat, git revision %s (%s)", sha, date) - } - var r io.Reader = os.Stdin - if *url != "" { - res, err := http.Get(*url) - if err != nil { - return err - } - if res.StatusCode != http.StatusOK { - return fmt.Errorf("bad GET status for %s: %d", *url, res.Status) - } - r = res.Body - defer res.Body.Close() - } - - var root node - icann := false - br := bufio.NewReader(r) - for { - s, err := br.ReadString('\n') - if err != nil { - if err == io.EOF { - break - } - return err - } - s = strings.TrimSpace(s) - if strings.Contains(s, "BEGIN ICANN DOMAINS") { - icann = true - continue - } - if strings.Contains(s, "END ICANN DOMAINS") { - icann = false - continue - } - if s == "" || strings.HasPrefix(s, "//") { - continue - } - s, err = idna.ToASCII(s) - if err != nil { - return err - } - if !validSuffixRE.MatchString(s) { - return fmt.Errorf("bad publicsuffix.org list data: %q", s) - } - - if *subset { - switch { - case s == "ac.jp" || strings.HasSuffix(s, ".ac.jp"): - case s == "ak.us" || strings.HasSuffix(s, ".ak.us"): - case s == "ao" || strings.HasSuffix(s, ".ao"): - case s == "ar" || strings.HasSuffix(s, ".ar"): - case s == "arpa" || strings.HasSuffix(s, ".arpa"): - case s == "cy" || strings.HasSuffix(s, ".cy"): - case s == "dyndns.org" || strings.HasSuffix(s, ".dyndns.org"): - case s == "jp": - case s == "kobe.jp" || strings.HasSuffix(s, ".kobe.jp"): - case s == "kyoto.jp" || strings.HasSuffix(s, ".kyoto.jp"): - case s == "om" || strings.HasSuffix(s, ".om"): - case s == "uk" || strings.HasSuffix(s, ".uk"): - case s == "uk.com" || strings.HasSuffix(s, ".uk.com"): - case s == "tw" || strings.HasSuffix(s, ".tw"): - case s == "zw" || strings.HasSuffix(s, ".zw"): - case s == "xn--p1ai" || strings.HasSuffix(s, ".xn--p1ai"): - // xn--p1ai is Russian-Cyrillic "рф". - default: - continue - } - } - - rules = append(rules, s) - - nt, wildcard := nodeTypeNormal, false - switch { - case strings.HasPrefix(s, "*."): - s, nt = s[2:], nodeTypeParentOnly - wildcard = true - case strings.HasPrefix(s, "!"): - s, nt = s[1:], nodeTypeException - } - labels := strings.Split(s, ".") - for n, i := &root, len(labels)-1; i >= 0; i-- { - label := labels[i] - n = n.child(label) - if i == 0 { - if nt != nodeTypeParentOnly && n.nodeType == nodeTypeParentOnly { - n.nodeType = nt - } - n.icann = n.icann && icann - n.wildcard = n.wildcard || wildcard - } - labelsMap[label] = true - } - } - labelsList = make([]string, 0, len(labelsMap)) - for label := range labelsMap { - labelsList = append(labelsList, label) - } - sort.Strings(labelsList) - - if err := generate(printReal, &root, "table.go"); err != nil { - return err - } - if err := generate(printTest, &root, "table_test.go"); err != nil { - return err - } - return nil -} - -func generate(p func(io.Writer, *node) error, root *node, filename string) error { - buf := new(bytes.Buffer) - if err := p(buf, root); err != nil { - return err - } - b, err := format.Source(buf.Bytes()) - if err != nil { - return err - } - return ioutil.WriteFile(filename, b, 0644) -} - -func gitCommit() (sha, date string, retErr error) { - res, err := http.Get(gitCommitURL) - if err != nil { - return "", "", err - } - if res.StatusCode != http.StatusOK { - return "", "", fmt.Errorf("bad GET status for %s: %d", gitCommitURL, res.Status) - } - defer res.Body.Close() - b, err := ioutil.ReadAll(res.Body) - if err != nil { - return "", "", err - } - if m := shaRE.FindSubmatch(b); m != nil { - sha = string(m[1]) - } - if m := dateRE.FindSubmatch(b); m != nil { - date = string(m[1]) - } - if sha == "" || date == "" { - retErr = fmt.Errorf("could not find commit SHA and date in %s", gitCommitURL) - } - return sha, date, retErr -} - -func printTest(w io.Writer, n *node) error { - fmt.Fprintf(w, "// generated by go run gen.go; DO NOT EDIT\n\n") - fmt.Fprintf(w, "package publicsuffix\n\nvar rules = [...]string{\n") - for _, rule := range rules { - fmt.Fprintf(w, "%q,\n", rule) - } - fmt.Fprintf(w, "}\n\nvar nodeLabels = [...]string{\n") - if err := n.walk(w, printNodeLabel); err != nil { - return err - } - fmt.Fprintf(w, "}\n") - return nil -} - -func printReal(w io.Writer, n *node) error { - const header = `// generated by go run gen.go; DO NOT EDIT - -package publicsuffix - -const version = %q - -const ( - nodesBitsChildren = %d - nodesBitsICANN = %d - nodesBitsTextOffset = %d - nodesBitsTextLength = %d - - childrenBitsWildcard = %d - childrenBitsNodeType = %d - childrenBitsHi = %d - childrenBitsLo = %d -) - -const ( - nodeTypeNormal = %d - nodeTypeException = %d - nodeTypeParentOnly = %d -) - -// numTLD is the number of top level domains. -const numTLD = %d - -` - fmt.Fprintf(w, header, *version, - nodesBitsChildren, nodesBitsICANN, nodesBitsTextOffset, nodesBitsTextLength, - childrenBitsWildcard, childrenBitsNodeType, childrenBitsHi, childrenBitsLo, - nodeTypeNormal, nodeTypeException, nodeTypeParentOnly, len(n.children)) - - text := combineText(labelsList) - if text == "" { - return fmt.Errorf("internal error: makeText returned no text") - } - for _, label := range labelsList { - offset, length := strings.Index(text, label), len(label) - if offset < 0 { - return fmt.Errorf("internal error: could not find %q in text %q", label, text) - } - maxTextOffset, maxTextLength = max(maxTextOffset, offset), max(maxTextLength, length) - if offset >= 1<<nodesBitsTextOffset { - return fmt.Errorf("text offset %d is too large, or nodeBitsTextOffset is too small", offset) - } - if length >= 1<<nodesBitsTextLength { - return fmt.Errorf("text length %d is too large, or nodeBitsTextLength is too small", length) - } - labelEncoding[label] = uint32(offset)<<nodesBitsTextLength | uint32(length) - } - fmt.Fprintf(w, "// Text is the combined text of all labels.\nconst text = ") - for len(text) > 0 { - n, plus := len(text), "" - if n > 64 { - n, plus = 64, " +" - } - fmt.Fprintf(w, "%q%s\n", text[:n], plus) - text = text[n:] - } - - if err := n.walk(w, assignIndexes); err != nil { - return err - } - - fmt.Fprintf(w, ` - -// nodes is the list of nodes. Each node is represented as a uint32, which -// encodes the node's children, wildcard bit and node type (as an index into -// the children array), ICANN bit and text. -// -// If the table was generated with the -comments flag, there is a //-comment -// after each node's data. In it is the nodes-array indexes of the children, -// formatted as (n0x1234-n0x1256), with * denoting the wildcard bit. The -// nodeType is printed as + for normal, ! for exception, and o for parent-only -// nodes that have children but don't match a domain label in their own right. -// An I denotes an ICANN domain. -// -// The layout within the uint32, from MSB to LSB, is: -// [%2d bits] unused -// [%2d bits] children index -// [%2d bits] ICANN bit -// [%2d bits] text index -// [%2d bits] text length -var nodes = [...]uint32{ -`, - 32-nodesBitsChildren-nodesBitsICANN-nodesBitsTextOffset-nodesBitsTextLength, - nodesBitsChildren, nodesBitsICANN, nodesBitsTextOffset, nodesBitsTextLength) - if err := n.walk(w, printNode); err != nil { - return err - } - fmt.Fprintf(w, `} - -// children is the list of nodes' children, the parent's wildcard bit and the -// parent's node type. If a node has no children then their children index -// will be in the range [0, 6), depending on the wildcard bit and node type. -// -// The layout within the uint32, from MSB to LSB, is: -// [%2d bits] unused -// [%2d bits] wildcard bit -// [%2d bits] node type -// [%2d bits] high nodes index (exclusive) of children -// [%2d bits] low nodes index (inclusive) of children -var children=[...]uint32{ -`, - 32-childrenBitsWildcard-childrenBitsNodeType-childrenBitsHi-childrenBitsLo, - childrenBitsWildcard, childrenBitsNodeType, childrenBitsHi, childrenBitsLo) - for i, c := range childrenEncoding { - s := "---------------" - lo := c & (1<<childrenBitsLo - 1) - hi := (c >> childrenBitsLo) & (1<<childrenBitsHi - 1) - if lo != hi { - s = fmt.Sprintf("n0x%04x-n0x%04x", lo, hi) - } - nodeType := int(c>>(childrenBitsLo+childrenBitsHi)) & (1<<childrenBitsNodeType - 1) - wildcard := c>>(childrenBitsLo+childrenBitsHi+childrenBitsNodeType) != 0 - if *comments { - fmt.Fprintf(w, "0x%08x, // c0x%04x (%s)%s %s\n", - c, i, s, wildcardStr(wildcard), nodeTypeStr(nodeType)) - } else { - fmt.Fprintf(w, "0x%x,\n", c) - } - } - fmt.Fprintf(w, "}\n\n") - fmt.Fprintf(w, "// max children %d (capacity %d)\n", maxChildren, 1<<nodesBitsChildren-1) - fmt.Fprintf(w, "// max text offset %d (capacity %d)\n", maxTextOffset, 1<<nodesBitsTextOffset-1) - fmt.Fprintf(w, "// max text length %d (capacity %d)\n", maxTextLength, 1<<nodesBitsTextLength-1) - fmt.Fprintf(w, "// max hi %d (capacity %d)\n", maxHi, 1<<childrenBitsHi-1) - fmt.Fprintf(w, "// max lo %d (capacity %d)\n", maxLo, 1<<childrenBitsLo-1) - return nil -} - -type node struct { - label string - nodeType int - icann bool - wildcard bool - // nodesIndex and childrenIndex are the index of this node in the nodes - // and the index of its children offset/length in the children arrays. - nodesIndex, childrenIndex int - // firstChild is the index of this node's first child, or zero if this - // node has no children. - firstChild int - // children are the node's children, in strictly increasing node label order. - children []*node -} - -func (n *node) walk(w io.Writer, f func(w1 io.Writer, n1 *node) error) error { - if err := f(w, n); err != nil { - return err - } - for _, c := range n.children { - if err := c.walk(w, f); err != nil { - return err - } - } - return nil -} - -// child returns the child of n with the given label. The child is created if -// it did not exist beforehand. -func (n *node) child(label string) *node { - for _, c := range n.children { - if c.label == label { - return c - } - } - c := &node{ - label: label, - nodeType: nodeTypeParentOnly, - icann: true, - } - n.children = append(n.children, c) - sort.Sort(byLabel(n.children)) - return c -} - -type byLabel []*node - -func (b byLabel) Len() int { return len(b) } -func (b byLabel) Swap(i, j int) { b[i], b[j] = b[j], b[i] } -func (b byLabel) Less(i, j int) bool { return b[i].label < b[j].label } - -var nextNodesIndex int - -// childrenEncoding are the encoded entries in the generated children array. -// All these pre-defined entries have no children. -var childrenEncoding = []uint32{ - 0 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeNormal. - 1 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeException. - 2 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeParentOnly. - 4 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeNormal. - 5 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeException. - 6 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeParentOnly. -} - -var firstCallToAssignIndexes = true - -func assignIndexes(w io.Writer, n *node) error { - if len(n.children) != 0 { - // Assign nodesIndex. - n.firstChild = nextNodesIndex - for _, c := range n.children { - c.nodesIndex = nextNodesIndex - nextNodesIndex++ - } - - // The root node's children is implicit. - if firstCallToAssignIndexes { - firstCallToAssignIndexes = false - return nil - } - - // Assign childrenIndex. - maxChildren = max(maxChildren, len(childrenEncoding)) - if len(childrenEncoding) >= 1<<nodesBitsChildren { - return fmt.Errorf("children table size %d is too large, or nodeBitsChildren is too small", len(childrenEncoding)) - } - n.childrenIndex = len(childrenEncoding) - lo := uint32(n.firstChild) - hi := lo + uint32(len(n.children)) - maxLo, maxHi = u32max(maxLo, lo), u32max(maxHi, hi) - if lo >= 1<<childrenBitsLo { - return fmt.Errorf("children lo %d is too large, or childrenBitsLo is too small", lo) - } - if hi >= 1<<childrenBitsHi { - return fmt.Errorf("children hi %d is too large, or childrenBitsHi is too small", hi) - } - enc := hi<<childrenBitsLo | lo - enc |= uint32(n.nodeType) << (childrenBitsLo + childrenBitsHi) - if n.wildcard { - enc |= 1 << (childrenBitsLo + childrenBitsHi + childrenBitsNodeType) - } - childrenEncoding = append(childrenEncoding, enc) - } else { - n.childrenIndex = n.nodeType - if n.wildcard { - n.childrenIndex += numNodeType - } - } - return nil -} - -func printNode(w io.Writer, n *node) error { - for _, c := range n.children { - s := "---------------" - if len(c.children) != 0 { - s = fmt.Sprintf("n0x%04x-n0x%04x", c.firstChild, c.firstChild+len(c.children)) - } - encoding := labelEncoding[c.label] - if c.icann { - encoding |= 1 << (nodesBitsTextLength + nodesBitsTextOffset) - } - encoding |= uint32(c.childrenIndex) << (nodesBitsTextLength + nodesBitsTextOffset + nodesBitsICANN) - if *comments { - fmt.Fprintf(w, "0x%08x, // n0x%04x c0x%04x (%s)%s %s %s %s\n", - encoding, c.nodesIndex, c.childrenIndex, s, wildcardStr(c.wildcard), - nodeTypeStr(c.nodeType), icannStr(c.icann), c.label, - ) - } else { - fmt.Fprintf(w, "0x%x,\n", encoding) - } - } - return nil -} - -func printNodeLabel(w io.Writer, n *node) error { - for _, c := range n.children { - fmt.Fprintf(w, "%q,\n", c.label) - } - return nil -} - -func icannStr(icann bool) string { - if icann { - return "I" - } - return " " -} - -func wildcardStr(wildcard bool) string { - if wildcard { - return "*" - } - return " " -} - -// combineText combines all the strings in labelsList to form one giant string. -// Overlapping strings will be merged: "arpa" and "parliament" could yield -// "arparliament". -func combineText(labelsList []string) string { - beforeLength := 0 - for _, s := range labelsList { - beforeLength += len(s) - } - - text := crush(removeSubstrings(labelsList)) - if *v { - fmt.Fprintf(os.Stderr, "crushed %d bytes to become %d bytes\n", beforeLength, len(text)) - } - return text -} - -type byLength []string - -func (s byLength) Len() int { return len(s) } -func (s byLength) Swap(i, j int) { s[i], s[j] = s[j], s[i] } -func (s byLength) Less(i, j int) bool { return len(s[i]) < len(s[j]) } - -// removeSubstrings returns a copy of its input with any strings removed -// that are substrings of other provided strings. -func removeSubstrings(input []string) []string { - // Make a copy of input. - ss := append(make([]string, 0, len(input)), input...) - sort.Sort(byLength(ss)) - - for i, shortString := range ss { - // For each string, only consider strings higher than it in sort order, i.e. - // of equal length or greater. - for _, longString := range ss[i+1:] { - if strings.Contains(longString, shortString) { - ss[i] = "" - break - } - } - } - - // Remove the empty strings. - sort.Strings(ss) - for len(ss) > 0 && ss[0] == "" { - ss = ss[1:] - } - return ss -} - -// crush combines a list of strings, taking advantage of overlaps. It returns a -// single string that contains each input string as a substring. -func crush(ss []string) string { - maxLabelLen := 0 - for _, s := range ss { - if maxLabelLen < len(s) { - maxLabelLen = len(s) - } - } - - for prefixLen := maxLabelLen; prefixLen > 0; prefixLen-- { - prefixes := makePrefixMap(ss, prefixLen) - for i, s := range ss { - if len(s) <= prefixLen { - continue - } - mergeLabel(ss, i, prefixLen, prefixes) - } - } - - return strings.Join(ss, "") -} - -// mergeLabel merges the label at ss[i] with the first available matching label -// in prefixMap, where the last "prefixLen" characters in ss[i] match the first -// "prefixLen" characters in the matching label. -// It will merge ss[i] repeatedly until no more matches are available. -// All matching labels merged into ss[i] are replaced by "". -func mergeLabel(ss []string, i, prefixLen int, prefixes prefixMap) { - s := ss[i] - suffix := s[len(s)-prefixLen:] - for _, j := range prefixes[suffix] { - // Empty strings mean "already used." Also avoid merging with self. - if ss[j] == "" || i == j { - continue - } - if *v { - fmt.Fprintf(os.Stderr, "%d-length overlap at (%4d,%4d): %q and %q share %q\n", - prefixLen, i, j, ss[i], ss[j], suffix) - } - ss[i] += ss[j][prefixLen:] - ss[j] = "" - // ss[i] has a new suffix, so merge again if possible. - // Note: we only have to merge again at the same prefix length. Shorter - // prefix lengths will be handled in the next iteration of crush's for loop. - // Can there be matches for longer prefix lengths, introduced by the merge? - // I believe that any such matches would by necessity have been eliminated - // during substring removal or merged at a higher prefix length. For - // instance, in crush("abc", "cde", "bcdef"), combining "abc" and "cde" - // would yield "abcde", which could be merged with "bcdef." However, in - // practice "cde" would already have been elimintated by removeSubstrings. - mergeLabel(ss, i, prefixLen, prefixes) - return - } -} - -// prefixMap maps from a prefix to a list of strings containing that prefix. The -// list of strings is represented as indexes into a slice of strings stored -// elsewhere. -type prefixMap map[string][]int - -// makePrefixMap constructs a prefixMap from a slice of strings. -func makePrefixMap(ss []string, prefixLen int) prefixMap { - prefixes := make(prefixMap) - for i, s := range ss { - // We use < rather than <= because if a label matches on a prefix equal to - // its full length, that's actually a substring match handled by - // removeSubstrings. - if prefixLen < len(s) { - prefix := s[:prefixLen] - prefixes[prefix] = append(prefixes[prefix], i) - } - } - - return prefixes -} |