// Copyright 2017 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package pipeline provides tools for creating translation pipelines. // // NOTE: UNDER DEVELOPMENT. API MAY CHANGE. package pipeline import ( "bytes" "encoding/json" "fmt" "go/build" "go/parser" "io/ioutil" "log" "os" "path/filepath" "regexp" "strings" "text/template" "unicode" "golang.org/x/text/internal" "golang.org/x/text/language" "golang.org/x/text/runes" "golang.org/x/tools/go/loader" ) const ( extractFile = "extracted.gotext.json" outFile = "out.gotext.json" gotextSuffix = "gotext.json" ) // Config contains configuration for the translation pipeline. type Config struct { // Supported indicates the languages for which data should be generated. // The default is to support all locales for which there are matching // translation files. Supported []language.Tag // --- Extraction SourceLanguage language.Tag Packages []string // --- File structure // Dir is the root dir for all operations. Dir string // TranslationsPattern is a regular expression to match incoming translation // files. These files may appear in any directory rooted at Dir. // language for the translation files is determined as follows: // 1. From the Language field in the file. // 2. If not present, from a valid language tag in the filename, separated // by dots (e.g. "en-US.json" or "incoming.pt_PT.xmb"). // 3. If not present, from a the closest subdirectory in which the file // is contained that parses as a valid language tag. TranslationsPattern string // OutPattern defines the location for translation files for a certain // language. The default is "{{.Dir}}/{{.Language}}/out.{{.Ext}}" OutPattern string // Format defines the file format for generated translation files. // The default is XMB. Alternatives are GetText, XLIFF, L20n, GoText. Format string Ext string // TODO: // Actions are additional actions to be performed after the initial extract // and merge. // Actions []struct { // Name string // Options map[string]string // } // --- Generation // GenFile may be in a different package. It is not defined, it will // be written to stdout. GenFile string // GenPackage is the package or relative path into which to generate the // file. If not specified it is relative to the current directory. GenPackage string // DeclareVar defines a variable to which to assing the generated Catalog. DeclareVar string // SetDefault determines whether to assign the generated Catalog to // message.DefaultCatalog. The default for this is true if DeclareVar is // not defined, false otherwise. SetDefault bool // TODO: // - Printf-style configuration // - Template-style configuration // - Extraction options // - Rewrite options // - Generation options } // Operations: // - extract: get the strings // - disambiguate: find messages with the same key, but possible different meaning. // - create out: create a list of messages that need translations // - load trans: load the list of current translations // - merge: assign list of translations as done // - (action)expand: analyze features and create example sentences for each version. // - (action)googletrans: pre-populate messages with automatic translations. // - (action)export: send out messages somewhere non-standard // - (action)import: load messages from somewhere non-standard // - vet program: don't pass "foo" + var + "bar" strings. Not using funcs for translated strings. // - vet trans: coverage: all translations/ all features. // - generate: generate Go code // State holds all accumulated information on translations during processing. type State struct { Config Config Package string program *loader.Program Extracted Messages `json:"messages"` // Messages includes all messages for which there need to be translations. // Duplicates may be eliminated. Generation will be done from these messages // (usually after merging). Messages []Messages // Translations are incoming translations for the application messages. Translations []Messages } func (s *State) dir() string { if d := s.Config.Dir; d != "" { return d } return "./locales" } func outPattern(s *State) (string, error) { c := s.Config pat := c.OutPattern if pat == "" { pat = "{{.Dir}}/{{.Language}}/out.{{.Ext}}" } ext := c.Ext if ext == "" { ext = c.Format } if ext == "" { ext = gotextSuffix } t, err := template.New("").Parse(pat) if err != nil { return "", wrap(err, "error parsing template") } buf := bytes.Buffer{} err = t.Execute(&buf, map[string]string{ "Dir": s.dir(), "Language": "%s", "Ext": ext, }) return filepath.FromSlash(buf.String()), wrap(err, "incorrect OutPattern") } var transRE = regexp.MustCompile(`.*\.` + gotextSuffix) // Import loads existing translation files. func (s *State) Import() error { outPattern, err := outPattern(s) if err != nil { return err } re := transRE if pat := s.Config.TranslationsPattern; pat != "" { if re, err = regexp.Compile(pat); err != nil { return wrapf(err, "error parsing regexp %q", s.Config.TranslationsPattern) } } x := importer{s, outPattern, re} return x.walkImport(s.dir(), s.Config.SourceLanguage) } type importer struct { state *State outPattern string transFile *regexp.Regexp } func (i *importer) walkImport(path string, tag language.Tag) error { files, err := ioutil.ReadDir(path) if err != nil { return nil } for _, f := range files { name := f.Name() tag := tag if f.IsDir() { if t, err := language.Parse(name); err == nil { tag = t } // We ignore errors if err := i.walkImport(filepath.Join(path, name), tag); err != nil { return err } continue } for _, l := range strings.Split(name, ".") { if t, err := language.Parse(l); err == nil { tag = t } } file := filepath.Join(path, name) // TODO: Should we skip files that match output files? if fmt.Sprintf(i.outPattern, tag) == file { continue } // TODO: handle different file formats. if !i.transFile.MatchString(name) { continue } b, err := ioutil.ReadFile(file) if err != nil { return wrap(err, "read file failed") } var translations Messages if err := json.Unmarshal(b, &translations); err != nil { return wrap(err, "parsing translation file failed") } i.state.Translations = append(i.state.Translations, translations) } return nil } // Merge merges the extracted messages with the existing translations. func (s *State) Merge() error { if s.Messages != nil { panic("already merged") } // Create an index for each unique message. // Duplicates are okay as long as the substitution arguments are okay as // well. // Top-level messages are okay to appear in multiple substitution points. // Collect key equivalence. msgs := []*Message{} keyToIDs := map[string]*Message{} for _, m := range s.Extracted.Messages { m := m if prev, ok := keyToIDs[m.Key]; ok { if err := checkEquivalence(&m, prev); err != nil { warnf("Key %q matches conflicting messages: %v and %v", m.Key, prev.ID, m.ID) // TODO: track enough information so that the rewriter can // suggest/disambiguate messages. } // TODO: add position to message. continue } i := len(msgs) msgs = append(msgs, &m) keyToIDs[m.Key] = msgs[i] } // Messages with different keys may still refer to the same translated // message (e.g. different whitespace). Filter these. idMap := map[string]bool{} filtered := []*Message{} for _, m := range msgs { found := false for _, id := range m.ID { found = found || idMap[id] } if !found { filtered = append(filtered, m) } for _, id := range m.ID { idMap[id] = true } } // Build index of translations. translations := map[language.Tag]map[string]Message{} languages := append([]language.Tag{}, s.Config.Supported...) for _, t := range s.Translations { tag := t.Language if _, ok := translations[tag]; !ok { translations[tag] = map[string]Message{} languages = append(languages, tag) } for _, m := range t.Messages { if !m.Translation.IsEmpty() { for _, id := range m.ID { if _, ok := translations[tag][id]; ok { warnf("Duplicate translation in locale %q for message %q", tag, id) } translations[tag][id] = m } } } } languages = internal.UniqueTags(languages) for _, tag := range languages { ms := Messages{Language: tag} for _, orig := range filtered { m := *orig m.Key = "" m.Position = "" for _, id := range m.ID { if t, ok := translations[tag][id]; ok { m.Translation = t.Translation if t.TranslatorComment != "" { m.TranslatorComment = t.TranslatorComment m.Fuzzy = t.Fuzzy } break } } if tag == s.Config.SourceLanguage && m.Translation.IsEmpty() { m.Translation = m.Message if m.TranslatorComment == "" { m.TranslatorComment = "Copied from source." m.Fuzzy = true } } // TODO: if translation is empty: pre-expand based on available // linguistic features. This may also be done as a plugin. ms.Messages = append(ms.Messages, m) } s.Messages = append(s.Messages, ms) } return nil } // Export writes out the messages to translation out files. func (s *State) Export() error { path, err := outPattern(s) if err != nil { return wrap(err, "export failed") } for _, out := range s.Messages { // TODO: inject translations from existing files to avoid retranslation. data, err := json.MarshalIndent(out, "", " ") if err != nil { return wrap(err, "JSON marshal failed") } file := fmt.Sprintf(path, out.Language) if err := os.MkdirAll(filepath.Dir(file), 0755); err != nil { return wrap(err, "dir create failed") } if err := ioutil.WriteFile(file, data, 0644); err != nil { return wrap(err, "write failed") } } return nil } var ( ws = runes.In(unicode.White_Space).Contains notWS = runes.NotIn(unicode.White_Space).Contains ) func trimWS(s string) (trimmed, leadWS, trailWS string) { trimmed = strings.TrimRightFunc(s, ws) trailWS = s[len(trimmed):] if i := strings.IndexFunc(trimmed, notWS); i > 0 { leadWS = trimmed[:i] trimmed = trimmed[i:] } return trimmed, leadWS, trailWS } // NOTE: The command line tool already prefixes with "gotext:". var ( wrap = func(err error, msg string) error { if err == nil { return nil } return fmt.Errorf("%s: %v", msg, err) } wrapf = func(err error, msg string, args ...interface{}) error { if err == nil { return nil } return wrap(err, fmt.Sprintf(msg, args...)) } errorf = fmt.Errorf ) func warnf(format string, args ...interface{}) { // TODO: don't log. log.Printf(format, args...) } func loadPackages(conf *loader.Config, args []string) (*loader.Program, error) { if len(args) == 0 { args = []string{"."} } conf.Build = &build.Default conf.ParserMode = parser.ParseComments // Use the initial packages from the command line. args, err := conf.FromArgs(args, false) if err != nil { return nil, wrap(err, "loading packages failed") } // Load, parse and type-check the whole program. return conf.Load() }