// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package pipeline import ( "bytes" "fmt" "go/ast" "go/constant" "go/format" "go/token" "go/types" "path" "path/filepath" "strings" "unicode" "unicode/utf8" fmtparser "golang.org/x/text/internal/format" "golang.org/x/tools/go/loader" ) // TODO: // - merge information into existing files // - handle different file formats (PO, XLIFF) // - handle features (gender, plural) // - message rewriting // - %m substitutions // - `msg:"etc"` tags // - msg/Msg top-level vars and strings. // Extract extracts all strings form the package defined in Config. func Extract(c *Config) (*State, error) { conf := loader.Config{} prog, err := loadPackages(&conf, c.Packages) if err != nil { return nil, wrap(err, "") } // print returns Go syntax for the specified node. print := func(n ast.Node) string { var buf bytes.Buffer format.Node(&buf, conf.Fset, n) return buf.String() } var messages []Message for _, info := range prog.AllPackages { for _, f := range info.Files { // Associate comments with nodes. cmap := ast.NewCommentMap(prog.Fset, f, f.Comments) getComment := func(n ast.Node) string { cs := cmap.Filter(n).Comments() if len(cs) > 0 { return strings.TrimSpace(cs[0].Text()) } return "" } // Find function calls. ast.Inspect(f, func(n ast.Node) bool { call, ok := n.(*ast.CallExpr) if !ok { return true } // Skip calls of functions other than // (*message.Printer).{Sp,Fp,P}rintf. sel, ok := call.Fun.(*ast.SelectorExpr) if !ok { return true } meth := info.Selections[sel] if meth == nil || meth.Kind() != types.MethodVal { return true } // TODO: remove cheap hack and check if the type either // implements some interface or is specifically of type // "golang.org/x/text/message".Printer. m, ok := extractFuncs[path.Base(meth.Recv().String())] if !ok { return true } fmtType, ok := m[meth.Obj().Name()] if !ok { return true } // argn is the index of the format string. argn := fmtType.arg if argn >= len(call.Args) { return true } args := call.Args[fmtType.arg:] fmtMsg, ok := msgStr(info, args[0]) if !ok { // TODO: identify the type of the format argument. If it // is not a string, multiple keys may be defined. return true } comment := "" key := []string{} if ident, ok := args[0].(*ast.Ident); ok { key = append(key, ident.Name) if v, ok := ident.Obj.Decl.(*ast.ValueSpec); ok && v.Comment != nil { // TODO: get comment above ValueSpec as well comment = v.Comment.Text() } } arguments := []argument{} args = args[1:] simArgs := make([]interface{}, len(args)) for i, arg := range args { expr := print(arg) val := "" if v := info.Types[arg].Value; v != nil { val = v.ExactString() simArgs[i] = val switch arg.(type) { case *ast.BinaryExpr, *ast.UnaryExpr: expr = val } } arguments = append(arguments, argument{ ArgNum: i + 1, Type: info.Types[arg].Type.String(), UnderlyingType: info.Types[arg].Type.Underlying().String(), Expr: expr, Value: val, Comment: getComment(arg), Position: posString(conf, info, arg.Pos()), // TODO report whether it implements // interfaces plural.Interface, // gender.Interface. }) } msg := "" ph := placeholders{index: map[string]string{}} trimmed, _, _ := trimWS(fmtMsg) p := fmtparser.Parser{} p.Reset(simArgs) for p.SetFormat(trimmed); p.Scan(); { switch p.Status { case fmtparser.StatusText: msg += p.Text() case fmtparser.StatusSubstitution, fmtparser.StatusBadWidthSubstitution, fmtparser.StatusBadPrecSubstitution: arguments[p.ArgNum-1].used = true arg := arguments[p.ArgNum-1] sub := p.Text() if !p.HasIndex { r, sz := utf8.DecodeLastRuneInString(sub) sub = fmt.Sprintf("%s[%d]%c", sub[:len(sub)-sz], p.ArgNum, r) } msg += fmt.Sprintf("{%s}", ph.addArg(&arg, sub)) } } key = append(key, msg) // Add additional Placeholders that can be used in translations // that are not present in the string. for _, arg := range arguments { if arg.used { continue } ph.addArg(&arg, fmt.Sprintf("%%[%d]v", arg.ArgNum)) } if c := getComment(call.Args[0]); c != "" { comment = c } messages = append(messages, Message{ ID: key, Key: fmtMsg, Message: Text{Msg: msg}, // TODO(fix): this doesn't get the before comment. Comment: comment, Placeholders: ph.slice, Position: posString(conf, info, call.Lparen), }) return true }) } } return &State{ Config: *c, program: prog, Extracted: Messages{ Language: c.SourceLanguage, Messages: messages, }, }, nil } func posString(conf loader.Config, info *loader.PackageInfo, pos token.Pos) string { p := conf.Fset.Position(pos) file := fmt.Sprintf("%s:%d:%d", filepath.Base(p.Filename), p.Line, p.Column) return filepath.Join(info.Pkg.Path(), file) } // extractFuncs indicates the types and methods for which to extract strings, // and which argument to extract. // TODO: use the types in conf.Import("golang.org/x/text/message") to extract // the correct instances. var extractFuncs = map[string]map[string]extractType{ // TODO: Printer -> *golang.org/x/text/message.Printer "message.Printer": { "Printf": extractType{arg: 0, format: true}, "Sprintf": extractType{arg: 0, format: true}, "Fprintf": extractType{arg: 1, format: true}, "Lookup": extractType{arg: 0}, }, } type extractType struct { // format indicates if the next arg is a formatted string or whether to // concatenate all arguments format bool // arg indicates the position of the argument to extract. arg int } func getID(arg *argument) string { s := getLastComponent(arg.Expr) s = strip(s) s = strings.Replace(s, " ", "", -1) // For small variable names, use user-defined types for more info. if len(s) <= 2 && arg.UnderlyingType != arg.Type { s = getLastComponent(arg.Type) } return strings.Title(s) } // strip is a dirty hack to convert function calls to placeholder IDs. func strip(s string) string { s = strings.Map(func(r rune) rune { if unicode.IsSpace(r) || r == '-' { return '_' } if !unicode.In(r, unicode.Letter, unicode.Mark, unicode.Number) { return -1 } return r }, s) // Strip "Get" from getter functions. if strings.HasPrefix(s, "Get") || strings.HasPrefix(s, "get") { if len(s) > len("get") { r, _ := utf8.DecodeRuneInString(s) if !unicode.In(r, unicode.Ll, unicode.M) { // not lower or mark s = s[len("get"):] } } } return s } type placeholders struct { index map[string]string slice []Placeholder } func (p *placeholders) addArg(arg *argument, sub string) (id string) { id = getID(arg) id1 := id alt, ok := p.index[id1] for i := 1; ok && alt != sub; i++ { id1 = fmt.Sprintf("%s_%d", id, i) alt, ok = p.index[id1] } p.index[id1] = sub p.slice = append(p.slice, Placeholder{ ID: id1, String: sub, Type: arg.Type, UnderlyingType: arg.UnderlyingType, ArgNum: arg.ArgNum, Expr: arg.Expr, Comment: arg.Comment, }) return id1 } func getLastComponent(s string) string { return s[1+strings.LastIndexByte(s, '.'):] } func msgStr(info *loader.PackageInfo, e ast.Expr) (s string, ok bool) { v := info.Types[e].Value if v == nil || v.Kind() != constant.String { return "", false } s = constant.StringVal(v) // Only record strings with letters. for _, r := range s { if unicode.In(r, unicode.L) { return s, true } } return "", false }