// Copyright 2015 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package number import ( "errors" "unicode/utf8" ) // This file contains a parser for the CLDR number patterns as described in // http://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns. // // The following BNF is derived from this standard. // // pattern := subpattern (';' subpattern)? // subpattern := affix? number exponent? affix? // number := decimal | sigDigits // decimal := '#'* '0'* ('.' fraction)? | '#' | '0' // fraction := '0'* '#'* // sigDigits := '#'* '@' '@'* '#'* // exponent := 'E' '+'? '0'* '0' // padSpec := '*' \L // // Notes: // - An affix pattern may contain any runes, but runes with special meaning // should be escaped. // - Sequences of digits, '#', and '@' in decimal and sigDigits may have // interstitial commas. // TODO: replace special characters in affixes (-, +, ¤) with control codes. // Pattern holds information for formatting numbers. It is designed to hold // information from CLDR number patterns. // // This pattern is precompiled for all patterns for all languages. Even though // the number of patterns is not very large, we want to keep this small. // // This type is only intended for internal use. type Pattern struct { RoundingContext Affix string // includes prefix and suffix. First byte is prefix length. Offset uint16 // Offset into Affix for prefix and suffix NegOffset uint16 // Offset into Affix for negative prefix and suffix or 0. PadRune rune FormatWidth uint16 GroupingSize [2]uint8 Flags PatternFlag } // A RoundingContext indicates how a number should be converted to digits. // It contains all information needed to determine the "visible digits" as // required by the pluralization rules. type RoundingContext struct { // TODO: unify these two fields so that there is a more unambiguous meaning // of how precision is handled. MaxSignificantDigits int16 // -1 is unlimited MaxFractionDigits int16 // -1 is unlimited Increment uint32 IncrementScale uint8 // May differ from printed scale. Mode RoundingMode DigitShift uint8 // Number of decimals to shift. Used for % and ‰. // Number of digits. MinIntegerDigits uint8 MaxIntegerDigits uint8 MinFractionDigits uint8 MinSignificantDigits uint8 MinExponentDigits uint8 } // RoundSignificantDigits returns the number of significant digits an // implementation of Convert may round to or n < 0 if there is no maximum or // a maximum is not recommended. func (r *RoundingContext) RoundSignificantDigits() (n int) { if r.MaxFractionDigits == 0 && r.MaxSignificantDigits > 0 { return int(r.MaxSignificantDigits) } else if r.isScientific() && r.MaxIntegerDigits == 1 { if r.MaxSignificantDigits == 0 || int(r.MaxFractionDigits+1) == int(r.MaxSignificantDigits) { // Note: don't add DigitShift: it is only used for decimals. return int(r.MaxFractionDigits) + 1 } } return -1 } // RoundFractionDigits returns the number of fraction digits an implementation // of Convert may round to or n < 0 if there is no maximum or a maximum is not // recommended. func (r *RoundingContext) RoundFractionDigits() (n int) { if r.MinExponentDigits == 0 && r.MaxSignificantDigits == 0 && r.MaxFractionDigits >= 0 { return int(r.MaxFractionDigits) + int(r.DigitShift) } return -1 } // SetScale fixes the RoundingContext to a fixed number of fraction digits. func (r *RoundingContext) SetScale(scale int) { r.MinFractionDigits = uint8(scale) r.MaxFractionDigits = int16(scale) } func (r *RoundingContext) SetPrecision(prec int) { r.MaxSignificantDigits = int16(prec) } func (r *RoundingContext) isScientific() bool { return r.MinExponentDigits > 0 } func (f *Pattern) needsSep(pos int) bool { p := pos - 1 size := int(f.GroupingSize[0]) if size == 0 || p == 0 { return false } if p == size { return true } if p -= size; p < 0 { return false } // TODO: make second groupingsize the same as first if 0 so that we can // avoid this check. if x := int(f.GroupingSize[1]); x != 0 { size = x } return p%size == 0 } // A PatternFlag is a bit mask for the flag field of a Pattern. type PatternFlag uint8 const ( AlwaysSign PatternFlag = 1 << iota ElideSign // Use space instead of plus sign. AlwaysSign must be true. AlwaysExpSign AlwaysDecimalSeparator ParenthesisForNegative // Common pattern. Saves space. PadAfterNumber PadAfterAffix PadBeforePrefix = 0 // Default PadAfterPrefix = PadAfterAffix PadBeforeSuffix = PadAfterNumber PadAfterSuffix = PadAfterNumber | PadAfterAffix PadMask = PadAfterNumber | PadAfterAffix ) type parser struct { *Pattern leadingSharps int pos int err error doNotTerminate bool groupingCount uint hasGroup bool buf []byte } func (p *parser) setError(err error) { if p.err == nil { p.err = err } } func (p *parser) updateGrouping() { if p.hasGroup && 0 < p.groupingCount && p.groupingCount < 255 { p.GroupingSize[1] = p.GroupingSize[0] p.GroupingSize[0] = uint8(p.groupingCount) } p.groupingCount = 0 p.hasGroup = true } var ( // TODO: more sensible and localizeable error messages. errMultiplePadSpecifiers = errors.New("format: pattern has multiple pad specifiers") errInvalidPadSpecifier = errors.New("format: invalid pad specifier") errInvalidQuote = errors.New("format: invalid quote") errAffixTooLarge = errors.New("format: prefix or suffix exceeds maximum UTF-8 length of 256 bytes") errDuplicatePercentSign = errors.New("format: duplicate percent sign") errDuplicatePermilleSign = errors.New("format: duplicate permille sign") errUnexpectedEnd = errors.New("format: unexpected end of pattern") ) // ParsePattern extracts formatting information from a CLDR number pattern. // // See http://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns. func ParsePattern(s string) (f *Pattern, err error) { p := parser{Pattern: &Pattern{}} s = p.parseSubPattern(s) if s != "" { // Parse negative sub pattern. if s[0] != ';' { p.setError(errors.New("format: error parsing first sub pattern")) return nil, p.err } neg := parser{Pattern: &Pattern{}} // just for extracting the affixes. s = neg.parseSubPattern(s[len(";"):]) p.NegOffset = uint16(len(p.buf)) p.buf = append(p.buf, neg.buf...) } if s != "" { p.setError(errors.New("format: spurious characters at end of pattern")) } if p.err != nil { return nil, p.err } if affix := string(p.buf); affix == "\x00\x00" || affix == "\x00\x00\x00\x00" { // No prefix or suffixes. p.NegOffset = 0 } else { p.Affix = affix } if p.Increment == 0 { p.IncrementScale = 0 } return p.Pattern, nil } func (p *parser) parseSubPattern(s string) string { s = p.parsePad(s, PadBeforePrefix) s = p.parseAffix(s) s = p.parsePad(s, PadAfterPrefix) s = p.parse(p.number, s) p.updateGrouping() s = p.parsePad(s, PadBeforeSuffix) s = p.parseAffix(s) s = p.parsePad(s, PadAfterSuffix) return s } func (p *parser) parsePad(s string, f PatternFlag) (tail string) { if len(s) >= 2 && s[0] == '*' { r, sz := utf8.DecodeRuneInString(s[1:]) if p.PadRune != 0 { p.err = errMultiplePadSpecifiers } else { p.Flags |= f p.PadRune = r } return s[1+sz:] } return s } func (p *parser) parseAffix(s string) string { x := len(p.buf) p.buf = append(p.buf, 0) // placeholder for affix length s = p.parse(p.affix, s) n := len(p.buf) - x - 1 if n > 0xFF { p.setError(errAffixTooLarge) } p.buf[x] = uint8(n) return s } // state implements a state transition. It returns the new state. A state // function may set an error on the parser or may simply return on an incorrect // token and let the next phase fail. type state func(r rune) state // parse repeatedly applies a state function on the given string until a // termination condition is reached. func (p *parser) parse(fn state, s string) (tail string) { for i, r := range s { p.doNotTerminate = false if fn = fn(r); fn == nil || p.err != nil { return s[i:] } p.FormatWidth++ } if p.doNotTerminate { p.setError(errUnexpectedEnd) } return "" } func (p *parser) affix(r rune) state { switch r { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '#', '@', '.', '*', ',', ';': return nil case '\'': p.FormatWidth-- return p.escapeFirst case '%': if p.DigitShift != 0 { p.setError(errDuplicatePercentSign) } p.DigitShift = 2 case '\u2030': // ‰ Per mille if p.DigitShift != 0 { p.setError(errDuplicatePermilleSign) } p.DigitShift = 3 // TODO: handle currency somehow: ¤, ¤¤, ¤¤¤, ¤¤¤¤ } p.buf = append(p.buf, string(r)...) return p.affix } func (p *parser) escapeFirst(r rune) state { switch r { case '\'': p.buf = append(p.buf, "\\'"...) return p.affix default: p.buf = append(p.buf, '\'') p.buf = append(p.buf, string(r)...) } return p.escape } func (p *parser) escape(r rune) state { switch r { case '\'': p.FormatWidth-- p.buf = append(p.buf, '\'') return p.affix default: p.buf = append(p.buf, string(r)...) } return p.escape } // number parses a number. The BNF says the integer part should always have // a '0', but that does not appear to be the case according to the rest of the // documentation. We will allow having only '#' numbers. func (p *parser) number(r rune) state { switch r { case '#': p.groupingCount++ p.leadingSharps++ case '@': p.groupingCount++ p.leadingSharps = 0 p.MaxFractionDigits = -1 return p.sigDigits(r) case ',': if p.leadingSharps == 0 { // no leading commas return nil } p.updateGrouping() case 'E': p.MaxIntegerDigits = uint8(p.leadingSharps) return p.exponent case '.': // allow ".##" etc. p.updateGrouping() return p.fraction case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': return p.integer(r) default: return nil } return p.number } func (p *parser) integer(r rune) state { if !('0' <= r && r <= '9') { var next state switch r { case 'E': if p.leadingSharps > 0 { p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits } next = p.exponent case '.': next = p.fraction case ',': next = p.integer } p.updateGrouping() return next } p.Increment = p.Increment*10 + uint32(r-'0') p.groupingCount++ p.MinIntegerDigits++ return p.integer } func (p *parser) sigDigits(r rune) state { switch r { case '@': p.groupingCount++ p.MaxSignificantDigits++ p.MinSignificantDigits++ case '#': return p.sigDigitsFinal(r) case 'E': p.updateGrouping() return p.normalizeSigDigitsWithExponent() default: p.updateGrouping() return nil } return p.sigDigits } func (p *parser) sigDigitsFinal(r rune) state { switch r { case '#': p.groupingCount++ p.MaxSignificantDigits++ case 'E': p.updateGrouping() return p.normalizeSigDigitsWithExponent() default: p.updateGrouping() return nil } return p.sigDigitsFinal } func (p *parser) normalizeSigDigitsWithExponent() state { p.MinIntegerDigits, p.MaxIntegerDigits = 1, 1 p.MinFractionDigits = p.MinSignificantDigits - 1 p.MaxFractionDigits = p.MaxSignificantDigits - 1 p.MinSignificantDigits, p.MaxSignificantDigits = 0, 0 return p.exponent } func (p *parser) fraction(r rune) state { switch r { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': p.Increment = p.Increment*10 + uint32(r-'0') p.IncrementScale++ p.MinFractionDigits++ p.MaxFractionDigits++ case '#': p.MaxFractionDigits++ case 'E': if p.leadingSharps > 0 { p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits } return p.exponent default: return nil } return p.fraction } func (p *parser) exponent(r rune) state { switch r { case '+': // Set mode and check it wasn't already set. if p.Flags&AlwaysExpSign != 0 || p.MinExponentDigits > 0 { break } p.Flags |= AlwaysExpSign p.doNotTerminate = true return p.exponent case '0': p.MinExponentDigits++ return p.exponent } // termination condition if p.MinExponentDigits == 0 { p.setError(errors.New("format: need at least one digit")) } return nil }