Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
592 lines
15 KiB
592 lines
15 KiB
package toml |
|
|
|
import ( |
|
"fmt" |
|
"strconv" |
|
"strings" |
|
"time" |
|
"unicode" |
|
"unicode/utf8" |
|
) |
|
|
|
type parser struct { |
|
mapping map[string]interface{} |
|
types map[string]tomlType |
|
lx *lexer |
|
|
|
// A list of keys in the order that they appear in the TOML data. |
|
ordered []Key |
|
|
|
// the full key for the current hash in scope |
|
context Key |
|
|
|
// the base key name for everything except hashes |
|
currentKey string |
|
|
|
// rough approximation of line number |
|
approxLine int |
|
|
|
// A map of 'key.group.names' to whether they were created implicitly. |
|
implicits map[string]bool |
|
} |
|
|
|
type parseError string |
|
|
|
func (pe parseError) Error() string { |
|
return string(pe) |
|
} |
|
|
|
func parse(data string) (p *parser, err error) { |
|
defer func() { |
|
if r := recover(); r != nil { |
|
var ok bool |
|
if err, ok = r.(parseError); ok { |
|
return |
|
} |
|
panic(r) |
|
} |
|
}() |
|
|
|
p = &parser{ |
|
mapping: make(map[string]interface{}), |
|
types: make(map[string]tomlType), |
|
lx: lex(data), |
|
ordered: make([]Key, 0), |
|
implicits: make(map[string]bool), |
|
} |
|
for { |
|
item := p.next() |
|
if item.typ == itemEOF { |
|
break |
|
} |
|
p.topLevel(item) |
|
} |
|
|
|
return p, nil |
|
} |
|
|
|
func (p *parser) panicf(format string, v ...interface{}) { |
|
msg := fmt.Sprintf("Near line %d (last key parsed '%s'): %s", |
|
p.approxLine, p.current(), fmt.Sprintf(format, v...)) |
|
panic(parseError(msg)) |
|
} |
|
|
|
func (p *parser) next() item { |
|
it := p.lx.nextItem() |
|
if it.typ == itemError { |
|
p.panicf("%s", it.val) |
|
} |
|
return it |
|
} |
|
|
|
func (p *parser) bug(format string, v ...interface{}) { |
|
panic(fmt.Sprintf("BUG: "+format+"\n\n", v...)) |
|
} |
|
|
|
func (p *parser) expect(typ itemType) item { |
|
it := p.next() |
|
p.assertEqual(typ, it.typ) |
|
return it |
|
} |
|
|
|
func (p *parser) assertEqual(expected, got itemType) { |
|
if expected != got { |
|
p.bug("Expected '%s' but got '%s'.", expected, got) |
|
} |
|
} |
|
|
|
func (p *parser) topLevel(item item) { |
|
switch item.typ { |
|
case itemCommentStart: |
|
p.approxLine = item.line |
|
p.expect(itemText) |
|
case itemTableStart: |
|
kg := p.next() |
|
p.approxLine = kg.line |
|
|
|
var key Key |
|
for ; kg.typ != itemTableEnd && kg.typ != itemEOF; kg = p.next() { |
|
key = append(key, p.keyString(kg)) |
|
} |
|
p.assertEqual(itemTableEnd, kg.typ) |
|
|
|
p.establishContext(key, false) |
|
p.setType("", tomlHash) |
|
p.ordered = append(p.ordered, key) |
|
case itemArrayTableStart: |
|
kg := p.next() |
|
p.approxLine = kg.line |
|
|
|
var key Key |
|
for ; kg.typ != itemArrayTableEnd && kg.typ != itemEOF; kg = p.next() { |
|
key = append(key, p.keyString(kg)) |
|
} |
|
p.assertEqual(itemArrayTableEnd, kg.typ) |
|
|
|
p.establishContext(key, true) |
|
p.setType("", tomlArrayHash) |
|
p.ordered = append(p.ordered, key) |
|
case itemKeyStart: |
|
kname := p.next() |
|
p.approxLine = kname.line |
|
p.currentKey = p.keyString(kname) |
|
|
|
val, typ := p.value(p.next()) |
|
p.setValue(p.currentKey, val) |
|
p.setType(p.currentKey, typ) |
|
p.ordered = append(p.ordered, p.context.add(p.currentKey)) |
|
p.currentKey = "" |
|
default: |
|
p.bug("Unexpected type at top level: %s", item.typ) |
|
} |
|
} |
|
|
|
// Gets a string for a key (or part of a key in a table name). |
|
func (p *parser) keyString(it item) string { |
|
switch it.typ { |
|
case itemText: |
|
return it.val |
|
case itemString, itemMultilineString, |
|
itemRawString, itemRawMultilineString: |
|
s, _ := p.value(it) |
|
return s.(string) |
|
default: |
|
p.bug("Unexpected key type: %s", it.typ) |
|
panic("unreachable") |
|
} |
|
} |
|
|
|
// value translates an expected value from the lexer into a Go value wrapped |
|
// as an empty interface. |
|
func (p *parser) value(it item) (interface{}, tomlType) { |
|
switch it.typ { |
|
case itemString: |
|
return p.replaceEscapes(it.val), p.typeOfPrimitive(it) |
|
case itemMultilineString: |
|
trimmed := stripFirstNewline(stripEscapedWhitespace(it.val)) |
|
return p.replaceEscapes(trimmed), p.typeOfPrimitive(it) |
|
case itemRawString: |
|
return it.val, p.typeOfPrimitive(it) |
|
case itemRawMultilineString: |
|
return stripFirstNewline(it.val), p.typeOfPrimitive(it) |
|
case itemBool: |
|
switch it.val { |
|
case "true": |
|
return true, p.typeOfPrimitive(it) |
|
case "false": |
|
return false, p.typeOfPrimitive(it) |
|
} |
|
p.bug("Expected boolean value, but got '%s'.", it.val) |
|
case itemInteger: |
|
if !numUnderscoresOK(it.val) { |
|
p.panicf("Invalid integer %q: underscores must be surrounded by digits", |
|
it.val) |
|
} |
|
val := strings.Replace(it.val, "_", "", -1) |
|
num, err := strconv.ParseInt(val, 10, 64) |
|
if err != nil { |
|
// Distinguish integer values. Normally, it'd be a bug if the lexer |
|
// provides an invalid integer, but it's possible that the number is |
|
// out of range of valid values (which the lexer cannot determine). |
|
// So mark the former as a bug but the latter as a legitimate user |
|
// error. |
|
if e, ok := err.(*strconv.NumError); ok && |
|
e.Err == strconv.ErrRange { |
|
|
|
p.panicf("Integer '%s' is out of the range of 64-bit "+ |
|
"signed integers.", it.val) |
|
} else { |
|
p.bug("Expected integer value, but got '%s'.", it.val) |
|
} |
|
} |
|
return num, p.typeOfPrimitive(it) |
|
case itemFloat: |
|
parts := strings.FieldsFunc(it.val, func(r rune) bool { |
|
switch r { |
|
case '.', 'e', 'E': |
|
return true |
|
} |
|
return false |
|
}) |
|
for _, part := range parts { |
|
if !numUnderscoresOK(part) { |
|
p.panicf("Invalid float %q: underscores must be "+ |
|
"surrounded by digits", it.val) |
|
} |
|
} |
|
if !numPeriodsOK(it.val) { |
|
// As a special case, numbers like '123.' or '1.e2', |
|
// which are valid as far as Go/strconv are concerned, |
|
// must be rejected because TOML says that a fractional |
|
// part consists of '.' followed by 1+ digits. |
|
p.panicf("Invalid float %q: '.' must be followed "+ |
|
"by one or more digits", it.val) |
|
} |
|
val := strings.Replace(it.val, "_", "", -1) |
|
num, err := strconv.ParseFloat(val, 64) |
|
if err != nil { |
|
if e, ok := err.(*strconv.NumError); ok && |
|
e.Err == strconv.ErrRange { |
|
|
|
p.panicf("Float '%s' is out of the range of 64-bit "+ |
|
"IEEE-754 floating-point numbers.", it.val) |
|
} else { |
|
p.panicf("Invalid float value: %q", it.val) |
|
} |
|
} |
|
return num, p.typeOfPrimitive(it) |
|
case itemDatetime: |
|
var t time.Time |
|
var ok bool |
|
var err error |
|
for _, format := range []string{ |
|
"2006-01-02T15:04:05Z07:00", |
|
"2006-01-02T15:04:05", |
|
"2006-01-02", |
|
} { |
|
t, err = time.ParseInLocation(format, it.val, time.Local) |
|
if err == nil { |
|
ok = true |
|
break |
|
} |
|
} |
|
if !ok { |
|
p.panicf("Invalid TOML Datetime: %q.", it.val) |
|
} |
|
return t, p.typeOfPrimitive(it) |
|
case itemArray: |
|
array := make([]interface{}, 0) |
|
types := make([]tomlType, 0) |
|
|
|
for it = p.next(); it.typ != itemArrayEnd; it = p.next() { |
|
if it.typ == itemCommentStart { |
|
p.expect(itemText) |
|
continue |
|
} |
|
|
|
val, typ := p.value(it) |
|
array = append(array, val) |
|
types = append(types, typ) |
|
} |
|
return array, p.typeOfArray(types) |
|
case itemInlineTableStart: |
|
var ( |
|
hash = make(map[string]interface{}) |
|
outerContext = p.context |
|
outerKey = p.currentKey |
|
) |
|
|
|
p.context = append(p.context, p.currentKey) |
|
p.currentKey = "" |
|
for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() { |
|
if it.typ != itemKeyStart { |
|
p.bug("Expected key start but instead found %q, around line %d", |
|
it.val, p.approxLine) |
|
} |
|
if it.typ == itemCommentStart { |
|
p.expect(itemText) |
|
continue |
|
} |
|
|
|
// retrieve key |
|
k := p.next() |
|
p.approxLine = k.line |
|
kname := p.keyString(k) |
|
|
|
// retrieve value |
|
p.currentKey = kname |
|
val, typ := p.value(p.next()) |
|
// make sure we keep metadata up to date |
|
p.setType(kname, typ) |
|
p.ordered = append(p.ordered, p.context.add(p.currentKey)) |
|
hash[kname] = val |
|
} |
|
p.context = outerContext |
|
p.currentKey = outerKey |
|
return hash, tomlHash |
|
} |
|
p.bug("Unexpected value type: %s", it.typ) |
|
panic("unreachable") |
|
} |
|
|
|
// numUnderscoresOK checks whether each underscore in s is surrounded by |
|
// characters that are not underscores. |
|
func numUnderscoresOK(s string) bool { |
|
accept := false |
|
for _, r := range s { |
|
if r == '_' { |
|
if !accept { |
|
return false |
|
} |
|
accept = false |
|
continue |
|
} |
|
accept = true |
|
} |
|
return accept |
|
} |
|
|
|
// numPeriodsOK checks whether every period in s is followed by a digit. |
|
func numPeriodsOK(s string) bool { |
|
period := false |
|
for _, r := range s { |
|
if period && !isDigit(r) { |
|
return false |
|
} |
|
period = r == '.' |
|
} |
|
return !period |
|
} |
|
|
|
// establishContext sets the current context of the parser, |
|
// where the context is either a hash or an array of hashes. Which one is |
|
// set depends on the value of the `array` parameter. |
|
// |
|
// Establishing the context also makes sure that the key isn't a duplicate, and |
|
// will create implicit hashes automatically. |
|
func (p *parser) establishContext(key Key, array bool) { |
|
var ok bool |
|
|
|
// Always start at the top level and drill down for our context. |
|
hashContext := p.mapping |
|
keyContext := make(Key, 0) |
|
|
|
// We only need implicit hashes for key[0:-1] |
|
for _, k := range key[0 : len(key)-1] { |
|
_, ok = hashContext[k] |
|
keyContext = append(keyContext, k) |
|
|
|
// No key? Make an implicit hash and move on. |
|
if !ok { |
|
p.addImplicit(keyContext) |
|
hashContext[k] = make(map[string]interface{}) |
|
} |
|
|
|
// If the hash context is actually an array of tables, then set |
|
// the hash context to the last element in that array. |
|
// |
|
// Otherwise, it better be a table, since this MUST be a key group (by |
|
// virtue of it not being the last element in a key). |
|
switch t := hashContext[k].(type) { |
|
case []map[string]interface{}: |
|
hashContext = t[len(t)-1] |
|
case map[string]interface{}: |
|
hashContext = t |
|
default: |
|
p.panicf("Key '%s' was already created as a hash.", keyContext) |
|
} |
|
} |
|
|
|
p.context = keyContext |
|
if array { |
|
// If this is the first element for this array, then allocate a new |
|
// list of tables for it. |
|
k := key[len(key)-1] |
|
if _, ok := hashContext[k]; !ok { |
|
hashContext[k] = make([]map[string]interface{}, 0, 5) |
|
} |
|
|
|
// Add a new table. But make sure the key hasn't already been used |
|
// for something else. |
|
if hash, ok := hashContext[k].([]map[string]interface{}); ok { |
|
hashContext[k] = append(hash, make(map[string]interface{})) |
|
} else { |
|
p.panicf("Key '%s' was already created and cannot be used as "+ |
|
"an array.", keyContext) |
|
} |
|
} else { |
|
p.setValue(key[len(key)-1], make(map[string]interface{})) |
|
} |
|
p.context = append(p.context, key[len(key)-1]) |
|
} |
|
|
|
// setValue sets the given key to the given value in the current context. |
|
// It will make sure that the key hasn't already been defined, account for |
|
// implicit key groups. |
|
func (p *parser) setValue(key string, value interface{}) { |
|
var tmpHash interface{} |
|
var ok bool |
|
|
|
hash := p.mapping |
|
keyContext := make(Key, 0) |
|
for _, k := range p.context { |
|
keyContext = append(keyContext, k) |
|
if tmpHash, ok = hash[k]; !ok { |
|
p.bug("Context for key '%s' has not been established.", keyContext) |
|
} |
|
switch t := tmpHash.(type) { |
|
case []map[string]interface{}: |
|
// The context is a table of hashes. Pick the most recent table |
|
// defined as the current hash. |
|
hash = t[len(t)-1] |
|
case map[string]interface{}: |
|
hash = t |
|
default: |
|
p.bug("Expected hash to have type 'map[string]interface{}', but "+ |
|
"it has '%T' instead.", tmpHash) |
|
} |
|
} |
|
keyContext = append(keyContext, key) |
|
|
|
if _, ok := hash[key]; ok { |
|
// Typically, if the given key has already been set, then we have |
|
// to raise an error since duplicate keys are disallowed. However, |
|
// it's possible that a key was previously defined implicitly. In this |
|
// case, it is allowed to be redefined concretely. (See the |
|
// `tests/valid/implicit-and-explicit-after.toml` test in `toml-test`.) |
|
// |
|
// But we have to make sure to stop marking it as an implicit. (So that |
|
// another redefinition provokes an error.) |
|
// |
|
// Note that since it has already been defined (as a hash), we don't |
|
// want to overwrite it. So our business is done. |
|
if p.isImplicit(keyContext) { |
|
p.removeImplicit(keyContext) |
|
return |
|
} |
|
|
|
// Otherwise, we have a concrete key trying to override a previous |
|
// key, which is *always* wrong. |
|
p.panicf("Key '%s' has already been defined.", keyContext) |
|
} |
|
hash[key] = value |
|
} |
|
|
|
// setType sets the type of a particular value at a given key. |
|
// It should be called immediately AFTER setValue. |
|
// |
|
// Note that if `key` is empty, then the type given will be applied to the |
|
// current context (which is either a table or an array of tables). |
|
func (p *parser) setType(key string, typ tomlType) { |
|
keyContext := make(Key, 0, len(p.context)+1) |
|
for _, k := range p.context { |
|
keyContext = append(keyContext, k) |
|
} |
|
if len(key) > 0 { // allow type setting for hashes |
|
keyContext = append(keyContext, key) |
|
} |
|
p.types[keyContext.String()] = typ |
|
} |
|
|
|
// addImplicit sets the given Key as having been created implicitly. |
|
func (p *parser) addImplicit(key Key) { |
|
p.implicits[key.String()] = true |
|
} |
|
|
|
// removeImplicit stops tagging the given key as having been implicitly |
|
// created. |
|
func (p *parser) removeImplicit(key Key) { |
|
p.implicits[key.String()] = false |
|
} |
|
|
|
// isImplicit returns true if the key group pointed to by the key was created |
|
// implicitly. |
|
func (p *parser) isImplicit(key Key) bool { |
|
return p.implicits[key.String()] |
|
} |
|
|
|
// current returns the full key name of the current context. |
|
func (p *parser) current() string { |
|
if len(p.currentKey) == 0 { |
|
return p.context.String() |
|
} |
|
if len(p.context) == 0 { |
|
return p.currentKey |
|
} |
|
return fmt.Sprintf("%s.%s", p.context, p.currentKey) |
|
} |
|
|
|
func stripFirstNewline(s string) string { |
|
if len(s) == 0 || s[0] != '\n' { |
|
return s |
|
} |
|
return s[1:] |
|
} |
|
|
|
func stripEscapedWhitespace(s string) string { |
|
esc := strings.Split(s, "\\\n") |
|
if len(esc) > 1 { |
|
for i := 1; i < len(esc); i++ { |
|
esc[i] = strings.TrimLeftFunc(esc[i], unicode.IsSpace) |
|
} |
|
} |
|
return strings.Join(esc, "") |
|
} |
|
|
|
func (p *parser) replaceEscapes(str string) string { |
|
var replaced []rune |
|
s := []byte(str) |
|
r := 0 |
|
for r < len(s) { |
|
if s[r] != '\\' { |
|
c, size := utf8.DecodeRune(s[r:]) |
|
r += size |
|
replaced = append(replaced, c) |
|
continue |
|
} |
|
r += 1 |
|
if r >= len(s) { |
|
p.bug("Escape sequence at end of string.") |
|
return "" |
|
} |
|
switch s[r] { |
|
default: |
|
p.bug("Expected valid escape code after \\, but got %q.", s[r]) |
|
return "" |
|
case 'b': |
|
replaced = append(replaced, rune(0x0008)) |
|
r += 1 |
|
case 't': |
|
replaced = append(replaced, rune(0x0009)) |
|
r += 1 |
|
case 'n': |
|
replaced = append(replaced, rune(0x000A)) |
|
r += 1 |
|
case 'f': |
|
replaced = append(replaced, rune(0x000C)) |
|
r += 1 |
|
case 'r': |
|
replaced = append(replaced, rune(0x000D)) |
|
r += 1 |
|
case '"': |
|
replaced = append(replaced, rune(0x0022)) |
|
r += 1 |
|
case '\\': |
|
replaced = append(replaced, rune(0x005C)) |
|
r += 1 |
|
case 'u': |
|
// At this point, we know we have a Unicode escape of the form |
|
// `uXXXX` at [r, r+5). (Because the lexer guarantees this |
|
// for us.) |
|
escaped := p.asciiEscapeToUnicode(s[r+1 : r+5]) |
|
replaced = append(replaced, escaped) |
|
r += 5 |
|
case 'U': |
|
// At this point, we know we have a Unicode escape of the form |
|
// `uXXXX` at [r, r+9). (Because the lexer guarantees this |
|
// for us.) |
|
escaped := p.asciiEscapeToUnicode(s[r+1 : r+9]) |
|
replaced = append(replaced, escaped) |
|
r += 9 |
|
} |
|
} |
|
return string(replaced) |
|
} |
|
|
|
func (p *parser) asciiEscapeToUnicode(bs []byte) rune { |
|
s := string(bs) |
|
hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32) |
|
if err != nil { |
|
p.bug("Could not parse '%s' as a hexadecimal number, but the "+ |
|
"lexer claims it's OK: %s", s, err) |
|
} |
|
if !utf8.ValidRune(rune(hex)) { |
|
p.panicf("Escaped character '\\u%s' is not valid UTF-8.", s) |
|
} |
|
return rune(hex) |
|
} |
|
|
|
func isStringType(ty itemType) bool { |
|
return ty == itemString || ty == itemMultilineString || |
|
ty == itemRawString || ty == itemRawMultilineString |
|
}
|
|
|