Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
193 lines
4.2 KiB
193 lines
4.2 KiB
// Package shlex provides a simple lexical analysis like Unix shell. |
|
package shlex |
|
|
|
import ( |
|
"bufio" |
|
"errors" |
|
"io" |
|
"strings" |
|
"unicode" |
|
) |
|
|
|
var ( |
|
ErrNoClosing = errors.New("No closing quotation") |
|
ErrNoEscaped = errors.New("No escaped character") |
|
) |
|
|
|
// Tokenizer is the interface that classifies a token according to |
|
// words, whitespaces, quotations, escapes and escaped quotations. |
|
type Tokenizer interface { |
|
IsWord(rune) bool |
|
IsWhitespace(rune) bool |
|
IsQuote(rune) bool |
|
IsEscape(rune) bool |
|
IsEscapedQuote(rune) bool |
|
} |
|
|
|
// DefaultTokenizer implements a simple tokenizer like Unix shell. |
|
type DefaultTokenizer struct{} |
|
|
|
func (t *DefaultTokenizer) IsWord(r rune) bool { |
|
return r == '_' || unicode.IsLetter(r) || unicode.IsNumber(r) |
|
} |
|
func (t *DefaultTokenizer) IsQuote(r rune) bool { |
|
switch r { |
|
case '\'', '"': |
|
return true |
|
default: |
|
return false |
|
} |
|
} |
|
func (t *DefaultTokenizer) IsWhitespace(r rune) bool { |
|
return unicode.IsSpace(r) |
|
} |
|
func (t *DefaultTokenizer) IsEscape(r rune) bool { |
|
return r == '\\' |
|
} |
|
func (t *DefaultTokenizer) IsEscapedQuote(r rune) bool { |
|
return r == '"' |
|
} |
|
|
|
// Lexer represents a lexical analyzer. |
|
type Lexer struct { |
|
reader *bufio.Reader |
|
tokenizer Tokenizer |
|
posix bool |
|
whitespacesplit bool |
|
} |
|
|
|
// NewLexer creates a new Lexer reading from io.Reader. This Lexer |
|
// has a DefaultTokenizer according to posix and whitespacesplit |
|
// rules. |
|
func NewLexer(r io.Reader, posix, whitespacesplit bool) *Lexer { |
|
return &Lexer{ |
|
reader: bufio.NewReader(r), |
|
tokenizer: &DefaultTokenizer{}, |
|
posix: posix, |
|
whitespacesplit: whitespacesplit, |
|
} |
|
} |
|
|
|
// NewLexerString creates a new Lexer reading from a string. This |
|
// Lexer has a DefaultTokenizer according to posix and whitespacesplit |
|
// rules. |
|
func NewLexerString(s string, posix, whitespacesplit bool) *Lexer { |
|
return NewLexer(strings.NewReader(s), posix, whitespacesplit) |
|
} |
|
|
|
// Split splits a string according to posix or non-posix rules. |
|
func Split(s string, posix bool) ([]string, error) { |
|
return NewLexerString(s, posix, true).Split() |
|
} |
|
|
|
// SetTokenizer sets a Tokenizer. |
|
func (l *Lexer) SetTokenizer(t Tokenizer) { |
|
l.tokenizer = t |
|
} |
|
|
|
func (l *Lexer) Split() ([]string, error) { |
|
result := make([]string, 0) |
|
for { |
|
token, err := l.readToken() |
|
if token != "" { |
|
result = append(result, token) |
|
} |
|
|
|
if err == io.EOF { |
|
break |
|
} else if err != nil { |
|
return result, err |
|
} |
|
} |
|
return result, nil |
|
} |
|
|
|
func (l *Lexer) readToken() (string, error) { |
|
t := l.tokenizer |
|
token := "" |
|
quoted := false |
|
state := ' ' |
|
escapedstate := ' ' |
|
scanning: |
|
for { |
|
next, _, err := l.reader.ReadRune() |
|
if err != nil { |
|
if t.IsQuote(state) { |
|
return token, ErrNoClosing |
|
} else if t.IsEscape(state) { |
|
return token, ErrNoEscaped |
|
} |
|
return token, err |
|
} |
|
|
|
switch { |
|
case t.IsWhitespace(state): |
|
switch { |
|
case t.IsWhitespace(next): |
|
break scanning |
|
case l.posix && t.IsEscape(next): |
|
escapedstate = 'a' |
|
state = next |
|
case t.IsWord(next): |
|
token += string(next) |
|
state = 'a' |
|
case t.IsQuote(next): |
|
if !l.posix { |
|
token += string(next) |
|
} |
|
state = next |
|
default: |
|
token = string(next) |
|
if l.whitespacesplit { |
|
state = 'a' |
|
} else if token != "" || (l.posix && quoted) { |
|
break scanning |
|
} |
|
} |
|
case t.IsQuote(state): |
|
quoted = true |
|
switch { |
|
case next == state: |
|
if !l.posix { |
|
token += string(next) |
|
break scanning |
|
} else { |
|
state = 'a' |
|
} |
|
case l.posix && t.IsEscape(next) && t.IsEscapedQuote(state): |
|
escapedstate = state |
|
state = next |
|
default: |
|
token += string(next) |
|
} |
|
case t.IsEscape(state): |
|
if t.IsQuote(escapedstate) && next != state && next != escapedstate { |
|
token += string(state) |
|
} |
|
token += string(next) |
|
state = escapedstate |
|
case t.IsWord(state): |
|
switch { |
|
case t.IsWhitespace(next): |
|
if token != "" || (l.posix && quoted) { |
|
break scanning |
|
} |
|
case l.posix && t.IsQuote(next): |
|
state = next |
|
case l.posix && t.IsEscape(next): |
|
escapedstate = 'a' |
|
state = next |
|
case t.IsWord(next) || t.IsQuote(next): |
|
token += string(next) |
|
default: |
|
if l.whitespacesplit { |
|
token += string(next) |
|
} else if token != "" { |
|
l.reader.UnreadRune() |
|
break scanning |
|
} |
|
} |
|
} |
|
} |
|
return token, nil |
|
}
|
|
|