Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
219 lines
5.6 KiB
219 lines
5.6 KiB
// Provide string-matching based on fnmatch.3 |
|
package fnmatch |
|
|
|
// There are a few issues that I believe to be bugs, but this implementation is |
|
// based as closely as possible on BSD fnmatch. These bugs are present in the |
|
// source of BSD fnmatch, and so are replicated here. The issues are as follows: |
|
// |
|
// * FNM_PERIOD is no longer observed after the first * in a pattern |
|
// This only applies to matches done with FNM_PATHNAME as well |
|
// * FNM_PERIOD doesn't apply to ranges. According to the documentation, |
|
// a period must be matched explicitly, but a range will match it too |
|
|
|
import ( |
|
"unicode" |
|
"unicode/utf8" |
|
) |
|
|
|
const ( |
|
FNM_NOESCAPE = (1 << iota) |
|
FNM_PATHNAME |
|
FNM_PERIOD |
|
|
|
FNM_LEADING_DIR |
|
FNM_CASEFOLD |
|
|
|
FNM_IGNORECASE = FNM_CASEFOLD |
|
FNM_FILE_NAME = FNM_PATHNAME |
|
) |
|
|
|
func unpackRune(str *string) rune { |
|
rune, size := utf8.DecodeRuneInString(*str) |
|
*str = (*str)[size:] |
|
return rune |
|
} |
|
|
|
// Matches the pattern against the string, with the given flags, |
|
// and returns true if the match is successful. |
|
// This function should match fnmatch.3 as closely as possible. |
|
func Match(pattern, s string, flags int) bool { |
|
// The implementation for this function was patterned after the BSD fnmatch.c |
|
// source found at http://src.gnu-darwin.org/src/contrib/csup/fnmatch.c.html |
|
noescape := (flags&FNM_NOESCAPE != 0) |
|
pathname := (flags&FNM_PATHNAME != 0) |
|
period := (flags&FNM_PERIOD != 0) |
|
leadingdir := (flags&FNM_LEADING_DIR != 0) |
|
casefold := (flags&FNM_CASEFOLD != 0) |
|
// the following is some bookkeeping that the original fnmatch.c implementation did not do |
|
// We are forced to do this because we're not keeping indexes into C strings but rather |
|
// processing utf8-encoded strings. Use a custom unpacker to maintain our state for us |
|
sAtStart := true |
|
sLastAtStart := true |
|
sLastSlash := false |
|
sLastUnpacked := rune(0) |
|
unpackS := func() rune { |
|
sLastSlash = (sLastUnpacked == '/') |
|
sLastUnpacked = unpackRune(&s) |
|
sLastAtStart = sAtStart |
|
sAtStart = false |
|
return sLastUnpacked |
|
} |
|
for len(pattern) > 0 { |
|
c := unpackRune(&pattern) |
|
switch c { |
|
case '?': |
|
if len(s) == 0 { |
|
return false |
|
} |
|
sc := unpackS() |
|
if pathname && sc == '/' { |
|
return false |
|
} |
|
if period && sc == '.' && (sLastAtStart || (pathname && sLastSlash)) { |
|
return false |
|
} |
|
case '*': |
|
// collapse multiple *'s |
|
// don't use unpackRune here, the only char we care to detect is ASCII |
|
for len(pattern) > 0 && pattern[0] == '*' { |
|
pattern = pattern[1:] |
|
} |
|
if period && s[0] == '.' && (sAtStart || (pathname && sLastUnpacked == '/')) { |
|
return false |
|
} |
|
// optimize for patterns with * at end or before / |
|
if len(pattern) == 0 { |
|
if pathname { |
|
return leadingdir || (strchr(s, '/') == -1) |
|
} else { |
|
return true |
|
} |
|
return !(pathname && strchr(s, '/') >= 0) |
|
} else if pathname && pattern[0] == '/' { |
|
offset := strchr(s, '/') |
|
if offset == -1 { |
|
return false |
|
} else { |
|
// we already know our pattern and string have a /, skip past it |
|
s = s[offset:] // use unpackS here to maintain our bookkeeping state |
|
unpackS() |
|
pattern = pattern[1:] // we know / is one byte long |
|
break |
|
} |
|
} |
|
// general case, recurse |
|
for test := s; len(test) > 0; unpackRune(&test) { |
|
// I believe the (flags &^ FNM_PERIOD) is a bug when FNM_PATHNAME is specified |
|
// but this follows exactly from how fnmatch.c implements it |
|
if Match(pattern, test, (flags &^ FNM_PERIOD)) { |
|
return true |
|
} else if pathname && test[0] == '/' { |
|
break |
|
} |
|
} |
|
return false |
|
case '[': |
|
if len(s) == 0 { |
|
return false |
|
} |
|
if pathname && s[0] == '/' { |
|
return false |
|
} |
|
sc := unpackS() |
|
if !rangematch(&pattern, sc, flags) { |
|
return false |
|
} |
|
case '\\': |
|
if !noescape { |
|
if len(pattern) > 0 { |
|
c = unpackRune(&pattern) |
|
} |
|
} |
|
fallthrough |
|
default: |
|
if len(s) == 0 { |
|
return false |
|
} |
|
sc := unpackS() |
|
switch { |
|
case sc == c: |
|
case casefold && unicode.ToLower(sc) == unicode.ToLower(c): |
|
default: |
|
return false |
|
} |
|
} |
|
} |
|
return len(s) == 0 || (leadingdir && s[0] == '/') |
|
} |
|
|
|
func rangematch(pattern *string, test rune, flags int) bool { |
|
if len(*pattern) == 0 { |
|
return false |
|
} |
|
casefold := (flags&FNM_CASEFOLD != 0) |
|
noescape := (flags&FNM_NOESCAPE != 0) |
|
if casefold { |
|
test = unicode.ToLower(test) |
|
} |
|
var negate, matched bool |
|
if (*pattern)[0] == '^' || (*pattern)[0] == '!' { |
|
negate = true |
|
(*pattern) = (*pattern)[1:] |
|
} |
|
for !matched && len(*pattern) > 1 && (*pattern)[0] != ']' { |
|
c := unpackRune(pattern) |
|
if !noescape && c == '\\' { |
|
if len(*pattern) > 1 { |
|
c = unpackRune(pattern) |
|
} else { |
|
return false |
|
} |
|
} |
|
if casefold { |
|
c = unicode.ToLower(c) |
|
} |
|
if (*pattern)[0] == '-' && len(*pattern) > 1 && (*pattern)[1] != ']' { |
|
unpackRune(pattern) // skip the - |
|
c2 := unpackRune(pattern) |
|
if !noescape && c2 == '\\' { |
|
if len(*pattern) > 0 { |
|
c2 = unpackRune(pattern) |
|
} else { |
|
return false |
|
} |
|
} |
|
if casefold { |
|
c2 = unicode.ToLower(c2) |
|
} |
|
// this really should be more intelligent, but it looks like |
|
// fnmatch.c does simple int comparisons, therefore we will as well |
|
if c <= test && test <= c2 { |
|
matched = true |
|
} |
|
} else if c == test { |
|
matched = true |
|
} |
|
} |
|
// skip past the rest of the pattern |
|
ok := false |
|
for !ok && len(*pattern) > 0 { |
|
c := unpackRune(pattern) |
|
if c == '\\' && len(*pattern) > 0 { |
|
unpackRune(pattern) |
|
} else if c == ']' { |
|
ok = true |
|
} |
|
} |
|
return ok && matched != negate |
|
} |
|
|
|
// define strchr because strings.Index() seems a bit overkill |
|
// returns the index of c in s, or -1 if there is no match |
|
func strchr(s string, c rune) int { |
|
for i, sc := range s { |
|
if sc == c { |
|
return i |
|
} |
|
} |
|
return -1 |
|
}
|
|
|