Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
229 lines
5.0 KiB
229 lines
5.0 KiB
package substring |
|
|
|
import ( |
|
"bytes" |
|
"regexp" |
|
|
|
"github.com/toqueteos/trie" |
|
) |
|
|
|
type BytesMatcher interface { |
|
Match(b []byte) bool |
|
MatchIndex(b []byte) int |
|
} |
|
|
|
// regexp |
|
type regexpBytes struct{ re *regexp.Regexp } |
|
|
|
func BytesRegexp(pat string) *regexpBytes { return ®expBytes{regexp.MustCompile(pat)} } |
|
func (m *regexpBytes) Match(b []byte) bool { return m.re.Match(b) } |
|
func (m *regexpBytes) MatchIndex(b []byte) int { |
|
found := m.re.FindIndex(b) |
|
if found != nil { |
|
return found[1] |
|
} |
|
return -1 |
|
} |
|
|
|
// exact |
|
type exactBytes struct{ pat []byte } |
|
|
|
func BytesExact(pat string) *exactBytes { return &exactBytes{[]byte(pat)} } |
|
func (m *exactBytes) Match(b []byte) bool { |
|
l, r := len(m.pat), len(b) |
|
if l != r { |
|
return false |
|
} |
|
for i := 0; i < l; i++ { |
|
if b[i] != m.pat[i] { |
|
return false |
|
} |
|
} |
|
return true |
|
} |
|
func (m *exactBytes) MatchIndex(b []byte) int { |
|
if m.Match(b) { |
|
return len(b) |
|
} |
|
return -1 |
|
} |
|
|
|
// any, search `s` in `.Match(pat)` |
|
type anyBytes struct { |
|
pat []byte |
|
} |
|
|
|
func BytesAny(pat string) *anyBytes { return &anyBytes{[]byte(pat)} } |
|
func (m *anyBytes) Match(b []byte) bool { return bytes.Index(m.pat, b) >= 0 } |
|
func (m *anyBytes) MatchIndex(b []byte) int { |
|
if idx := bytes.Index(m.pat, b); idx >= 0 { |
|
return idx + len(b) |
|
} |
|
return -1 |
|
} |
|
|
|
// has, search `pat` in `.Match(s)` |
|
type hasBytes struct { |
|
pat []byte |
|
} |
|
|
|
func BytesHas(pat string) *hasBytes { return &hasBytes{[]byte(pat)} } |
|
func (m *hasBytes) Match(b []byte) bool { return bytes.Index(b, m.pat) >= 0 } |
|
func (m *hasBytes) MatchIndex(b []byte) int { |
|
if idx := bytes.Index(b, m.pat); idx >= 0 { |
|
return idx + len(m.pat) |
|
} |
|
return -1 |
|
} |
|
|
|
// prefix |
|
type prefixBytes struct{ pat []byte } |
|
|
|
func BytesPrefix(pat string) *prefixBytes { return &prefixBytes{[]byte(pat)} } |
|
func (m *prefixBytes) Match(b []byte) bool { return bytes.HasPrefix(b, m.pat) } |
|
func (m *prefixBytes) MatchIndex(b []byte) int { |
|
if bytes.HasPrefix(b, m.pat) { |
|
return len(m.pat) |
|
} |
|
return -1 |
|
} |
|
|
|
// prefixes |
|
type prefixesBytes struct { |
|
t *trie.Trie |
|
} |
|
|
|
func BytesPrefixes(pats ...string) *prefixesBytes { |
|
t := trie.New() |
|
for _, pat := range pats { |
|
t.Insert([]byte(pat)) |
|
} |
|
return &prefixesBytes{t} |
|
} |
|
func (m *prefixesBytes) Match(b []byte) bool { return m.t.PrefixIndex(b) >= 0 } |
|
func (m *prefixesBytes) MatchIndex(b []byte) int { |
|
if idx := m.t.PrefixIndex(b); idx >= 0 { |
|
return idx |
|
} |
|
return -1 |
|
} |
|
|
|
// suffix |
|
type suffixBytes struct{ pat []byte } |
|
|
|
func BytesSuffix(pat string) *suffixBytes { return &suffixBytes{[]byte(pat)} } |
|
func (m *suffixBytes) Match(b []byte) bool { return bytes.HasSuffix(b, m.pat) } |
|
func (m *suffixBytes) MatchIndex(b []byte) int { |
|
if bytes.HasSuffix(b, m.pat) { |
|
return len(m.pat) |
|
} |
|
return -1 |
|
} |
|
|
|
// suffixes |
|
type suffixesBytes struct { |
|
t *trie.Trie |
|
} |
|
|
|
func BytesSuffixes(pats ...string) *suffixesBytes { |
|
t := trie.New() |
|
for _, pat := range pats { |
|
t.Insert(reverse([]byte(pat))) |
|
} |
|
return &suffixesBytes{t} |
|
} |
|
func (m *suffixesBytes) Match(b []byte) bool { |
|
return m.t.PrefixIndex(reverse(b)) >= 0 |
|
} |
|
func (m *suffixesBytes) MatchIndex(b []byte) int { |
|
if idx := m.t.PrefixIndex(reverse(b)); idx >= 0 { |
|
return idx |
|
} |
|
return -1 |
|
} |
|
|
|
// after |
|
type afterBytes struct { |
|
first []byte |
|
matcher BytesMatcher |
|
} |
|
|
|
func BytesAfter(first string, m BytesMatcher) *afterBytes { return &afterBytes{[]byte(first), m} } |
|
func (a *afterBytes) Match(b []byte) bool { |
|
if idx := bytes.Index(b, a.first); idx >= 0 { |
|
return a.matcher.Match(b[idx+len(a.first):]) |
|
} |
|
return false |
|
} |
|
func (a *afterBytes) MatchIndex(b []byte) int { |
|
if idx := bytes.Index(b, a.first); idx >= 0 { |
|
return idx + a.matcher.MatchIndex(b[idx:]) |
|
} |
|
return -1 |
|
} |
|
|
|
// and, returns true iff all matchers return true |
|
type andBytes struct{ matchers []BytesMatcher } |
|
|
|
func BytesAnd(m ...BytesMatcher) *andBytes { return &andBytes{m} } |
|
func (a *andBytes) Match(b []byte) bool { |
|
for _, m := range a.matchers { |
|
if !m.Match(b) { |
|
return false |
|
} |
|
} |
|
return true |
|
} |
|
func (a *andBytes) MatchIndex(b []byte) int { |
|
longest := 0 |
|
for _, m := range a.matchers { |
|
if idx := m.MatchIndex(b); idx < 0 { |
|
return -1 |
|
} else if idx > longest { |
|
longest = idx |
|
} |
|
} |
|
return longest |
|
} |
|
|
|
// or, returns true iff any matcher returns true |
|
type orBytes struct{ matchers []BytesMatcher } |
|
|
|
func BytesOr(m ...BytesMatcher) *orBytes { return &orBytes{m} } |
|
func (o *orBytes) Match(b []byte) bool { |
|
for _, m := range o.matchers { |
|
if m.Match(b) { |
|
return true |
|
} |
|
} |
|
return false |
|
} |
|
func (o *orBytes) MatchIndex(b []byte) int { |
|
for _, m := range o.matchers { |
|
if idx := m.MatchIndex(b); idx >= 0 { |
|
return idx |
|
} |
|
} |
|
return -1 |
|
} |
|
|
|
type suffixGroupBytes struct { |
|
suffix BytesMatcher |
|
matchers []BytesMatcher |
|
} |
|
|
|
func BytesSuffixGroup(s string, m ...BytesMatcher) *suffixGroupBytes { |
|
return &suffixGroupBytes{BytesSuffix(s), m} |
|
} |
|
func (sg *suffixGroupBytes) Match(b []byte) bool { |
|
if sg.suffix.Match(b) { |
|
return BytesOr(sg.matchers...).Match(b) |
|
} |
|
return false |
|
} |
|
func (sg *suffixGroupBytes) MatchIndex(b []byte) int { |
|
if sg.suffix.MatchIndex(b) >= 0 { |
|
return BytesOr(sg.matchers...).MatchIndex(b) |
|
} |
|
return -1 |
|
}
|
|
|