Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
622 lines
14 KiB
622 lines
14 KiB
package cascadia |
|
|
|
import ( |
|
"bytes" |
|
"fmt" |
|
"regexp" |
|
"strings" |
|
|
|
"golang.org/x/net/html" |
|
) |
|
|
|
// the Selector type, and functions for creating them |
|
|
|
// A Selector is a function which tells whether a node matches or not. |
|
type Selector func(*html.Node) bool |
|
|
|
// hasChildMatch returns whether n has any child that matches a. |
|
func hasChildMatch(n *html.Node, a Selector) bool { |
|
for c := n.FirstChild; c != nil; c = c.NextSibling { |
|
if a(c) { |
|
return true |
|
} |
|
} |
|
return false |
|
} |
|
|
|
// hasDescendantMatch performs a depth-first search of n's descendants, |
|
// testing whether any of them match a. It returns true as soon as a match is |
|
// found, or false if no match is found. |
|
func hasDescendantMatch(n *html.Node, a Selector) bool { |
|
for c := n.FirstChild; c != nil; c = c.NextSibling { |
|
if a(c) || (c.Type == html.ElementNode && hasDescendantMatch(c, a)) { |
|
return true |
|
} |
|
} |
|
return false |
|
} |
|
|
|
// Compile parses a selector and returns, if successful, a Selector object |
|
// that can be used to match against html.Node objects. |
|
func Compile(sel string) (Selector, error) { |
|
p := &parser{s: sel} |
|
compiled, err := p.parseSelectorGroup() |
|
if err != nil { |
|
return nil, err |
|
} |
|
|
|
if p.i < len(sel) { |
|
return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i) |
|
} |
|
|
|
return compiled, nil |
|
} |
|
|
|
// MustCompile is like Compile, but panics instead of returning an error. |
|
func MustCompile(sel string) Selector { |
|
compiled, err := Compile(sel) |
|
if err != nil { |
|
panic(err) |
|
} |
|
return compiled |
|
} |
|
|
|
// MatchAll returns a slice of the nodes that match the selector, |
|
// from n and its children. |
|
func (s Selector) MatchAll(n *html.Node) []*html.Node { |
|
return s.matchAllInto(n, nil) |
|
} |
|
|
|
func (s Selector) matchAllInto(n *html.Node, storage []*html.Node) []*html.Node { |
|
if s(n) { |
|
storage = append(storage, n) |
|
} |
|
|
|
for child := n.FirstChild; child != nil; child = child.NextSibling { |
|
storage = s.matchAllInto(child, storage) |
|
} |
|
|
|
return storage |
|
} |
|
|
|
// Match returns true if the node matches the selector. |
|
func (s Selector) Match(n *html.Node) bool { |
|
return s(n) |
|
} |
|
|
|
// MatchFirst returns the first node that matches s, from n and its children. |
|
func (s Selector) MatchFirst(n *html.Node) *html.Node { |
|
if s.Match(n) { |
|
return n |
|
} |
|
|
|
for c := n.FirstChild; c != nil; c = c.NextSibling { |
|
m := s.MatchFirst(c) |
|
if m != nil { |
|
return m |
|
} |
|
} |
|
return nil |
|
} |
|
|
|
// Filter returns the nodes in nodes that match the selector. |
|
func (s Selector) Filter(nodes []*html.Node) (result []*html.Node) { |
|
for _, n := range nodes { |
|
if s(n) { |
|
result = append(result, n) |
|
} |
|
} |
|
return result |
|
} |
|
|
|
// typeSelector returns a Selector that matches elements with a given tag name. |
|
func typeSelector(tag string) Selector { |
|
tag = toLowerASCII(tag) |
|
return func(n *html.Node) bool { |
|
return n.Type == html.ElementNode && n.Data == tag |
|
} |
|
} |
|
|
|
// toLowerASCII returns s with all ASCII capital letters lowercased. |
|
func toLowerASCII(s string) string { |
|
var b []byte |
|
for i := 0; i < len(s); i++ { |
|
if c := s[i]; 'A' <= c && c <= 'Z' { |
|
if b == nil { |
|
b = make([]byte, len(s)) |
|
copy(b, s) |
|
} |
|
b[i] = s[i] + ('a' - 'A') |
|
} |
|
} |
|
|
|
if b == nil { |
|
return s |
|
} |
|
|
|
return string(b) |
|
} |
|
|
|
// attributeSelector returns a Selector that matches elements |
|
// where the attribute named key satisifes the function f. |
|
func attributeSelector(key string, f func(string) bool) Selector { |
|
key = toLowerASCII(key) |
|
return func(n *html.Node) bool { |
|
if n.Type != html.ElementNode { |
|
return false |
|
} |
|
for _, a := range n.Attr { |
|
if a.Key == key && f(a.Val) { |
|
return true |
|
} |
|
} |
|
return false |
|
} |
|
} |
|
|
|
// attributeExistsSelector returns a Selector that matches elements that have |
|
// an attribute named key. |
|
func attributeExistsSelector(key string) Selector { |
|
return attributeSelector(key, func(string) bool { return true }) |
|
} |
|
|
|
// attributeEqualsSelector returns a Selector that matches elements where |
|
// the attribute named key has the value val. |
|
func attributeEqualsSelector(key, val string) Selector { |
|
return attributeSelector(key, |
|
func(s string) bool { |
|
return s == val |
|
}) |
|
} |
|
|
|
// attributeNotEqualSelector returns a Selector that matches elements where |
|
// the attribute named key does not have the value val. |
|
func attributeNotEqualSelector(key, val string) Selector { |
|
key = toLowerASCII(key) |
|
return func(n *html.Node) bool { |
|
if n.Type != html.ElementNode { |
|
return false |
|
} |
|
for _, a := range n.Attr { |
|
if a.Key == key && a.Val == val { |
|
return false |
|
} |
|
} |
|
return true |
|
} |
|
} |
|
|
|
// attributeIncludesSelector returns a Selector that matches elements where |
|
// the attribute named key is a whitespace-separated list that includes val. |
|
func attributeIncludesSelector(key, val string) Selector { |
|
return attributeSelector(key, |
|
func(s string) bool { |
|
for s != "" { |
|
i := strings.IndexAny(s, " \t\r\n\f") |
|
if i == -1 { |
|
return s == val |
|
} |
|
if s[:i] == val { |
|
return true |
|
} |
|
s = s[i+1:] |
|
} |
|
return false |
|
}) |
|
} |
|
|
|
// attributeDashmatchSelector returns a Selector that matches elements where |
|
// the attribute named key equals val or starts with val plus a hyphen. |
|
func attributeDashmatchSelector(key, val string) Selector { |
|
return attributeSelector(key, |
|
func(s string) bool { |
|
if s == val { |
|
return true |
|
} |
|
if len(s) <= len(val) { |
|
return false |
|
} |
|
if s[:len(val)] == val && s[len(val)] == '-' { |
|
return true |
|
} |
|
return false |
|
}) |
|
} |
|
|
|
// attributePrefixSelector returns a Selector that matches elements where |
|
// the attribute named key starts with val. |
|
func attributePrefixSelector(key, val string) Selector { |
|
return attributeSelector(key, |
|
func(s string) bool { |
|
if strings.TrimSpace(s) == "" { |
|
return false |
|
} |
|
return strings.HasPrefix(s, val) |
|
}) |
|
} |
|
|
|
// attributeSuffixSelector returns a Selector that matches elements where |
|
// the attribute named key ends with val. |
|
func attributeSuffixSelector(key, val string) Selector { |
|
return attributeSelector(key, |
|
func(s string) bool { |
|
if strings.TrimSpace(s) == "" { |
|
return false |
|
} |
|
return strings.HasSuffix(s, val) |
|
}) |
|
} |
|
|
|
// attributeSubstringSelector returns a Selector that matches nodes where |
|
// the attribute named key contains val. |
|
func attributeSubstringSelector(key, val string) Selector { |
|
return attributeSelector(key, |
|
func(s string) bool { |
|
if strings.TrimSpace(s) == "" { |
|
return false |
|
} |
|
return strings.Contains(s, val) |
|
}) |
|
} |
|
|
|
// attributeRegexSelector returns a Selector that matches nodes where |
|
// the attribute named key matches the regular expression rx |
|
func attributeRegexSelector(key string, rx *regexp.Regexp) Selector { |
|
return attributeSelector(key, |
|
func(s string) bool { |
|
return rx.MatchString(s) |
|
}) |
|
} |
|
|
|
// intersectionSelector returns a selector that matches nodes that match |
|
// both a and b. |
|
func intersectionSelector(a, b Selector) Selector { |
|
return func(n *html.Node) bool { |
|
return a(n) && b(n) |
|
} |
|
} |
|
|
|
// unionSelector returns a selector that matches elements that match |
|
// either a or b. |
|
func unionSelector(a, b Selector) Selector { |
|
return func(n *html.Node) bool { |
|
return a(n) || b(n) |
|
} |
|
} |
|
|
|
// negatedSelector returns a selector that matches elements that do not match a. |
|
func negatedSelector(a Selector) Selector { |
|
return func(n *html.Node) bool { |
|
if n.Type != html.ElementNode { |
|
return false |
|
} |
|
return !a(n) |
|
} |
|
} |
|
|
|
// writeNodeText writes the text contained in n and its descendants to b. |
|
func writeNodeText(n *html.Node, b *bytes.Buffer) { |
|
switch n.Type { |
|
case html.TextNode: |
|
b.WriteString(n.Data) |
|
case html.ElementNode: |
|
for c := n.FirstChild; c != nil; c = c.NextSibling { |
|
writeNodeText(c, b) |
|
} |
|
} |
|
} |
|
|
|
// nodeText returns the text contained in n and its descendants. |
|
func nodeText(n *html.Node) string { |
|
var b bytes.Buffer |
|
writeNodeText(n, &b) |
|
return b.String() |
|
} |
|
|
|
// nodeOwnText returns the contents of the text nodes that are direct |
|
// children of n. |
|
func nodeOwnText(n *html.Node) string { |
|
var b bytes.Buffer |
|
for c := n.FirstChild; c != nil; c = c.NextSibling { |
|
if c.Type == html.TextNode { |
|
b.WriteString(c.Data) |
|
} |
|
} |
|
return b.String() |
|
} |
|
|
|
// textSubstrSelector returns a selector that matches nodes that |
|
// contain the given text. |
|
func textSubstrSelector(val string) Selector { |
|
return func(n *html.Node) bool { |
|
text := strings.ToLower(nodeText(n)) |
|
return strings.Contains(text, val) |
|
} |
|
} |
|
|
|
// ownTextSubstrSelector returns a selector that matches nodes that |
|
// directly contain the given text |
|
func ownTextSubstrSelector(val string) Selector { |
|
return func(n *html.Node) bool { |
|
text := strings.ToLower(nodeOwnText(n)) |
|
return strings.Contains(text, val) |
|
} |
|
} |
|
|
|
// textRegexSelector returns a selector that matches nodes whose text matches |
|
// the specified regular expression |
|
func textRegexSelector(rx *regexp.Regexp) Selector { |
|
return func(n *html.Node) bool { |
|
return rx.MatchString(nodeText(n)) |
|
} |
|
} |
|
|
|
// ownTextRegexSelector returns a selector that matches nodes whose text |
|
// directly matches the specified regular expression |
|
func ownTextRegexSelector(rx *regexp.Regexp) Selector { |
|
return func(n *html.Node) bool { |
|
return rx.MatchString(nodeOwnText(n)) |
|
} |
|
} |
|
|
|
// hasChildSelector returns a selector that matches elements |
|
// with a child that matches a. |
|
func hasChildSelector(a Selector) Selector { |
|
return func(n *html.Node) bool { |
|
if n.Type != html.ElementNode { |
|
return false |
|
} |
|
return hasChildMatch(n, a) |
|
} |
|
} |
|
|
|
// hasDescendantSelector returns a selector that matches elements |
|
// with any descendant that matches a. |
|
func hasDescendantSelector(a Selector) Selector { |
|
return func(n *html.Node) bool { |
|
if n.Type != html.ElementNode { |
|
return false |
|
} |
|
return hasDescendantMatch(n, a) |
|
} |
|
} |
|
|
|
// nthChildSelector returns a selector that implements :nth-child(an+b). |
|
// If last is true, implements :nth-last-child instead. |
|
// If ofType is true, implements :nth-of-type instead. |
|
func nthChildSelector(a, b int, last, ofType bool) Selector { |
|
return func(n *html.Node) bool { |
|
if n.Type != html.ElementNode { |
|
return false |
|
} |
|
|
|
parent := n.Parent |
|
if parent == nil { |
|
return false |
|
} |
|
|
|
if parent.Type == html.DocumentNode { |
|
return false |
|
} |
|
|
|
i := -1 |
|
count := 0 |
|
for c := parent.FirstChild; c != nil; c = c.NextSibling { |
|
if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) { |
|
continue |
|
} |
|
count++ |
|
if c == n { |
|
i = count |
|
if !last { |
|
break |
|
} |
|
} |
|
} |
|
|
|
if i == -1 { |
|
// This shouldn't happen, since n should always be one of its parent's children. |
|
return false |
|
} |
|
|
|
if last { |
|
i = count - i + 1 |
|
} |
|
|
|
i -= b |
|
if a == 0 { |
|
return i == 0 |
|
} |
|
|
|
return i%a == 0 && i/a >= 0 |
|
} |
|
} |
|
|
|
// simpleNthChildSelector returns a selector that implements :nth-child(b). |
|
// If ofType is true, implements :nth-of-type instead. |
|
func simpleNthChildSelector(b int, ofType bool) Selector { |
|
return func(n *html.Node) bool { |
|
if n.Type != html.ElementNode { |
|
return false |
|
} |
|
|
|
parent := n.Parent |
|
if parent == nil { |
|
return false |
|
} |
|
|
|
if parent.Type == html.DocumentNode { |
|
return false |
|
} |
|
|
|
count := 0 |
|
for c := parent.FirstChild; c != nil; c = c.NextSibling { |
|
if c.Type != html.ElementNode || (ofType && c.Data != n.Data) { |
|
continue |
|
} |
|
count++ |
|
if c == n { |
|
return count == b |
|
} |
|
if count >= b { |
|
return false |
|
} |
|
} |
|
return false |
|
} |
|
} |
|
|
|
// simpleNthLastChildSelector returns a selector that implements |
|
// :nth-last-child(b). If ofType is true, implements :nth-last-of-type |
|
// instead. |
|
func simpleNthLastChildSelector(b int, ofType bool) Selector { |
|
return func(n *html.Node) bool { |
|
if n.Type != html.ElementNode { |
|
return false |
|
} |
|
|
|
parent := n.Parent |
|
if parent == nil { |
|
return false |
|
} |
|
|
|
if parent.Type == html.DocumentNode { |
|
return false |
|
} |
|
|
|
count := 0 |
|
for c := parent.LastChild; c != nil; c = c.PrevSibling { |
|
if c.Type != html.ElementNode || (ofType && c.Data != n.Data) { |
|
continue |
|
} |
|
count++ |
|
if c == n { |
|
return count == b |
|
} |
|
if count >= b { |
|
return false |
|
} |
|
} |
|
return false |
|
} |
|
} |
|
|
|
// onlyChildSelector returns a selector that implements :only-child. |
|
// If ofType is true, it implements :only-of-type instead. |
|
func onlyChildSelector(ofType bool) Selector { |
|
return func(n *html.Node) bool { |
|
if n.Type != html.ElementNode { |
|
return false |
|
} |
|
|
|
parent := n.Parent |
|
if parent == nil { |
|
return false |
|
} |
|
|
|
if parent.Type == html.DocumentNode { |
|
return false |
|
} |
|
|
|
count := 0 |
|
for c := parent.FirstChild; c != nil; c = c.NextSibling { |
|
if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) { |
|
continue |
|
} |
|
count++ |
|
if count > 1 { |
|
return false |
|
} |
|
} |
|
|
|
return count == 1 |
|
} |
|
} |
|
|
|
// inputSelector is a Selector that matches input, select, textarea and button elements. |
|
func inputSelector(n *html.Node) bool { |
|
return n.Type == html.ElementNode && (n.Data == "input" || n.Data == "select" || n.Data == "textarea" || n.Data == "button") |
|
} |
|
|
|
// emptyElementSelector is a Selector that matches empty elements. |
|
func emptyElementSelector(n *html.Node) bool { |
|
if n.Type != html.ElementNode { |
|
return false |
|
} |
|
|
|
for c := n.FirstChild; c != nil; c = c.NextSibling { |
|
switch c.Type { |
|
case html.ElementNode, html.TextNode: |
|
return false |
|
} |
|
} |
|
|
|
return true |
|
} |
|
|
|
// descendantSelector returns a Selector that matches an element if |
|
// it matches d and has an ancestor that matches a. |
|
func descendantSelector(a, d Selector) Selector { |
|
return func(n *html.Node) bool { |
|
if !d(n) { |
|
return false |
|
} |
|
|
|
for p := n.Parent; p != nil; p = p.Parent { |
|
if a(p) { |
|
return true |
|
} |
|
} |
|
|
|
return false |
|
} |
|
} |
|
|
|
// childSelector returns a Selector that matches an element if |
|
// it matches d and its parent matches a. |
|
func childSelector(a, d Selector) Selector { |
|
return func(n *html.Node) bool { |
|
return d(n) && n.Parent != nil && a(n.Parent) |
|
} |
|
} |
|
|
|
// siblingSelector returns a Selector that matches an element |
|
// if it matches s2 and in is preceded by an element that matches s1. |
|
// If adjacent is true, the sibling must be immediately before the element. |
|
func siblingSelector(s1, s2 Selector, adjacent bool) Selector { |
|
return func(n *html.Node) bool { |
|
if !s2(n) { |
|
return false |
|
} |
|
|
|
if adjacent { |
|
for n = n.PrevSibling; n != nil; n = n.PrevSibling { |
|
if n.Type == html.TextNode || n.Type == html.CommentNode { |
|
continue |
|
} |
|
return s1(n) |
|
} |
|
return false |
|
} |
|
|
|
// Walk backwards looking for element that matches s1 |
|
for c := n.PrevSibling; c != nil; c = c.PrevSibling { |
|
if s1(c) { |
|
return true |
|
} |
|
} |
|
|
|
return false |
|
} |
|
} |
|
|
|
// rootSelector implements :root |
|
func rootSelector(n *html.Node) bool { |
|
if n.Type != html.ElementNode { |
|
return false |
|
} |
|
if n.Parent == nil { |
|
return false |
|
} |
|
return n.Parent.Type == html.DocumentNode |
|
}
|
|
|