@ -39,7 +39,7 @@ import (
"golang.org/x/net/html"
cssparser "github.com/chris-ramon /douceur/parser"
"github.com/aymerick /douceur/parser"
)
var (
@ -286,7 +286,7 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
case html . StartTagToken :
mostRecentlyStartedToken = strings . ToLower ( token . Data )
mostRecentlyStartedToken = normaliseElementName ( token . Data )
aps , ok := p . elsAndAttrs [ token . Data ]
if ! ok {
@ -329,7 +329,7 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
case html . EndTagToken :
if mostRecentlyStartedToken == strings . ToLower ( token . Data ) {
if mostRecentlyStartedToken == normaliseElementName ( token . Data ) {
mostRecentlyStartedToken = ""
}
@ -407,11 +407,11 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
if ! skipElementContent {
switch mostRecentlyStartedToken {
case "script" :
case ` script ` :
// not encouraged, but if a policy allows JavaScript we
// should not HTML escape it as that would break the output
buff . WriteString ( token . Data )
case "style" :
case ` style ` :
// not encouraged, but if a policy allows CSS styles we
// should not HTML escape it as that would break the output
buff . WriteString ( token . Data )
@ -721,6 +721,26 @@ func (p *Policy) sanitizeAttrs(
}
}
if p . requireCrossOriginAnonymous && len ( cleanAttrs ) > 0 {
switch elementName {
case "audio" , "img" , "link" , "script" , "video" :
var crossOriginFound bool
for _ , htmlAttr := range cleanAttrs {
if htmlAttr . Key == "crossorigin" {
crossOriginFound = true
htmlAttr . Val = "anonymous"
}
}
if ! crossOriginFound {
crossOrigin := html . Attribute { }
crossOrigin . Key = "crossorigin"
crossOrigin . Val = "anonymous"
cleanAttrs = append ( cleanAttrs , crossOrigin )
}
}
}
return cleanAttrs
}
@ -744,7 +764,7 @@ func (p *Policy) sanitizeStyles(attr html.Attribute, elementName string) html.At
if len ( attr . Val ) > 0 && attr . Val [ len ( attr . Val ) - 1 ] != ';' {
attr . Val = attr . Val + ";"
}
decs , err := css parser. ParseDeclarations ( attr . Val )
decs , err := parser . ParseDeclarations ( attr . Val )
if err != nil {
attr . Val = ""
return attr
@ -944,3 +964,23 @@ func (p *Policy) matchRegex(elementName string) (map[string]attrPolicy, bool) {
}
return aps , matched
}
// normaliseElementName takes a HTML element like <script> which is user input
// and returns a lower case version of it that is immune to UTF-8 to ASCII
// conversion tricks (like the use of upper case cyrillic i scrİpt which a
// strings.ToLower would convert to script). Instead this func will preserve
// all non-ASCII as their escaped equivalent, i.e. \u0130 which reveals the
// characters when lower cased
func normaliseElementName ( str string ) string {
// that useful QuoteToASCII put quote marks at the start and end
// so those are trimmed off
return strings . TrimSuffix (
strings . TrimPrefix (
strings . ToLower (
strconv . QuoteToASCII ( str ) ,
) ,
` " ` ) ,
` " ` ,
)
}