Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
133 lines
3.2 KiB
133 lines
3.2 KiB
// Copyright 2019 The Gitea Authors. All rights reserved. |
|
// Use of this source code is governed by a MIT-style |
|
// license that can be found in the LICENSE file. |
|
|
|
package mdstripper |
|
|
|
import ( |
|
"bytes" |
|
"sync" |
|
|
|
"io" |
|
|
|
"code.gitea.io/gitea/modules/log" |
|
"code.gitea.io/gitea/modules/markup/common" |
|
|
|
"github.com/yuin/goldmark" |
|
"github.com/yuin/goldmark/ast" |
|
"github.com/yuin/goldmark/extension" |
|
"github.com/yuin/goldmark/parser" |
|
"github.com/yuin/goldmark/renderer" |
|
"github.com/yuin/goldmark/renderer/html" |
|
"github.com/yuin/goldmark/text" |
|
) |
|
|
|
type stripRenderer struct { |
|
links []string |
|
empty bool |
|
} |
|
|
|
func (r *stripRenderer) Render(w io.Writer, source []byte, doc ast.Node) error { |
|
return ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) { |
|
if !entering { |
|
return ast.WalkContinue, nil |
|
} |
|
switch v := n.(type) { |
|
case *ast.Text: |
|
if !v.IsRaw() { |
|
_, prevSibIsText := n.PreviousSibling().(*ast.Text) |
|
coalesce := prevSibIsText |
|
r.processString( |
|
w, |
|
v.Text(source), |
|
coalesce) |
|
if v.SoftLineBreak() { |
|
r.doubleSpace(w) |
|
} |
|
} |
|
return ast.WalkContinue, nil |
|
case *ast.Link: |
|
r.processLink(w, v.Destination) |
|
return ast.WalkSkipChildren, nil |
|
case *ast.AutoLink: |
|
r.processLink(w, v.URL(source)) |
|
return ast.WalkSkipChildren, nil |
|
} |
|
return ast.WalkContinue, nil |
|
}) |
|
} |
|
|
|
func (r *stripRenderer) doubleSpace(w io.Writer) { |
|
if !r.empty { |
|
_, _ = w.Write([]byte{'\n'}) |
|
} |
|
} |
|
|
|
func (r *stripRenderer) processString(w io.Writer, text []byte, coalesce bool) { |
|
// Always break-up words |
|
if !coalesce { |
|
r.doubleSpace(w) |
|
} |
|
_, _ = w.Write(text) |
|
r.empty = false |
|
} |
|
|
|
func (r *stripRenderer) processLink(w io.Writer, link []byte) { |
|
// Links are processed out of band |
|
r.links = append(r.links, string(link)) |
|
} |
|
|
|
// GetLinks returns the list of link data collected while parsing |
|
func (r *stripRenderer) GetLinks() []string { |
|
return r.links |
|
} |
|
|
|
// AddOptions adds given option to this renderer. |
|
func (r *stripRenderer) AddOptions(...renderer.Option) { |
|
// no-op |
|
} |
|
|
|
// StripMarkdown parses markdown content by removing all markup and code blocks |
|
// in order to extract links and other references |
|
func StripMarkdown(rawBytes []byte) (string, []string) { |
|
buf, links := StripMarkdownBytes(rawBytes) |
|
return string(buf), links |
|
} |
|
|
|
var stripParser parser.Parser |
|
var once = sync.Once{} |
|
|
|
// StripMarkdownBytes parses markdown content by removing all markup and code blocks |
|
// in order to extract links and other references |
|
func StripMarkdownBytes(rawBytes []byte) ([]byte, []string) { |
|
once.Do(func() { |
|
gdMarkdown := goldmark.New( |
|
goldmark.WithExtensions(extension.Table, |
|
extension.Strikethrough, |
|
extension.TaskList, |
|
extension.DefinitionList, |
|
common.FootnoteExtension, |
|
common.Linkify, |
|
), |
|
goldmark.WithParserOptions( |
|
parser.WithAttribute(), |
|
parser.WithAutoHeadingID(), |
|
), |
|
goldmark.WithRendererOptions( |
|
html.WithUnsafe(), |
|
), |
|
) |
|
stripParser = gdMarkdown.Parser() |
|
}) |
|
stripper := &stripRenderer{ |
|
links: make([]string, 0, 10), |
|
empty: true, |
|
} |
|
reader := text.NewReader(rawBytes) |
|
doc := stripParser.Parse(reader) |
|
var buf bytes.Buffer |
|
if err := stripper.Render(&buf, rawBytes, doc); err != nil { |
|
log.Error("Unable to strip: %v", err) |
|
} |
|
return buf.Bytes(), stripper.GetLinks() |
|
}
|
|
|