From 52d333f0841be774b5ed116cd75434814bc09957 Mon Sep 17 00:00:00 2001
From: mrsdizzie <info@mrsdizzie.com>
Date: Wed, 21 Oct 2020 22:37:50 -0400
Subject: [PATCH] Add better error checking for inline html diff code (#13251)

* Fix error in diff html rendering (#13191)

* Fix error in diff html rendering

Was missing an optional whitespace check in regex. Also noticed a rare case where diff.Type == Equal would be empty and thus get a newline attached. Fixed that too.

Fixes #13177

* Update services/gitdiff/gitdiff.go

Co-authored-by: zeripath <art27@cantab.net>

* Update gitdiff_test.go

* fmt

Co-authored-by: zeripath <art27@cantab.net>

* Add better error checking for inline html diff code (#13239)

* Add better error checking for inline html diff code

A better fix for #13191 which cleans up this code a bit and adds basic checking which should avoid writing broken HTML in future situations.

* Update gitdiff_test.go

* better regex

Co-authored-by: zeripath <art27@cantab.net>
---
 services/gitdiff/gitdiff.go      | 88 +++++++++++++++++++---------------------
 services/gitdiff/gitdiff_test.go | 13 +++++-
 2 files changed, 53 insertions(+), 48 deletions(-)

diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go
index 91105399db..164e1c0ca5 100644
--- a/services/gitdiff/gitdiff.go
+++ b/services/gitdiff/gitdiff.go
@@ -181,64 +181,60 @@ var (
 	removedCodePrefix = []byte(`<span class="removed-code">`)
 	codeTagSuffix     = []byte(`</span>`)
 )
-var addSpanRegex = regexp.MustCompile(`<span [class="[a-z]*]*$`)
+var trailingSpanRegex = regexp.MustCompile(`<span\s*[[:alpha:]="]*?[>]?$`)
+
+// shouldWriteInline represents combinations where we manually write inline changes
+func shouldWriteInline(diff diffmatchpatch.Diff, lineType DiffLineType) bool {
+	if true &&
+		diff.Type == diffmatchpatch.DiffEqual ||
+		diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd ||
+		diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel {
+		return true
+	}
+	return false
+}
 
 func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineType) template.HTML {
 	buf := bytes.NewBuffer(nil)
-	var addSpan string
-	for i := range diffs {
-		switch {
-		case diffs[i].Type == diffmatchpatch.DiffEqual:
-			// Looking for the case where our 3rd party diff library previously detected a string difference
-			// in the middle of a span class because we highlight them first. This happens when added/deleted code
-			// also changes the chroma class name, either partially or fully. If found, just move the openining span code forward into the next section
-			// see TestDiffToHTML for examples
-			if len(addSpan) > 0 {
-				diffs[i].Text = addSpan + diffs[i].Text
-				addSpan = ""
+	match := ""
+
+	for _, diff := range diffs {
+		if shouldWriteInline(diff, lineType) {
+			if len(match) > 0 {
+				diff.Text = match + diff.Text
+				match = ""
 			}
-			m := addSpanRegex.FindStringSubmatchIndex(diffs[i].Text)
+			// Chroma HTML syntax highlighting is done before diffing individual lines in order to maintain consistency.
+			// Since inline changes might split in the middle of a chroma span tag, make we manually put it back together
+			// before writing so we don't try insert added/removed code spans in the middle of an existing chroma span
+			// and create broken HTML.
+			m := trailingSpanRegex.FindStringSubmatchIndex(diff.Text)
 			if m != nil {
-				addSpan = diffs[i].Text[m[0]:m[1]]
-				buf.WriteString(strings.TrimSuffix(diffs[i].Text, addSpan))
-			} else {
-				addSpan = ""
-				buf.WriteString(getLineContent(diffs[i].Text))
-			}
-		case diffs[i].Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd:
-			if len(addSpan) > 0 {
-				diffs[i].Text = addSpan + diffs[i].Text
-				addSpan = ""
+				match = diff.Text[m[0]:m[1]]
+				diff.Text = strings.TrimSuffix(diff.Text, match)
 			}
-			// Print existing closing span first before opening added-code span so it doesn't unintentionally close it
-			if strings.HasPrefix(diffs[i].Text, "</span>") {
+			// Print an existing closing span first before opening added/remove-code span so it doesn't unintentionally close it
+			if strings.HasPrefix(diff.Text, "</span>") {
 				buf.WriteString("</span>")
-				diffs[i].Text = strings.TrimPrefix(diffs[i].Text, "</span>")
+				diff.Text = strings.TrimPrefix(diff.Text, "</span>")
 			}
-			m := addSpanRegex.FindStringSubmatchIndex(diffs[i].Text)
-			if m != nil {
-				addSpan = diffs[i].Text[m[0]:m[1]]
-				diffs[i].Text = strings.TrimSuffix(diffs[i].Text, addSpan)
+			// If we weren't able to fix it then this should avoid broken HTML by not inserting more spans below
+			// The previous/next diff section will contain the rest of the tag that is missing here
+			if strings.Count(diff.Text, "<") != strings.Count(diff.Text, ">") {
+				buf.WriteString(diff.Text)
+				continue
 			}
+		}
+		switch {
+		case diff.Type == diffmatchpatch.DiffEqual:
+			buf.WriteString(diff.Text)
+		case diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd:
 			buf.Write(addedCodePrefix)
-			buf.WriteString(getLineContent(diffs[i].Text))
+			buf.WriteString(diff.Text)
 			buf.Write(codeTagSuffix)
-		case diffs[i].Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel:
-			if len(addSpan) > 0 {
-				diffs[i].Text = addSpan + diffs[i].Text
-				addSpan = ""
-			}
-			if strings.HasPrefix(diffs[i].Text, "</span>") {
-				buf.WriteString("</span>")
-				diffs[i].Text = strings.TrimPrefix(diffs[i].Text, "</span>")
-			}
-			m := addSpanRegex.FindStringSubmatchIndex(diffs[i].Text)
-			if m != nil {
-				addSpan = diffs[i].Text[m[0]:m[1]]
-				diffs[i].Text = strings.TrimSuffix(diffs[i].Text, addSpan)
-			}
+		case diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel:
 			buf.Write(removedCodePrefix)
-			buf.WriteString(getLineContent(diffs[i].Text))
+			buf.WriteString(diff.Text)
 			buf.Write(codeTagSuffix)
 		}
 	}
diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go
index 64cd4f1c21..e7eeca7004 100644
--- a/services/gitdiff/gitdiff_test.go
+++ b/services/gitdiff/gitdiff_test.go
@@ -50,7 +50,7 @@ func TestDiffToHTML(t *testing.T) {
 		{Type: dmp.DiffEqual, Text: "</span> <span class=\"p\">{</span>"},
 	}, DiffLineAdd))
 
-	assertEqual(t, "<span class=\"nx\">tagURL</span> <span class=\"o\">:=</span> <span class=\"removed-code\"><span class=\"nx\">fmt</span><span class=\"p\">.</span><span class=\"nf\">Sprintf</span><span class=\"p\">(</span><span class=\"s\">&#34;## [%s](%s/%s/%s/%s?q=&amp;type=all&amp;state=closed&amp;milestone=%d) - %s&#34;</span><span class=\"p\">,</span> <span class=\"nx\">ge</span><span class=\"p\">.</span><span class=\"nx\">Milestone\"</span></span><span class=\"p\">,</span> <span class=\"nx\">ge</span><span class=\"p\">.</span><span class=\"nx\">BaseURL</span><span class=\"p\">,</span> <span class=\"nx\">ge</span><span class=\"p\">.</span><span class=\"nx\">Owner</span><span class=\"p\">,</span> <span class=\"nx\">ge</span><span class=\"p\">.</span><span class=\"nx\">Repo</span><span class=\"p\">,</span> <span class=\"nx\"><span class=\"removed-code\">from</span><span class=\"p\">,</span> <span class=\"nx\">milestoneID</span><span class=\"p\">,</span> <span class=\"nx\">time</span><span class=\"p\">.</span><span class=\"nf\">Now</span><span class=\"p\">(</span><span class=\"p\">)</span><span class=\"p\">.</span><span class=\"nf\">Format</span><span class=\"p\">(</span><span class=\"s\">&#34;2006-01-02&#34;</span><span class=\"p\">)</span></span><span class=\"p\">)</span>", diffToHTML("", []dmp.Diff{
+	assertEqual(t, "<span class=\"nx\">tagURL</span> <span class=\"o\">:=</span> <span class=\"removed-code\"><span class=\"nx\">fmt</span><span class=\"p\">.</span><span class=\"nf\">Sprintf</span><span class=\"p\">(</span><span class=\"s\">&#34;## [%s](%s/%s/%s/%s?q=&amp;type=all&amp;state=closed&amp;milestone=%d) - %s&#34;</span><span class=\"p\">,</span> <span class=\"nx\">ge</span><span class=\"p\">.</span><span class=\"nx\">Milestone\"</span></span><span class=\"p\">,</span> <span class=\"nx\">ge</span><span class=\"p\">.</span><span class=\"nx\">BaseURL</span><span class=\"p\">,</span> <span class=\"nx\">ge</span><span class=\"p\">.</span><span class=\"nx\">Owner</span><span class=\"p\">,</span> <span class=\"nx\">ge</span><span class=\"p\">.</span><span class=\"nx\">Repo</span><span class=\"p\">,</span> <span class=\"removed-code\"><span class=\"nx\">from</span><span class=\"p\">,</span> <span class=\"nx\">milestoneID</span><span class=\"p\">,</span> <span class=\"nx\">time</span><span class=\"p\">.</span><span class=\"nf\">Now</span><span class=\"p\">(</span><span class=\"p\">)</span><span class=\"p\">.</span><span class=\"nf\">Format</span><span class=\"p\">(</span><span class=\"s\">&#34;2006-01-02&#34;</span><span class=\"p\">)</span></span><span class=\"p\">)</span>", diffToHTML("", []dmp.Diff{
 		{Type: dmp.DiffEqual, Text: "<span class=\"nx\">tagURL</span> <span class=\"o\">:=</span> <span class=\"n"},
 		{Type: dmp.DiffDelete, Text: "x\">fmt</span><span class=\"p\">.</span><span class=\"nf\">Sprintf</span><span class=\"p\">(</span><span class=\"s\">&#34;## [%s](%s/%s/%s/%s?q=&amp;type=all&amp;state=closed&amp;milestone=%d) - %s&#34;</span><span class=\"p\">,</span> <span class=\"nx\">ge</span><span class=\"p\">.</span><span class=\"nx\">Milestone\""},
 		{Type: dmp.DiffInsert, Text: "f\">getGiteaTagURL</span><span class=\"p\">(</span><span class=\"nx\">client"},
@@ -60,7 +60,7 @@ func TestDiffToHTML(t *testing.T) {
 		{Type: dmp.DiffEqual, Text: "</span><span class=\"p\">)</span>"},
 	}, DiffLineDel))
 
-	assertEqual(t, "<span class=\"nx\">r</span><span class=\"p\">.</span><span class=\"nf\">WrapperRenderer</span><span class=\"p\">(</span><span class=\"nx\">w</span><span class=\"p\">,</span> <span class=\"nx\"><span class=\"removed-code\">language</span></span><span class=\"removed-code\"><span class=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs</span></span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>", diffToHTML("", []dmp.Diff{
+	assertEqual(t, "<span class=\"nx\">r</span><span class=\"p\">.</span><span class=\"nf\">WrapperRenderer</span><span class=\"p\">(</span><span class=\"nx\">w</span><span class=\"p\">,</span> <span class=\"removed-code\"><span class=\"nx\">language</span></span><span class=\"removed-code\"><span class=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs</span></span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>", diffToHTML("", []dmp.Diff{
 		{Type: dmp.DiffEqual, Text: "<span class=\"nx\">r</span><span class=\"p\">.</span><span class=\"nf\">WrapperRenderer</span><span class=\"p\">(</span><span class=\"nx\">w</span><span class=\"p\">,</span> <span class=\"nx\">"},
 		{Type: dmp.DiffDelete, Text: "language</span><span "},
 		{Type: dmp.DiffEqual, Text: "c"},
@@ -74,6 +74,15 @@ func TestDiffToHTML(t *testing.T) {
 		{Type: dmp.DiffInsert, Text: "lass=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs"},
 		{Type: dmp.DiffEqual, Text: "</span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>"},
 	}, DiffLineAdd))
+
+	assertEqual(t, "<span class=\"k\">print</span><span class=\"added-code\"></span><span class=\"added-code\"><span class=\"p\">(</span></span><span class=\"sa\"></span><span class=\"s2\">&#34;</span><span class=\"s2\">// </span><span class=\"s2\">&#34;</span><span class=\"p\">,</span> <span class=\"n\">sys</span><span class=\"o\">.</span><span class=\"n\">argv</span><span class=\"added-code\"><span class=\"p\">)</span></span>", diffToHTML("", []dmp.Diff{
+		{Type: dmp.DiffEqual, Text: "<span class=\"k\">print</span>"},
+		{Type: dmp.DiffInsert, Text: "<span"},
+		{Type: dmp.DiffEqual, Text: " "},
+		{Type: dmp.DiffInsert, Text: "class=\"p\">(</span>"},
+		{Type: dmp.DiffEqual, Text: "<span class=\"sa\"></span><span class=\"s2\">&#34;</span><span class=\"s2\">// </span><span class=\"s2\">&#34;</span><span class=\"p\">,</span> <span class=\"n\">sys</span><span class=\"o\">.</span><span class=\"n\">argv</span>"},
+		{Type: dmp.DiffInsert, Text: "<span class=\"p\">)</span>"},
+	}, DiffLineAdd))
 }
 
 func TestParsePatch_singlefile(t *testing.T) {