Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2244 lines
66 KiB
2244 lines
66 KiB
/** |
|
* dmp.go |
|
* |
|
* Go language implementation of Google Diff, Match, and Patch library |
|
* |
|
* Original library is Copyright (c) 2006 Google Inc. |
|
* http://code.google.com/p/google-diff-match-patch/ |
|
* |
|
* Copyright (c) 2012 Sergi Mansilla <sergi.mansilla@gmail.com> |
|
* https://github.com/sergi/go-diff |
|
* |
|
* See included LICENSE file for license details. |
|
*/ |
|
|
|
// Package diffmatchpatch offers robust algorithms to perform the |
|
// operations required for synchronizing plain text. |
|
package diffmatchpatch |
|
|
|
import ( |
|
"bytes" |
|
"errors" |
|
"fmt" |
|
"html" |
|
"math" |
|
"net/url" |
|
"regexp" |
|
"strconv" |
|
"strings" |
|
"time" |
|
"unicode/utf8" |
|
) |
|
|
|
// The data structure representing a diff is an array of tuples: |
|
// [[DiffDelete, 'Hello'], [DiffInsert, 'Goodbye'], [DiffEqual, ' world.']] |
|
// which means: delete 'Hello', add 'Goodbye' and keep ' world.' |
|
|
|
// Operation defines the operation of a diff item. |
|
type Operation int8 |
|
|
|
const ( |
|
// DiffDelete item represents a delete diff. |
|
DiffDelete Operation = -1 |
|
// DiffInsert item represents an insert diff. |
|
DiffInsert Operation = 1 |
|
// DiffEqual item represents an equal diff. |
|
DiffEqual Operation = 0 |
|
) |
|
|
|
// unescaper unescapes selected chars for compatibility with JavaScript's encodeURI. |
|
// In speed critical applications this could be dropped since the |
|
// receiving application will certainly decode these fine. |
|
// Note that this function is case-sensitive. Thus "%3F" would not be |
|
// unescaped. But this is ok because it is only called with the output of |
|
// HttpUtility.UrlEncode which returns lowercase hex. |
|
// |
|
// Example: "%3f" -> "?", "%24" -> "$", etc. |
|
var unescaper = strings.NewReplacer( |
|
"%21", "!", "%7E", "~", "%27", "'", |
|
"%28", "(", "%29", ")", "%3B", ";", |
|
"%2F", "/", "%3F", "?", "%3A", ":", |
|
"%40", "@", "%26", "&", "%3D", "=", |
|
"%2B", "+", "%24", "$", "%2C", ",", "%23", "#", "%2A", "*") |
|
|
|
// Define some regex patterns for matching boundaries. |
|
var ( |
|
nonAlphaNumericRegex = regexp.MustCompile(`[^a-zA-Z0-9]`) |
|
whitespaceRegex = regexp.MustCompile(`\s`) |
|
linebreakRegex = regexp.MustCompile(`[\r\n]`) |
|
blanklineEndRegex = regexp.MustCompile(`\n\r?\n$`) |
|
blanklineStartRegex = regexp.MustCompile(`^\r?\n\r?\n`) |
|
) |
|
|
|
func splice(slice []Diff, index int, amount int, elements ...Diff) []Diff { |
|
return append(slice[:index], append(elements, slice[index+amount:]...)...) |
|
} |
|
|
|
// indexOf returns the first index of pattern in str, starting at str[i]. |
|
func indexOf(str string, pattern string, i int) int { |
|
if i > len(str)-1 { |
|
return -1 |
|
} |
|
if i <= 0 { |
|
return strings.Index(str, pattern) |
|
} |
|
ind := strings.Index(str[i:], pattern) |
|
if ind == -1 { |
|
return -1 |
|
} |
|
return ind + i |
|
} |
|
|
|
// lastIndexOf returns the last index of pattern in str, starting at str[i]. |
|
func lastIndexOf(str string, pattern string, i int) int { |
|
if i < 0 { |
|
return -1 |
|
} |
|
if i >= len(str) { |
|
return strings.LastIndex(str, pattern) |
|
} |
|
_, size := utf8.DecodeRuneInString(str[i:]) |
|
return strings.LastIndex(str[:i+size], pattern) |
|
} |
|
|
|
// Return the index of pattern in target, starting at target[i]. |
|
func runesIndexOf(target, pattern []rune, i int) int { |
|
if i > len(target)-1 { |
|
return -1 |
|
} |
|
if i <= 0 { |
|
return runesIndex(target, pattern) |
|
} |
|
ind := runesIndex(target[i:], pattern) |
|
if ind == -1 { |
|
return -1 |
|
} |
|
return ind + i |
|
} |
|
|
|
func min(x, y int) int { |
|
if x < y { |
|
return x |
|
} |
|
return y |
|
} |
|
|
|
func max(x, y int) int { |
|
if x > y { |
|
return x |
|
} |
|
return y |
|
} |
|
|
|
func runesEqual(r1, r2 []rune) bool { |
|
if len(r1) != len(r2) { |
|
return false |
|
} |
|
for i, c := range r1 { |
|
if c != r2[i] { |
|
return false |
|
} |
|
} |
|
return true |
|
} |
|
|
|
// The equivalent of strings.Index for rune slices. |
|
func runesIndex(r1, r2 []rune) int { |
|
last := len(r1) - len(r2) |
|
for i := 0; i <= last; i++ { |
|
if runesEqual(r1[i:i+len(r2)], r2) { |
|
return i |
|
} |
|
} |
|
return -1 |
|
} |
|
|
|
// Diff represents one diff operation |
|
type Diff struct { |
|
Type Operation |
|
Text string |
|
} |
|
|
|
// Patch represents one patch operation. |
|
type Patch struct { |
|
diffs []Diff |
|
start1 int |
|
start2 int |
|
length1 int |
|
length2 int |
|
} |
|
|
|
// String emulates GNU diff's format. |
|
// Header: @@ -382,8 +481,9 @@ |
|
// Indicies are printed as 1-based, not 0-based. |
|
func (p *Patch) String() string { |
|
var coords1, coords2 string |
|
|
|
if p.length1 == 0 { |
|
coords1 = strconv.Itoa(p.start1) + ",0" |
|
} else if p.length1 == 1 { |
|
coords1 = strconv.Itoa(p.start1 + 1) |
|
} else { |
|
coords1 = strconv.Itoa(p.start1+1) + "," + strconv.Itoa(p.length1) |
|
} |
|
|
|
if p.length2 == 0 { |
|
coords2 = strconv.Itoa(p.start2) + ",0" |
|
} else if p.length2 == 1 { |
|
coords2 = strconv.Itoa(p.start2 + 1) |
|
} else { |
|
coords2 = strconv.Itoa(p.start2+1) + "," + strconv.Itoa(p.length2) |
|
} |
|
|
|
var text bytes.Buffer |
|
_, _ = text.WriteString("@@ -" + coords1 + " +" + coords2 + " @@\n") |
|
|
|
// Escape the body of the patch with %xx notation. |
|
for _, aDiff := range p.diffs { |
|
switch aDiff.Type { |
|
case DiffInsert: |
|
_, _ = text.WriteString("+") |
|
case DiffDelete: |
|
_, _ = text.WriteString("-") |
|
case DiffEqual: |
|
_, _ = text.WriteString(" ") |
|
} |
|
|
|
_, _ = text.WriteString(strings.Replace(url.QueryEscape(aDiff.Text), "+", " ", -1)) |
|
_, _ = text.WriteString("\n") |
|
} |
|
|
|
return unescaper.Replace(text.String()) |
|
} |
|
|
|
// DiffMatchPatch holds the configuration for diff-match-patch operations. |
|
type DiffMatchPatch struct { |
|
// Number of seconds to map a diff before giving up (0 for infinity). |
|
DiffTimeout time.Duration |
|
// Cost of an empty edit operation in terms of edit characters. |
|
DiffEditCost int |
|
// How far to search for a match (0 = exact location, 1000+ = broad match). |
|
// A match this many characters away from the expected location will add |
|
// 1.0 to the score (0.0 is a perfect match). |
|
MatchDistance int |
|
// When deleting a large block of text (over ~64 characters), how close do |
|
// the contents have to be to match the expected contents. (0.0 = perfection, |
|
// 1.0 = very loose). Note that MatchThreshold controls how closely the |
|
// end points of a delete need to match. |
|
PatchDeleteThreshold float64 |
|
// Chunk size for context length. |
|
PatchMargin int |
|
// The number of bits in an int. |
|
MatchMaxBits int |
|
// At what point is no match declared (0.0 = perfection, 1.0 = very loose). |
|
MatchThreshold float64 |
|
} |
|
|
|
// New creates a new DiffMatchPatch object with default parameters. |
|
func New() *DiffMatchPatch { |
|
// Defaults. |
|
return &DiffMatchPatch{ |
|
DiffTimeout: time.Second, |
|
DiffEditCost: 4, |
|
MatchThreshold: 0.5, |
|
MatchDistance: 1000, |
|
PatchDeleteThreshold: 0.5, |
|
PatchMargin: 4, |
|
MatchMaxBits: 32, |
|
} |
|
} |
|
|
|
// DiffMain finds the differences between two texts. |
|
func (dmp *DiffMatchPatch) DiffMain(text1, text2 string, checklines bool) []Diff { |
|
return dmp.DiffMainRunes([]rune(text1), []rune(text2), checklines) |
|
} |
|
|
|
// DiffMainRunes finds the differences between two rune sequences. |
|
func (dmp *DiffMatchPatch) DiffMainRunes(text1, text2 []rune, checklines bool) []Diff { |
|
var deadline time.Time |
|
if dmp.DiffTimeout > 0 { |
|
deadline = time.Now().Add(dmp.DiffTimeout) |
|
} |
|
return dmp.diffMainRunes(text1, text2, checklines, deadline) |
|
} |
|
|
|
func (dmp *DiffMatchPatch) diffMainRunes(text1, text2 []rune, checklines bool, deadline time.Time) []Diff { |
|
if runesEqual(text1, text2) { |
|
var diffs []Diff |
|
if len(text1) > 0 { |
|
diffs = append(diffs, Diff{DiffEqual, string(text1)}) |
|
} |
|
return diffs |
|
} |
|
// Trim off common prefix (speedup). |
|
commonlength := commonPrefixLength(text1, text2) |
|
commonprefix := text1[:commonlength] |
|
text1 = text1[commonlength:] |
|
text2 = text2[commonlength:] |
|
|
|
// Trim off common suffix (speedup). |
|
commonlength = commonSuffixLength(text1, text2) |
|
commonsuffix := text1[len(text1)-commonlength:] |
|
text1 = text1[:len(text1)-commonlength] |
|
text2 = text2[:len(text2)-commonlength] |
|
|
|
// Compute the diff on the middle block. |
|
diffs := dmp.diffCompute(text1, text2, checklines, deadline) |
|
|
|
// Restore the prefix and suffix. |
|
if len(commonprefix) != 0 { |
|
diffs = append([]Diff{Diff{DiffEqual, string(commonprefix)}}, diffs...) |
|
} |
|
if len(commonsuffix) != 0 { |
|
diffs = append(diffs, Diff{DiffEqual, string(commonsuffix)}) |
|
} |
|
|
|
return dmp.DiffCleanupMerge(diffs) |
|
} |
|
|
|
// diffCompute finds the differences between two rune slices. Assumes that the texts do not |
|
// have any common prefix or suffix. |
|
func (dmp *DiffMatchPatch) diffCompute(text1, text2 []rune, checklines bool, deadline time.Time) []Diff { |
|
diffs := []Diff{} |
|
if len(text1) == 0 { |
|
// Just add some text (speedup). |
|
return append(diffs, Diff{DiffInsert, string(text2)}) |
|
} else if len(text2) == 0 { |
|
// Just delete some text (speedup). |
|
return append(diffs, Diff{DiffDelete, string(text1)}) |
|
} |
|
|
|
var longtext, shorttext []rune |
|
if len(text1) > len(text2) { |
|
longtext = text1 |
|
shorttext = text2 |
|
} else { |
|
longtext = text2 |
|
shorttext = text1 |
|
} |
|
|
|
if i := runesIndex(longtext, shorttext); i != -1 { |
|
op := DiffInsert |
|
// Swap insertions for deletions if diff is reversed. |
|
if len(text1) > len(text2) { |
|
op = DiffDelete |
|
} |
|
// Shorter text is inside the longer text (speedup). |
|
return []Diff{ |
|
Diff{op, string(longtext[:i])}, |
|
Diff{DiffEqual, string(shorttext)}, |
|
Diff{op, string(longtext[i+len(shorttext):])}, |
|
} |
|
} else if len(shorttext) == 1 { |
|
// Single character string. |
|
// After the previous speedup, the character can't be an equality. |
|
return []Diff{ |
|
Diff{DiffDelete, string(text1)}, |
|
Diff{DiffInsert, string(text2)}, |
|
} |
|
// Check to see if the problem can be split in two. |
|
} else if hm := dmp.diffHalfMatch(text1, text2); hm != nil { |
|
// A half-match was found, sort out the return data. |
|
text1A := hm[0] |
|
text1B := hm[1] |
|
text2A := hm[2] |
|
text2B := hm[3] |
|
midCommon := hm[4] |
|
// Send both pairs off for separate processing. |
|
diffsA := dmp.diffMainRunes(text1A, text2A, checklines, deadline) |
|
diffsB := dmp.diffMainRunes(text1B, text2B, checklines, deadline) |
|
// Merge the results. |
|
return append(diffsA, append([]Diff{Diff{DiffEqual, string(midCommon)}}, diffsB...)...) |
|
} else if checklines && len(text1) > 100 && len(text2) > 100 { |
|
return dmp.diffLineMode(text1, text2, deadline) |
|
} |
|
return dmp.diffBisect(text1, text2, deadline) |
|
} |
|
|
|
// diffLineMode does a quick line-level diff on both []runes, then rediff the parts for |
|
// greater accuracy. This speedup can produce non-minimal diffs. |
|
func (dmp *DiffMatchPatch) diffLineMode(text1, text2 []rune, deadline time.Time) []Diff { |
|
// Scan the text on a line-by-line basis first. |
|
text1, text2, linearray := dmp.diffLinesToRunes(text1, text2) |
|
|
|
diffs := dmp.diffMainRunes(text1, text2, false, deadline) |
|
|
|
// Convert the diff back to original text. |
|
diffs = dmp.DiffCharsToLines(diffs, linearray) |
|
// Eliminate freak matches (e.g. blank lines) |
|
diffs = dmp.DiffCleanupSemantic(diffs) |
|
|
|
// Rediff any replacement blocks, this time character-by-character. |
|
// Add a dummy entry at the end. |
|
diffs = append(diffs, Diff{DiffEqual, ""}) |
|
|
|
pointer := 0 |
|
countDelete := 0 |
|
countInsert := 0 |
|
|
|
// NOTE: Rune slices are slower than using strings in this case. |
|
textDelete := "" |
|
textInsert := "" |
|
|
|
for pointer < len(diffs) { |
|
switch diffs[pointer].Type { |
|
case DiffInsert: |
|
countInsert++ |
|
textInsert += diffs[pointer].Text |
|
case DiffDelete: |
|
countDelete++ |
|
textDelete += diffs[pointer].Text |
|
case DiffEqual: |
|
// Upon reaching an equality, check for prior redundancies. |
|
if countDelete >= 1 && countInsert >= 1 { |
|
// Delete the offending records and add the merged ones. |
|
diffs = splice(diffs, pointer-countDelete-countInsert, |
|
countDelete+countInsert) |
|
|
|
pointer = pointer - countDelete - countInsert |
|
a := dmp.diffMainRunes([]rune(textDelete), []rune(textInsert), false, deadline) |
|
for j := len(a) - 1; j >= 0; j-- { |
|
diffs = splice(diffs, pointer, 0, a[j]) |
|
} |
|
pointer = pointer + len(a) |
|
} |
|
|
|
countInsert = 0 |
|
countDelete = 0 |
|
textDelete = "" |
|
textInsert = "" |
|
} |
|
pointer++ |
|
} |
|
|
|
return diffs[:len(diffs)-1] // Remove the dummy entry at the end. |
|
} |
|
|
|
// DiffBisect finds the 'middle snake' of a diff, split the problem in two |
|
// and return the recursively constructed diff. |
|
// See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. |
|
func (dmp *DiffMatchPatch) DiffBisect(text1, text2 string, deadline time.Time) []Diff { |
|
// Unused in this code, but retained for interface compatibility. |
|
return dmp.diffBisect([]rune(text1), []rune(text2), deadline) |
|
} |
|
|
|
// diffBisect finds the 'middle snake' of a diff, splits the problem in two |
|
// and returns the recursively constructed diff. |
|
// See Myers's 1986 paper: An O(ND) Difference Algorithm and Its Variations. |
|
func (dmp *DiffMatchPatch) diffBisect(runes1, runes2 []rune, deadline time.Time) []Diff { |
|
// Cache the text lengths to prevent multiple calls. |
|
runes1Len, runes2Len := len(runes1), len(runes2) |
|
|
|
maxD := (runes1Len + runes2Len + 1) / 2 |
|
vOffset := maxD |
|
vLength := 2 * maxD |
|
|
|
v1 := make([]int, vLength) |
|
v2 := make([]int, vLength) |
|
for i := range v1 { |
|
v1[i] = -1 |
|
v2[i] = -1 |
|
} |
|
v1[vOffset+1] = 0 |
|
v2[vOffset+1] = 0 |
|
|
|
delta := runes1Len - runes2Len |
|
// If the total number of characters is odd, then the front path will collide |
|
// with the reverse path. |
|
front := (delta%2 != 0) |
|
// Offsets for start and end of k loop. |
|
// Prevents mapping of space beyond the grid. |
|
k1start := 0 |
|
k1end := 0 |
|
k2start := 0 |
|
k2end := 0 |
|
for d := 0; d < maxD; d++ { |
|
// Bail out if deadline is reached. |
|
if !deadline.IsZero() && time.Now().After(deadline) { |
|
break |
|
} |
|
|
|
// Walk the front path one step. |
|
for k1 := -d + k1start; k1 <= d-k1end; k1 += 2 { |
|
k1Offset := vOffset + k1 |
|
var x1 int |
|
|
|
if k1 == -d || (k1 != d && v1[k1Offset-1] < v1[k1Offset+1]) { |
|
x1 = v1[k1Offset+1] |
|
} else { |
|
x1 = v1[k1Offset-1] + 1 |
|
} |
|
|
|
y1 := x1 - k1 |
|
for x1 < runes1Len && y1 < runes2Len { |
|
if runes1[x1] != runes2[y1] { |
|
break |
|
} |
|
x1++ |
|
y1++ |
|
} |
|
v1[k1Offset] = x1 |
|
if x1 > runes1Len { |
|
// Ran off the right of the graph. |
|
k1end += 2 |
|
} else if y1 > runes2Len { |
|
// Ran off the bottom of the graph. |
|
k1start += 2 |
|
} else if front { |
|
k2Offset := vOffset + delta - k1 |
|
if k2Offset >= 0 && k2Offset < vLength && v2[k2Offset] != -1 { |
|
// Mirror x2 onto top-left coordinate system. |
|
x2 := runes1Len - v2[k2Offset] |
|
if x1 >= x2 { |
|
// Overlap detected. |
|
return dmp.diffBisectSplit(runes1, runes2, x1, y1, deadline) |
|
} |
|
} |
|
} |
|
} |
|
// Walk the reverse path one step. |
|
for k2 := -d + k2start; k2 <= d-k2end; k2 += 2 { |
|
k2Offset := vOffset + k2 |
|
var x2 int |
|
if k2 == -d || (k2 != d && v2[k2Offset-1] < v2[k2Offset+1]) { |
|
x2 = v2[k2Offset+1] |
|
} else { |
|
x2 = v2[k2Offset-1] + 1 |
|
} |
|
var y2 = x2 - k2 |
|
for x2 < runes1Len && y2 < runes2Len { |
|
if runes1[runes1Len-x2-1] != runes2[runes2Len-y2-1] { |
|
break |
|
} |
|
x2++ |
|
y2++ |
|
} |
|
v2[k2Offset] = x2 |
|
if x2 > runes1Len { |
|
// Ran off the left of the graph. |
|
k2end += 2 |
|
} else if y2 > runes2Len { |
|
// Ran off the top of the graph. |
|
k2start += 2 |
|
} else if !front { |
|
k1Offset := vOffset + delta - k2 |
|
if k1Offset >= 0 && k1Offset < vLength && v1[k1Offset] != -1 { |
|
x1 := v1[k1Offset] |
|
y1 := vOffset + x1 - k1Offset |
|
// Mirror x2 onto top-left coordinate system. |
|
x2 = runes1Len - x2 |
|
if x1 >= x2 { |
|
// Overlap detected. |
|
return dmp.diffBisectSplit(runes1, runes2, x1, y1, deadline) |
|
} |
|
} |
|
} |
|
} |
|
} |
|
// Diff took too long and hit the deadline or |
|
// number of diffs equals number of characters, no commonality at all. |
|
return []Diff{ |
|
Diff{DiffDelete, string(runes1)}, |
|
Diff{DiffInsert, string(runes2)}, |
|
} |
|
} |
|
|
|
func (dmp *DiffMatchPatch) diffBisectSplit(runes1, runes2 []rune, x, y int, |
|
deadline time.Time) []Diff { |
|
runes1a := runes1[:x] |
|
runes2a := runes2[:y] |
|
runes1b := runes1[x:] |
|
runes2b := runes2[y:] |
|
|
|
// Compute both diffs serially. |
|
diffs := dmp.diffMainRunes(runes1a, runes2a, false, deadline) |
|
diffsb := dmp.diffMainRunes(runes1b, runes2b, false, deadline) |
|
|
|
return append(diffs, diffsb...) |
|
} |
|
|
|
// DiffLinesToChars splits two texts into a list of strings. Reduces the texts to a string of |
|
// hashes where each Unicode character represents one line. |
|
// It's slightly faster to call DiffLinesToRunes first, followed by DiffMainRunes. |
|
func (dmp *DiffMatchPatch) DiffLinesToChars(text1, text2 string) (string, string, []string) { |
|
chars1, chars2, lineArray := dmp.DiffLinesToRunes(text1, text2) |
|
return string(chars1), string(chars2), lineArray |
|
} |
|
|
|
// DiffLinesToRunes splits two texts into a list of runes. Each rune represents one line. |
|
func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune, []string) { |
|
// '\x00' is a valid character, but various debuggers don't like it. |
|
// So we'll insert a junk entry to avoid generating a null character. |
|
lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n' |
|
lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4 |
|
|
|
chars1 := dmp.diffLinesToRunesMunge(text1, &lineArray, lineHash) |
|
chars2 := dmp.diffLinesToRunesMunge(text2, &lineArray, lineHash) |
|
|
|
return chars1, chars2, lineArray |
|
} |
|
|
|
func (dmp *DiffMatchPatch) diffLinesToRunes(text1, text2 []rune) ([]rune, []rune, []string) { |
|
return dmp.DiffLinesToRunes(string(text1), string(text2)) |
|
} |
|
|
|
// diffLinesToRunesMunge splits a text into an array of strings. Reduces the |
|
// texts to a []rune where each Unicode character represents one line. |
|
// We use strings instead of []runes as input mainly because you can't use []rune as a map key. |
|
func (dmp *DiffMatchPatch) diffLinesToRunesMunge(text string, lineArray *[]string, lineHash map[string]int) []rune { |
|
// Walk the text, pulling out a substring for each line. |
|
// text.split('\n') would would temporarily double our memory footprint. |
|
// Modifying text would create many large strings to garbage collect. |
|
lineStart := 0 |
|
lineEnd := -1 |
|
runes := []rune{} |
|
|
|
for lineEnd < len(text)-1 { |
|
lineEnd = indexOf(text, "\n", lineStart) |
|
|
|
if lineEnd == -1 { |
|
lineEnd = len(text) - 1 |
|
} |
|
|
|
line := text[lineStart : lineEnd+1] |
|
lineStart = lineEnd + 1 |
|
lineValue, ok := lineHash[line] |
|
|
|
if ok { |
|
runes = append(runes, rune(lineValue)) |
|
} else { |
|
*lineArray = append(*lineArray, line) |
|
lineHash[line] = len(*lineArray) - 1 |
|
runes = append(runes, rune(len(*lineArray)-1)) |
|
} |
|
} |
|
|
|
return runes |
|
} |
|
|
|
// DiffCharsToLines rehydrates the text in a diff from a string of line hashes to real lines of |
|
// text. |
|
func (dmp *DiffMatchPatch) DiffCharsToLines(diffs []Diff, lineArray []string) []Diff { |
|
hydrated := make([]Diff, 0, len(diffs)) |
|
for _, aDiff := range diffs { |
|
chars := aDiff.Text |
|
text := make([]string, len(chars)) |
|
|
|
for i, r := range chars { |
|
text[i] = lineArray[r] |
|
} |
|
|
|
aDiff.Text = strings.Join(text, "") |
|
hydrated = append(hydrated, aDiff) |
|
} |
|
return hydrated |
|
} |
|
|
|
// DiffCommonPrefix determines the common prefix length of two strings. |
|
func (dmp *DiffMatchPatch) DiffCommonPrefix(text1, text2 string) int { |
|
// Unused in this code, but retained for interface compatibility. |
|
return commonPrefixLength([]rune(text1), []rune(text2)) |
|
} |
|
|
|
// DiffCommonSuffix determines the common suffix length of two strings. |
|
func (dmp *DiffMatchPatch) DiffCommonSuffix(text1, text2 string) int { |
|
// Unused in this code, but retained for interface compatibility. |
|
return commonSuffixLength([]rune(text1), []rune(text2)) |
|
} |
|
|
|
// commonPrefixLength returns the length of the common prefix of two rune slices. |
|
func commonPrefixLength(text1, text2 []rune) int { |
|
short, long := text1, text2 |
|
if len(short) > len(long) { |
|
short, long = long, short |
|
} |
|
for i, r := range short { |
|
if r != long[i] { |
|
return i |
|
} |
|
} |
|
return len(short) |
|
} |
|
|
|
// commonSuffixLength returns the length of the common suffix of two rune slices. |
|
func commonSuffixLength(text1, text2 []rune) int { |
|
n := min(len(text1), len(text2)) |
|
for i := 0; i < n; i++ { |
|
if text1[len(text1)-i-1] != text2[len(text2)-i-1] { |
|
return i |
|
} |
|
} |
|
return n |
|
|
|
// Binary search. |
|
// Performance analysis: http://neil.fraser.name/news/2007/10/09/ |
|
/* |
|
pointermin := 0 |
|
pointermax := math.Min(len(text1), len(text2)) |
|
pointermid := pointermax |
|
pointerend := 0 |
|
for pointermin < pointermid { |
|
if text1[len(text1)-pointermid:len(text1)-pointerend] == |
|
text2[len(text2)-pointermid:len(text2)-pointerend] { |
|
pointermin = pointermid |
|
pointerend = pointermin |
|
} else { |
|
pointermax = pointermid |
|
} |
|
pointermid = math.Floor((pointermax-pointermin)/2 + pointermin) |
|
} |
|
return pointermid |
|
*/ |
|
} |
|
|
|
// DiffCommonOverlap determines if the suffix of one string is the prefix of another. |
|
func (dmp *DiffMatchPatch) DiffCommonOverlap(text1 string, text2 string) int { |
|
// Cache the text lengths to prevent multiple calls. |
|
text1Length := len(text1) |
|
text2Length := len(text2) |
|
// Eliminate the null case. |
|
if text1Length == 0 || text2Length == 0 { |
|
return 0 |
|
} |
|
// Truncate the longer string. |
|
if text1Length > text2Length { |
|
text1 = text1[text1Length-text2Length:] |
|
} else if text1Length < text2Length { |
|
text2 = text2[0:text1Length] |
|
} |
|
textLength := int(math.Min(float64(text1Length), float64(text2Length))) |
|
// Quick check for the worst case. |
|
if text1 == text2 { |
|
return textLength |
|
} |
|
|
|
// Start by looking for a single character match |
|
// and increase length until no match is found. |
|
// Performance analysis: http://neil.fraser.name/news/2010/11/04/ |
|
best := 0 |
|
length := 1 |
|
for { |
|
pattern := text1[textLength-length:] |
|
found := strings.Index(text2, pattern) |
|
if found == -1 { |
|
break |
|
} |
|
length += found |
|
if found == 0 || text1[textLength-length:] == text2[0:length] { |
|
best = length |
|
length++ |
|
} |
|
} |
|
|
|
return best |
|
} |
|
|
|
// DiffHalfMatch checks whether the two texts share a substring which is at |
|
// least half the length of the longer text. This speedup can produce non-minimal diffs. |
|
func (dmp *DiffMatchPatch) DiffHalfMatch(text1, text2 string) []string { |
|
// Unused in this code, but retained for interface compatibility. |
|
runeSlices := dmp.diffHalfMatch([]rune(text1), []rune(text2)) |
|
if runeSlices == nil { |
|
return nil |
|
} |
|
|
|
result := make([]string, len(runeSlices)) |
|
for i, r := range runeSlices { |
|
result[i] = string(r) |
|
} |
|
return result |
|
} |
|
|
|
func (dmp *DiffMatchPatch) diffHalfMatch(text1, text2 []rune) [][]rune { |
|
if dmp.DiffTimeout <= 0 { |
|
// Don't risk returning a non-optimal diff if we have unlimited time. |
|
return nil |
|
} |
|
|
|
var longtext, shorttext []rune |
|
if len(text1) > len(text2) { |
|
longtext = text1 |
|
shorttext = text2 |
|
} else { |
|
longtext = text2 |
|
shorttext = text1 |
|
} |
|
|
|
if len(longtext) < 4 || len(shorttext)*2 < len(longtext) { |
|
return nil // Pointless. |
|
} |
|
|
|
// First check if the second quarter is the seed for a half-match. |
|
hm1 := dmp.diffHalfMatchI(longtext, shorttext, int(float64(len(longtext)+3)/4)) |
|
|
|
// Check again based on the third quarter. |
|
hm2 := dmp.diffHalfMatchI(longtext, shorttext, int(float64(len(longtext)+1)/2)) |
|
|
|
hm := [][]rune{} |
|
if hm1 == nil && hm2 == nil { |
|
return nil |
|
} else if hm2 == nil { |
|
hm = hm1 |
|
} else if hm1 == nil { |
|
hm = hm2 |
|
} else { |
|
// Both matched. Select the longest. |
|
if len(hm1[4]) > len(hm2[4]) { |
|
hm = hm1 |
|
} else { |
|
hm = hm2 |
|
} |
|
} |
|
|
|
// A half-match was found, sort out the return data. |
|
if len(text1) > len(text2) { |
|
return hm |
|
} |
|
|
|
return [][]rune{hm[2], hm[3], hm[0], hm[1], hm[4]} |
|
} |
|
|
|
// diffHalfMatchI checks if a substring of shorttext exist within longtext such that the substring is at least half the length of longtext? |
|
// @param {string} longtext Longer string. |
|
// @param {string} shorttext Shorter string. |
|
// @param {number} i Start index of quarter length substring within longtext. |
|
// @return {Array.<string>} Five element Array, containing the prefix of |
|
// longtext, the suffix of longtext, the prefix of shorttext, the suffix |
|
// of shorttext and the common middle. Or null if there was no match. |
|
func (dmp *DiffMatchPatch) diffHalfMatchI(l, s []rune, i int) [][]rune { |
|
var bestCommonA []rune |
|
var bestCommonB []rune |
|
var bestCommonLen int |
|
var bestLongtextA []rune |
|
var bestLongtextB []rune |
|
var bestShorttextA []rune |
|
var bestShorttextB []rune |
|
|
|
// Start with a 1/4 length substring at position i as a seed. |
|
seed := l[i : i+len(l)/4] |
|
|
|
for j := runesIndexOf(s, seed, 0); j != -1; j = runesIndexOf(s, seed, j+1) { |
|
prefixLength := commonPrefixLength(l[i:], s[j:]) |
|
suffixLength := commonSuffixLength(l[:i], s[:j]) |
|
|
|
if bestCommonLen < suffixLength+prefixLength { |
|
bestCommonA = s[j-suffixLength : j] |
|
bestCommonB = s[j : j+prefixLength] |
|
bestCommonLen = len(bestCommonA) + len(bestCommonB) |
|
bestLongtextA = l[:i-suffixLength] |
|
bestLongtextB = l[i+prefixLength:] |
|
bestShorttextA = s[:j-suffixLength] |
|
bestShorttextB = s[j+prefixLength:] |
|
} |
|
} |
|
|
|
if bestCommonLen*2 < len(l) { |
|
return nil |
|
} |
|
|
|
return [][]rune{ |
|
bestLongtextA, |
|
bestLongtextB, |
|
bestShorttextA, |
|
bestShorttextB, |
|
append(bestCommonA, bestCommonB...), |
|
} |
|
} |
|
|
|
// DiffCleanupSemantic reduces the number of edits by eliminating |
|
// semantically trivial equalities. |
|
func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff { |
|
changes := false |
|
// Stack of indices where equalities are found. |
|
type equality struct { |
|
data int |
|
next *equality |
|
} |
|
var equalities *equality |
|
|
|
var lastequality string |
|
// Always equal to diffs[equalities[equalitiesLength - 1]][1] |
|
var pointer int // Index of current position. |
|
// Number of characters that changed prior to the equality. |
|
var lengthInsertions1, lengthDeletions1 int |
|
// Number of characters that changed after the equality. |
|
var lengthInsertions2, lengthDeletions2 int |
|
|
|
for pointer < len(diffs) { |
|
if diffs[pointer].Type == DiffEqual { // Equality found. |
|
equalities = &equality{ |
|
data: pointer, |
|
next: equalities, |
|
} |
|
lengthInsertions1 = lengthInsertions2 |
|
lengthDeletions1 = lengthDeletions2 |
|
lengthInsertions2 = 0 |
|
lengthDeletions2 = 0 |
|
lastequality = diffs[pointer].Text |
|
} else { // An insertion or deletion. |
|
if diffs[pointer].Type == DiffInsert { |
|
lengthInsertions2 += len(diffs[pointer].Text) |
|
} else { |
|
lengthDeletions2 += len(diffs[pointer].Text) |
|
} |
|
// Eliminate an equality that is smaller or equal to the edits on both |
|
// sides of it. |
|
difference1 := int(math.Max(float64(lengthInsertions1), float64(lengthDeletions1))) |
|
difference2 := int(math.Max(float64(lengthInsertions2), float64(lengthDeletions2))) |
|
if len(lastequality) > 0 && |
|
(len(lastequality) <= difference1) && |
|
(len(lastequality) <= difference2) { |
|
// Duplicate record. |
|
insPoint := equalities.data |
|
diffs = append( |
|
diffs[:insPoint], |
|
append([]Diff{Diff{DiffDelete, lastequality}}, diffs[insPoint:]...)...) |
|
|
|
// Change second copy to insert. |
|
diffs[insPoint+1].Type = DiffInsert |
|
// Throw away the equality we just deleted. |
|
equalities = equalities.next |
|
|
|
if equalities != nil { |
|
equalities = equalities.next |
|
} |
|
if equalities != nil { |
|
pointer = equalities.data |
|
} else { |
|
pointer = -1 |
|
} |
|
|
|
lengthInsertions1 = 0 // Reset the counters. |
|
lengthDeletions1 = 0 |
|
lengthInsertions2 = 0 |
|
lengthDeletions2 = 0 |
|
lastequality = "" |
|
changes = true |
|
} |
|
} |
|
pointer++ |
|
} |
|
|
|
// Normalize the diff. |
|
if changes { |
|
diffs = dmp.DiffCleanupMerge(diffs) |
|
} |
|
diffs = dmp.DiffCleanupSemanticLossless(diffs) |
|
// Find any overlaps between deletions and insertions. |
|
// e.g: <del>abcxxx</del><ins>xxxdef</ins> |
|
// -> <del>abc</del>xxx<ins>def</ins> |
|
// e.g: <del>xxxabc</del><ins>defxxx</ins> |
|
// -> <ins>def</ins>xxx<del>abc</del> |
|
// Only extract an overlap if it is as big as the edit ahead or behind it. |
|
pointer = 1 |
|
for pointer < len(diffs) { |
|
if diffs[pointer-1].Type == DiffDelete && |
|
diffs[pointer].Type == DiffInsert { |
|
deletion := diffs[pointer-1].Text |
|
insertion := diffs[pointer].Text |
|
overlapLength1 := dmp.DiffCommonOverlap(deletion, insertion) |
|
overlapLength2 := dmp.DiffCommonOverlap(insertion, deletion) |
|
if overlapLength1 >= overlapLength2 { |
|
if float64(overlapLength1) >= float64(len(deletion))/2 || |
|
float64(overlapLength1) >= float64(len(insertion))/2 { |
|
|
|
// Overlap found. Insert an equality and trim the surrounding edits. |
|
diffs = append( |
|
diffs[:pointer], |
|
append([]Diff{Diff{DiffEqual, insertion[:overlapLength1]}}, diffs[pointer:]...)...) |
|
//diffs.splice(pointer, 0, |
|
// [DiffEqual, insertion[0 : overlapLength1)]] |
|
diffs[pointer-1].Text = |
|
deletion[0 : len(deletion)-overlapLength1] |
|
diffs[pointer+1].Text = insertion[overlapLength1:] |
|
pointer++ |
|
} |
|
} else { |
|
if float64(overlapLength2) >= float64(len(deletion))/2 || |
|
float64(overlapLength2) >= float64(len(insertion))/2 { |
|
// Reverse overlap found. |
|
// Insert an equality and swap and trim the surrounding edits. |
|
overlap := Diff{DiffEqual, deletion[:overlapLength2]} |
|
diffs = append( |
|
diffs[:pointer], |
|
append([]Diff{overlap}, diffs[pointer:]...)...) |
|
// diffs.splice(pointer, 0, |
|
// [DiffEqual, deletion[0 : overlapLength2)]] |
|
diffs[pointer-1].Type = DiffInsert |
|
diffs[pointer-1].Text = insertion[0 : len(insertion)-overlapLength2] |
|
diffs[pointer+1].Type = DiffDelete |
|
diffs[pointer+1].Text = deletion[overlapLength2:] |
|
pointer++ |
|
} |
|
} |
|
pointer++ |
|
} |
|
pointer++ |
|
} |
|
|
|
return diffs |
|
} |
|
|
|
// DiffCleanupSemanticLossless looks for single edits surrounded on both sides by equalities |
|
// which can be shifted sideways to align the edit to a word boundary. |
|
// e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came. |
|
func (dmp *DiffMatchPatch) DiffCleanupSemanticLossless(diffs []Diff) []Diff { |
|
|
|
/** |
|
* Given two strings, compute a score representing whether the internal |
|
* boundary falls on logical boundaries. |
|
* Scores range from 6 (best) to 0 (worst). |
|
* Closure, but does not reference any external variables. |
|
* @param {string} one First string. |
|
* @param {string} two Second string. |
|
* @return {number} The score. |
|
* @private |
|
*/ |
|
diffCleanupSemanticScore := func(one, two string) int { |
|
if len(one) == 0 || len(two) == 0 { |
|
// Edges are the best. |
|
return 6 |
|
} |
|
|
|
// Each port of this function behaves slightly differently due to |
|
// subtle differences in each language's definition of things like |
|
// 'whitespace'. Since this function's purpose is largely cosmetic, |
|
// the choice has been made to use each language's native features |
|
// rather than force total conformity. |
|
rune1, _ := utf8.DecodeLastRuneInString(one) |
|
rune2, _ := utf8.DecodeRuneInString(two) |
|
char1 := string(rune1) |
|
char2 := string(rune2) |
|
|
|
nonAlphaNumeric1 := nonAlphaNumericRegex.MatchString(char1) |
|
nonAlphaNumeric2 := nonAlphaNumericRegex.MatchString(char2) |
|
whitespace1 := nonAlphaNumeric1 && whitespaceRegex.MatchString(char1) |
|
whitespace2 := nonAlphaNumeric2 && whitespaceRegex.MatchString(char2) |
|
lineBreak1 := whitespace1 && linebreakRegex.MatchString(char1) |
|
lineBreak2 := whitespace2 && linebreakRegex.MatchString(char2) |
|
blankLine1 := lineBreak1 && blanklineEndRegex.MatchString(one) |
|
blankLine2 := lineBreak2 && blanklineEndRegex.MatchString(two) |
|
|
|
if blankLine1 || blankLine2 { |
|
// Five points for blank lines. |
|
return 5 |
|
} else if lineBreak1 || lineBreak2 { |
|
// Four points for line breaks. |
|
return 4 |
|
} else if nonAlphaNumeric1 && !whitespace1 && whitespace2 { |
|
// Three points for end of sentences. |
|
return 3 |
|
} else if whitespace1 || whitespace2 { |
|
// Two points for whitespace. |
|
return 2 |
|
} else if nonAlphaNumeric1 || nonAlphaNumeric2 { |
|
// One point for non-alphanumeric. |
|
return 1 |
|
} |
|
return 0 |
|
} |
|
|
|
pointer := 1 |
|
|
|
// Intentionally ignore the first and last element (don't need checking). |
|
for pointer < len(diffs)-1 { |
|
if diffs[pointer-1].Type == DiffEqual && |
|
diffs[pointer+1].Type == DiffEqual { |
|
|
|
// This is a single edit surrounded by equalities. |
|
equality1 := diffs[pointer-1].Text |
|
edit := diffs[pointer].Text |
|
equality2 := diffs[pointer+1].Text |
|
|
|
// First, shift the edit as far left as possible. |
|
commonOffset := dmp.DiffCommonSuffix(equality1, edit) |
|
if commonOffset > 0 { |
|
commonString := edit[len(edit)-commonOffset:] |
|
equality1 = equality1[0 : len(equality1)-commonOffset] |
|
edit = commonString + edit[:len(edit)-commonOffset] |
|
equality2 = commonString + equality2 |
|
} |
|
|
|
// Second, step character by character right, looking for the best fit. |
|
bestEquality1 := equality1 |
|
bestEdit := edit |
|
bestEquality2 := equality2 |
|
bestScore := diffCleanupSemanticScore(equality1, edit) + |
|
diffCleanupSemanticScore(edit, equality2) |
|
|
|
for len(edit) != 0 && len(equality2) != 0 { |
|
_, sz := utf8.DecodeRuneInString(edit) |
|
if len(equality2) < sz || edit[:sz] != equality2[:sz] { |
|
break |
|
} |
|
equality1 += edit[:sz] |
|
edit = edit[sz:] + equality2[:sz] |
|
equality2 = equality2[sz:] |
|
score := diffCleanupSemanticScore(equality1, edit) + |
|
diffCleanupSemanticScore(edit, equality2) |
|
// The >= encourages trailing rather than leading whitespace on |
|
// edits. |
|
if score >= bestScore { |
|
bestScore = score |
|
bestEquality1 = equality1 |
|
bestEdit = edit |
|
bestEquality2 = equality2 |
|
} |
|
} |
|
|
|
if diffs[pointer-1].Text != bestEquality1 { |
|
// We have an improvement, save it back to the diff. |
|
if len(bestEquality1) != 0 { |
|
diffs[pointer-1].Text = bestEquality1 |
|
} else { |
|
diffs = splice(diffs, pointer-1, 1) |
|
pointer-- |
|
} |
|
|
|
diffs[pointer].Text = bestEdit |
|
if len(bestEquality2) != 0 { |
|
diffs[pointer+1].Text = bestEquality2 |
|
} else { |
|
//splice(diffs, pointer+1, 1) |
|
diffs = append(diffs[:pointer+1], diffs[pointer+2:]...) |
|
pointer-- |
|
} |
|
} |
|
} |
|
pointer++ |
|
} |
|
|
|
return diffs |
|
} |
|
|
|
// DiffCleanupEfficiency reduces the number of edits by eliminating |
|
// operationally trivial equalities. |
|
func (dmp *DiffMatchPatch) DiffCleanupEfficiency(diffs []Diff) []Diff { |
|
changes := false |
|
// Stack of indices where equalities are found. |
|
type equality struct { |
|
data int |
|
next *equality |
|
} |
|
var equalities *equality |
|
// Always equal to equalities[equalitiesLength-1][1] |
|
lastequality := "" |
|
pointer := 0 // Index of current position. |
|
// Is there an insertion operation before the last equality. |
|
preIns := false |
|
// Is there a deletion operation before the last equality. |
|
preDel := false |
|
// Is there an insertion operation after the last equality. |
|
postIns := false |
|
// Is there a deletion operation after the last equality. |
|
postDel := false |
|
for pointer < len(diffs) { |
|
if diffs[pointer].Type == DiffEqual { // Equality found. |
|
if len(diffs[pointer].Text) < dmp.DiffEditCost && |
|
(postIns || postDel) { |
|
// Candidate found. |
|
equalities = &equality{ |
|
data: pointer, |
|
next: equalities, |
|
} |
|
preIns = postIns |
|
preDel = postDel |
|
lastequality = diffs[pointer].Text |
|
} else { |
|
// Not a candidate, and can never become one. |
|
equalities = nil |
|
lastequality = "" |
|
} |
|
postIns = false |
|
postDel = false |
|
} else { // An insertion or deletion. |
|
if diffs[pointer].Type == DiffDelete { |
|
postDel = true |
|
} else { |
|
postIns = true |
|
} |
|
/* |
|
* Five types to be split: |
|
* <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del> |
|
* <ins>A</ins>X<ins>C</ins><del>D</del> |
|
* <ins>A</ins><del>B</del>X<ins>C</ins> |
|
* <ins>A</del>X<ins>C</ins><del>D</del> |
|
* <ins>A</ins><del>B</del>X<del>C</del> |
|
*/ |
|
var sumPres int |
|
if preIns { |
|
sumPres++ |
|
} |
|
if preDel { |
|
sumPres++ |
|
} |
|
if postIns { |
|
sumPres++ |
|
} |
|
if postDel { |
|
sumPres++ |
|
} |
|
if len(lastequality) > 0 && |
|
((preIns && preDel && postIns && postDel) || |
|
((len(lastequality) < dmp.DiffEditCost/2) && sumPres == 3)) { |
|
|
|
insPoint := equalities.data |
|
|
|
// Duplicate record. |
|
diffs = append(diffs[:insPoint], |
|
append([]Diff{Diff{DiffDelete, lastequality}}, diffs[insPoint:]...)...) |
|
|
|
// Change second copy to insert. |
|
diffs[insPoint+1].Type = DiffInsert |
|
// Throw away the equality we just deleted. |
|
equalities = equalities.next |
|
lastequality = "" |
|
|
|
if preIns && preDel { |
|
// No changes made which could affect previous entry, keep going. |
|
postIns = true |
|
postDel = true |
|
equalities = nil |
|
} else { |
|
if equalities != nil { |
|
equalities = equalities.next |
|
} |
|
if equalities != nil { |
|
pointer = equalities.data |
|
} else { |
|
pointer = -1 |
|
} |
|
postIns = false |
|
postDel = false |
|
} |
|
changes = true |
|
} |
|
} |
|
pointer++ |
|
} |
|
|
|
if changes { |
|
diffs = dmp.DiffCleanupMerge(diffs) |
|
} |
|
|
|
return diffs |
|
} |
|
|
|
// DiffCleanupMerge reorders and merges like edit sections. Merge equalities. |
|
// Any edit section can move as long as it doesn't cross an equality. |
|
func (dmp *DiffMatchPatch) DiffCleanupMerge(diffs []Diff) []Diff { |
|
// Add a dummy entry at the end. |
|
diffs = append(diffs, Diff{DiffEqual, ""}) |
|
pointer := 0 |
|
countDelete := 0 |
|
countInsert := 0 |
|
commonlength := 0 |
|
textDelete := []rune(nil) |
|
textInsert := []rune(nil) |
|
|
|
for pointer < len(diffs) { |
|
switch diffs[pointer].Type { |
|
case DiffInsert: |
|
countInsert++ |
|
textInsert = append(textInsert, []rune(diffs[pointer].Text)...) |
|
pointer++ |
|
break |
|
case DiffDelete: |
|
countDelete++ |
|
textDelete = append(textDelete, []rune(diffs[pointer].Text)...) |
|
pointer++ |
|
break |
|
case DiffEqual: |
|
// Upon reaching an equality, check for prior redundancies. |
|
if countDelete+countInsert > 1 { |
|
if countDelete != 0 && countInsert != 0 { |
|
// Factor out any common prefixies. |
|
commonlength = commonPrefixLength(textInsert, textDelete) |
|
if commonlength != 0 { |
|
x := pointer - countDelete - countInsert |
|
if x > 0 && diffs[x-1].Type == DiffEqual { |
|
diffs[x-1].Text += string(textInsert[:commonlength]) |
|
} else { |
|
diffs = append([]Diff{Diff{DiffEqual, string(textInsert[:commonlength])}}, diffs...) |
|
pointer++ |
|
} |
|
textInsert = textInsert[commonlength:] |
|
textDelete = textDelete[commonlength:] |
|
} |
|
// Factor out any common suffixies. |
|
commonlength = commonSuffixLength(textInsert, textDelete) |
|
if commonlength != 0 { |
|
insertIndex := len(textInsert) - commonlength |
|
deleteIndex := len(textDelete) - commonlength |
|
diffs[pointer].Text = string(textInsert[insertIndex:]) + diffs[pointer].Text |
|
textInsert = textInsert[:insertIndex] |
|
textDelete = textDelete[:deleteIndex] |
|
} |
|
} |
|
// Delete the offending records and add the merged ones. |
|
if countDelete == 0 { |
|
diffs = splice(diffs, pointer-countInsert, |
|
countDelete+countInsert, |
|
Diff{DiffInsert, string(textInsert)}) |
|
} else if countInsert == 0 { |
|
diffs = splice(diffs, pointer-countDelete, |
|
countDelete+countInsert, |
|
Diff{DiffDelete, string(textDelete)}) |
|
} else { |
|
diffs = splice(diffs, pointer-countDelete-countInsert, |
|
countDelete+countInsert, |
|
Diff{DiffDelete, string(textDelete)}, |
|
Diff{DiffInsert, string(textInsert)}) |
|
} |
|
|
|
pointer = pointer - countDelete - countInsert + 1 |
|
if countDelete != 0 { |
|
pointer++ |
|
} |
|
if countInsert != 0 { |
|
pointer++ |
|
} |
|
} else if pointer != 0 && diffs[pointer-1].Type == DiffEqual { |
|
// Merge this equality with the previous one. |
|
diffs[pointer-1].Text += diffs[pointer].Text |
|
diffs = append(diffs[:pointer], diffs[pointer+1:]...) |
|
} else { |
|
pointer++ |
|
} |
|
countInsert = 0 |
|
countDelete = 0 |
|
textDelete = nil |
|
textInsert = nil |
|
break |
|
} |
|
} |
|
|
|
if len(diffs[len(diffs)-1].Text) == 0 { |
|
diffs = diffs[0 : len(diffs)-1] // Remove the dummy entry at the end. |
|
} |
|
|
|
// Second pass: look for single edits surrounded on both sides by |
|
// equalities which can be shifted sideways to eliminate an equality. |
|
// e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC |
|
changes := false |
|
pointer = 1 |
|
// Intentionally ignore the first and last element (don't need checking). |
|
for pointer < (len(diffs) - 1) { |
|
if diffs[pointer-1].Type == DiffEqual && |
|
diffs[pointer+1].Type == DiffEqual { |
|
// This is a single edit surrounded by equalities. |
|
if strings.HasSuffix(diffs[pointer].Text, diffs[pointer-1].Text) { |
|
// Shift the edit over the previous equality. |
|
diffs[pointer].Text = diffs[pointer-1].Text + |
|
diffs[pointer].Text[:len(diffs[pointer].Text)-len(diffs[pointer-1].Text)] |
|
diffs[pointer+1].Text = diffs[pointer-1].Text + diffs[pointer+1].Text |
|
diffs = splice(diffs, pointer-1, 1) |
|
changes = true |
|
} else if strings.HasPrefix(diffs[pointer].Text, diffs[pointer+1].Text) { |
|
// Shift the edit over the next equality. |
|
diffs[pointer-1].Text += diffs[pointer+1].Text |
|
diffs[pointer].Text = |
|
diffs[pointer].Text[len(diffs[pointer+1].Text):] + diffs[pointer+1].Text |
|
diffs = splice(diffs, pointer+1, 1) |
|
changes = true |
|
} |
|
} |
|
pointer++ |
|
} |
|
|
|
// If shifts were made, the diff needs reordering and another shift sweep. |
|
if changes { |
|
diffs = dmp.DiffCleanupMerge(diffs) |
|
} |
|
|
|
return diffs |
|
} |
|
|
|
// DiffXIndex returns the equivalent location in s2. |
|
// loc is a location in text1, comAdde and return the equivalent location in |
|
// text2. |
|
// e.g. "The cat" vs "The big cat", 1->1, 5->8 |
|
func (dmp *DiffMatchPatch) DiffXIndex(diffs []Diff, loc int) int { |
|
chars1 := 0 |
|
chars2 := 0 |
|
lastChars1 := 0 |
|
lastChars2 := 0 |
|
lastDiff := Diff{} |
|
for i := 0; i < len(diffs); i++ { |
|
aDiff := diffs[i] |
|
if aDiff.Type != DiffInsert { |
|
// Equality or deletion. |
|
chars1 += len(aDiff.Text) |
|
} |
|
if aDiff.Type != DiffDelete { |
|
// Equality or insertion. |
|
chars2 += len(aDiff.Text) |
|
} |
|
if chars1 > loc { |
|
// Overshot the location. |
|
lastDiff = aDiff |
|
break |
|
} |
|
lastChars1 = chars1 |
|
lastChars2 = chars2 |
|
} |
|
if lastDiff.Type == DiffDelete { |
|
// The location was deleted. |
|
return lastChars2 |
|
} |
|
// Add the remaining character length. |
|
return lastChars2 + (loc - lastChars1) |
|
} |
|
|
|
// DiffPrettyHtml converts a []Diff into a pretty HTML report. |
|
// It is intended as an example from which to write one's own |
|
// display functions. |
|
func (dmp *DiffMatchPatch) DiffPrettyHtml(diffs []Diff) string { |
|
var buff bytes.Buffer |
|
for _, diff := range diffs { |
|
text := strings.Replace(html.EscapeString(diff.Text), "\n", "¶<br>", -1) |
|
switch diff.Type { |
|
case DiffInsert: |
|
_, _ = buff.WriteString("<ins style=\"background:#e6ffe6;\">") |
|
_, _ = buff.WriteString(text) |
|
_, _ = buff.WriteString("</ins>") |
|
case DiffDelete: |
|
_, _ = buff.WriteString("<del style=\"background:#ffe6e6;\">") |
|
_, _ = buff.WriteString(text) |
|
_, _ = buff.WriteString("</del>") |
|
case DiffEqual: |
|
_, _ = buff.WriteString("<span>") |
|
_, _ = buff.WriteString(text) |
|
_, _ = buff.WriteString("</span>") |
|
} |
|
} |
|
return buff.String() |
|
} |
|
|
|
// DiffPrettyText converts a []Diff into a colored text report. |
|
func (dmp *DiffMatchPatch) DiffPrettyText(diffs []Diff) string { |
|
var buff bytes.Buffer |
|
for _, diff := range diffs { |
|
text := diff.Text |
|
|
|
switch diff.Type { |
|
case DiffInsert: |
|
_, _ = buff.WriteString("\x1b[32m") |
|
_, _ = buff.WriteString(text) |
|
_, _ = buff.WriteString("\x1b[0m") |
|
case DiffDelete: |
|
_, _ = buff.WriteString("\x1b[31m") |
|
_, _ = buff.WriteString(text) |
|
_, _ = buff.WriteString("\x1b[0m") |
|
case DiffEqual: |
|
_, _ = buff.WriteString(text) |
|
} |
|
} |
|
|
|
return buff.String() |
|
} |
|
|
|
// DiffText1 computes and returns the source text (all equalities and deletions). |
|
func (dmp *DiffMatchPatch) DiffText1(diffs []Diff) string { |
|
//StringBuilder text = new StringBuilder() |
|
var text bytes.Buffer |
|
|
|
for _, aDiff := range diffs { |
|
if aDiff.Type != DiffInsert { |
|
_, _ = text.WriteString(aDiff.Text) |
|
} |
|
} |
|
return text.String() |
|
} |
|
|
|
// DiffText2 computes and returns the destination text (all equalities and insertions). |
|
func (dmp *DiffMatchPatch) DiffText2(diffs []Diff) string { |
|
var text bytes.Buffer |
|
|
|
for _, aDiff := range diffs { |
|
if aDiff.Type != DiffDelete { |
|
_, _ = text.WriteString(aDiff.Text) |
|
} |
|
} |
|
return text.String() |
|
} |
|
|
|
// DiffLevenshtein computes the Levenshtein distance; the number of inserted, deleted or |
|
// substituted characters. |
|
func (dmp *DiffMatchPatch) DiffLevenshtein(diffs []Diff) int { |
|
levenshtein := 0 |
|
insertions := 0 |
|
deletions := 0 |
|
|
|
for _, aDiff := range diffs { |
|
switch aDiff.Type { |
|
case DiffInsert: |
|
insertions += len(aDiff.Text) |
|
case DiffDelete: |
|
deletions += len(aDiff.Text) |
|
case DiffEqual: |
|
// A deletion and an insertion is one substitution. |
|
levenshtein += max(insertions, deletions) |
|
insertions = 0 |
|
deletions = 0 |
|
} |
|
} |
|
|
|
levenshtein += max(insertions, deletions) |
|
return levenshtein |
|
} |
|
|
|
// DiffToDelta crushes the diff into an encoded string which describes the operations |
|
// required to transform text1 into text2. |
|
// E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. |
|
// Operations are tab-separated. Inserted text is escaped using %xx |
|
// notation. |
|
func (dmp *DiffMatchPatch) DiffToDelta(diffs []Diff) string { |
|
var text bytes.Buffer |
|
for _, aDiff := range diffs { |
|
switch aDiff.Type { |
|
case DiffInsert: |
|
_, _ = text.WriteString("+") |
|
_, _ = text.WriteString(strings.Replace(url.QueryEscape(aDiff.Text), "+", " ", -1)) |
|
_, _ = text.WriteString("\t") |
|
break |
|
case DiffDelete: |
|
_, _ = text.WriteString("-") |
|
_, _ = text.WriteString(strconv.Itoa(utf8.RuneCountInString(aDiff.Text))) |
|
_, _ = text.WriteString("\t") |
|
break |
|
case DiffEqual: |
|
_, _ = text.WriteString("=") |
|
_, _ = text.WriteString(strconv.Itoa(utf8.RuneCountInString(aDiff.Text))) |
|
_, _ = text.WriteString("\t") |
|
break |
|
} |
|
} |
|
delta := text.String() |
|
if len(delta) != 0 { |
|
// Strip off trailing tab character. |
|
delta = delta[0 : utf8.RuneCountInString(delta)-1] |
|
delta = unescaper.Replace(delta) |
|
} |
|
return delta |
|
} |
|
|
|
// DiffFromDelta given the original text1, and an encoded string which describes the |
|
// operations required to transform text1 into text2, comAdde the full diff. |
|
func (dmp *DiffMatchPatch) DiffFromDelta(text1, delta string) (diffs []Diff, err error) { |
|
diffs = []Diff{} |
|
|
|
defer func() { |
|
if r := recover(); r != nil { |
|
err = r.(error) |
|
} |
|
}() |
|
|
|
pointer := 0 // Cursor in text1 |
|
tokens := strings.Split(delta, "\t") |
|
|
|
for _, token := range tokens { |
|
if len(token) == 0 { |
|
// Blank tokens are ok (from a trailing \t). |
|
continue |
|
} |
|
|
|
// Each token begins with a one character parameter which specifies the |
|
// operation of this token (delete, insert, equality). |
|
param := token[1:] |
|
|
|
switch op := token[0]; op { |
|
case '+': |
|
// decode would Diff all "+" to " " |
|
param = strings.Replace(param, "+", "%2b", -1) |
|
param, err = url.QueryUnescape(param) |
|
if err != nil { |
|
return nil, err |
|
} |
|
if !utf8.ValidString(param) { |
|
return nil, fmt.Errorf("invalid UTF-8 token: %q", param) |
|
} |
|
diffs = append(diffs, Diff{DiffInsert, param}) |
|
case '=', '-': |
|
n, err := strconv.ParseInt(param, 10, 0) |
|
if err != nil { |
|
return diffs, err |
|
} else if n < 0 { |
|
return diffs, errors.New("Negative number in DiffFromDelta: " + param) |
|
} |
|
|
|
// remember that string slicing is by byte - we want by rune here. |
|
text := string([]rune(text1)[pointer : pointer+int(n)]) |
|
pointer += int(n) |
|
|
|
if op == '=' { |
|
diffs = append(diffs, Diff{DiffEqual, text}) |
|
} else { |
|
diffs = append(diffs, Diff{DiffDelete, text}) |
|
} |
|
default: |
|
// Anything else is an error. |
|
return diffs, errors.New("Invalid diff operation in DiffFromDelta: " + string(token[0])) |
|
} |
|
} |
|
|
|
if pointer != len([]rune(text1)) { |
|
return diffs, fmt.Errorf("Delta length (%v) smaller than source text length (%v)", pointer, len(text1)) |
|
} |
|
return diffs, err |
|
} |
|
|
|
// MATCH FUNCTIONS |
|
|
|
// MatchMain locates the best instance of 'pattern' in 'text' near 'loc'. |
|
// Returns -1 if no match found. |
|
func (dmp *DiffMatchPatch) MatchMain(text, pattern string, loc int) int { |
|
// Check for null inputs not needed since null can't be passed in C#. |
|
|
|
loc = int(math.Max(0, math.Min(float64(loc), float64(len(text))))) |
|
if text == pattern { |
|
// Shortcut (potentially not guaranteed by the algorithm) |
|
return 0 |
|
} else if len(text) == 0 { |
|
// Nothing to match. |
|
return -1 |
|
} else if loc+len(pattern) <= len(text) && text[loc:loc+len(pattern)] == pattern { |
|
// Perfect match at the perfect spot! (Includes case of null pattern) |
|
return loc |
|
} |
|
// Do a fuzzy compare. |
|
return dmp.MatchBitap(text, pattern, loc) |
|
} |
|
|
|
// MatchBitap locates the best instance of 'pattern' in 'text' near 'loc' using the |
|
// Bitap algorithm. Returns -1 if no match found. |
|
func (dmp *DiffMatchPatch) MatchBitap(text, pattern string, loc int) int { |
|
// Initialise the alphabet. |
|
s := dmp.MatchAlphabet(pattern) |
|
|
|
// Highest score beyond which we give up. |
|
scoreThreshold := dmp.MatchThreshold |
|
// Is there a nearby exact match? (speedup) |
|
bestLoc := indexOf(text, pattern, loc) |
|
if bestLoc != -1 { |
|
scoreThreshold = math.Min(dmp.matchBitapScore(0, bestLoc, loc, |
|
pattern), scoreThreshold) |
|
// What about in the other direction? (speedup) |
|
bestLoc = lastIndexOf(text, pattern, loc+len(pattern)) |
|
if bestLoc != -1 { |
|
scoreThreshold = math.Min(dmp.matchBitapScore(0, bestLoc, loc, |
|
pattern), scoreThreshold) |
|
} |
|
} |
|
|
|
// Initialise the bit arrays. |
|
matchmask := 1 << uint((len(pattern) - 1)) |
|
bestLoc = -1 |
|
|
|
var binMin, binMid int |
|
binMax := len(pattern) + len(text) |
|
lastRd := []int{} |
|
for d := 0; d < len(pattern); d++ { |
|
// Scan for the best match; each iteration allows for one more error. |
|
// Run a binary search to determine how far from 'loc' we can stray at |
|
// this error level. |
|
binMin = 0 |
|
binMid = binMax |
|
for binMin < binMid { |
|
if dmp.matchBitapScore(d, loc+binMid, loc, pattern) <= scoreThreshold { |
|
binMin = binMid |
|
} else { |
|
binMax = binMid |
|
} |
|
binMid = (binMax-binMin)/2 + binMin |
|
} |
|
// Use the result from this iteration as the maximum for the next. |
|
binMax = binMid |
|
start := int(math.Max(1, float64(loc-binMid+1))) |
|
finish := int(math.Min(float64(loc+binMid), float64(len(text))) + float64(len(pattern))) |
|
|
|
rd := make([]int, finish+2) |
|
rd[finish+1] = (1 << uint(d)) - 1 |
|
|
|
for j := finish; j >= start; j-- { |
|
var charMatch int |
|
if len(text) <= j-1 { |
|
// Out of range. |
|
charMatch = 0 |
|
} else if _, ok := s[text[j-1]]; !ok { |
|
charMatch = 0 |
|
} else { |
|
charMatch = s[text[j-1]] |
|
} |
|
|
|
if d == 0 { |
|
// First pass: exact match. |
|
rd[j] = ((rd[j+1] << 1) | 1) & charMatch |
|
} else { |
|
// Subsequent passes: fuzzy match. |
|
rd[j] = ((rd[j+1]<<1)|1)&charMatch | (((lastRd[j+1] | lastRd[j]) << 1) | 1) | lastRd[j+1] |
|
} |
|
if (rd[j] & matchmask) != 0 { |
|
score := dmp.matchBitapScore(d, j-1, loc, pattern) |
|
// This match will almost certainly be better than any existing |
|
// match. But check anyway. |
|
if score <= scoreThreshold { |
|
// Told you so. |
|
scoreThreshold = score |
|
bestLoc = j - 1 |
|
if bestLoc > loc { |
|
// When passing loc, don't exceed our current distance from loc. |
|
start = int(math.Max(1, float64(2*loc-bestLoc))) |
|
} else { |
|
// Already passed loc, downhill from here on in. |
|
break |
|
} |
|
} |
|
} |
|
} |
|
if dmp.matchBitapScore(d+1, loc, loc, pattern) > scoreThreshold { |
|
// No hope for a (better) match at greater error levels. |
|
break |
|
} |
|
lastRd = rd |
|
} |
|
return bestLoc |
|
} |
|
|
|
// matchBitapScore computes and returns the score for a match with e errors and x location. |
|
func (dmp *DiffMatchPatch) matchBitapScore(e, x, loc int, pattern string) float64 { |
|
accuracy := float64(e) / float64(len(pattern)) |
|
proximity := math.Abs(float64(loc - x)) |
|
if dmp.MatchDistance == 0 { |
|
// Dodge divide by zero error. |
|
if proximity == 0 { |
|
return accuracy |
|
} |
|
|
|
return 1.0 |
|
} |
|
return accuracy + (proximity / float64(dmp.MatchDistance)) |
|
} |
|
|
|
// MatchAlphabet initialises the alphabet for the Bitap algorithm. |
|
func (dmp *DiffMatchPatch) MatchAlphabet(pattern string) map[byte]int { |
|
s := map[byte]int{} |
|
charPattern := []byte(pattern) |
|
for _, c := range charPattern { |
|
_, ok := s[c] |
|
if !ok { |
|
s[c] = 0 |
|
} |
|
} |
|
i := 0 |
|
|
|
for _, c := range charPattern { |
|
value := s[c] | int(uint(1)<<uint((len(pattern)-i-1))) |
|
s[c] = value |
|
i++ |
|
} |
|
return s |
|
} |
|
|
|
// PATCH FUNCTIONS |
|
|
|
// PatchAddContext increases the context until it is unique, |
|
// but doesn't let the pattern expand beyond MatchMaxBits. |
|
func (dmp *DiffMatchPatch) PatchAddContext(patch Patch, text string) Patch { |
|
if len(text) == 0 { |
|
return patch |
|
} |
|
|
|
pattern := text[patch.start2 : patch.start2+patch.length1] |
|
padding := 0 |
|
|
|
// Look for the first and last matches of pattern in text. If two |
|
// different matches are found, increase the pattern length. |
|
for strings.Index(text, pattern) != strings.LastIndex(text, pattern) && |
|
len(pattern) < dmp.MatchMaxBits-2*dmp.PatchMargin { |
|
padding += dmp.PatchMargin |
|
maxStart := max(0, patch.start2-padding) |
|
minEnd := min(len(text), patch.start2+patch.length1+padding) |
|
pattern = text[maxStart:minEnd] |
|
} |
|
// Add one chunk for good luck. |
|
padding += dmp.PatchMargin |
|
|
|
// Add the prefix. |
|
prefix := text[max(0, patch.start2-padding):patch.start2] |
|
if len(prefix) != 0 { |
|
patch.diffs = append([]Diff{Diff{DiffEqual, prefix}}, patch.diffs...) |
|
} |
|
// Add the suffix. |
|
suffix := text[patch.start2+patch.length1 : min(len(text), patch.start2+patch.length1+padding)] |
|
if len(suffix) != 0 { |
|
patch.diffs = append(patch.diffs, Diff{DiffEqual, suffix}) |
|
} |
|
|
|
// Roll back the start points. |
|
patch.start1 -= len(prefix) |
|
patch.start2 -= len(prefix) |
|
// Extend the lengths. |
|
patch.length1 += len(prefix) + len(suffix) |
|
patch.length2 += len(prefix) + len(suffix) |
|
|
|
return patch |
|
} |
|
|
|
// PatchMake computes a list of patches. |
|
func (dmp *DiffMatchPatch) PatchMake(opt ...interface{}) []Patch { |
|
if len(opt) == 1 { |
|
diffs, _ := opt[0].([]Diff) |
|
text1 := dmp.DiffText1(diffs) |
|
return dmp.PatchMake(text1, diffs) |
|
} else if len(opt) == 2 { |
|
text1 := opt[0].(string) |
|
switch t := opt[1].(type) { |
|
case string: |
|
diffs := dmp.DiffMain(text1, t, true) |
|
if len(diffs) > 2 { |
|
diffs = dmp.DiffCleanupSemantic(diffs) |
|
diffs = dmp.DiffCleanupEfficiency(diffs) |
|
} |
|
return dmp.PatchMake(text1, diffs) |
|
case []Diff: |
|
return dmp.patchMake2(text1, t) |
|
} |
|
} else if len(opt) == 3 { |
|
return dmp.PatchMake(opt[0], opt[2]) |
|
} |
|
return []Patch{} |
|
} |
|
|
|
// patchMake2 computes a list of patches to turn text1 into text2. |
|
// text2 is not provided, diffs are the delta between text1 and text2. |
|
func (dmp *DiffMatchPatch) patchMake2(text1 string, diffs []Diff) []Patch { |
|
// Check for null inputs not needed since null can't be passed in C#. |
|
patches := []Patch{} |
|
if len(diffs) == 0 { |
|
return patches // Get rid of the null case. |
|
} |
|
|
|
patch := Patch{} |
|
charCount1 := 0 // Number of characters into the text1 string. |
|
charCount2 := 0 // Number of characters into the text2 string. |
|
// Start with text1 (prepatchText) and apply the diffs until we arrive at |
|
// text2 (postpatchText). We recreate the patches one by one to determine |
|
// context info. |
|
prepatchText := text1 |
|
postpatchText := text1 |
|
|
|
for i, aDiff := range diffs { |
|
if len(patch.diffs) == 0 && aDiff.Type != DiffEqual { |
|
// A new patch starts here. |
|
patch.start1 = charCount1 |
|
patch.start2 = charCount2 |
|
} |
|
|
|
switch aDiff.Type { |
|
case DiffInsert: |
|
patch.diffs = append(patch.diffs, aDiff) |
|
patch.length2 += len(aDiff.Text) |
|
postpatchText = postpatchText[:charCount2] + |
|
aDiff.Text + postpatchText[charCount2:] |
|
case DiffDelete: |
|
patch.length1 += len(aDiff.Text) |
|
patch.diffs = append(patch.diffs, aDiff) |
|
postpatchText = postpatchText[:charCount2] + postpatchText[charCount2+len(aDiff.Text):] |
|
case DiffEqual: |
|
if len(aDiff.Text) <= 2*dmp.PatchMargin && |
|
len(patch.diffs) != 0 && i != len(diffs)-1 { |
|
// Small equality inside a patch. |
|
patch.diffs = append(patch.diffs, aDiff) |
|
patch.length1 += len(aDiff.Text) |
|
patch.length2 += len(aDiff.Text) |
|
} |
|
if len(aDiff.Text) >= 2*dmp.PatchMargin { |
|
// Time for a new patch. |
|
if len(patch.diffs) != 0 { |
|
patch = dmp.PatchAddContext(patch, prepatchText) |
|
patches = append(patches, patch) |
|
patch = Patch{} |
|
// Unlike Unidiff, our patch lists have a rolling context. |
|
// http://code.google.com/p/google-diff-match-patch/wiki/Unidiff |
|
// Update prepatch text & pos to reflect the application of the |
|
// just completed patch. |
|
prepatchText = postpatchText |
|
charCount1 = charCount2 |
|
} |
|
} |
|
} |
|
|
|
// Update the current character count. |
|
if aDiff.Type != DiffInsert { |
|
charCount1 += len(aDiff.Text) |
|
} |
|
if aDiff.Type != DiffDelete { |
|
charCount2 += len(aDiff.Text) |
|
} |
|
} |
|
|
|
// Pick up the leftover patch if not empty. |
|
if len(patch.diffs) != 0 { |
|
patch = dmp.PatchAddContext(patch, prepatchText) |
|
patches = append(patches, patch) |
|
} |
|
|
|
return patches |
|
} |
|
|
|
// PatchDeepCopy returns an array that is identical to a |
|
// given an array of patches. |
|
func (dmp *DiffMatchPatch) PatchDeepCopy(patches []Patch) []Patch { |
|
patchesCopy := []Patch{} |
|
for _, aPatch := range patches { |
|
patchCopy := Patch{} |
|
for _, aDiff := range aPatch.diffs { |
|
patchCopy.diffs = append(patchCopy.diffs, Diff{ |
|
aDiff.Type, |
|
aDiff.Text, |
|
}) |
|
} |
|
patchCopy.start1 = aPatch.start1 |
|
patchCopy.start2 = aPatch.start2 |
|
patchCopy.length1 = aPatch.length1 |
|
patchCopy.length2 = aPatch.length2 |
|
patchesCopy = append(patchesCopy, patchCopy) |
|
} |
|
return patchesCopy |
|
} |
|
|
|
// PatchApply merges a set of patches onto the text. Returns a patched text, as well |
|
// as an array of true/false values indicating which patches were applied. |
|
func (dmp *DiffMatchPatch) PatchApply(patches []Patch, text string) (string, []bool) { |
|
if len(patches) == 0 { |
|
return text, []bool{} |
|
} |
|
|
|
// Deep copy the patches so that no changes are made to originals. |
|
patches = dmp.PatchDeepCopy(patches) |
|
|
|
nullPadding := dmp.PatchAddPadding(patches) |
|
text = nullPadding + text + nullPadding |
|
patches = dmp.PatchSplitMax(patches) |
|
|
|
x := 0 |
|
// delta keeps track of the offset between the expected and actual |
|
// location of the previous patch. If there are patches expected at |
|
// positions 10 and 20, but the first patch was found at 12, delta is 2 |
|
// and the second patch has an effective expected position of 22. |
|
delta := 0 |
|
results := make([]bool, len(patches)) |
|
for _, aPatch := range patches { |
|
expectedLoc := aPatch.start2 + delta |
|
text1 := dmp.DiffText1(aPatch.diffs) |
|
var startLoc int |
|
endLoc := -1 |
|
if len(text1) > dmp.MatchMaxBits { |
|
// PatchSplitMax will only provide an oversized pattern |
|
// in the case of a monster delete. |
|
startLoc = dmp.MatchMain(text, text1[:dmp.MatchMaxBits], expectedLoc) |
|
if startLoc != -1 { |
|
endLoc = dmp.MatchMain(text, |
|
text1[len(text1)-dmp.MatchMaxBits:], expectedLoc+len(text1)-dmp.MatchMaxBits) |
|
if endLoc == -1 || startLoc >= endLoc { |
|
// Can't find valid trailing context. Drop this patch. |
|
startLoc = -1 |
|
} |
|
} |
|
} else { |
|
startLoc = dmp.MatchMain(text, text1, expectedLoc) |
|
} |
|
if startLoc == -1 { |
|
// No match found. :( |
|
results[x] = false |
|
// Subtract the delta for this failed patch from subsequent patches. |
|
delta -= aPatch.length2 - aPatch.length1 |
|
} else { |
|
// Found a match. :) |
|
results[x] = true |
|
delta = startLoc - expectedLoc |
|
var text2 string |
|
if endLoc == -1 { |
|
text2 = text[startLoc:int(math.Min(float64(startLoc+len(text1)), float64(len(text))))] |
|
} else { |
|
text2 = text[startLoc:int(math.Min(float64(endLoc+dmp.MatchMaxBits), float64(len(text))))] |
|
} |
|
if text1 == text2 { |
|
// Perfect match, just shove the Replacement text in. |
|
text = text[:startLoc] + dmp.DiffText2(aPatch.diffs) + text[startLoc+len(text1):] |
|
} else { |
|
// Imperfect match. Run a diff to get a framework of equivalent |
|
// indices. |
|
diffs := dmp.DiffMain(text1, text2, false) |
|
if len(text1) > dmp.MatchMaxBits && float64(dmp.DiffLevenshtein(diffs))/float64(len(text1)) > dmp.PatchDeleteThreshold { |
|
// The end points match, but the content is unacceptably bad. |
|
results[x] = false |
|
} else { |
|
diffs = dmp.DiffCleanupSemanticLossless(diffs) |
|
index1 := 0 |
|
for _, aDiff := range aPatch.diffs { |
|
if aDiff.Type != DiffEqual { |
|
index2 := dmp.DiffXIndex(diffs, index1) |
|
if aDiff.Type == DiffInsert { |
|
// Insertion |
|
text = text[:startLoc+index2] + aDiff.Text + text[startLoc+index2:] |
|
} else if aDiff.Type == DiffDelete { |
|
// Deletion |
|
startIndex := startLoc + index2 |
|
text = text[:startIndex] + |
|
text[startIndex+dmp.DiffXIndex(diffs, index1+len(aDiff.Text))-index2:] |
|
} |
|
} |
|
if aDiff.Type != DiffDelete { |
|
index1 += len(aDiff.Text) |
|
} |
|
} |
|
} |
|
} |
|
} |
|
x++ |
|
} |
|
// Strip the padding off. |
|
text = text[len(nullPadding) : len(nullPadding)+(len(text)-2*len(nullPadding))] |
|
return text, results |
|
} |
|
|
|
// PatchAddPadding adds some padding on text start and end so that edges can match something. |
|
// Intended to be called only from within patchApply. |
|
func (dmp *DiffMatchPatch) PatchAddPadding(patches []Patch) string { |
|
paddingLength := dmp.PatchMargin |
|
nullPadding := "" |
|
for x := 1; x <= paddingLength; x++ { |
|
nullPadding += string(x) |
|
} |
|
|
|
// Bump all the patches forward. |
|
for i := range patches { |
|
patches[i].start1 += paddingLength |
|
patches[i].start2 += paddingLength |
|
} |
|
|
|
// Add some padding on start of first diff. |
|
if len(patches[0].diffs) == 0 || patches[0].diffs[0].Type != DiffEqual { |
|
// Add nullPadding equality. |
|
patches[0].diffs = append([]Diff{Diff{DiffEqual, nullPadding}}, patches[0].diffs...) |
|
patches[0].start1 -= paddingLength // Should be 0. |
|
patches[0].start2 -= paddingLength // Should be 0. |
|
patches[0].length1 += paddingLength |
|
patches[0].length2 += paddingLength |
|
} else if paddingLength > len(patches[0].diffs[0].Text) { |
|
// Grow first equality. |
|
extraLength := paddingLength - len(patches[0].diffs[0].Text) |
|
patches[0].diffs[0].Text = nullPadding[len(patches[0].diffs[0].Text):] + patches[0].diffs[0].Text |
|
patches[0].start1 -= extraLength |
|
patches[0].start2 -= extraLength |
|
patches[0].length1 += extraLength |
|
patches[0].length2 += extraLength |
|
} |
|
|
|
// Add some padding on end of last diff. |
|
last := len(patches) - 1 |
|
if len(patches[last].diffs) == 0 || patches[last].diffs[len(patches[last].diffs)-1].Type != DiffEqual { |
|
// Add nullPadding equality. |
|
patches[last].diffs = append(patches[last].diffs, Diff{DiffEqual, nullPadding}) |
|
patches[last].length1 += paddingLength |
|
patches[last].length2 += paddingLength |
|
} else if paddingLength > len(patches[last].diffs[len(patches[last].diffs)-1].Text) { |
|
// Grow last equality. |
|
lastDiff := patches[last].diffs[len(patches[last].diffs)-1] |
|
extraLength := paddingLength - len(lastDiff.Text) |
|
patches[last].diffs[len(patches[last].diffs)-1].Text += nullPadding[:extraLength] |
|
patches[last].length1 += extraLength |
|
patches[last].length2 += extraLength |
|
} |
|
|
|
return nullPadding |
|
} |
|
|
|
// PatchSplitMax looks through the patches and breaks up any which are longer than the |
|
// maximum limit of the match algorithm. |
|
// Intended to be called only from within patchApply. |
|
func (dmp *DiffMatchPatch) PatchSplitMax(patches []Patch) []Patch { |
|
patchSize := dmp.MatchMaxBits |
|
for x := 0; x < len(patches); x++ { |
|
if patches[x].length1 <= patchSize { |
|
continue |
|
} |
|
bigpatch := patches[x] |
|
// Remove the big old patch. |
|
patches = append(patches[:x], patches[x+1:]...) |
|
x-- |
|
|
|
start1 := bigpatch.start1 |
|
start2 := bigpatch.start2 |
|
precontext := "" |
|
for len(bigpatch.diffs) != 0 { |
|
// Create one of several smaller patches. |
|
patch := Patch{} |
|
empty := true |
|
patch.start1 = start1 - len(precontext) |
|
patch.start2 = start2 - len(precontext) |
|
if len(precontext) != 0 { |
|
patch.length1 = len(precontext) |
|
patch.length2 = len(precontext) |
|
patch.diffs = append(patch.diffs, Diff{DiffEqual, precontext}) |
|
} |
|
for len(bigpatch.diffs) != 0 && patch.length1 < patchSize-dmp.PatchMargin { |
|
diffType := bigpatch.diffs[0].Type |
|
diffText := bigpatch.diffs[0].Text |
|
if diffType == DiffInsert { |
|
// Insertions are harmless. |
|
patch.length2 += len(diffText) |
|
start2 += len(diffText) |
|
patch.diffs = append(patch.diffs, bigpatch.diffs[0]) |
|
bigpatch.diffs = bigpatch.diffs[1:] |
|
empty = false |
|
} else if diffType == DiffDelete && len(patch.diffs) == 1 && patch.diffs[0].Type == DiffEqual && len(diffText) > 2*patchSize { |
|
// This is a large deletion. Let it pass in one chunk. |
|
patch.length1 += len(diffText) |
|
start1 += len(diffText) |
|
empty = false |
|
patch.diffs = append(patch.diffs, Diff{diffType, diffText}) |
|
bigpatch.diffs = bigpatch.diffs[1:] |
|
} else { |
|
// Deletion or equality. Only take as much as we can stomach. |
|
diffText = diffText[:min(len(diffText), patchSize-patch.length1-dmp.PatchMargin)] |
|
|
|
patch.length1 += len(diffText) |
|
start1 += len(diffText) |
|
if diffType == DiffEqual { |
|
patch.length2 += len(diffText) |
|
start2 += len(diffText) |
|
} else { |
|
empty = false |
|
} |
|
patch.diffs = append(patch.diffs, Diff{diffType, diffText}) |
|
if diffText == bigpatch.diffs[0].Text { |
|
bigpatch.diffs = bigpatch.diffs[1:] |
|
} else { |
|
bigpatch.diffs[0].Text = |
|
bigpatch.diffs[0].Text[len(diffText):] |
|
} |
|
} |
|
} |
|
// Compute the head context for the next patch. |
|
precontext = dmp.DiffText2(patch.diffs) |
|
precontext = precontext[max(0, len(precontext)-dmp.PatchMargin):] |
|
|
|
postcontext := "" |
|
// Append the end context for this patch. |
|
if len(dmp.DiffText1(bigpatch.diffs)) > dmp.PatchMargin { |
|
postcontext = dmp.DiffText1(bigpatch.diffs)[:dmp.PatchMargin] |
|
} else { |
|
postcontext = dmp.DiffText1(bigpatch.diffs) |
|
} |
|
|
|
if len(postcontext) != 0 { |
|
patch.length1 += len(postcontext) |
|
patch.length2 += len(postcontext) |
|
if len(patch.diffs) != 0 && patch.diffs[len(patch.diffs)-1].Type == DiffEqual { |
|
patch.diffs[len(patch.diffs)-1].Text += postcontext |
|
} else { |
|
patch.diffs = append(patch.diffs, Diff{DiffEqual, postcontext}) |
|
} |
|
} |
|
if !empty { |
|
x++ |
|
patches = append(patches[:x], append([]Patch{patch}, patches[x:]...)...) |
|
} |
|
} |
|
} |
|
return patches |
|
} |
|
|
|
// PatchToText takes a list of patches and returns a textual representation. |
|
func (dmp *DiffMatchPatch) PatchToText(patches []Patch) string { |
|
var text bytes.Buffer |
|
for _, aPatch := range patches { |
|
_, _ = text.WriteString(aPatch.String()) |
|
} |
|
return text.String() |
|
} |
|
|
|
// PatchFromText parses a textual representation of patches and returns a List of Patch |
|
// objects. |
|
func (dmp *DiffMatchPatch) PatchFromText(textline string) ([]Patch, error) { |
|
patches := []Patch{} |
|
if len(textline) == 0 { |
|
return patches, nil |
|
} |
|
text := strings.Split(textline, "\n") |
|
textPointer := 0 |
|
patchHeader := regexp.MustCompile("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$") |
|
|
|
var patch Patch |
|
var sign uint8 |
|
var line string |
|
for textPointer < len(text) { |
|
|
|
if !patchHeader.MatchString(text[textPointer]) { |
|
return patches, errors.New("Invalid patch string: " + text[textPointer]) |
|
} |
|
|
|
patch = Patch{} |
|
m := patchHeader.FindStringSubmatch(text[textPointer]) |
|
|
|
patch.start1, _ = strconv.Atoi(m[1]) |
|
if len(m[2]) == 0 { |
|
patch.start1-- |
|
patch.length1 = 1 |
|
} else if m[2] == "0" { |
|
patch.length1 = 0 |
|
} else { |
|
patch.start1-- |
|
patch.length1, _ = strconv.Atoi(m[2]) |
|
} |
|
|
|
patch.start2, _ = strconv.Atoi(m[3]) |
|
|
|
if len(m[4]) == 0 { |
|
patch.start2-- |
|
patch.length2 = 1 |
|
} else if m[4] == "0" { |
|
patch.length2 = 0 |
|
} else { |
|
patch.start2-- |
|
patch.length2, _ = strconv.Atoi(m[4]) |
|
} |
|
textPointer++ |
|
|
|
for textPointer < len(text) { |
|
if len(text[textPointer]) > 0 { |
|
sign = text[textPointer][0] |
|
} else { |
|
textPointer++ |
|
continue |
|
} |
|
|
|
line = text[textPointer][1:] |
|
line = strings.Replace(line, "+", "%2b", -1) |
|
line, _ = url.QueryUnescape(line) |
|
if sign == '-' { |
|
// Deletion. |
|
patch.diffs = append(patch.diffs, Diff{DiffDelete, line}) |
|
} else if sign == '+' { |
|
// Insertion. |
|
patch.diffs = append(patch.diffs, Diff{DiffInsert, line}) |
|
} else if sign == ' ' { |
|
// Minor equality. |
|
patch.diffs = append(patch.diffs, Diff{DiffEqual, line}) |
|
} else if sign == '@' { |
|
// Start of next patch. |
|
break |
|
} else { |
|
// WTF? |
|
return patches, errors.New("Invalid patch mode '" + string(sign) + "' in: " + string(line)) |
|
} |
|
textPointer++ |
|
} |
|
|
|
patches = append(patches, patch) |
|
} |
|
return patches, nil |
|
}
|
|
|