PHP's xdiff_string_diff in JavaScript

Here’s what our current JavaScript equivalent to PHP's xdiff_string_diff looks like.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
module.exports = function xdiff_string_diff (oldData, newData, contextLines, minimal) { // eslint-disable-line camelcase
// discuss at: http://locutus.io/php/xdiff_string_diff
// original by: Brett Zamir (http://brett-zamir.me)
// based on: Imgen Tata (http://www.myipdf.com/)
// bugfixed by: Imgen Tata (http://www.myipdf.com/)
// improved by: Brett Zamir (http://brett-zamir.me)
// note 1: The minimal argument is not currently supported
// example 1: xdiff_string_diff('', 'Hello world!')
// returns 1: '@@ -0,0 +1,1 @@\n+Hello world!'

// (This code was done by Imgen Tata; I have only reformatted for use in Locutus)

// See http://en.wikipedia.org/wiki/Diff#Unified_format
var i = 0
var j = 0
var k = 0
var oriHunkStart
var newHunkStart
var oriHunkEnd
var newHunkEnd
var oriHunkLineNo
var newHunkLineNo
var oriHunkSize
var newHunkSize
var MAX_CONTEXT_LINES = Number.POSITIVE_INFINITY // Potential configuration
var MIN_CONTEXT_LINES = 0
var DEFAULT_CONTEXT_LINES = 3
var HEADER_PREFIX = '@@ ' //
var HEADER_SUFFIX = ' @@'
var ORIGINAL_INDICATOR = '-'
var NEW_INDICATOR = '+'
var RANGE_SEPARATOR = ','
var CONTEXT_INDICATOR = ' '
var DELETION_INDICATOR = '-'
var ADDITION_INDICATOR = '+'
var oriLines
var newLines
var NEW_LINE = '\n'

var _trim = function (text) {
if (typeof text !== 'string') {
throw new Error('String parameter required')
}

return text.replace(/(^\s*)|(\s*$)/g, '')
}

var _verifyType = function (type) {
var args = arguments
var argsLen = arguments.length
var basicTypes = ['number', 'boolean', 'string', 'function', 'object', 'undefined']
var basicType
var i
var j
var typeOfType = typeof type
if (typeOfType !== 'string' && typeOfType !== 'function') {
throw new Error('Bad type parameter')
}

if (argsLen < 2) {
throw new Error('Too few arguments')
}

if (typeOfType === 'string') {
type = _trim(type)

if (type === '') {
throw new Error('Bad type parameter')
}

for (j = 0; j < basicTypes.length; j++) {
basicType = basicTypes[j]

if (basicType === type) {
for (i = 1; i < argsLen; i++) {
if (typeof args[i] !== type) {
throw new Error('Bad type')
}
}

return
}
}

throw new Error('Bad type parameter')
}

// Not basic type. we need to use instanceof operator
for (i = 1; i < argsLen; i++) {
if (!(args[i] instanceof type)) {
throw new Error('Bad type')
}
}
}

var _hasValue = function (array, value) {
var i
_verifyType(Array, array)

for (i = 0; i < array.length; i++) {
if (array[i] === value) {
return true
}
}

return false
}

var _areTypeOf = function (type) {
var args = arguments
var argsLen = arguments.length
var basicTypes = ['number', 'boolean', 'string', 'function', 'object', 'undefined']
var basicType
var i
var j
var typeOfType = typeof type

if (typeOfType !== 'string' && typeOfType !== 'function') {
throw new Error('Bad type parameter')
}

if (argsLen < 2) {
throw new Error('Too few arguments')
}

if (typeOfType === 'string') {
type = _trim(type)

if (type === '') {
return false
}

for (j = 0; j < basicTypes.length; j++) {
basicType = basicTypes[j]

if (basicType === type) {
for (i = 1; i < argsLen; i++) {
if (typeof args[i] !== type) {
return false
}
}

return true
}
}

throw new Error('Bad type parameter')
}

// Not basic type. we need to use instanceof operator
for (i = 1; i < argsLen; i++) {
if (!(args[i] instanceof type)) {
return false
}
}

return true
}

var _getInitializedArray = function (arraySize, initValue) {
var array = []
var i
_verifyType('number', arraySize)

for (i = 0; i < arraySize; i++) {
array.push(initValue)
}

return array
}

var _splitIntoLines = function (text) {
_verifyType('string', text)

if (text === '') {
return []
}
return text.split('\n')
}

var _isEmptyArray = function (obj) {
return _areTypeOf(Array, obj) && obj.length === 0
}

/**
* Finds longest common sequence between two sequences
* @see {@link http://wordaligned.org/articles/longest-common-subsequence}
*/

var _findLongestCommonSequence = function (seq1, seq2, seq1IsInLcs, seq2IsInLcs) {
if (!_areTypeOf(Array, seq1, seq2)) {
throw new Error('Array parameters are required')
}

// Deal with edge case
if (_isEmptyArray(seq1) || _isEmptyArray(seq2)) {
return []
}

// Function to calculate lcs lengths
var lcsLens = function (xs, ys) {
var i
var j
var prev
var curr = _getInitializedArray(ys.length + 1, 0)

for (i = 0; i < xs.length; i++) {
prev = curr.slice(0)
for (j = 0; j < ys.length; j++) {
if (xs[i] === ys[j]) {
curr[j + 1] = prev[j] + 1
} else {
curr[j + 1] = Math.max(curr[j], prev[j + 1])
}
}
}

return curr
}

// Function to find lcs and fill in the array to indicate the optimal longest common sequence
var _findLcs = function (xs, xidx, xIsIn, ys) {
var i
var xb
var xe
var llB
var llE
var pivot
var max
var yb
var ye
var nx = xs.length
var ny = ys.length

if (nx === 0) {
return []
}
if (nx === 1) {
if (_hasValue(ys, xs[0])) {
xIsIn[xidx] = true
return [xs[0]]
}
return []
}
i = Math.floor(nx / 2)
xb = xs.slice(0, i)
xe = xs.slice(i)
llB = lcsLens(xb, ys)
llE = lcsLens(xe.slice(0)
.reverse(), ys.slice(0)
.reverse())

pivot = 0
max = 0
for (j = 0; j <= ny; j++) {
if (llB[j] + llE[ny - j] > max) {
pivot = j
max = llB[j] + llE[ny - j]
}
}
yb = ys.slice(0, pivot)
ye = ys.slice(pivot)
return _findLcs(xb, xidx, xIsIn, yb).concat(_findLcs(xe, xidx + i, xIsIn, ye))
}

// Fill in seq1IsInLcs to find the optimal longest common subsequence of first sequence
_findLcs(seq1, 0, seq1IsInLcs, seq2)
// Fill in seq2IsInLcs to find the optimal longest common subsequence
// of second sequence and return the result
return _findLcs(seq2, 0, seq2IsInLcs, seq1)
}

// First, check the parameters
if (_areTypeOf('string', oldData, newData) === false) {
return false
}

if (oldData === newData) {
return ''
}

if (typeof contextLines !== 'number' ||
contextLines > MAX_CONTEXT_LINES ||
contextLines < MIN_CONTEXT_LINES) {
contextLines = DEFAULT_CONTEXT_LINES
}

oriLines = _splitIntoLines(oldData)
newLines = _splitIntoLines(newData)
var oriLen = oriLines.length
var newLen = newLines.length
var oriIsInLcs = _getInitializedArray(oriLen, false)
var newIsInLcs = _getInitializedArray(newLen, false)
var lcsLen = _findLongestCommonSequence(oriLines, newLines, oriIsInLcs, newIsInLcs).length
var unidiff = ''

if (lcsLen === 0) {
// No common sequence
unidiff = [
HEADER_PREFIX,
ORIGINAL_INDICATOR,
(oriLen > 0 ? '1' : '0'),
RANGE_SEPARATOR,
oriLen,
' ',
NEW_INDICATOR,
(newLen > 0 ? '1' : '0'),
RANGE_SEPARATOR,
newLen,
HEADER_SUFFIX
].join('')

for (i = 0; i < oriLen; i++) {
unidiff += NEW_LINE + DELETION_INDICATOR + oriLines[i]
}

for (j = 0; j < newLen; j++) {
unidiff += NEW_LINE + ADDITION_INDICATOR + newLines[j]
}

return unidiff
}

var leadingContext = []
var trailingContext = []
var actualLeadingContext = []
var actualTrailingContext = []

// Regularize leading context by the contextLines parameter
var regularizeLeadingContext = function (context) {
if (context.length === 0 || contextLines === 0) {
return []
}

var contextStartPos = Math.max(context.length - contextLines, 0)

return context.slice(contextStartPos)
}

// Regularize trailing context by the contextLines parameter
var regularizeTrailingContext = function (context) {
if (context.length === 0 || contextLines === 0) {
return []
}

return context.slice(0, Math.min(contextLines, context.length))
}

// Skip common lines in the beginning
while (i < oriLen && oriIsInLcs[i] === true && newIsInLcs[i] === true) {
leadingContext.push(oriLines[i])
i++
}

j = i
// The index in the longest common sequence
k = i
oriHunkStart = i
newHunkStart = j
oriHunkEnd = i
newHunkEnd = j

while (i < oriLen || j < newLen) {
while (i < oriLen && oriIsInLcs[i] === false) {
i++
}
oriHunkEnd = i

while (j < newLen && newIsInLcs[j] === false) {
j++
}
newHunkEnd = j

// Find the trailing context
trailingContext = []
while (i < oriLen && oriIsInLcs[i] === true && j < newLen && newIsInLcs[j] === true) {
trailingContext.push(oriLines[i])
k++
i++
j++
}

if (k >= lcsLen || // No more in longest common lines
trailingContext.length >= 2 * contextLines) {
// Context break found
if (trailingContext.length < 2 * contextLines) {
// It must be last block of common lines but not a context break
trailingContext = []

// Force break out
i = oriLen
j = newLen

// Update hunk ends to force output to the end
oriHunkEnd = oriLen
newHunkEnd = newLen
}

// Output the diff hunk

// Trim the leading and trailing context block
actualLeadingContext = regularizeLeadingContext(leadingContext)
actualTrailingContext = regularizeTrailingContext(trailingContext)

oriHunkStart -= actualLeadingContext.length
newHunkStart -= actualLeadingContext.length
oriHunkEnd += actualTrailingContext.length
newHunkEnd += actualTrailingContext.length

oriHunkLineNo = oriHunkStart + 1
newHunkLineNo = newHunkStart + 1
oriHunkSize = oriHunkEnd - oriHunkStart
newHunkSize = newHunkEnd - newHunkStart

// Build header
unidiff += [
HEADER_PREFIX,
ORIGINAL_INDICATOR,
oriHunkLineNo,
RANGE_SEPARATOR,
oriHunkSize,
' ',
NEW_INDICATOR,
newHunkLineNo,
RANGE_SEPARATOR,
newHunkSize,
HEADER_SUFFIX,
NEW_LINE
].join('')

// Build the diff hunk content
while (oriHunkStart < oriHunkEnd || newHunkStart < newHunkEnd) {
if (oriHunkStart < oriHunkEnd &&
oriIsInLcs[oriHunkStart] === true &&
newIsInLcs[newHunkStart] === true) {
// The context line
unidiff += CONTEXT_INDICATOR + oriLines[oriHunkStart] + NEW_LINE
oriHunkStart++
newHunkStart++
} else if (oriHunkStart < oriHunkEnd && oriIsInLcs[oriHunkStart] === false) {
// The deletion line
unidiff += DELETION_INDICATOR + oriLines[oriHunkStart] + NEW_LINE
oriHunkStart++
} else if (newHunkStart < newHunkEnd && newIsInLcs[newHunkStart] === false) {
// The additional line
unidiff += ADDITION_INDICATOR + newLines[newHunkStart] + NEW_LINE
newHunkStart++
}
}

// Update hunk position and leading context
oriHunkStart = i
newHunkStart = j
leadingContext = trailingContext
}
}

// Trim the trailing new line if it exists
if (unidiff.length > 0 && unidiff.charAt(unidiff.length) === NEW_LINE) {
unidiff = unidiff.slice(0, -1)
}

return unidiff
}
[ View on GitHub | Edit on GitHub | Source on GitHub ]

How to use

You you can install via npm install locutus and require it via require('locutus/php/xdiff/xdiff_string_diff'). You could also require the xdiff module in full so that you could access xdiff.xdiff_string_diff instead.

If you intend to target the browser, you can then use a module bundler such as Browserify, webpack or rollup.js.

ES5/ES6

This function targets ES5, but as of Locutus 2.0.2 we also support ES6 functions. Locutus transpiles to ES5 before publishing to npm.

A community effort

Not unlike Wikipedia, Locutus is an ongoing community effort. Our philosophy follows The McDonald’s Theory. This means that we don't consider it to be a bad thing that many of our functions are first iterations, which may still have their fair share of issues. We hope that these flaws will inspire others to come up with better ideas.

This way of working also means that we don't offer any production guarantees, and recommend to use Locutus inspiration and learning purposes only.

Notes

  • The minimal argument is not currently supported

Examples

Please note that these examples are distilled from test cases that automatically verify our functions still work correctly. This could explain some quirky ones.

#codeexpected result
1xdiff_string_diff('', 'Hello world!')'@@ -0,0 +1,1 @@\n+Hello world!'

« More PHP xdiff functions