PHP's xdiff_string_diff in JavaScript

Here’s what our current JavaScript equivalent to PHP's xdiff_string_diff looks like.

module.exports = function xdiff_string_diff (oldData, newData, contextLines, minimal) { // eslint-disable-line camelcase
// discuss at: https://locutus.io/php/xdiff_string_diff
// original by: Brett Zamir (https://brett-zamir.me)
// based on: Imgen Tata (https://www.myipdf.com/)
// bugfixed by: Imgen Tata (https://www.myipdf.com/)
// improved by: Brett Zamir (https://brett-zamir.me)
// note 1: The minimal argument is not currently supported
// example 1: xdiff_string_diff('', 'Hello world!')
// returns 1: '@@ -0,0 +1,1 @@\n+Hello world!'
// (This code was done by Imgen Tata; I have only reformatted for use in Locutus)
// See https://en.wikipedia.org/wiki/Diff#Unified_format
let i = 0
let j = 0
let k = 0
let oriHunkStart
let newHunkStart
let oriHunkEnd
let newHunkEnd
let oriHunkLineNo
let newHunkLineNo
let oriHunkSize
let newHunkSize
const MAX_CONTEXT_LINES = Number.POSITIVE_INFINITY // Potential configuration
const MIN_CONTEXT_LINES = 0
const DEFAULT_CONTEXT_LINES = 3
const HEADER_PREFIX = '@@ ' //
const HEADER_SUFFIX = ' @@'
const ORIGINAL_INDICATOR = '-'
const NEW_INDICATOR = '+'
const RANGE_SEPARATOR = ','
const CONTEXT_INDICATOR = ' '
const DELETION_INDICATOR = '-'
const ADDITION_INDICATOR = '+'
let oriLines
let newLines
const NEW_LINE = '\n'
const _trim = function (text) {
if (typeof text !== 'string') {
throw new Error('String parameter required')
}
return text.replace(/(^\s*)|(\s*$)/g, '')
}
const _verifyType = function (type) {
const args = arguments
const argsLen = arguments.length
const basicTypes = ['number', 'boolean', 'string', 'function', 'object', 'undefined']
let basicType
let i
let j
const typeOfType = typeof type
if (typeOfType !== 'string' && typeOfType !== 'function') {
throw new Error('Bad type parameter')
}
if (argsLen < 2) {
throw new Error('Too few arguments')
}
if (typeOfType === 'string') {
type = _trim(type)
if (type === '') {
throw new Error('Bad type parameter')
}
for (j = 0; j < basicTypes.length; j++) {
basicType = basicTypes[j]
if (basicType === type) {
for (i = 1; i < argsLen; i++) {
if (typeof args[i] !== type) {
throw new Error('Bad type')
}
}
return
}
}
throw new Error('Bad type parameter')
}
// Not basic type. we need to use instanceof operator
for (i = 1; i < argsLen; i++) {
if (!(args[i] instanceof type)) {
throw new Error('Bad type')
}
}
}
const _hasValue = function (array, value) {
let i
_verifyType(Array, array)
for (i = 0; i < array.length; i++) {
if (array[i] === value) {
return true
}
}
return false
}
const _areTypeOf = function (type) {
const args = arguments
const argsLen = arguments.length
const basicTypes = ['number', 'boolean', 'string', 'function', 'object', 'undefined']
let basicType
let i
let j
const typeOfType = typeof type
if (typeOfType !== 'string' && typeOfType !== 'function') {
throw new Error('Bad type parameter')
}
if (argsLen < 2) {
throw new Error('Too few arguments')
}
if (typeOfType === 'string') {
type = _trim(type)
if (type === '') {
return false
}
for (j = 0; j < basicTypes.length; j++) {
basicType = basicTypes[j]
if (basicType === type) {
for (i = 1; i < argsLen; i++) {
if (typeof args[i] !== type) {
return false
}
}
return true
}
}
throw new Error('Bad type parameter')
}
// Not basic type. we need to use instanceof operator
for (i = 1; i < argsLen; i++) {
if (!(args[i] instanceof type)) {
return false
}
}
return true
}
const _getInitializedArray = function (arraySize, initValue) {
const array = []
let i
_verifyType('number', arraySize)
for (i = 0; i < arraySize; i++) {
array.push(initValue)
}
return array
}
const _splitIntoLines = function (text) {
_verifyType('string', text)
if (text === '') {
return []
}
return text.split('\n')
}
const _isEmptyArray = function (obj) {
return _areTypeOf(Array, obj) && obj.length === 0
}
/**
* Finds longest common sequence between two sequences
* @see {@link https://wordaligned.org/articles/longest-common-subsequence}
*/
const _findLongestCommonSequence = function (seq1, seq2, seq1IsInLcs, seq2IsInLcs) {
if (!_areTypeOf(Array, seq1, seq2)) {
throw new Error('Array parameters are required')
}
// Deal with edge case
if (_isEmptyArray(seq1) || _isEmptyArray(seq2)) {
return []
}
// Function to calculate lcs lengths
const lcsLens = function (xs, ys) {
let i
let j
let prev
const curr = _getInitializedArray(ys.length + 1, 0)
for (i = 0; i < xs.length; i++) {
prev = curr.slice(0)
for (j = 0; j < ys.length; j++) {
if (xs[i] === ys[j]) {
curr[j + 1] = prev[j] + 1
} else {
curr[j + 1] = Math.max(curr[j], prev[j + 1])
}
}
}
return curr
}
// Function to find lcs and fill in the array to indicate the optimal longest common sequence
var _findLcs = function (xs, xidx, xIsIn, ys) {
let i
let xb
let xe
let llB
let llE
let pivot
let max
let yb
let ye
const nx = xs.length
const ny = ys.length
if (nx === 0) {
return []
}
if (nx === 1) {
if (_hasValue(ys, xs[0])) {
xIsIn[xidx] = true
return [xs[0]]
}
return []
}
i = Math.floor(nx / 2)
xb = xs.slice(0, i)
xe = xs.slice(i)
llB = lcsLens(xb, ys)
llE = lcsLens(xe.slice(0)
.reverse(), ys.slice(0)
.reverse())
pivot = 0
max = 0
for (j = 0; j <= ny; j++) {
if (llB[j] + llE[ny - j] > max) {
pivot = j
max = llB[j] + llE[ny - j]
}
}
yb = ys.slice(0, pivot)
ye = ys.slice(pivot)
return _findLcs(xb, xidx, xIsIn, yb).concat(_findLcs(xe, xidx + i, xIsIn, ye))
}
// Fill in seq1IsInLcs to find the optimal longest common subsequence of first sequence
_findLcs(seq1, 0, seq1IsInLcs, seq2)
// Fill in seq2IsInLcs to find the optimal longest common subsequence
// of second sequence and return the result
return _findLcs(seq2, 0, seq2IsInLcs, seq1)
}
// First, check the parameters
if (_areTypeOf('string', oldData, newData) === false) {
return false
}
if (oldData === newData) {
return ''
}
if (typeof contextLines !== 'number' ||
contextLines > MAX_CONTEXT_LINES ||
contextLines < MIN_CONTEXT_LINES) {
contextLines = DEFAULT_CONTEXT_LINES
}
oriLines = _splitIntoLines(oldData)
newLines = _splitIntoLines(newData)
const oriLen = oriLines.length
const newLen = newLines.length
const oriIsInLcs = _getInitializedArray(oriLen, false)
const newIsInLcs = _getInitializedArray(newLen, false)
const lcsLen = _findLongestCommonSequence(oriLines, newLines, oriIsInLcs, newIsInLcs).length
let unidiff = ''
if (lcsLen === 0) {
// No common sequence
unidiff = [
HEADER_PREFIX,
ORIGINAL_INDICATOR,
(oriLen > 0 ? '1' : '0'),
RANGE_SEPARATOR,
oriLen,
' ',
NEW_INDICATOR,
(newLen > 0 ? '1' : '0'),
RANGE_SEPARATOR,
newLen,
HEADER_SUFFIX
].join('')
for (i = 0; i < oriLen; i++) {
unidiff += NEW_LINE + DELETION_INDICATOR + oriLines[i]
}
for (j = 0; j < newLen; j++) {
unidiff += NEW_LINE + ADDITION_INDICATOR + newLines[j]
}
return unidiff
}
let leadingContext = []
let trailingContext = []
let actualLeadingContext = []
let actualTrailingContext = []
// Regularize leading context by the contextLines parameter
const regularizeLeadingContext = function (context) {
if (context.length === 0 || contextLines === 0) {
return []
}
const contextStartPos = Math.max(context.length - contextLines, 0)
return context.slice(contextStartPos)
}
// Regularize trailing context by the contextLines parameter
const regularizeTrailingContext = function (context) {
if (context.length === 0 || contextLines === 0) {
return []
}
return context.slice(0, Math.min(contextLines, context.length))
}
// Skip common lines in the beginning
while (i < oriLen && oriIsInLcs[i] === true && newIsInLcs[i] === true) {
leadingContext.push(oriLines[i])
i++
}
j = i
// The index in the longest common sequence
k = i
oriHunkStart = i
newHunkStart = j
oriHunkEnd = i
newHunkEnd = j
while (i < oriLen || j < newLen) {
while (i < oriLen && oriIsInLcs[i] === false) {
i++
}
oriHunkEnd = i
while (j < newLen && newIsInLcs[j] === false) {
j++
}
newHunkEnd = j
// Find the trailing context
trailingContext = []
while (i < oriLen && oriIsInLcs[i] === true && j < newLen && newIsInLcs[j] === true) {
trailingContext.push(oriLines[i])
k++
i++
j++
}
if (k >= lcsLen || // No more in longest common lines
trailingContext.length >= 2 * contextLines) {
// Context break found
if (trailingContext.length < 2 * contextLines) {
// It must be last block of common lines but not a context break
trailingContext = []
// Force break out
i = oriLen
j = newLen
// Update hunk ends to force output to the end
oriHunkEnd = oriLen
newHunkEnd = newLen
}
// Output the diff hunk
// Trim the leading and trailing context block
actualLeadingContext = regularizeLeadingContext(leadingContext)
actualTrailingContext = regularizeTrailingContext(trailingContext)
oriHunkStart -= actualLeadingContext.length
newHunkStart -= actualLeadingContext.length
oriHunkEnd += actualTrailingContext.length
newHunkEnd += actualTrailingContext.length
oriHunkLineNo = oriHunkStart + 1
newHunkLineNo = newHunkStart + 1
oriHunkSize = oriHunkEnd - oriHunkStart
newHunkSize = newHunkEnd - newHunkStart
// Build header
unidiff += [
HEADER_PREFIX,
ORIGINAL_INDICATOR,
oriHunkLineNo,
RANGE_SEPARATOR,
oriHunkSize,
' ',
NEW_INDICATOR,
newHunkLineNo,
RANGE_SEPARATOR,
newHunkSize,
HEADER_SUFFIX,
NEW_LINE
].join('')
// Build the diff hunk content
while (oriHunkStart < oriHunkEnd || newHunkStart < newHunkEnd) {
if (oriHunkStart < oriHunkEnd &&
oriIsInLcs[oriHunkStart] === true &&
newIsInLcs[newHunkStart] === true) {
// The context line
unidiff += CONTEXT_INDICATOR + oriLines[oriHunkStart] + NEW_LINE
oriHunkStart++
newHunkStart++
} else if (oriHunkStart < oriHunkEnd && oriIsInLcs[oriHunkStart] === false) {
// The deletion line
unidiff += DELETION_INDICATOR + oriLines[oriHunkStart] + NEW_LINE
oriHunkStart++
} else if (newHunkStart < newHunkEnd && newIsInLcs[newHunkStart] === false) {
// The additional line
unidiff += ADDITION_INDICATOR + newLines[newHunkStart] + NEW_LINE
newHunkStart++
}
}
// Update hunk position and leading context
oriHunkStart = i
newHunkStart = j
leadingContext = trailingContext
}
}
// Trim the trailing new line if it exists
if (unidiff.length > 0 && unidiff.charAt(unidiff.length) === NEW_LINE) {
unidiff = unidiff.slice(0, -1)
}
return unidiff
}
[ View on GitHub | Edit on GitHub | Source on GitHub ]

How to use

You you can install via npm install locutus and require it via require('locutus/php/xdiff/xdiff_string_diff'). You could also require the xdiff module in full so that you could access xdiff.xdiff_string_diff instead.

If you intend to target the browser, you can then use a module bundler such as Parcel, webpack, Browserify, or rollup.js. This can be important because Locutus allows modern JavaScript in the source files, meaning it may not work in all browsers without a build/transpile step. Locutus does transpile all functions to ES5 before publishing to npm.

A community effort

Not unlike Wikipedia, Locutus is an ongoing community effort. Our philosophy follows The McDonald’s Theory. This means that we don't consider it to be a bad thing that many of our functions are first iterations, which may still have their fair share of issues. We hope that these flaws will inspire others to come up with better ideas.

This way of working also means that we don't offer any production guarantees, and recommend to use Locutus inspiration and learning purposes only.

Notes

  • The minimal argument is not currently supported

Examples

Please note that these examples are distilled from test cases that automatically verify our functions still work correctly. This could explain some quirky ones.

#codeexpected result
1xdiff_string_diff('', 'Hello world!')'@@ -0,0 +1,1 @@\n+Hello world!'

« More PHP xdiff functions


Star