PHP's xdiff_string_diff in JavaScript

How to use

You you can install via yarn add locutus and require this function via const xdiff_string_diff = require('locutus/php/xdiff/xdiff_string_diff').

It is important to use a bundler that supports tree-shaking so that you only ship the functions that you actually use to your browser, instead of all of Locutus, which is massive. Examples are: Parcel, webpack, or rollup.js. For server-side use this is typically less of a concern.

Examples

Please note that these examples are distilled from test cases that automatically verify our functions still work correctly. This could explain some quirky ones.

#codeexpected result
1xdiff_string_diff('', 'Hello world!')'@@ -0,0 +1,1 @@\n+Hello world!'

Notes

  • The minimal argument is not currently supported

Here’s what our current JavaScript equivalent to PHP's xdiff_string_diff looks like.

module.exports = function xdiff_string_diff(oldData, newData, contextLines, minimal) {
// discuss at: https://locutus.io/php/xdiff_string_diff
// original by: Brett Zamir (https://brett-zamir.me)
// based on: Imgen Tata (https://www.myipdf.com/)
// bugfixed by: Imgen Tata (https://www.myipdf.com/)
// improved by: Brett Zamir (https://brett-zamir.me)
// note 1: The minimal argument is not currently supported
// example 1: xdiff_string_diff('', 'Hello world!')
// returns 1: '@@ -0,0 +1,1 @@\n+Hello world!'

// (This code was done by Imgen Tata; I have only reformatted for use in Locutus)

// See https://en.wikipedia.org/wiki/Diff#Unified_format
let i = 0
let j = 0
let k = 0
let oriHunkStart
let newHunkStart
let oriHunkEnd
let newHunkEnd
let oriHunkLineNo
let newHunkLineNo
let oriHunkSize
let newHunkSize
const MAX_CONTEXT_LINES = Number.POSITIVE_INFINITY // Potential configuration
const MIN_CONTEXT_LINES = 0
const DEFAULT_CONTEXT_LINES = 3
const HEADER_PREFIX = '@@ ' //
const HEADER_SUFFIX = ' @@'
const ORIGINAL_INDICATOR = '-'
const NEW_INDICATOR = '+'
const RANGE_SEPARATOR = ','
const CONTEXT_INDICATOR = ' '
const DELETION_INDICATOR = '-'
const ADDITION_INDICATOR = '+'
let oriLines
let newLines
const NEW_LINE = '\n'

const _trim = function (text) {
if (typeof text !== 'string') {
throw new Error('String parameter required')
}

return text.replace(/(^\s*)|(\s*$)/g, '')
}

const _verifyType = function (type) {
const args = arguments
const argsLen = arguments.length
const basicTypes = ['number', 'boolean', 'string', 'function', 'object', 'undefined']
let basicType
let i
let j
const typeOfType = typeof type
if (typeOfType !== 'string' && typeOfType !== 'function') {
throw new Error('Bad type parameter')
}

if (argsLen < 2) {
throw new Error('Too few arguments')
}

if (typeOfType === 'string') {
type = _trim(type)

if (type === '') {
throw new Error('Bad type parameter')
}

for (j = 0; j < basicTypes.length; j++) {
basicType = basicTypes[j]

if (basicType === type) {
for (i = 1; i < argsLen; i++) {
if (typeof args[i] !== type) {
throw new Error('Bad type')
}
}

return
}
}

throw new Error('Bad type parameter')
}

// Not basic type. we need to use instanceof operator
for (i = 1; i < argsLen; i++) {
if (!(args[i] instanceof type)) {
throw new Error('Bad type')
}
}
}

const _hasValue = function (array, value) {
let i
_verifyType(Array, array)

for (i = 0; i < array.length; i++) {
if (array[i] === value) {
return true
}
}

return false
}

const _areTypeOf = function (type) {
const args = arguments
const argsLen = arguments.length
const basicTypes = ['number', 'boolean', 'string', 'function', 'object', 'undefined']
let basicType
let i
let j
const typeOfType = typeof type

if (typeOfType !== 'string' && typeOfType !== 'function') {
throw new Error('Bad type parameter')
}

if (argsLen < 2) {
throw new Error('Too few arguments')
}

if (typeOfType === 'string') {
type = _trim(type)

if (type === '') {
return false
}

for (j = 0; j < basicTypes.length; j++) {
basicType = basicTypes[j]

if (basicType === type) {
for (i = 1; i < argsLen; i++) {
if (typeof args[i] !== type) {
return false
}
}

return true
}
}

throw new Error('Bad type parameter')
}

// Not basic type. we need to use instanceof operator
for (i = 1; i < argsLen; i++) {
if (!(args[i] instanceof type)) {
return false
}
}

return true
}

const _getInitializedArray = function (arraySize, initValue) {
const array = []
let i
_verifyType('number', arraySize)

for (i = 0; i < arraySize; i++) {
array.push(initValue)
}

return array
}

const _splitIntoLines = function (text) {
_verifyType('string', text)

if (text === '') {
return []
}
return text.split('\n')
}

const _isEmptyArray = function (obj) {
return _areTypeOf(Array, obj) && obj.length === 0
}

/**
* Finds longest common sequence between two sequences
* @see {@link https://wordaligned.org/articles/longest-common-subsequence}
*/
const _findLongestCommonSequence = function (seq1, seq2, seq1IsInLcs, seq2IsInLcs) {
if (!_areTypeOf(Array, seq1, seq2)) {
throw new Error('Array parameters are required')
}

// Deal with edge case
if (_isEmptyArray(seq1) || _isEmptyArray(seq2)) {
return []
}

// Function to calculate lcs lengths
const lcsLens = function (xs, ys) {
let i
let j
let prev
const curr = _getInitializedArray(ys.length + 1, 0)

for (i = 0; i < xs.length; i++) {
prev = curr.slice(0)
for (j = 0; j < ys.length; j++) {
if (xs[i] === ys[j]) {
curr[j + 1] = prev[j] + 1
} else {
curr[j + 1] = Math.max(curr[j], prev[j + 1])
}
}
}

return curr
}

// Function to find lcs and fill in the array to indicate the optimal longest common sequence
const _findLcs = function (xs, xidx, xIsIn, ys) {
let i
let xb
let xe
let llB
let llE
let pivot
let max
let yb
let ye
const nx = xs.length
const ny = ys.length

if (nx === 0) {
return []
}
if (nx === 1) {
if (_hasValue(ys, xs[0])) {
xIsIn[xidx] = true
return [xs[0]]
}
return []
}
i = Math.floor(nx / 2)
xb = xs.slice(0, i)
xe = xs.slice(i)
llB = lcsLens(xb, ys)
llE = lcsLens(xe.slice(0).reverse(), ys.slice(0).reverse())

pivot = 0
max = 0
for (j = 0; j <= ny; j++) {
if (llB[j] + llE[ny - j] > max) {
pivot = j
max = llB[j] + llE[ny - j]
}
}
yb = ys.slice(0, pivot)
ye = ys.slice(pivot)
return _findLcs(xb, xidx, xIsIn, yb).concat(_findLcs(xe, xidx + i, xIsIn, ye))
}

// Fill in seq1IsInLcs to find the optimal longest common subsequence of first sequence
_findLcs(seq1, 0, seq1IsInLcs, seq2)
// Fill in seq2IsInLcs to find the optimal longest common subsequence
// of second sequence and return the result
return _findLcs(seq2, 0, seq2IsInLcs, seq1)
}

// First, check the parameters
if (_areTypeOf('string', oldData, newData) === false) {
return false
}

if (oldData === newData) {
return ''
}

if (typeof contextLines !== 'number' || contextLines > MAX_CONTEXT_LINES || contextLines < MIN_CONTEXT_LINES) {
contextLines = DEFAULT_CONTEXT_LINES
}

oriLines = _splitIntoLines(oldData)
newLines = _splitIntoLines(newData)
const oriLen = oriLines.length
const newLen = newLines.length
const oriIsInLcs = _getInitializedArray(oriLen, false)
const newIsInLcs = _getInitializedArray(newLen, false)
const lcsLen = _findLongestCommonSequence(oriLines, newLines, oriIsInLcs, newIsInLcs).length
let unidiff = ''

if (lcsLen === 0) {
// No common sequence
unidiff = [
HEADER_PREFIX,
ORIGINAL_INDICATOR,
oriLen > 0 ? '1' : '0',
RANGE_SEPARATOR,
oriLen,
' ',
NEW_INDICATOR,
newLen > 0 ? '1' : '0',
RANGE_SEPARATOR,
newLen,
HEADER_SUFFIX,
].join('')

for (i = 0; i < oriLen; i++) {
unidiff += NEW_LINE + DELETION_INDICATOR + oriLines[i]
}

for (j = 0; j < newLen; j++) {
unidiff += NEW_LINE + ADDITION_INDICATOR + newLines[j]
}

return unidiff
}

let leadingContext = []
let trailingContext = []
let actualLeadingContext = []
let actualTrailingContext = []

// Regularize leading context by the contextLines parameter
const regularizeLeadingContext = function (context) {
if (context.length === 0 || contextLines === 0) {
return []
}

const contextStartPos = Math.max(context.length - contextLines, 0)

return context.slice(contextStartPos)
}

// Regularize trailing context by the contextLines parameter
const regularizeTrailingContext = function (context) {
if (context.length === 0 || contextLines === 0) {
return []
}

return context.slice(0, Math.min(contextLines, context.length))
}

// Skip common lines in the beginning
while (i < oriLen && oriIsInLcs[i] === true && newIsInLcs[i] === true) {
leadingContext.push(oriLines[i])
i++
}

j = i
// The index in the longest common sequence
k = i
oriHunkStart = i
newHunkStart = j
oriHunkEnd = i
newHunkEnd = j

while (i < oriLen || j < newLen) {
while (i < oriLen && oriIsInLcs[i] === false) {
i++
}
oriHunkEnd = i

while (j < newLen && newIsInLcs[j] === false) {
j++
}
newHunkEnd = j

// Find the trailing context
trailingContext = []
while (i < oriLen && oriIsInLcs[i] === true && j < newLen && newIsInLcs[j] === true) {
trailingContext.push(oriLines[i])
k++
i++
j++
}

if (
k >= lcsLen || // No more in longest common lines
trailingContext.length >= 2 * contextLines
) {
// Context break found
if (trailingContext.length < 2 * contextLines) {
// It must be last block of common lines but not a context break
trailingContext = []

// Force break out
i = oriLen
j = newLen

// Update hunk ends to force output to the end
oriHunkEnd = oriLen
newHunkEnd = newLen
}

// Output the diff hunk

// Trim the leading and trailing context block
actualLeadingContext = regularizeLeadingContext(leadingContext)
actualTrailingContext = regularizeTrailingContext(trailingContext)

oriHunkStart -= actualLeadingContext.length
newHunkStart -= actualLeadingContext.length
oriHunkEnd += actualTrailingContext.length
newHunkEnd += actualTrailingContext.length

oriHunkLineNo = oriHunkStart + 1
newHunkLineNo = newHunkStart + 1
oriHunkSize = oriHunkEnd - oriHunkStart
newHunkSize = newHunkEnd - newHunkStart

// Build header
unidiff += [
HEADER_PREFIX,
ORIGINAL_INDICATOR,
oriHunkLineNo,
RANGE_SEPARATOR,
oriHunkSize,
' ',
NEW_INDICATOR,
newHunkLineNo,
RANGE_SEPARATOR,
newHunkSize,
HEADER_SUFFIX,
NEW_LINE,
].join('')

// Build the diff hunk content
while (oriHunkStart < oriHunkEnd || newHunkStart < newHunkEnd) {
if (oriHunkStart < oriHunkEnd && oriIsInLcs[oriHunkStart] === true && newIsInLcs[newHunkStart] === true) {
// The context line
unidiff += CONTEXT_INDICATOR + oriLines[oriHunkStart] + NEW_LINE
oriHunkStart++
newHunkStart++
} else if (oriHunkStart < oriHunkEnd && oriIsInLcs[oriHunkStart] === false) {
// The deletion line
unidiff += DELETION_INDICATOR + oriLines[oriHunkStart] + NEW_LINE
oriHunkStart++
} else if (newHunkStart < newHunkEnd && newIsInLcs[newHunkStart] === false) {
// The additional line
unidiff += ADDITION_INDICATOR + newLines[newHunkStart] + NEW_LINE
newHunkStart++
}
}

// Update hunk position and leading context
oriHunkStart = i
newHunkStart = j
leadingContext = trailingContext
}
}

// Trim the trailing new line if it exists
if (unidiff.length > 0 && unidiff.charAt(unidiff.length) === NEW_LINE) {
unidiff = unidiff.slice(0, -1)
}

return unidiff
}

A community effort

Not unlike Wikipedia, Locutus is an ongoing community effort. Our philosophy follows The McDonald’s Theory. This means that we assimilate first iterations with imperfections, hoping for others to take issue with-and improve them. This unorthodox approach has worked very well to foster fun and fruitful collaboration, but please be reminded to use our creations at your own risk. THE SOFTWARE IS PROVIDED "AS IS" has never been more true than for Locutus.

Now go and: [ View on GitHub | Edit on GitHub | View Raw ]


« More PHP xdiff functions


Star