Python's difflib.ndiff in TypeScript

✓ Verified: Python 3.12

How to use

Install via yarn add locutus and import: import { ndiff } from 'locutus/python/difflib/ndiff'.

Or with CommonJS: const { ndiff } = require('locutus/python/difflib/ndiff')

Use a bundler that supports tree-shaking so you only ship the functions you actually use. Vite, webpack, Rollup, and Parcel all handle this. For server-side use this is less of a concern.

Examples

These examples are extracted from test cases that automatically verify our functions against their native counterparts.

#	code	expected result
1	`ndiff(['one', 'two', 'three'], ['ore', 'too', 'tree'])`	`['- one', '- two', '+ ore', '+ too', '- three', '? -\n', '+ tree']`
2	`ndiff(['spam\n', 'ham\n', 'eggs\n'], ['spam\n', 'jam\n', 'eggs\n'])`	`[' spam\n', '- ham\n', '? ^\n', '+ jam\n', '? ^\n', ' eggs\n']`
3	`ndiff([], ['x'])`	`['+ x']`

Notes

Returns a Differ-style delta with ‘-‘, ‘+’, ‘ ‘, and ‘?’ prefixed lines.
The default char junk filter ignores spaces and tabs, matching Python’s difflib.

Here's what our current TypeScript equivalent to Python's difflib.ndiff looks like.

type LineJunkPredicate = ((line: string) => boolean) | null
type CharJunkPredicate = ((char: string) => boolean) | null
type SequenceMatch = { aIndex: number; bIndex: number; size: number }
type SequenceOpcodeTag = 'replace' | 'delete' | 'insert' | 'equal'
type SequenceOpcode = [SequenceOpcodeTag, number, number, number, number]

class SequenceMatcher<T> {
  private a: T[] = []
  private b: T[] = []
  private b2j = new Map<T, number[]>()
  private matchingBlocksCache: SequenceMatch[] | undefined
  private opcodesCache: SequenceOpcode[] | undefined
  private readonly isJunk: ((value: T) => boolean) | undefined

  constructor(isJunk?: (value: T) => boolean) {
    this.isJunk = isJunk
  }

  setSeqs(a: T[], b: T[]): void {
    this.setSeq1(a)
    this.setSeq2(b)
  }

  setSeq1(a: T[]): void {
    this.a = [...a]
    this.matchingBlocksCache = undefined
    this.opcodesCache = undefined
  }

  setSeq2(b: T[]): void {
    this.b = [...b]
    this.b2j = new Map()
    this.matchingBlocksCache = undefined
    this.opcodesCache = undefined

    for (let index = 0; index < this.b.length; index++) {
      const value = this.b[index] as T
      const existing = this.b2j.get(value)
      if (existing) {
        existing.push(index)
        continue
      }
      this.b2j.set(value, [index])
    }

    if (this.isJunk) {
      for (const [value] of this.b2j) {
        if (this.isJunk(value)) {
          this.b2j.delete(value)
        }
      }
    }

    if (this.b.length >= 200) {
      const popularThreshold = Math.floor(this.b.length / 100) + 1
      for (const [value, indexes] of this.b2j) {
        if (indexes.length > popularThreshold) {
          this.b2j.delete(value)
        }
      }
    }
  }

  getOpcodes(): SequenceOpcode[] {
    if (this.opcodesCache) {
      return this.opcodesCache
    }

    const out: SequenceOpcode[] = []
    let aIndex = 0
    let bIndex = 0

    for (const block of this.getMatchingBlocks()) {
      const aStart = block.aIndex
      const bStart = block.bIndex
      let tag: SequenceOpcodeTag | null = null

      if (aIndex < aStart && bIndex < bStart) {
        tag = 'replace'
      } else if (aIndex < aStart) {
        tag = 'delete'
      } else if (bIndex < bStart) {
        tag = 'insert'
      }

      if (tag) {
        out.push([tag, aIndex, aStart, bIndex, bStart])
      }

      if (block.size > 0) {
        out.push(['equal', aStart, aStart + block.size, bStart, bStart + block.size])
      }

      aIndex = aStart + block.size
      bIndex = bStart + block.size
    }

    this.opcodesCache = out
    return out
  }

  ratio(): number {
    const total = this.a.length + this.b.length
    if (total === 0) {
      return 1
    }
    const matches = this.getMatchingBlocks().reduce((sum, block) => sum + block.size, 0)
    return (2 * matches) / total
  }

  quickRatio(): number {
    const total = this.a.length + this.b.length
    if (total === 0) {
      return 1
    }

    const bCounts = new Map<T, number>()
    for (const value of this.b) {
      bCounts.set(value, (bCounts.get(value) ?? 0) + 1)
    }

    let matches = 0
    for (const value of this.a) {
      const count = bCounts.get(value) ?? 0
      if (count > 0) {
        matches += 1
        bCounts.set(value, count - 1)
      }
    }

    return (2 * matches) / total
  }

  realQuickRatio(): number {
    const total = this.a.length + this.b.length
    if (total === 0) {
      return 1
    }
    return (2 * Math.min(this.a.length, this.b.length)) / total
  }

  private getMatchingBlocks(): SequenceMatch[] {
    if (this.matchingBlocksCache) {
      return this.matchingBlocksCache
    }

    const queue: Array<[number, number, number, number]> = [[0, this.a.length, 0, this.b.length]]
    const out: SequenceMatch[] = []

    while (queue.length > 0) {
      const next = queue.pop()
      if (!next) {
        continue
      }

      const [alo, ahi, blo, bhi] = next
      const match = this.findLongestMatch(alo, ahi, blo, bhi)
      if (match.size === 0) {
        continue
      }

      out.push(match)

      if (alo < match.aIndex && blo < match.bIndex) {
        queue.push([alo, match.aIndex, blo, match.bIndex])
      }

      const aMatchEnd = match.aIndex + match.size
      const bMatchEnd = match.bIndex + match.size
      if (aMatchEnd < ahi && bMatchEnd < bhi) {
        queue.push([aMatchEnd, ahi, bMatchEnd, bhi])
      }
    }

    out.sort((left, right) => left.aIndex - right.aIndex || left.bIndex - right.bIndex)

    const merged: SequenceMatch[] = []
    for (const block of out) {
      const previous = merged.at(-1)
      if (
        previous &&
        previous.aIndex + previous.size === block.aIndex &&
        previous.bIndex + previous.size === block.bIndex
      ) {
        previous.size += block.size
        continue
      }
      merged.push({ ...block })
    }

    merged.push({ aIndex: this.a.length, bIndex: this.b.length, size: 0 })
    this.matchingBlocksCache = merged
    return merged
  }

  private findLongestMatch(alo: number, ahi: number, blo: number, bhi: number): SequenceMatch {
    let bestI = alo
    let bestJ = blo
    let bestSize = 0
    let j2len = new Map<number, number>()

    for (let i = alo; i < ahi; i++) {
      const newj2len = new Map<number, number>()
      const indexes = this.b2j.get(this.a[i] as T) ?? []

      for (const j of indexes) {
        if (j < blo) {
          continue
        }
        if (j >= bhi) {
          break
        }
        const size = (j2len.get(j - 1) ?? 0) + 1
        newj2len.set(j, size)
        if (size > bestSize) {
          bestI = i - size + 1
          bestJ = j - size + 1
          bestSize = size
        }
      }

      j2len = newj2len
    }

    while (
      bestI > alo &&
      bestJ > blo &&
      !this.isJunk?.(this.b[bestJ - 1] as T) &&
      this.a[bestI - 1] === this.b[bestJ - 1]
    ) {
      bestI -= 1
      bestJ -= 1
      bestSize += 1
    }

    while (
      bestI + bestSize < ahi &&
      bestJ + bestSize < bhi &&
      !this.isJunk?.(this.b[bestJ + bestSize] as T) &&
      this.a[bestI + bestSize] === this.b[bestJ + bestSize]
    ) {
      bestSize += 1
    }

    while (
      bestI > alo &&
      bestJ > blo &&
      !!this.isJunk?.(this.b[bestJ - 1] as T) &&
      this.a[bestI - 1] === this.b[bestJ - 1]
    ) {
      bestI -= 1
      bestJ -= 1
      bestSize += 1
    }

    while (
      bestI + bestSize < ahi &&
      bestJ + bestSize < bhi &&
      !!this.isJunk?.(this.b[bestJ + bestSize] as T) &&
      this.a[bestI + bestSize] === this.b[bestJ + bestSize]
    ) {
      bestSize += 1
    }

    return { aIndex: bestI, bIndex: bestJ, size: bestSize }
  }
}

class Differ {
  private readonly linejunk: ((line: string) => boolean) | undefined
  private readonly charjunk: ((char: string) => boolean) | undefined

  constructor(linejunk: ((line: string) => boolean) | undefined, charjunk: ((char: string) => boolean) | undefined) {
    this.linejunk = linejunk
    this.charjunk = charjunk
  }

  compare(a: string[], b: string[]): string[] {
    const out: string[] = []
    const cruncher = new SequenceMatcher<string>(this.linejunk)
    cruncher.setSeqs(a, b)

    for (const [tag, alo, ahi, blo, bhi] of cruncher.getOpcodes()) {
      if (tag === 'replace') {
        out.push(...this.fancyReplace(a, alo, ahi, b, blo, bhi))
        continue
      }
      if (tag === 'delete') {
        out.push(...this.dump('-', a, alo, ahi))
        continue
      }
      if (tag === 'insert') {
        out.push(...this.dump('+', b, blo, bhi))
        continue
      }
      out.push(...this.dump(' ', a, alo, ahi))
    }

    return out
  }

  private dump(tag: '-' | '+' | ' ', source: string[], lo: number, hi: number): string[] {
    const out: string[] = []
    for (let index = lo; index < hi; index++) {
      out.push(`${tag} ${source[index]}`)
    }
    return out
  }

  private plainReplace(a: string[], alo: number, ahi: number, b: string[], blo: number, bhi: number): string[] {
    const first = bhi - blo < ahi - alo ? this.dump('+', b, blo, bhi) : this.dump('-', a, alo, ahi)
    const second = bhi - blo < ahi - alo ? this.dump('-', a, alo, ahi) : this.dump('+', b, blo, bhi)
    return [...first, ...second]
  }

  private fancyReplace(a: string[], alo: number, ahi: number, b: string[], blo: number, bhi: number): string[] {
    let bestRatio = 0.74
    const cutoff = 0.75
    const cruncher = new SequenceMatcher<string>(this.charjunk)
    let equalI: number | undefined
    let equalJ: number | undefined
    let bestI = alo
    let bestJ = blo

    for (let j = blo; j < bhi; j++) {
      const right = b[j] ?? ''
      cruncher.setSeq2(Array.from(right))

      for (let i = alo; i < ahi; i++) {
        const left = a[i] ?? ''
        if (left === right) {
          if (equalI === undefined) {
            equalI = i
            equalJ = j
          }
          continue
        }

        cruncher.setSeq1(Array.from(left))
        if (cruncher.realQuickRatio() > bestRatio && cruncher.quickRatio() > bestRatio) {
          const ratio = cruncher.ratio()
          if (ratio > bestRatio) {
            bestRatio = ratio
            bestI = i
            bestJ = j
          }
        }
      }
    }

    if (bestRatio < cutoff) {
      if (equalI === undefined || equalJ === undefined) {
        return this.plainReplace(a, alo, ahi, b, blo, bhi)
      }
      bestI = equalI
      bestJ = equalJ
    } else {
      equalI = undefined
      equalJ = undefined
    }

    const out = this.fancyHelper(a, alo, bestI, b, blo, bestJ)
    const leftLine = a[bestI] ?? ''
    const rightLine = b[bestJ] ?? ''

    if (equalI === undefined || equalJ === undefined) {
      let leftTags = ''
      let rightTags = ''
      cruncher.setSeqs(Array.from(leftLine), Array.from(rightLine))

      for (const [tag, ai1, ai2, bj1, bj2] of cruncher.getOpcodes()) {
        const leftLength = ai2 - ai1
        const rightLength = bj2 - bj1
        if (tag === 'replace') {
          leftTags += '^'.repeat(leftLength)
          rightTags += '^'.repeat(rightLength)
          continue
        }
        if (tag === 'delete') {
          leftTags += '-'.repeat(leftLength)
          continue
        }
        if (tag === 'insert') {
          rightTags += '+'.repeat(rightLength)
          continue
        }
        leftTags += ' '.repeat(leftLength)
        rightTags += ' '.repeat(rightLength)
      }

      out.push(...this.qformat(leftLine, rightLine, leftTags, rightTags))
    } else {
      out.push(`  ${leftLine}`)
    }

    out.push(...this.fancyHelper(a, bestI + 1, ahi, b, bestJ + 1, bhi))
    return out
  }

  private fancyHelper(a: string[], alo: number, ahi: number, b: string[], blo: number, bhi: number): string[] {
    if (alo < ahi) {
      if (blo < bhi) {
        return this.fancyReplace(a, alo, ahi, b, blo, bhi)
      }
      return this.dump('-', a, alo, ahi)
    }

    if (blo < bhi) {
      return this.dump('+', b, blo, bhi)
    }

    return []
  }

  private qformat(aline: string, bline: string, atags: string, btags: string): string[] {
    const leftTags = rstripTrailingWhitespace(keepOriginalWhitespace(aline, atags))
    const rightTags = rstripTrailingWhitespace(keepOriginalWhitespace(bline, btags))
    const out = [`- ${aline}`]

    if (leftTags) {
      out.push(`? ${leftTags}\n`)
    }

    out.push(`+ ${bline}`)

    if (rightTags) {
      out.push(`? ${rightTags}\n`)
    }

    return out
  }
}

export function ndiff(
  a: string[] | unknown,
  b: string[] | unknown,
  linejunk: LineJunkPredicate = null,
  charjunk: CharJunkPredicate = isCharacterJunk,
): string[] {
  //      discuss at: https://locutus.io/python/difflib/ndiff/
  // parity verified: Python 3.12
  //     original by: Python Software Foundation (https://www.python.org/)
  //     original by: Kevin van Zonneveld (https://kvz.io)
  //          note 1: Returns a Differ-style delta with '-', '+', ' ', and '?' prefixed lines.
  //          note 2: The default char junk filter ignores spaces and tabs, matching Python's difflib.
  //       example 1: ndiff(['one', 'two', 'three'], ['ore', 'too', 'tree'])
  //       returns 1: ['- one', '- two', '+ ore', '+ too', '- three', '?  -\n', '+ tree']
  //       example 2: ndiff(['spam\n', 'ham\n', 'eggs\n'], ['spam\n', 'jam\n', 'eggs\n'])
  //       returns 2: ['  spam\n', '- ham\n', '? ^\n', '+ jam\n', '? ^\n', '  eggs\n']
  //       example 3: ndiff([], ['x'])
  //       returns 3: ['+ x']

  const left = normalizeLineSequence(a, 'a')
  const right = normalizeLineSequence(b, 'b')
  const lineFilter = normalizeJunkPredicate(linejunk, false)
  const charFilter = normalizeJunkPredicate(charjunk, true)

  return new Differ(lineFilter, charFilter).compare(left, right)
}

function normalizeLineSequence(value: string[] | unknown, paramName: string): string[] {
  if (!Array.isArray(value)) {
    throw new TypeError(`ndiff(): ${paramName} must be an array of strings`)
  }

  for (const line of value) {
    if (typeof line !== 'string') {
      throw new TypeError(`ndiff(): ${paramName} must be an array of strings`)
    }
  }

  return [...value]
}

function isCharacterJunk(char: string): boolean {
  return char === ' ' || char === '\t'
}

function normalizeJunkPredicate(
  predicate: LineJunkPredicate | CharJunkPredicate | undefined,
  useDefault: boolean,
): ((value: string) => boolean) | undefined {
  if (typeof predicate === 'undefined') {
    return useDefault ? isCharacterJunk : undefined
  }

  if (predicate === null) {
    return undefined
  }

  if (typeof predicate === 'function') {
    return predicate
  }

  if (isPythonFalsy(predicate)) {
    return undefined
  }

  throw new TypeError("'int' object is not callable")
}

function isPythonFalsy(value: unknown): boolean {
  if (value === null || typeof value === 'undefined') {
    return true
  }

  if (typeof value === 'boolean') {
    return value === false
  }

  if (typeof value === 'number') {
    return value === 0
  }

  if (typeof value === 'bigint') {
    return value === 0n
  }

  if (typeof value === 'string') {
    return value.length === 0
  }

  if (Array.isArray(value)) {
    return value.length === 0
  }

  if (typeof value === 'object') {
    const prototype = Object.getPrototypeOf(value)
    if (prototype === Object.prototype || prototype === null) {
      return Object.keys(value).length === 0
    }
  }

  return false
}

function keepOriginalWhitespace(line: string, tags: string): string {
  const lineChars = Array.from(line)
  const tagChars = Array.from(tags)
  const out: string[] = []

  for (let index = 0; index < Math.min(lineChars.length, tagChars.length); index++) {
    const lineChar = lineChars[index] ?? ''
    const tagChar = tagChars[index] ?? ''
    out.push(tagChar === ' ' && /\s/u.test(lineChar) ? lineChar : tagChar)
  }

  return out.join('')
}

function rstripTrailingWhitespace(value: string): string {
  return value.replace(/\s+$/u, '')
}

Improve this function

Locutus is a community effort following The McDonald's Theory: we ship first iterations, hoping others will improve them. If you see something that could be better, we'd love your contribution.

View on GitHub · Edit on GitHub · View Raw

Help us add more

Got a rainy Sunday afternoon and a taste for a porting puzzle?

Get inspiration from the Python 3 standard library string page.
Click "New file" in the appropriate folder on GitHub. This will fork the project to your account, directly add the file to it, and send a Pull Request to us.

We will then review it. If it's useful to the project and in line with our contributing guidelines your work will become part of Locutus and you'll be automatically credited in the authors section accordingly.

« More Python difflib functions

Star