PHP's substr in TypeScript

Rosetta Stone: perl/substr · lua/sub · awk/substr

How to use

Install via yarn add locutus and import: import { substr } from 'locutus/php/strings/substr'.

Or with CommonJS: const { substr } = require('locutus/php/strings/substr')

Use a bundler that supports tree-shaking so you only ship the functions you actually use. Vite, webpack, Rollup, and Parcel all handle this. For server-side use this is less of a concern.

Examples

These examples are extracted from test cases that automatically verify our functions against their native counterparts.

#	code	expected result
1	`substr('abcdef', 0, -1)`	`'abcde'`
2	`substr(2, 0, -6)`	`false`
3	`ini_set('unicode.semantics', 'on') substr('a\uD801\uDC00', 0, -1)`	`'a'`
4	`ini_set('unicode.semantics', 'on') substr('a\uD801\uDC00', 0, 2)`	`'a\uD801\uDC00'`
5	`ini_set('unicode.semantics', 'on') substr('a\uD801\uDC00', -1, 1)`	`'\uD801\uDC00'`
6	`ini_set('unicode.semantics', 'on') substr('a\uD801\uDC00z\uD801\uDC00', -3, 2)`	`'\uD801\uDC00z'`
7	`ini_set('unicode.semantics', 'on') substr('a\uD801\uDC00z\uD801\uDC00', -3, -1)`	`'\uD801\uDC00z'`

Notes

Handles rare Unicode characters if ‘unicode.semantics’ ini (PHP6) is set to ‘on’

Dependencies

This function uses the following Locutus functions:

_phpCastString (php/_helpers)
ini_get (php/info)

Here's what our current TypeScript equivalent to PHP's substr looks like.

import { _phpCastString as _php_cast_string } from '../_helpers/_phpCastString.ts'
import { ini_get } from '../info/ini_get.ts'

export function substr(input: string | number, start: number, len?: number): string | false {
  //  discuss at: https://locutus.io/php/substr/
  // original by: Martijn Wieringa
  // bugfixed by: T.Wild
  // improved by: Onno Marsman (https://twitter.com/onnomarsman)
  // improved by: Brett Zamir (https://brett-zamir.me)
  //  revised by: Theriault (https://github.com/Theriault)
  //  revised by: Rafał Kukawski
  //      note 1: Handles rare Unicode characters if 'unicode.semantics' ini (PHP6) is set to 'on'
  //   example 1: substr('abcdef', 0, -1)
  //   returns 1: 'abcde'
  //   example 2: substr(2, 0, -6)
  //   returns 2: false
  //   example 3: ini_set('unicode.semantics', 'on')
  //   example 3: substr('a\uD801\uDC00', 0, -1)
  //   returns 3: 'a'
  //   example 4: ini_set('unicode.semantics', 'on')
  //   example 4: substr('a\uD801\uDC00', 0, 2)
  //   returns 4: 'a\uD801\uDC00'
  //   example 5: ini_set('unicode.semantics', 'on')
  //   example 5: substr('a\uD801\uDC00', -1, 1)
  //   returns 5: '\uD801\uDC00'
  //   example 6: ini_set('unicode.semantics', 'on')
  //   example 6: substr('a\uD801\uDC00z\uD801\uDC00', -3, 2)
  //   returns 6: '\uD801\uDC00z'
  //   example 7: ini_set('unicode.semantics', 'on')
  //   example 7: substr('a\uD801\uDC00z\uD801\uDC00', -3, -1)
  //   returns 7: '\uD801\uDC00z'

  const str = _php_cast_string(input)

  const multibyte = ini_get('unicode.semantics') === 'on'

  const chars = multibyte ? str.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]|[\s\S]/g) || [] : null

  const inputLength = chars ? chars.length : str.length
  let end = inputLength

  if (start < 0) {
    start += end
  }

  if (typeof len !== 'undefined') {
    if (len < 0) {
      end = len + end
    } else {
      end = len + start
    }
  }

  if (start > inputLength || start < 0 || start > end) {
    return false
  }

  if (chars) {
    return chars.slice(start, end).join('')
  }

  return str.slice(start, end)
}

import { _phpCastString as _php_cast_string } from '../_helpers/_phpCastString.ts'
import { ini_get } from '../info/ini_get.ts'

export function substr(input, start, len) {
  //  discuss at: https://locutus.io/php/substr/
  // original by: Martijn Wieringa
  // bugfixed by: T.Wild
  // improved by: Onno Marsman (https://twitter.com/onnomarsman)
  // improved by: Brett Zamir (https://brett-zamir.me)
  //  revised by: Theriault (https://github.com/Theriault)
  //  revised by: Rafał Kukawski
  //      note 1: Handles rare Unicode characters if 'unicode.semantics' ini (PHP6) is set to 'on'
  //   example 1: substr('abcdef', 0, -1)
  //   returns 1: 'abcde'
  //   example 2: substr(2, 0, -6)
  //   returns 2: false
  //   example 3: ini_set('unicode.semantics', 'on')
  //   example 3: substr('a\uD801\uDC00', 0, -1)
  //   returns 3: 'a'
  //   example 4: ini_set('unicode.semantics', 'on')
  //   example 4: substr('a\uD801\uDC00', 0, 2)
  //   returns 4: 'a\uD801\uDC00'
  //   example 5: ini_set('unicode.semantics', 'on')
  //   example 5: substr('a\uD801\uDC00', -1, 1)
  //   returns 5: '\uD801\uDC00'
  //   example 6: ini_set('unicode.semantics', 'on')
  //   example 6: substr('a\uD801\uDC00z\uD801\uDC00', -3, 2)
  //   returns 6: '\uD801\uDC00z'
  //   example 7: ini_set('unicode.semantics', 'on')
  //   example 7: substr('a\uD801\uDC00z\uD801\uDC00', -3, -1)
  //   returns 7: '\uD801\uDC00z'

  const str = _php_cast_string(input)

  const multibyte = ini_get('unicode.semantics') === 'on'

  const chars = multibyte ? str.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]|[\s\S]/g) || [] : null

  const inputLength = chars ? chars.length : str.length
  let end = inputLength

  if (start < 0) {
    start += end
  }

  if (typeof len !== 'undefined') {
    if (len < 0) {
      end = len + end
    } else {
      end = len + start
    }
  }

  if (start > inputLength || start < 0 || start > end) {
    return false
  }

  if (chars) {
    return chars.slice(start, end).join('')
  }

  return str.slice(start, end)
}

// php/_helpers/_phpTypes (Locutus helper dependency)
type PhpNullish = null | undefined

type PhpInput = {} | PhpNullish

type PhpList<T = PhpInput> = T[]

type PhpAssoc<T = PhpInput> = { [key: string]: T }

type PhpArrayLike<T = PhpInput> = PhpList<T> | PhpAssoc<T>

function isPhpList<T = PhpInput>(value: PhpInput): value is PhpList<T> {
  return Array.isArray(value)
}

function isObjectLike(value: PhpInput): value is PhpArrayLike<PhpInput> {
  return typeof value === 'object' && value !== null
}

function isPhpAssocObject<T = PhpInput>(value: PhpInput): value is PhpAssoc<T> {
  return isObjectLike(value) && !isPhpList(value)
}

// php/_helpers/_phpCastString (Locutus helper dependency)
type CastStringValue = PhpInput

function _phpCastString(value: CastStringValue): string {
  // original by: Rafał Kukawski

  if (typeof value === 'boolean') {
    return value ? '1' : ''
  }
  if (typeof value === 'string') {
    return value
  }
  if (typeof value === 'number') {
    if (isNaN(value)) {
      return 'NAN'
    }

    if (!isFinite(value)) {
      return (value < 0 ? '-' : '') + 'INF'
    }

    return value + ''
  }
  if (typeof value === 'undefined') {
    return ''
  }
  if (typeof value === 'object') {
    if (Array.isArray(value)) {
      return 'Array'
    }

    if (value !== null) {
      return 'Object'
    }

    return ''
  }

  throw new Error('Unsupported value type')
}

// php/_helpers/_phpRuntimeState (Locutus helper dependency)
interface IniEntry {
  local_value?: PhpInput
}

type LocaleEntry = PhpAssoc<PhpInput> & {
  sorting?: (left: PhpInput, right: PhpInput) => number
}

type LocaleCategoryMap = PhpAssoc<string | undefined>

interface LocutusRuntimeContainer {
  php?: PhpAssoc<PhpInput>
}

type GlobalWithLocutus = {
  $locutus?: LocutusRuntimeContainer
  [key: string]: PhpInput
}

interface PhpRuntimeState {
  ini: PhpAssoc<IniEntry | undefined>
  locales: PhpAssoc<LocaleEntry | undefined>
  localeCategories: LocaleCategoryMap
  pointers: PhpList<PhpInput>
  locale_default: string | undefined
}

const isIniBag = (value: PhpInput): value is PhpAssoc<IniEntry | undefined> =>
  isPhpAssocObject<IniEntry | undefined>(value)

const isLocaleBag = (value: PhpInput): value is PhpAssoc<LocaleEntry | undefined> =>
  isPhpAssocObject<LocaleEntry | undefined>(value)

const isLocaleCategoryBag = (value: PhpInput): value is LocaleCategoryMap => isPhpAssocObject<string | undefined>(value)

const globalContext: GlobalWithLocutus =
  typeof window === 'object' && window !== null ? window : typeof global === 'object' && global !== null ? global : {}

const ensurePhpRuntimeObject = (): PhpAssoc<PhpInput> => {
  let locutus = globalContext.$locutus
  if (typeof locutus !== 'object' || locutus === null) {
    locutus = {}
    globalContext.$locutus = locutus
  }

  let php = locutus.php
  if (typeof php !== 'object' || php === null) {
    php = {}
    locutus.php = php
  }

  return php
}

function ensurePhpRuntimeState(): PhpRuntimeState {
  const php = ensurePhpRuntimeObject()
  const iniValue = php.ini
  const localesValue = php.locales
  const localeCategoriesValue = php.localeCategories
  const pointersValue = php.pointers

  const ini = isIniBag(iniValue) ? iniValue : {}
  const locales = isLocaleBag(localesValue) ? localesValue : {}
  const localeCategories = isLocaleCategoryBag(localeCategoriesValue) ? localeCategoriesValue : {}
  const pointers: PhpList<PhpInput> = Array.isArray(pointersValue) ? pointersValue : []

  if (iniValue !== ini) {
    php.ini = ini
  }
  if (localesValue !== locales) {
    php.locales = locales
  }
  if (localeCategoriesValue !== localeCategories) {
    php.localeCategories = localeCategories
  }
  if (pointersValue !== pointers) {
    php.pointers = pointers
  }

  const localeDefaultValue = php.locale_default
  const localeDefault = typeof localeDefaultValue === 'string' ? localeDefaultValue : undefined

  return {
    ini,
    locales,
    localeCategories,
    pointers,
    locale_default: localeDefault,
  }
}

// php/info/ini_get (Locutus dependency module)
function ini_get(varname: string): string {
  //  discuss at: https://locutus.io/php/ini_get/
  // original by: Brett Zamir (https://brett-zamir.me)
  //      note 1: The ini values must be set by ini_set or manually within an ini file
  //   example 1: ini_set('date.timezone', 'Asia/Hong_Kong')
  //   example 1: ini_get('date.timezone')
  //   returns 1: 'Asia/Hong_Kong'

  const runtime = ensurePhpRuntimeState()
  const entry = runtime.ini[varname]

  if (entry && entry.local_value !== undefined) {
    if (entry.local_value === null) {
      return ''
    }
    return String(entry.local_value)
  }

  return ''
}

// php/strings/substr (target function module)
const _php_cast_string = _phpCastString;

function substr(input: string | number, start: number, len?: number): string | false {
  //  discuss at: https://locutus.io/php/substr/
  // original by: Martijn Wieringa
  // bugfixed by: T.Wild
  // improved by: Onno Marsman (https://twitter.com/onnomarsman)
  // improved by: Brett Zamir (https://brett-zamir.me)
  //  revised by: Theriault (https://github.com/Theriault)
  //  revised by: Rafał Kukawski
  //      note 1: Handles rare Unicode characters if 'unicode.semantics' ini (PHP6) is set to 'on'
  //   example 1: substr('abcdef', 0, -1)
  //   returns 1: 'abcde'
  //   example 2: substr(2, 0, -6)
  //   returns 2: false
  //   example 3: ini_set('unicode.semantics', 'on')
  //   example 3: substr('a\uD801\uDC00', 0, -1)
  //   returns 3: 'a'
  //   example 4: ini_set('unicode.semantics', 'on')
  //   example 4: substr('a\uD801\uDC00', 0, 2)
  //   returns 4: 'a\uD801\uDC00'
  //   example 5: ini_set('unicode.semantics', 'on')
  //   example 5: substr('a\uD801\uDC00', -1, 1)
  //   returns 5: '\uD801\uDC00'
  //   example 6: ini_set('unicode.semantics', 'on')
  //   example 6: substr('a\uD801\uDC00z\uD801\uDC00', -3, 2)
  //   returns 6: '\uD801\uDC00z'
  //   example 7: ini_set('unicode.semantics', 'on')
  //   example 7: substr('a\uD801\uDC00z\uD801\uDC00', -3, -1)
  //   returns 7: '\uD801\uDC00z'

  const str = _php_cast_string(input)

  const multibyte = ini_get('unicode.semantics') === 'on'

  const chars = multibyte ? str.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]|[\s\S]/g) || [] : null

  const inputLength = chars ? chars.length : str.length
  let end = inputLength

  if (start < 0) {
    start += end
  }

  if (typeof len !== 'undefined') {
    if (len < 0) {
      end = len + end
    } else {
      end = len + start
    }
  }

  if (start > inputLength || start < 0 || start > end) {
    return false
  }

  if (chars) {
    return chars.slice(start, end).join('')
  }

  return str.slice(start, end)
}

// php/_helpers/_phpCastString (Locutus helper dependency)
function _phpCastString(value) {
  // original by: Rafał Kukawski

  if (typeof value === 'boolean') {
    return value ? '1' : ''
  }
  if (typeof value === 'string') {
    return value
  }
  if (typeof value === 'number') {
    if (isNaN(value)) {
      return 'NAN'
    }

    if (!isFinite(value)) {
      return (value < 0 ? '-' : '') + 'INF'
    }

    return value + ''
  }
  if (typeof value === 'undefined') {
    return ''
  }
  if (typeof value === 'object') {
    if (Array.isArray(value)) {
      return 'Array'
    }

    if (value !== null) {
      return 'Object'
    }

    return ''
  }

  throw new Error('Unsupported value type')
}

// php/_helpers/_phpTypes (Locutus helper dependency)

function isObjectLike(value) {
  return typeof value === 'object' && value !== null
}

function isPhpAssocObject(value) {
  return isObjectLike(value) && !Array.isArray(value)
}

// php/_helpers/_phpRuntimeState (Locutus helper dependency)

const globalContext =
  typeof window === 'object' && window !== null ? window : typeof global === 'object' && global !== null ? global : {}

const ensurePhpRuntimeObject = () => {
  let locutus = globalContext.$locutus
  if (typeof locutus !== 'object' || locutus === null) {
    locutus = {}
    globalContext.$locutus = locutus
  }

  let php = locutus.php
  if (typeof php !== 'object' || php === null) {
    php = {}
    locutus.php = php
  }

  return php
}

function ensurePhpRuntimeState() {
  const php = ensurePhpRuntimeObject()
  const iniValue = php.ini
  const localesValue = php.locales
  const localeCategoriesValue = php.localeCategories
  const pointersValue = php.pointers

  const ini = isPhpAssocObject(iniValue) ? iniValue : {}
  const locales = isPhpAssocObject(localesValue) ? localesValue : {}
  const localeCategories = isPhpAssocObject(localeCategoriesValue) ? localeCategoriesValue : {}
  const pointers = Array.isArray(pointersValue) ? pointersValue : []

  if (iniValue !== ini) {
    php.ini = ini
  }
  if (localesValue !== locales) {
    php.locales = locales
  }
  if (localeCategoriesValue !== localeCategories) {
    php.localeCategories = localeCategories
  }
  if (pointersValue !== pointers) {
    php.pointers = pointers
  }

  const localeDefaultValue = php.locale_default
  const localeDefault = typeof localeDefaultValue === 'string' ? localeDefaultValue : undefined

  return {
    ini,
    locales,
    localeCategories,
    pointers,
    locale_default: localeDefault,
  }
}

// php/info/ini_get (Locutus dependency module)
function ini_get(varname) {
  //  discuss at: https://locutus.io/php/ini_get/
  // original by: Brett Zamir (https://brett-zamir.me)
  //      note 1: The ini values must be set by ini_set or manually within an ini file
  //   example 1: ini_set('date.timezone', 'Asia/Hong_Kong')
  //   example 1: ini_get('date.timezone')
  //   returns 1: 'Asia/Hong_Kong'

  const runtime = ensurePhpRuntimeState()
  const entry = runtime.ini[varname]

  if (entry && entry.local_value !== undefined) {
    if (entry.local_value === null) {
      return ''
    }
    return String(entry.local_value)
  }

  return ''
}

// php/strings/substr (target function module)
const _php_cast_string = _phpCastString

function substr(input, start, len) {
  //  discuss at: https://locutus.io/php/substr/
  // original by: Martijn Wieringa
  // bugfixed by: T.Wild
  // improved by: Onno Marsman (https://twitter.com/onnomarsman)
  // improved by: Brett Zamir (https://brett-zamir.me)
  //  revised by: Theriault (https://github.com/Theriault)
  //  revised by: Rafał Kukawski
  //      note 1: Handles rare Unicode characters if 'unicode.semantics' ini (PHP6) is set to 'on'
  //   example 1: substr('abcdef', 0, -1)
  //   returns 1: 'abcde'
  //   example 2: substr(2, 0, -6)
  //   returns 2: false
  //   example 3: ini_set('unicode.semantics', 'on')
  //   example 3: substr('a\uD801\uDC00', 0, -1)
  //   returns 3: 'a'
  //   example 4: ini_set('unicode.semantics', 'on')
  //   example 4: substr('a\uD801\uDC00', 0, 2)
  //   returns 4: 'a\uD801\uDC00'
  //   example 5: ini_set('unicode.semantics', 'on')
  //   example 5: substr('a\uD801\uDC00', -1, 1)
  //   returns 5: '\uD801\uDC00'
  //   example 6: ini_set('unicode.semantics', 'on')
  //   example 6: substr('a\uD801\uDC00z\uD801\uDC00', -3, 2)
  //   returns 6: '\uD801\uDC00z'
  //   example 7: ini_set('unicode.semantics', 'on')
  //   example 7: substr('a\uD801\uDC00z\uD801\uDC00', -3, -1)
  //   returns 7: '\uD801\uDC00z'

  const str = _php_cast_string(input)

  const multibyte = ini_get('unicode.semantics') === 'on'

  const chars = multibyte ? str.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]|[\s\S]/g) || [] : null

  const inputLength = chars ? chars.length : str.length
  let end = inputLength

  if (start < 0) {
    start += end
  }

  if (typeof len !== 'undefined') {
    if (len < 0) {
      end = len + end
    } else {
      end = len + start
    }
  }

  if (start > inputLength || start < 0 || start > end) {
    return false
  }

  if (chars) {
    return chars.slice(start, end).join('')
  }

  return str.slice(start, end)
}

Improve this function

Locutus is a community effort following The McDonald's Theory: we ship first iterations, hoping others will improve them. If you see something that could be better, we'd love your contribution.

View on GitHub · Edit on GitHub · View Raw

« More PHP strings functions

Star