PHP's unserialize in TypeScript

How to use

Install via yarn add locutus and import: import { unserialize } from 'locutus/php/var/unserialize'.

Or with CommonJS: const { unserialize } = require('locutus/php/var/unserialize')

Use a bundler that supports tree-shaking so you only ship the functions you actually use. Vite, webpack, Rollup, and Parcel all handle this. For server-side use this is less of a concern.

Examples

These examples are extracted from test cases that automatically verify our functions against their native counterparts.

#codeexpected result
1unserialize('a:3:{i:0;s:5:"Kevin";i:1;s:3:"van";i:2;s:9:"Zonneveld";}')['Kevin', 'van', 'Zonneveld']
2unserialize('a:2:{s:9:"firstName";s:5:"Kevin";s:7:"midName";s:3:"van";}'){firstName: 'Kevin', midName: 'van'}
3unserialize('a:3:{s:2:"ü";s:2:"ü";s:3:"四";s:3:"四";s:4:"𠜎";s:4:"𠜎";}'){'ü': 'ü', '四': '四', '𠜎': '𠜎'}
4unserialize(undefined)false
5unserialize('O:8:"stdClass":1:{s:3:"foo";b:1;}'){ foo: true }
6unserialize('a:2:{i:0;N;i:1;s:0:"";}')[null, ""]
7unserialize('S:7:"\\65\\73\\63\\61\\70\\65\\64";')'escaped'

Notes

  • We feel the main purpose of this function should be to ease the transport of data between php & js Aiming for PHP-compatibility, we have to translate objects to arrays

Here's what our current TypeScript equivalent to PHP's unserialize looks like.

type UnserializedScalar = string | number | boolean | null
type UnserializedObject = { [key: string]: UnserializedValue }
type UnserializedValue = UnserializedScalar | UnserializedObject | UnserializedValue[]

type ParsedResult = [value: UnserializedValue, offset: number]
type CacheEntry = [value: UnserializedValue, offset?: number]
type CacheFn = (<T extends CacheEntry>(value: T) => T) & { get: (index: number) => UnserializedValue }
type ErrorMode = 'throw' | 'log' | 'silent'
type UnserializeInput = string | null | undefined

function initCache(): CacheFn {
const store: UnserializedValue[] = []
// cache only first element, second is length to jump ahead for the parser
const cacheBase = function cache<T extends CacheEntry>(value: T): T {
store.push(value[0])
return value
}

const cache: CacheFn = Object.assign(cacheBase, {
get: (index: number): UnserializedValue => {
if (index >= store.length) {
throw new RangeError(`Can't resolve reference ${index + 1}`)
}

const cachedValue = store[index]
if (typeof cachedValue === 'undefined') {
throw new RangeError(`Can't resolve reference ${index + 1}`)
}

return cachedValue
},
})

return cache
}

function expectType(str: string, cache: CacheFn): ParsedResult {
const types = /^(?:N(?=;)|[bidsSaOCrR](?=:)|[^:]+(?=:))/g
const type = (types.exec(str) || [])[0]

if (!type) {
throw new SyntaxError('Invalid input: ' + str)
}

switch (type) {
case 'N':
return cache([null, 2])
case 'b':
return cache(expectBool(str))
case 'i':
return cache(expectInt(str))
case 'd':
return cache(expectFloat(str))
case 's':
return cache(expectString(str))
case 'S':
return cache(expectEscapedString(str))
case 'a':
return expectArray(str, cache)
case 'O':
return expectObject(str, cache)
case 'C':
return expectClass(str, cache)
case 'r':
case 'R':
return expectReference(str, cache)
default:
throw new SyntaxError(`Invalid or unsupported data type: ${type}`)
}
}

function expectBool(str: string): [boolean, number] {
const reBool = /^b:([01]);/
const [match, boolMatch] = reBool.exec(str) || []

if (!match || !boolMatch) {
throw new SyntaxError('Invalid bool value, expected 0 or 1')
}

return [boolMatch === '1', match.length]
}

function expectInt(str: string): [number, number] {
const reInt = /^i:([+-]?\d+);/
const [match, intMatch] = reInt.exec(str) || []

if (!match || !intMatch) {
throw new SyntaxError('Expected an integer value')
}

return [parseInt(intMatch, 10), match.length]
}

function expectFloat(str: string): [number, number] {
const reFloat = /^d:(NAN|-?INF|(?:\d+\.\d*|\d*\.\d+|\d+)(?:[eE][+-]\d+)?);/
const [match, floatMatch] = reFloat.exec(str) || []

if (!match || !floatMatch) {
throw new SyntaxError('Expected a float value')
}

let floatValue = 0

switch (floatMatch) {
case 'NAN':
floatValue = Number.NaN
break
case '-INF':
floatValue = Number.NEGATIVE_INFINITY
break
case 'INF':
floatValue = Number.POSITIVE_INFINITY
break
default:
floatValue = parseFloat(floatMatch)
break
}

return [floatValue, match.length]
}

function readBytes(str: string, len: number, escapedString = false): [string, number, number] {
let bytes = 0
let out = ''
let c = 0
const strLen = str.length
let wasHighSurrogate = false
let escapedChars = 0

while (bytes < len && c < strLen) {
let chr = str.charAt(c)
const code = chr.charCodeAt(0)
const isHighSurrogate = code >= 0xd800 && code <= 0xdbff
const isLowSurrogate = code >= 0xdc00 && code <= 0xdfff

if (escapedString && chr === '\\') {
chr = String.fromCharCode(parseInt(str.substr(c + 1, 2), 16))
escapedChars++

// each escaped sequence is 3 characters. Go 2 chars ahead.
// third character will be jumped over a few lines later
c += 2
}

c++

bytes +=
isHighSurrogate || (isLowSurrogate && wasHighSurrogate)
? // if high surrogate, count 2 bytes, as expectation is to be followed by low surrogate
// if low surrogate preceded by high surrogate, add 2 bytes
2
: code > 0x7ff
? // otherwise low surrogate falls into this part
3
: code > 0x7f
? 2
: 1

// if high surrogate is not followed by low surrogate, add 1 more byte
bytes += wasHighSurrogate && !isLowSurrogate ? 1 : 0

out += chr
wasHighSurrogate = isHighSurrogate
}

return [out, bytes, escapedChars]
}

function expectString(str: string): [string, number] {
// PHP strings consist of one-byte characters.
// JS uses 2 bytes with possible surrogate pairs.
// Serialized length of 2 is still 1 JS string character
const reStrLength = /^s:(\d+):"/g // also match the opening " char
const [match, byteLenMatch] = reStrLength.exec(str) || []

if (!match || !byteLenMatch) {
throw new SyntaxError('Expected a string value')
}

const len = parseInt(byteLenMatch, 10)

str = str.substr(match.length)

const [strMatch, bytes] = readBytes(str, len)

if (bytes !== len) {
throw new SyntaxError(`Expected string of ${len} bytes, but got ${bytes}`)
}

str = str.substr(strMatch.length)

// strict parsing, match closing "; chars
if (!str.startsWith('";')) {
throw new SyntaxError('Expected ";')
}

return [strMatch, match.length + strMatch.length + 2] // skip last ";
}

function expectEscapedString(str: string): [string, number] {
const reStrLength = /^S:(\d+):"/g // also match the opening " char
const [match, strLenMatch] = reStrLength.exec(str) || []

if (!match || !strLenMatch) {
throw new SyntaxError('Expected an escaped string value')
}

const len = parseInt(strLenMatch, 10)

str = str.substr(match.length)

const [strMatch, bytes, escapedChars] = readBytes(str, len, true)

if (bytes !== len) {
throw new SyntaxError(`Expected escaped string of ${len} bytes, but got ${bytes}`)
}

str = str.substr(strMatch.length + escapedChars * 2)

// strict parsing, match closing "; chars
if (!str.startsWith('";')) {
throw new SyntaxError('Expected ";')
}

return [strMatch, match.length + strMatch.length + 2] // skip last ";
}

function expectKeyOrIndex(str: string): [string | number, number] {
try {
return expectString(str)
// biome-ignore lint/suspicious/noEmptyBlockStatements: fallthrough to next parser
} catch (_err) {}

try {
return expectEscapedString(str)
// biome-ignore lint/suspicious/noEmptyBlockStatements: fallthrough to next parser
} catch (_err) {}

try {
return expectInt(str)
} catch (_err) {
throw new SyntaxError('Expected key or index')
}
}

function expectObject(str: string, cache: CacheFn): ParsedResult {
// O:<class name length>:"class name":<prop count>:{<props and values>}
// O:8:"stdClass":2:{s:3:"foo";s:3:"bar";s:3:"bar";s:3:"baz";}
const reObjectLiteral = /^O:(\d+):"([^"]+)":(\d+):\{/
const [objectLiteralBeginMatch /* classNameLengthMatch */, , className, propCountMatch] =
reObjectLiteral.exec(str) || []

if (!objectLiteralBeginMatch || !propCountMatch) {
throw new SyntaxError('Invalid input')
}

if (className !== 'stdClass') {
throw new SyntaxError(`Unsupported object type: ${className}`)
}

let totalOffset = objectLiteralBeginMatch.length

const propCount = parseInt(propCountMatch, 10)
const obj: UnserializedObject = {}
cache([obj])

str = str.substr(totalOffset)

for (let i = 0; i < propCount; i++) {
const prop = expectKeyOrIndex(str)
str = str.substr(prop[1])
totalOffset += prop[1]

const value = expectType(str, cache)
str = str.substr(value[1])
totalOffset += value[1]

obj[String(prop[0])] = value[0]
}

// strict parsing, expect } after object literal
if (str.charAt(0) !== '}') {
throw new SyntaxError('Expected }')
}

return [obj, totalOffset + 1] // skip final }
}

function expectClass(_str: string, _cache: CacheFn): ParsedResult {
// can't be well supported, because requires calling eval (or similar)
// in order to call serialized constructor name
// which is unsafe
// or assume that constructor is defined in global scope
// but this is too much limiting
throw new Error('Not yet implemented')
}

function expectReference(str: string, cache: CacheFn): ParsedResult {
const reRef = /^[rR]:([1-9]\d*);/
const [match, refIndex] = reRef.exec(str) || []

if (!match || !refIndex) {
throw new SyntaxError('Expected reference value')
}

return [cache.get(parseInt(refIndex, 10) - 1), match.length]
}

function expectArray(str: string, cache: CacheFn): ParsedResult {
const reArrayLength = /^a:(\d+):{/
const [arrayLiteralBeginMatch, arrayLengthMatch] = reArrayLength.exec(str) || []

if (!arrayLiteralBeginMatch || !arrayLengthMatch) {
throw new SyntaxError('Expected array length annotation')
}

str = str.substr(arrayLiteralBeginMatch.length)

const array = expectArrayItems(str, parseInt(arrayLengthMatch, 10), cache)

// strict parsing, expect closing } brace after array literal
if (str.charAt(array[1]) !== '}') {
throw new SyntaxError('Expected }')
}

return [array[0], arrayLiteralBeginMatch.length + array[1] + 1] // jump over }
}

function expectArrayItems(
str: string,
expectedItems = 0,
cache: CacheFn,
): [UnserializedObject | UnserializedValue[], number] {
let key: [string | number, number]
let item: ParsedResult
let totalOffset = 0
let hasContinousIndexes = true
let lastIndex = -1
const items: UnserializedObject = {}
cache([items])

for (let i = 0; i < expectedItems; i++) {
key = expectKeyOrIndex(str)

hasContinousIndexes = hasContinousIndexes && typeof key[0] === 'number' && key[0] === lastIndex + 1
lastIndex = typeof key[0] === 'number' ? key[0] : lastIndex

str = str.substr(key[1])
totalOffset += key[1]

// references are resolved immediately, so if duplicate key overwrites previous array index
// the old value is anyway resolved
// fixme: but next time the same reference should point to the new value
item = expectType(str, cache)
str = str.substr(item[1])
totalOffset += item[1]

items[String(key[0])] = item[0]
}

if (hasContinousIndexes) {
return [Object.values(items), totalOffset]
}

return [items, totalOffset]
}

// errorMode: 'throw', 'log', 'silent'
export function unserialize(str: UnserializeInput, errorMode: ErrorMode = 'log'): UnserializedValue | false {
// discuss at: https://locutus.io/php/unserialize/
// original by: Arpad Ray (mailto:arpad@php.net)
// improved by: Pedro Tainha (https://www.pedrotainha.com)
// improved by: Kevin van Zonneveld (https://kvz.io)
// improved by: Kevin van Zonneveld (https://kvz.io)
// improved by: Chris
// improved by: James
// improved by: Le Torbi
// improved by: Eli Skeggs
// bugfixed by: dptr1988
// bugfixed by: Kevin van Zonneveld (https://kvz.io)
// bugfixed by: Brett Zamir (https://brett-zamir.me)
// bugfixed by: philippsimon (https://github.com/philippsimon/)
// revised by: d3x
// input by: Brett Zamir (https://brett-zamir.me)
// input by: Martin (https://www.erlenwiese.de/)
// input by: kilops
// input by: Jaroslaw Czarniak
// input by: lovasoa (https://github.com/lovasoa/)
// improved by: Rafał Kukawski
// reimplemented by: Rafał Kukawski
// note 1: We feel the main purpose of this function should be
// note 1: to ease the transport of data between php & js
// note 1: Aiming for PHP-compatibility, we have to translate objects to arrays
// example 1: unserialize('a:3:{i:0;s:5:"Kevin";i:1;s:3:"van";i:2;s:9:"Zonneveld";}')
// returns 1: ['Kevin', 'van', 'Zonneveld']
// example 2: unserialize('a:2:{s:9:"firstName";s:5:"Kevin";s:7:"midName";s:3:"van";}')
// returns 2: {firstName: 'Kevin', midName: 'van'}
// example 3: unserialize('a:3:{s:2:"ü";s:2:"ü";s:3:"四";s:3:"四";s:4:"𠜎";s:4:"𠜎";}')
// returns 3: {'ü': 'ü', '四': '四', '𠜎': '𠜎'}
// example 4: unserialize(undefined)
// returns 4: false
// example 5: unserialize('O:8:"stdClass":1:{s:3:"foo";b:1;}')
// returns 5: { foo: true }
// example 6: unserialize('a:2:{i:0;N;i:1;s:0:"";}')
// returns 6: [null, ""]
// example 7: unserialize('S:7:"\\65\\73\\63\\61\\70\\65\\64";')
// returns 7: 'escaped'

try {
if (typeof str !== 'string') {
return false
}

return expectType(str, initCache())[0]
} catch (err) {
if (errorMode === 'throw') {
throw err
} else if (errorMode === 'log') {
console.error(err)
}
// if silent mode we do nothing
return false
}
}

Improve this function

Locutus is a community effort following The McDonald's Theory: we ship first iterations, hoping others will improve them. If you see something that could be better, we'd love your contribution.

View on GitHub · Edit on GitHub · View Raw


« More PHP var functions


Star