Install via yarn add locutus and import:
import { is_unicode } from 'locutus/php/var/is_unicode'.
Or with CommonJS: const { is_unicode } = require('locutus/php/var/is_unicode')
Use a bundler that supports tree-shaking so you only ship the functions you actually use.
Vite,
webpack,
Rollup, and
Parcel
all handle this. For server-side use this is less of a concern.
Examples
These examples are extracted from test cases that automatically verify our functions against their native counterparts.
#
code
expected result
1
is_unicode('We the peoples of the United Nations...!')
true
Notes
Almost all strings in JavaScript should be Unicode
Here's what our current TypeScript equivalent to PHP's is_unicode looks like.
exportfunctionis_unicode(vr: UnicodeValue): vr is string { // discuss at: https://locutus.io/php/is_unicode/ // original by: Brett Zamir (https://brett-zamir.me) // note 1: Almost all strings in JavaScript should be Unicode // example 1: is_unicode('We the peoples of the United Nations...!') // returns 1: true
if (typeof vr !== 'string') { returnfalse }
// If surrogates occur outside of high-low pairs, then this is not Unicode letarr: RegExpExecArray | null const highSurrogate = '[\uD800-\uDBFF]' const lowSurrogate = '[\uDC00-\uDFFF]' const highSurrogateBeforeAny = newRegExp(highSurrogate + '([\\s\\S])', 'g') const lowSurrogateAfterAny = newRegExp('([\\s\\S])' + lowSurrogate, 'g') const singleLowSurrogate = newRegExp('^' + lowSurrogate + '$') const singleHighSurrogate = newRegExp('^' + highSurrogate + '$')
while ((arr = highSurrogateBeforeAny.exec(vr)) !== null) { if (!arr[1] || !singleLowSurrogate.test(arr[1])) { // If high not followed by low surrogate returnfalse } } while ((arr = lowSurrogateAfterAny.exec(vr)) !== null) { if (!arr[1] || !singleHighSurrogate.test(arr[1])) { // If low not preceded by high surrogate returnfalse } }
returntrue }
exportfunctionis_unicode(vr) { // discuss at: https://locutus.io/php/is_unicode/ // original by: Brett Zamir (https://brett-zamir.me) // note 1: Almost all strings in JavaScript should be Unicode // example 1: is_unicode('We the peoples of the United Nations...!') // returns 1: true
if (typeof vr !== 'string') { returnfalse }
// If surrogates occur outside of high-low pairs, then this is not Unicode let arr const highSurrogate = '[\uD800-\uDBFF]' const lowSurrogate = '[\uDC00-\uDFFF]' const highSurrogateBeforeAny = newRegExp(highSurrogate + '([\\s\\S])', 'g') const lowSurrogateAfterAny = newRegExp('([\\s\\S])' + lowSurrogate, 'g') const singleLowSurrogate = newRegExp('^' + lowSurrogate + '$') const singleHighSurrogate = newRegExp('^' + highSurrogate + '$')
while ((arr = highSurrogateBeforeAny.exec(vr)) !== null) { if (!arr[1] || !singleLowSurrogate.test(arr[1])) { // If high not followed by low surrogate returnfalse } } while ((arr = lowSurrogateAfterAny.exec(vr)) !== null) { if (!arr[1] || !singleHighSurrogate.test(arr[1])) { // If low not preceded by high surrogate returnfalse } }
returntrue }
Improve this function
Locutus is a community effort following
The McDonald's Theory:
we ship first iterations, hoping others will improve them.
If you see something that could be better, we'd love your contribution.