Install via yarn add locutus and import:
import { strlen } from 'locutus/php/strings/strlen'.
Or with CommonJS: const { strlen } = require('locutus/php/strings/strlen')
Use a bundler that supports tree-shaking so you only ship the functions you actually use.
Vite,
webpack,
Rollup, and
Parcel
all handle this. For server-side use this is less of a concern.
Examples
These examples are extracted from test cases that automatically verify our functions against their native counterparts.
May look like overkill, but in order to be truly faithful to handling all Unicode
characters and to this function in PHP which does not count the number of bytes
but counts the number of characters, something like this is really necessary.
Dependencies
This function uses the following Locutus functions:
Here's what our current TypeScript equivalent to PHP's strlen looks like.
import { ini_get } from'../info/ini_get.ts'
exportfunctionstrlen(string: string): number { // discuss at: https://locutus.io/php/strlen/ // original by: Kevin van Zonneveld (https://kvz.io) // improved by: Sakimori // improved by: Kevin van Zonneveld (https://kvz.io) // input by: Kirk Strobeck // bugfixed by: Onno Marsman (https://twitter.com/onnomarsman) // revised by: Brett Zamir (https://brett-zamir.me) // note 1: May look like overkill, but in order to be truly faithful to handling all Unicode // note 1: characters and to this function in PHP which does not count the number of bytes // note 1: but counts the number of characters, something like this is really necessary. // example 1: strlen('Kevin van Zonneveld') // returns 1: 19 // example 2: ini_set('unicode.semantics', 'on') // example 2: strlen('A\ud87e\udc04Z') // returns 2: 3
const getWholeChar = function (str: string, i: number): string | false { const code = str.charCodeAt(i) if (code >= 0xd800 && code <= 0xdbff) { // High surrogate (could change last hex to 0xDB7F to // treat high private surrogates as single characters) if (str.length <= i + 1) { thrownewError('High surrogate without following low surrogate') } const next = str.charCodeAt(i + 1) if (next < 0xdc00 || next > 0xdfff) { thrownewError('High surrogate without following low surrogate') } return str.charAt(i) + str.charAt(i + 1) } elseif (code >= 0xdc00 && code <= 0xdfff) { // Low surrogate if (i === 0) { thrownewError('Low surrogate without preceding high surrogate') } const prev = str.charCodeAt(i - 1) if (prev < 0xd800 || prev > 0xdbff) { // (could change last hex to 0xDB7F to treat high private surrogates // as single characters) thrownewError('Low surrogate without preceding high surrogate') } // We can pass over low surrogates now as the second // component in a pair which we have already processed returnfalse } return str.charAt(i) }
for (i = 0, lgth = 0; i < str.length; i++) { if (getWholeChar(str, i) === false) { continue } // Adapt this line at the top of any loop, passing in the whole string and // the current iteration and returning a variable to represent the individual character; // purpose is to treat the first part of a surrogate pair as the whole character and then // ignore the second part lgth++ }
return lgth }
import { ini_get } from'../info/ini_get.ts'
exportfunctionstrlen(string) { // discuss at: https://locutus.io/php/strlen/ // original by: Kevin van Zonneveld (https://kvz.io) // improved by: Sakimori // improved by: Kevin van Zonneveld (https://kvz.io) // input by: Kirk Strobeck // bugfixed by: Onno Marsman (https://twitter.com/onnomarsman) // revised by: Brett Zamir (https://brett-zamir.me) // note 1: May look like overkill, but in order to be truly faithful to handling all Unicode // note 1: characters and to this function in PHP which does not count the number of bytes // note 1: but counts the number of characters, something like this is really necessary. // example 1: strlen('Kevin van Zonneveld') // returns 1: 19 // example 2: ini_set('unicode.semantics', 'on') // example 2: strlen('A\ud87e\udc04Z') // returns 2: 3
const getWholeChar = function (str, i) { const code = str.charCodeAt(i) if (code >= 0xd800 && code <= 0xdbff) { // High surrogate (could change last hex to 0xDB7F to // treat high private surrogates as single characters) if (str.length <= i + 1) { thrownewError('High surrogate without following low surrogate') } const next = str.charCodeAt(i + 1) if (next < 0xdc00 || next > 0xdfff) { thrownewError('High surrogate without following low surrogate') } return str.charAt(i) + str.charAt(i + 1) } elseif (code >= 0xdc00 && code <= 0xdfff) { // Low surrogate if (i === 0) { thrownewError('Low surrogate without preceding high surrogate') } const prev = str.charCodeAt(i - 1) if (prev < 0xd800 || prev > 0xdbff) { // (could change last hex to 0xDB7F to treat high private surrogates // as single characters) thrownewError('Low surrogate without preceding high surrogate') } // We can pass over low surrogates now as the second // component in a pair which we have already processed returnfalse } return str.charAt(i) }
for (i = 0, lgth = 0; i < str.length; i++) { if (getWholeChar(str, i) === false) { continue } // Adapt this line at the top of any loop, passing in the whole string and // the current iteration and returning a variable to represent the individual character; // purpose is to treat the first part of a surrogate pair as the whole character and then // ignore the second part lgth++ }
return { ini, locales, localeCategories, pointers, locale_default: localeDefault, } }
// php/info/ini_get (Locutus dependency module) functionini_get(varname: string): string { // discuss at: https://locutus.io/php/ini_get/ // original by: Brett Zamir (https://brett-zamir.me) // note 1: The ini values must be set by ini_set or manually within an ini file // example 1: ini_set('date.timezone', 'Asia/Hong_Kong') // example 1: ini_get('date.timezone') // returns 1: 'Asia/Hong_Kong'
if (entry && entry.local_value !== undefined) { if (entry.local_value === null) { return'' } returnString(entry.local_value) }
return'' }
// php/strings/strlen (target function module) functionstrlen(string: string): number { // discuss at: https://locutus.io/php/strlen/ // original by: Kevin van Zonneveld (https://kvz.io) // improved by: Sakimori // improved by: Kevin van Zonneveld (https://kvz.io) // input by: Kirk Strobeck // bugfixed by: Onno Marsman (https://twitter.com/onnomarsman) // revised by: Brett Zamir (https://brett-zamir.me) // note 1: May look like overkill, but in order to be truly faithful to handling all Unicode // note 1: characters and to this function in PHP which does not count the number of bytes // note 1: but counts the number of characters, something like this is really necessary. // example 1: strlen('Kevin van Zonneveld') // returns 1: 19 // example 2: ini_set('unicode.semantics', 'on') // example 2: strlen('A\ud87e\udc04Z') // returns 2: 3
const getWholeChar = function (str: string, i: number): string | false { const code = str.charCodeAt(i) if (code >= 0xd800 && code <= 0xdbff) { // High surrogate (could change last hex to 0xDB7F to // treat high private surrogates as single characters) if (str.length <= i + 1) { thrownewError('High surrogate without following low surrogate') } const next = str.charCodeAt(i + 1) if (next < 0xdc00 || next > 0xdfff) { thrownewError('High surrogate without following low surrogate') } return str.charAt(i) + str.charAt(i + 1) } elseif (code >= 0xdc00 && code <= 0xdfff) { // Low surrogate if (i === 0) { thrownewError('Low surrogate without preceding high surrogate') } const prev = str.charCodeAt(i - 1) if (prev < 0xd800 || prev > 0xdbff) { // (could change last hex to 0xDB7F to treat high private surrogates // as single characters) thrownewError('Low surrogate without preceding high surrogate') } // We can pass over low surrogates now as the second // component in a pair which we have already processed returnfalse } return str.charAt(i) }
for (i = 0, lgth = 0; i < str.length; i++) { if (getWholeChar(str, i) === false) { continue } // Adapt this line at the top of any loop, passing in the whole string and // the current iteration and returning a variable to represent the individual character; // purpose is to treat the first part of a surrogate pair as the whole character and then // ignore the second part lgth++ }
return { ini, locales, localeCategories, pointers, locale_default: localeDefault, } }
// php/info/ini_get (Locutus dependency module) functionini_get(varname) { // discuss at: https://locutus.io/php/ini_get/ // original by: Brett Zamir (https://brett-zamir.me) // note 1: The ini values must be set by ini_set or manually within an ini file // example 1: ini_set('date.timezone', 'Asia/Hong_Kong') // example 1: ini_get('date.timezone') // returns 1: 'Asia/Hong_Kong'
if (entry && entry.local_value !== undefined) { if (entry.local_value === null) { return'' } returnString(entry.local_value) }
return'' }
// php/strings/strlen (target function module) functionstrlen(string) { // discuss at: https://locutus.io/php/strlen/ // original by: Kevin van Zonneveld (https://kvz.io) // improved by: Sakimori // improved by: Kevin van Zonneveld (https://kvz.io) // input by: Kirk Strobeck // bugfixed by: Onno Marsman (https://twitter.com/onnomarsman) // revised by: Brett Zamir (https://brett-zamir.me) // note 1: May look like overkill, but in order to be truly faithful to handling all Unicode // note 1: characters and to this function in PHP which does not count the number of bytes // note 1: but counts the number of characters, something like this is really necessary. // example 1: strlen('Kevin van Zonneveld') // returns 1: 19 // example 2: ini_set('unicode.semantics', 'on') // example 2: strlen('A\ud87e\udc04Z') // returns 2: 3
const getWholeChar = function (str, i) { const code = str.charCodeAt(i) if (code >= 0xd800 && code <= 0xdbff) { // High surrogate (could change last hex to 0xDB7F to // treat high private surrogates as single characters) if (str.length <= i + 1) { thrownewError('High surrogate without following low surrogate') } const next = str.charCodeAt(i + 1) if (next < 0xdc00 || next > 0xdfff) { thrownewError('High surrogate without following low surrogate') } return str.charAt(i) + str.charAt(i + 1) } elseif (code >= 0xdc00 && code <= 0xdfff) { // Low surrogate if (i === 0) { thrownewError('Low surrogate without preceding high surrogate') } const prev = str.charCodeAt(i - 1) if (prev < 0xd800 || prev > 0xdbff) { // (could change last hex to 0xDB7F to treat high private surrogates // as single characters) thrownewError('Low surrogate without preceding high surrogate') } // We can pass over low surrogates now as the second // component in a pair which we have already processed returnfalse } return str.charAt(i) }
for (i = 0, lgth = 0; i < str.length; i++) { if (getWholeChar(str, i) === false) { continue } // Adapt this line at the top of any loop, passing in the whole string and // the current iteration and returning a variable to represent the individual character; // purpose is to treat the first part of a surrogate pair as the whole character and then // ignore the second part lgth++ }
return lgth }
Improve this function
Locutus is a community effort following
The McDonald's Theory:
we ship first iterations, hoping others will improve them.
If you see something that could be better, we'd love your contribution.