PHP's str_word_count in TypeScript

How to use

Install via yarn add locutus and import: import { str_word_count } from 'locutus/php/strings/str_word_count'.

Or with CommonJS: const { str_word_count } = require('locutus/php/strings/str_word_count')

Use a bundler that supports tree-shaking so you only ship the functions you actually use. Vite, webpack, Rollup, and Parcel all handle this. For server-side use this is less of a concern.

Examples

These examples are extracted from test cases that automatically verify our functions against their native counterparts.

#codeexpected result
1str_word_count("Hello fri3nd, you're\r\n looking good today!", 1)['Hello', 'fri', 'nd', "you're", 'looking', 'good', 'today']
2str_word_count("Hello fri3nd, you're\r\n looking good today!", 2){0: 'Hello', 6: 'fri', 10: 'nd', 14: "you're", 29: 'looking', 46: 'good', 51: 'today'}
3str_word_count("Hello fri3nd, you're\r\n looking good today!", 1, '\u00e0\u00e1\u00e3\u00e73')['Hello', 'fri3nd', "you're", 'looking', 'good', 'today']
4str_word_count('hey', 2){0: 'hey'}

Dependencies

This function uses the following Locutus functions:

Here's what our current TypeScript equivalent to PHP's str_word_count looks like.

import { ctype_alpha as ctypeAlpha } from '../ctype/ctype_alpha.ts'

export function str_word_count(
str: string,
format?: 0 | 1 | 2,
charlist?: string,
): number | string[] | { [key: number]: string } {
// discuss at: https://locutus.io/php/str_word_count/
// original by: Ole Vrijenhoek
// bugfixed by: Kevin van Zonneveld (https://kvz.io)
// bugfixed by: Brett Zamir (https://brett-zamir.me)
// bugfixed by: Brett Zamir (https://brett-zamir.me)
// input by: Bug?
// improved by: Brett Zamir (https://brett-zamir.me)
// example 1: str_word_count("Hello fri3nd, you're\r\n looking good today!", 1)
// returns 1: ['Hello', 'fri', 'nd', "you're", 'looking', 'good', 'today']
// example 2: str_word_count("Hello fri3nd, you're\r\n looking good today!", 2)
// returns 2: {0: 'Hello', 6: 'fri', 10: 'nd', 14: "you're", 29: 'looking', 46: 'good', 51: 'today'}
// example 3: str_word_count("Hello fri3nd, you're\r\n looking good today!", 1, '\u00e0\u00e1\u00e3\u00e73')
// returns 3: ['Hello', 'fri3nd', "you're", 'looking', 'good', 'today']
// example 4: str_word_count('hey', 2)
// returns 4: {0: 'hey'}

const len = str.length
const cl = charlist?.length ?? 0
let chr = ''
let tmpStr = ''
let c = ''
const wArr: string[] = []
let wC = 0
const assoc: { [key: number]: string } = {}
let aC = 0
let reg: RegExp | null = null
let match = false

const _pregQuote = function (value: string): string {
return (value + '').replace(/([\\.+*?[^\]$(){}=!<>|:])/g, '\\$1')
}
const _getWholeChar = function (value: string, index: number): string | false {
// Use for rare cases of non-BMP characters
const code = value.charCodeAt(index)
if (code < 0xd800 || code > 0xdfff) {
return value.charAt(index)
}
if (code >= 0xd800 && code <= 0xdbff) {
// High surrogate (could change last hex to 0xDB7F to treat high private surrogates as single
// characters)
if (value.length <= index + 1) {
throw new Error('High surrogate without following low surrogate')
}
const next = value.charCodeAt(index + 1)
if (next < 0xdc00 || next > 0xdfff) {
throw new Error('High surrogate without following low surrogate')
}
return value.charAt(index) + value.charAt(index + 1)
}
// Low surrogate (0xDC00 <= code && code <= 0xDFFF)
if (index === 0) {
throw new Error('Low surrogate without preceding high surrogate')
}
const prev = value.charCodeAt(index - 1)
if (prev < 0xd800 || prev > 0xdbff) {
// (could change last hex to 0xDB7F to treat high private surrogates as single characters)
throw new Error('Low surrogate without preceding high surrogate')
}
// We can pass over low surrogates now as the second component in a pair which we have already
// processed
return false
}

if (cl && typeof charlist === 'string') {
const firstChar = _getWholeChar(charlist, 0)
let pattern = '^(' + _pregQuote(firstChar === false ? '' : firstChar)
for (let i = 1; i < cl; i++) {
const wholeChar = _getWholeChar(charlist, i)
if (wholeChar === false) {
continue
}
chr = wholeChar
pattern += '|' + _pregQuote(chr)
}
pattern += ')$'
reg = new RegExp(pattern)
}

for (let i = 0; i < len; i++) {
const wholeChar = _getWholeChar(str, i)
if (wholeChar === false) {
continue
}
c = wholeChar
// No hyphen at beginning or end unless allowed in charlist (or locale)
// No apostrophe at beginning unless allowed in charlist (or locale)
// @todo: Make this more readable
match =
ctypeAlpha(c) ||
(reg !== null && reg.test(c)) ||
(i !== 0 && i !== len - 1 && c === '-') ||
(i !== 0 && c === "'")
if (match) {
if (tmpStr === '' && format === 2) {
aC = i
}
tmpStr = tmpStr + c
}
if (i === len - 1 || (!match && tmpStr !== '')) {
if (format !== 2) {
wArr.push(tmpStr)
} else {
assoc[aC] = tmpStr
}
tmpStr = ''
wC++
}
}

if (!format) {
return wC
} else if (format === 1) {
return wArr
} else if (format === 2) {
return assoc
}

throw new Error('You have supplied an incorrect format')
}

Improve this function

Locutus is a community effort following The McDonald's Theory: we ship first iterations, hoping others will improve them. If you see something that could be better, we'd love your contribution.

View on GitHub · Edit on GitHub · View Raw


« More PHP strings functions


Star