PHP's substr in JavaScript

Here’s what our current JavaScript equivalent to PHP's substr looks like.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
module.exports = function substr (str, start, len) {
// discuss at: http://locutus.io/php/substr/
// original by: Martijn Wieringa
// bugfixed by: T.Wild
// improved by: Onno Marsman (https://twitter.com/onnomarsman)
// improved by: Brett Zamir (http://brett-zamir.me)
// revised by: Theriault (https://github.com/Theriault)
// note 1: Handles rare Unicode characters if 'unicode.semantics' ini (PHP6) is set to 'on'
// example 1: substr('abcdef', 0, -1)
// returns 1: 'abcde'
// example 2: substr(2, 0, -6)
// returns 2: false
// example 3: ini_set('unicode.semantics', 'on')
// example 3: substr('a\uD801\uDC00', 0, -1)
// returns 3: 'a'
// example 4: ini_set('unicode.semantics', 'on')
// example 4: substr('a\uD801\uDC00', 0, 2)
// returns 4: 'a\uD801\uDC00'
// example 5: ini_set('unicode.semantics', 'on')
// example 5: substr('a\uD801\uDC00', -1, 1)
// returns 5: '\uD801\uDC00'
// example 6: ini_set('unicode.semantics', 'on')
// example 6: substr('a\uD801\uDC00z\uD801\uDC00', -3, 2)
// returns 6: '\uD801\uDC00z'
// example 7: ini_set('unicode.semantics', 'on')
// example 7: substr('a\uD801\uDC00z\uD801\uDC00', -3, -1)
// returns 7: '\uD801\uDC00z'
// test: skip-3 skip-4 skip-5 skip-6 skip-7

str += ''
var end = str.length

var iniVal = (typeof require !== 'undefined' ? require('../info/ini_get')('unicode.emantics') : undefined) || 'off'

if (iniVal === 'off') {
// assumes there are no non-BMP characters;
// if there may be such characters, then it is best to turn it on (critical in true XHTML/XML)
if (start < 0) {
start += end
}
if (typeof len !== 'undefined') {
if (len < 0) {
end = len + end
} else {
end = len + start
}
}

// PHP returns false if start does not fall within the string.
// PHP returns false if the calculated end comes before the calculated start.
// PHP returns an empty string if start and end are the same.
// Otherwise, PHP returns the portion of the string from start to end.
if (start >= str.length || start < 0 || start > end) {
return false
}

return str.slice(start, end)
}

// Full-blown Unicode including non-Basic-Multilingual-Plane characters
var i = 0
var allBMP = true
var es = 0
var el = 0
var se = 0
var ret = ''

for (i = 0; i < str.length; i++) {
if (/[\uD800-\uDBFF]/.test(str.charAt(i)) && /[\uDC00-\uDFFF]/.test(str.charAt(i + 1))) {
allBMP = false
break
}
}

if (!allBMP) {
if (start < 0) {
for (i = end - 1, es = (start += end); i >= es; i--) {
if (/[\uDC00-\uDFFF]/.test(str.charAt(i)) && /[\uD800-\uDBFF]/.test(str.charAt(i - 1))) {
start--
es--
}
}
} else {
var surrogatePairs = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g
while ((surrogatePairs.exec(str)) !== null) {
var li = surrogatePairs.lastIndex
if (li - 2 < start) {
start++
} else {
break
}
}
}

if (start >= end || start < 0) {
return false
}
if (len < 0) {
for (i = end - 1, el = (end += len); i >= el; i--) {
if (/[\uDC00-\uDFFF]/.test(str.charAt(i)) && /[\uD800-\uDBFF]/.test(str.charAt(i - 1))) {
end--
el--
}
}
if (start > end) {
return false
}
return str.slice(start, end)
} else {
se = start + len
for (i = start; i < se; i++) {
ret += str.charAt(i)
if (/[\uD800-\uDBFF]/.test(str.charAt(i)) && /[\uDC00-\uDFFF]/.test(str.charAt(i + 1))) {
// Go one further, since one of the "characters" is part of a surrogate pair
se++
}
}
return ret
}
}
}
[ View on GitHub | Edit on GitHub | Source on GitHub ]

How to use

You you can install via npm install locutus and require it via require('locutus/php/strings/substr'). You could also require the strings module in full so that you could access strings.substr instead.

If you intend to target the browser, you can then use a module bundler such as Browserify, webpack or rollup.js.

ES5/ES6

This function targets ES5, but as of Locutus 2.0.2 we also support ES6 functions. Locutus transpiles to ES5 before publishing to npm.

A community effort

Not unlike Wikipedia, Locutus is an ongoing community effort. Our philosophy follows The McDonald’s Theory. This means that we don't consider it to be a bad thing that many of our functions are first iterations, which may still have their fair share of issues. We hope that these flaws will inspire others to come up with better ideas.

This way of working also means that we don't offer any production guarantees, and recommend to use Locutus inspiration and learning purposes only.

Notes

  • Handles rare Unicode characters if ‘unicode.semantics’ ini (PHP6) is set to ‘on’

Examples

Please note that these examples are distilled from test cases that automatically verify our functions still work correctly. This could explain some quirky ones.

#codeexpected result
1substr('abcdef', 0, -1)'abcde'
2substr(2, 0, -6)false
3ini_set('unicode.semantics', 'on') substr('a\uD801\uDC00', 0, -1)'a'
4ini_set('unicode.semantics', 'on') substr('a\uD801\uDC00', 0, 2)'a\uD801\uDC00'
5ini_set('unicode.semantics', 'on') substr('a\uD801\uDC00', -1, 1)'\uD801\uDC00'
6ini_set('unicode.semantics', 'on') substr('a\uD801\uDC00z\uD801\uDC00', -3, 2)'\uD801\uDC00z'
7ini_set('unicode.semantics', 'on') substr('a\uD801\uDC00z\uD801\uDC00', -3, -1)'\uD801\uDC00z'

« More PHP strings functions