Many solutions here rely on multiple replace
or split
steps to handle edge cases. A more direct approach is to use a single, prioritized regex for tokenization.
Here’s a short and sweet solution with optional acronym handling and further below is an i18n version.
function toPascalCase(str, keepAcronyms = false) {
const re = keepAcronyms ?
/([A-Z][a-z]+|[A-Z](?![a-z])|[a-z]+|\d+)/g :
/([A-Z][a-z]+|[A-Z]+(?![a-z])|[a-z]+|\d+)/g;
return (str.match(re) || [])
.map(w => w[0].toLocaleUpperCase() + w.slice(1).toLocaleLowerCase())
.join('');
}
const cases = [
'foo bar baz', 'alllower', 'ALLCAPS', 'IM_A_SHOUTER', 'PascalCase', 'APIResponse', 'send-HTTP-Request', 'foo123bar', '_mixed-|seps|__in this:here.string*', '!--whack-¿?-string--121-**%', 'AbcDeFGhiJKL'
];
// result w/ acronyms off = ['FooBarBaz', 'Alllower', 'Allcaps', 'ImAShouter', 'PascalCase', 'ApiResponse', 'SendHttpRequest', 'Foo123Bar', 'MixedSepsInThisHereString', 'WhackString121', 'AbcDeFGhiJkl'];
// result w/ acronyms on = ['FooBarBaz', 'Alllower', 'ALLCAPS', 'IMASHOUTER', 'PascalCase', 'APIResponse', 'SendHTTPRequest', 'Foo123Bar', 'MixedSepsInThisHereString', 'WhackString121', 'AbcDeFGhiJKL'];
const tbody = document.querySelector('#results tbody');
cases.forEach(str => {
const tr = document.createElement('tr');
[str, toPascalCase(str), toPascalCase(str, true)].forEach(val => {
const td = document.createElement('td');
td.textContent = val;
tr.appendChild(td);
});
tbody.appendChild(tr);
});
table {
font-size: 75%;
}
tr {
text-align: left;
}
td:not(:last-child) {
padding-right: 1em;
}
<table id="results">
<thead>
<tr>
<th>Input</th>
<th>acro false</th>
<th>acro true</th>
</tr>
</thead>
<tbody></tbody>
</table>
And here the I18n version along with nodejs testing.
function toPascalCaseI18n(str, keepAcronyms = false) {
const re = keepAcronyms
? /([\p{Lu}][\p{Ll}]+|[\p{Lu}](?![\p{Ll}])|[\p{Ll}]+|[\p{L}]+|\p{N}+)/gu
: /([\p{Lu}][\p{Ll}]+|[\p{Lu}]+(?![\p{Ll}])|[\p{Ll}]+|[\p{L}]+|\p{N}+)/gu;
return str
.normalize('NFC')
// Insert a separator when switching between CJK and Latin
.replace(/([\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}])(?=[A-Za-z])/gu, '$1 ')
.replace(/([A-Za-z])(?=[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}])/gu, '$1 ')
.match(re)?.map(w => w[0].toLocaleUpperCase() + w.slice(1).toLocaleLowerCase())
.join('') ?? '';
}
import { test } from 'node:test';
import { strictEqual } from 'node:assert';
test('toPascalCaseI18n', () => {
const words = [
['alllower', 'Alllower', 'Alllower'],
['ALLCAPS', 'Allcaps', 'ALLCAPS'],
['IM_A_SHOUTER', 'ImAShouter', 'IMASHOUTER'],
['PascalCase', 'PascalCase', 'PascalCase'],
['camelCase', 'CamelCase', 'CamelCase'],
['foo bar baz', 'FooBarBaz', 'FooBarBaz'],
['_foo', 'Foo', 'Foo'],
['foo_', 'Foo', 'Foo'],
['_mixed-|seps|__in this:here.string*', 'MixedSepsInThisHereString', 'MixedSepsInThisHereString'],
['!--whack-¿?-string--121-**%', 'WhackString121', 'WhackString121'],
['number42', 'Number42', 'Number42'],
['foo123bar', 'Foo123Bar', 'Foo123Bar'],
['42#number', '42Number', '42Number'],
['123 456', '123456', '123456'],
['(555) 123-4567', '5551234567', '5551234567'],
['AbcDeFGhiJKL', 'AbcDeFGhiJkl', 'AbcDeFGhiJKL'],
['XMLHttpRequest', 'XmlHttpRequest', 'XMLHttpRequest'],
['APIResponse', 'ApiResponse', 'APIResponse'],
['', '', ''],
['ça.roule', 'ÇaRoule', 'ÇaRoule'],
['добрий-день', 'ДобрийДень', 'ДобрийДень'],
['٤٥٦bar12', '٤٥٦Bar12', '٤٥٦Bar12'], // Arabic numerals (Eastern Arabic-Indic)
['مرحبا-بالعالم', 'مرحبابالعالم', 'مرحبابالعالم'], // Mixed Arabic text + Latin
['αβγ-δεζ', 'ΑβγΔεζ', 'ΑβγΔεζ'], // Greek
['İstanbul', 'İstanbul', 'İstanbul'], // Turkish I/İ/ı/iş
['istanbul', 'Istanbul', 'Istanbul'],
['ışık', 'Işık', 'Işık'],
['résumé', 'Résumé', 'Résumé'], // Combining diacritic (e.g., é + ́)
['שלום-עולם', 'שלוםעולם', 'שלוםעולם'], // Hebrew
['你好-世界', '你好世界', '你好世界'], // CJK (Chinese, Japanese, Korean)
['foo世界bar', 'Foo世界Bar', 'Foo世界Bar'], // Mixed CJK + Latin
['Foo123bar', 'Foo123Bar', 'Foo123Bar'], // Full-width digit (U+FF11, U+FF12)
['foo😀bar', 'FooBar', 'FooBar'], // Emoji as noise
['ÉCOLE', 'École', 'ÉCOLE'], // // Combining acute accent on capital
];
words.forEach(([input, expectFalse, expectTrue]) => {
strictEqual(toPascalCaseI18n(input, false), expectFalse, `Failed for input: "${input}" (keepAcronyms=false). Expected "${expectFalse}", got "${toPascalCaseI18n(input, false)}"`);
strictEqual(toPascalCaseI18n(input, true), expectTrue, `Failed for input: "${input}" (keepAcronyms=true). Expected "${expectTrue}", got "${toPascalCaseI18n(input, true)}"`);
});
});