Reports

Many solutions here rely on multiple replace or split steps to handle edge cases. A more direct approach is to use a single, prioritized regex for tokenization.

Here’s a short and sweet solution with optional acronym handling and further below is an i18n version.

function toPascalCase(str, keepAcronyms = false) {
  const re = keepAcronyms ?
    /([A-Z][a-z]+|[A-Z](?![a-z])|[a-z]+|\d+)/g :
    /([A-Z][a-z]+|[A-Z]+(?![a-z])|[a-z]+|\d+)/g;

  return (str.match(re) || [])
    .map(w => w[0].toLocaleUpperCase() + w.slice(1).toLocaleLowerCase())
    .join('');
}

const cases = [
  'foo bar baz', 'alllower', 'ALLCAPS', 'IM_A_SHOUTER', 'PascalCase', 'APIResponse', 'send-HTTP-Request', 'foo123bar', '_mixed-|seps|__in this:here.string*', '!--whack-¿?-string--121-**%', 'AbcDeFGhiJKL'
];
// result w/ acronyms off = ['FooBarBaz', 'Alllower', 'Allcaps', 'ImAShouter', 'PascalCase', 'ApiResponse', 'SendHttpRequest', 'Foo123Bar', 'MixedSepsInThisHereString', 'WhackString121', 'AbcDeFGhiJkl'];
// result w/ acronyms on  = ['FooBarBaz', 'Alllower', 'ALLCAPS', 'IMASHOUTER', 'PascalCase', 'APIResponse', 'SendHTTPRequest', 'Foo123Bar', 'MixedSepsInThisHereString', 'WhackString121', 'AbcDeFGhiJKL'];

const tbody = document.querySelector('#results tbody');
cases.forEach(str => {
  const tr = document.createElement('tr');
  [str, toPascalCase(str), toPascalCase(str, true)].forEach(val => {
    const td = document.createElement('td');
    td.textContent = val;
    tr.appendChild(td);
  });
  tbody.appendChild(tr);
});

table {
  font-size: 75%;
}

tr {
  text-align: left;
}

td:not(:last-child) {
  padding-right: 1em;
}

<table id="results">
  <thead>
    <tr>
      <th>Input</th>
      <th>acro false</th>
      <th>acro true</th>
    </tr>
  </thead>
  <tbody></tbody>
</table>

And here the I18n version along with nodejs testing.

function toPascalCaseI18n(str, keepAcronyms = false) {
  const re = keepAcronyms
    ? /([\p{Lu}][\p{Ll}]+|[\p{Lu}](?![\p{Ll}])|[\p{Ll}]+|[\p{L}]+|\p{N}+)/gu
    : /([\p{Lu}][\p{Ll}]+|[\p{Lu}]+(?![\p{Ll}])|[\p{Ll}]+|[\p{L}]+|\p{N}+)/gu;

  return str
    .normalize('NFC')
    // Insert a separator when switching between CJK and Latin
    .replace(/([\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}])(?=[A-Za-z])/gu, '$1 ')
    .replace(/([A-Za-z])(?=[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}])/gu, '$1 ')
    .match(re)?.map(w => w[0].toLocaleUpperCase() + w.slice(1).toLocaleLowerCase())
    .join('') ?? '';
}

import { test } from 'node:test';
import { strictEqual } from 'node:assert';

test('toPascalCaseI18n', () => {
  const words = [
    ['alllower', 'Alllower', 'Alllower'],
    ['ALLCAPS', 'Allcaps', 'ALLCAPS'],
    ['IM_A_SHOUTER', 'ImAShouter', 'IMASHOUTER'],
    ['PascalCase', 'PascalCase', 'PascalCase'],
    ['camelCase', 'CamelCase', 'CamelCase'],
    ['foo bar baz', 'FooBarBaz', 'FooBarBaz'],
    ['_foo', 'Foo', 'Foo'],
    ['foo_', 'Foo', 'Foo'],
    ['_mixed-|seps|__in this:here.string*', 'MixedSepsInThisHereString', 'MixedSepsInThisHereString'],
    ['!--whack-¿?-string--121-**%', 'WhackString121', 'WhackString121'],
    ['number42', 'Number42', 'Number42'],
    ['foo123bar', 'Foo123Bar', 'Foo123Bar'],
    ['42#number', '42Number', '42Number'],
    ['123 456', '123456', '123456'],
    ['(555) 123-4567', '5551234567', '5551234567'],
    ['AbcDeFGhiJKL', 'AbcDeFGhiJkl', 'AbcDeFGhiJKL'],
    ['XMLHttpRequest', 'XmlHttpRequest', 'XMLHttpRequest'],
    ['APIResponse', 'ApiResponse', 'APIResponse'],
    ['', '', ''],
    ['ça.roule', 'ÇaRoule', 'ÇaRoule'],
    ['добрий-день', 'ДобрийДень', 'ДобрийДень'],
    ['٤٥٦bar12', '٤٥٦Bar12', '٤٥٦Bar12'], // Arabic numerals (Eastern Arabic-Indic)
    ['مرحبا-بالعالم', 'مرحبابالعالم', 'مرحبابالعالم'], // Mixed Arabic text + Latin
    ['αβγ-δεζ', 'ΑβγΔεζ', 'ΑβγΔεζ'], // Greek
    ['İstanbul', 'İstanbul', 'İstanbul'], // Turkish I/İ/ı/iş
    ['istanbul', 'Istanbul', 'Istanbul'],
    ['ışık', 'Işık', 'Işık'],
    ['résumé', 'Résumé', 'Résumé'], // Combining diacritic (e.g., é +  ́)
    ['שלום-עולם', 'שלוםעולם', 'שלוםעולם'], // Hebrew
    ['你好-世界', '你好世界', '你好世界'], // CJK (Chinese, Japanese, Korean)
    ['foo世界bar', 'Foo世界Bar', 'Foo世界Bar'], // Mixed CJK + Latin
    ['Ｆｏｏ１２３ｂａｒ', 'Ｆｏｏ１２３Ｂａｒ', 'Ｆｏｏ１２３Ｂａｒ'], // Full-width digit (U+FF11, U+FF12)
    ['foo😀bar', 'FooBar', 'FooBar'], // Emoji as noise
    ['ÉCOLE', 'École', 'ÉCOLE'], // // Combining acute accent on capital
  ];

  words.forEach(([input, expectFalse, expectTrue]) => {
    strictEqual(toPascalCaseI18n(input, false), expectFalse, `Failed for input: "${input}" (keepAcronyms=false). Expected "${expectFalse}", got "${toPascalCaseI18n(input, false)}"`);
    strictEqual(toPascalCaseI18n(input, true), expectTrue, `Failed for input: "${input}" (keepAcronyms=true). Expected "${expectTrue}", got "${toPascalCaseI18n(input, true)}"`);
  });

});

79719043