diff options
| author | Adam Mathes <adam@adammathes.com> | 2026-02-13 21:34:48 -0800 |
|---|---|---|
| committer | Adam Mathes <adam@adammathes.com> | 2026-02-13 21:34:48 -0800 |
| commit | 76cb9c2a39d477a64824a985ade40507e3bbade1 (patch) | |
| tree | 41e997aa9c6f538d3a136af61dae9424db2005a9 /vanilla/node_modules/@exodus/bytes/utf8.js | |
| parent | 819a39a21ac992b1393244a4c283bbb125208c69 (diff) | |
| download | neko-76cb9c2a39d477a64824a985ade40507e3bbade1.tar.gz neko-76cb9c2a39d477a64824a985ade40507e3bbade1.tar.bz2 neko-76cb9c2a39d477a64824a985ade40507e3bbade1.zip | |
feat(vanilla): add testing infrastructure and tests (NK-wjnczv)
Diffstat (limited to 'vanilla/node_modules/@exodus/bytes/utf8.js')
| -rw-r--r-- | vanilla/node_modules/@exodus/bytes/utf8.js | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/vanilla/node_modules/@exodus/bytes/utf8.js b/vanilla/node_modules/@exodus/bytes/utf8.js new file mode 100644 index 0000000..86b0022 --- /dev/null +++ b/vanilla/node_modules/@exodus/bytes/utf8.js @@ -0,0 +1,66 @@ +import { typedView } from './array.js' +import { assertU8, E_STRING, E_STRICT_UNICODE } from './fallback/_utils.js' +import { nativeDecoder, nativeEncoder } from './fallback/platform.js' +import * as js from './fallback/utf8.auto.js' + +// ignoreBOM: true means that BOM will be left as-is, i.e. will be present in the output +// We don't want to strip anything unexpectedly +const decoderLoose = nativeDecoder +const decoderFatal = nativeDecoder + ? new TextDecoder('utf-8', { ignoreBOM: true, fatal: true }) + : null +const { isWellFormed } = String.prototype + +function deLoose(str, loose, res) { + if (loose || str.length === res.length) return res // length is equal only for ascii, which is automatically fine + if (isWellFormed) { + // We have a fast native method + if (isWellFormed.call(str)) return res + throw new TypeError(E_STRICT_UNICODE) + } + + // Recheck if the string was encoded correctly + let start = 0 + const last = res.length - 3 + // Search for EFBFBD (3-byte sequence) + while (start <= last) { + const pos = res.indexOf(0xef, start) + if (pos === -1 || pos > last) break + start = pos + 1 + if (res[pos + 1] === 0xbf && res[pos + 2] === 0xbd) { + // Found a replacement char in output, need to recheck if we encoded the input correctly + if (js.decodeFast && !nativeDecoder && str.length < 1e7) { + // This is ~2x faster than decode in Hermes + try { + if (encodeURI(str) !== null) return res // guard against optimizing out + } catch {} + } else if (str === decode(res)) return res + throw new TypeError(E_STRICT_UNICODE) + } + } + + return res +} + +function encode(str, loose = false) { + if (typeof str !== 'string') throw new TypeError(E_STRING) + if (str.length === 0) return new Uint8Array() // faster than Uint8Array.of + if (nativeEncoder || !js.encode) return deLoose(str, loose, nativeEncoder.encode(str)) + // No reason to use unescape + encodeURIComponent: it's slower than JS on normal engines, and modern Hermes already has TextEncoder + return js.encode(str, loose) +} + +function decode(arr, loose = false) { + assertU8(arr) + if (arr.byteLength === 0) return '' + if (nativeDecoder || !js.decodeFast) { + return loose ? decoderLoose.decode(arr) : decoderFatal.decode(arr) // Node.js and browsers + } + + return js.decodeFast(arr, loose) +} + +export const utf8fromString = (str, format = 'uint8') => typedView(encode(str, false), format) +export const utf8fromStringLoose = (str, format = 'uint8') => typedView(encode(str, true), format) +export const utf8toString = (arr) => decode(arr, false) +export const utf8toStringLoose = (arr) => decode(arr, true) |
