aboutsummaryrefslogtreecommitdiffstats
path: root/vanilla/node_modules/@exodus/bytes/single-byte.js
diff options
context:
space:
mode:
authorAdam Mathes <adam@adammathes.com>2026-02-13 21:34:48 -0800
committerAdam Mathes <adam@adammathes.com>2026-02-13 21:34:48 -0800
commit76cb9c2a39d477a64824a985ade40507e3bbade1 (patch)
tree41e997aa9c6f538d3a136af61dae9424db2005a9 /vanilla/node_modules/@exodus/bytes/single-byte.js
parent819a39a21ac992b1393244a4c283bbb125208c69 (diff)
downloadneko-76cb9c2a39d477a64824a985ade40507e3bbade1.tar.gz
neko-76cb9c2a39d477a64824a985ade40507e3bbade1.tar.bz2
neko-76cb9c2a39d477a64824a985ade40507e3bbade1.zip
feat(vanilla): add testing infrastructure and tests (NK-wjnczv)
Diffstat (limited to 'vanilla/node_modules/@exodus/bytes/single-byte.js')
-rw-r--r--vanilla/node_modules/@exodus/bytes/single-byte.js135
1 files changed, 135 insertions, 0 deletions
diff --git a/vanilla/node_modules/@exodus/bytes/single-byte.js b/vanilla/node_modules/@exodus/bytes/single-byte.js
new file mode 100644
index 0000000..ba17324
--- /dev/null
+++ b/vanilla/node_modules/@exodus/bytes/single-byte.js
@@ -0,0 +1,135 @@
+import { assertU8, E_STRING } from './fallback/_utils.js'
+import { nativeDecoderLatin1, nativeEncoder } from './fallback/platform.js'
+import { encodeAscii, encodeAsciiPrefix, encodeLatin1 } from './fallback/latin1.js'
+import { assertEncoding, encodingDecoder, encodeMap, E_STRICT } from './fallback/single-byte.js'
+
+const { TextDecoder, btoa } = globalThis
+
+let windows1252works
+
+// prettier-ignore
+const skipNative = new Set([
+ 'iso-8859-1', 'iso-8859-9', 'iso-8859-11', // non-WHATWG
+ 'iso-8859-6', 'iso-8859-8', 'iso-8859-8-i', // slow in all 3 engines
+ 'iso-8859-16', // iso-8859-16 is somehow broken in WebKit, at least on CI
+])
+
+function shouldUseNative(enc) {
+ // https://issues.chromium.org/issues/468458388
+ // Also might be incorrectly imlemented on platforms as Latin1 (e.g. in Node.js) or regress
+ // This is the most significant single-byte encoding, 'ascii' and 'latin1' alias to this
+ // Even after Chrome bug is fixed, this should serve as a quick correctness check that it's actually windows-1252
+ if (enc === 'windows-1252') {
+ if (windows1252works === undefined) {
+ windows1252works = false
+ try {
+ const u = new Uint8Array(9) // using 9 bytes is significant to catch the bug
+ u[8] = 128
+ windows1252works = new TextDecoder(enc).decode(u).codePointAt(8) === 0x20_ac
+ } catch {}
+ }
+
+ return windows1252works
+ }
+
+ return !skipNative.has(enc)
+}
+
+export function createSinglebyteDecoder(encoding, loose = false) {
+ if (typeof loose !== 'boolean') throw new TypeError('loose option should be boolean')
+ assertEncoding(encoding)
+
+ if (nativeDecoderLatin1 && shouldUseNative(encoding)) {
+ // In try, as not all encodings might be implemented in all engines which have native TextDecoder
+ try {
+ const decoder = new TextDecoder(encoding, { fatal: !loose })
+ return (arr) => {
+ assertU8(arr)
+ if (arr.byteLength === 0) return ''
+ return decoder.decode(arr)
+ }
+ } catch {}
+ }
+
+ const jsDecoder = encodingDecoder(encoding)
+ return (arr) => {
+ assertU8(arr)
+ if (arr.byteLength === 0) return ''
+ return jsDecoder(arr, loose)
+ }
+}
+
+const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex
+
+function encode(s, m) {
+ const len = s.length
+ const x = new Uint8Array(len)
+ let i = nativeEncoder ? 0 : encodeAsciiPrefix(x, s)
+
+ for (const len3 = len - 3; i < len3; i += 4) {
+ const x0 = s.charCodeAt(i), x1 = s.charCodeAt(i + 1), x2 = s.charCodeAt(i + 2), x3 = s.charCodeAt(i + 3) // prettier-ignore
+ const c0 = m[x0], c1 = m[x1], c2 = m[x2], c3 = m[x3] // prettier-ignore
+ if ((!c0 && x0) || (!c1 && x1) || (!c2 && x2) || (!c3 && x3)) return null
+
+ x[i] = c0
+ x[i + 1] = c1
+ x[i + 2] = c2
+ x[i + 3] = c3
+ }
+
+ for (; i < len; i++) {
+ const x0 = s.charCodeAt(i)
+ const c0 = m[x0]
+ if (!c0 && x0) return null
+ x[i] = c0
+ }
+
+ return x
+}
+
+// fromBase64+btoa path is faster on everything where fromBase64 is fast
+const useLatin1btoa = Uint8Array.fromBase64 && btoa
+
+export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
+ // TODO: replacement, truncate (replacement will need varying length)
+ if (mode !== 'fatal') throw new Error('Unsupported mode')
+ const m = encodeMap(encoding) // asserts
+ const isLatin1 = encoding === 'iso-8859-1'
+
+ // No single-byte encoder produces surrogate pairs, so any surrogate is invalid
+ // This needs special treatment only to decide how many replacement chars to output, one or two
+ // Not much use in running isWellFormed, most likely cause of error is unmapped chars, not surrogate pairs
+ return (s) => {
+ if (typeof s !== 'string') throw new TypeError(E_STRING)
+ if (isLatin1) {
+ // max limit is to not produce base64 strings that are too long
+ if (useLatin1btoa && s.length >= 1024 && s.length < 1e8) {
+ try {
+ return Uint8Array.fromBase64(btoa(s)) // fails on non-latin1
+ } catch {
+ throw new TypeError(E_STRICT)
+ }
+ }
+
+ if (NON_LATIN.test(s)) throw new TypeError(E_STRICT)
+ return encodeLatin1(s)
+ }
+
+ // Instead of an ASCII regex check, encode optimistically - this is faster
+ // Check for 8-bit string with a regex though, this is instant on 8-bit strings so doesn't hurt the ASCII fast path
+ if (nativeEncoder && !NON_LATIN.test(s)) {
+ try {
+ return encodeAscii(s, E_STRICT)
+ } catch {}
+ }
+
+ const res = encode(s, m)
+ if (!res) throw new TypeError(E_STRICT)
+ return res
+ }
+}
+
+export const latin1toString = /* @__PURE__ */ createSinglebyteDecoder('iso-8859-1')
+export const latin1fromString = /* @__PURE__ */ createSinglebyteEncoder('iso-8859-1')
+export const windows1252toString = /* @__PURE__ */ createSinglebyteDecoder('windows-1252')
+export const windows1252fromString = /* @__PURE__ */ createSinglebyteEncoder('windows-1252')