aboutsummaryrefslogtreecommitdiffstats
path: root/vanilla/node_modules/@exodus/bytes/fallback/multi-byte.table.js
diff options
context:
space:
mode:
Diffstat (limited to 'vanilla/node_modules/@exodus/bytes/fallback/multi-byte.table.js')
-rw-r--r--vanilla/node_modules/@exodus/bytes/fallback/multi-byte.table.js118
1 files changed, 118 insertions, 0 deletions
diff --git a/vanilla/node_modules/@exodus/bytes/fallback/multi-byte.table.js b/vanilla/node_modules/@exodus/bytes/fallback/multi-byte.table.js
new file mode 100644
index 0000000..c7dfeb2
--- /dev/null
+++ b/vanilla/node_modules/@exodus/bytes/fallback/multi-byte.table.js
@@ -0,0 +1,118 @@
+import { fromBase64url } from '@exodus/bytes/base64.js'
+import { utf16toString } from '@exodus/bytes/utf16.js'
+import loadEncodings from './multi-byte.encodings.cjs'
+
+export const sizes = {
+ jis0208: 11_104,
+ jis0212: 7211,
+ 'euc-kr': 23_750,
+ gb18030: 23_940,
+ big5: 19_782,
+}
+
+// This is huge. It's _much_ smaller than https://npmjs.com/text-encoding though
+// Exactly as mapped by the index table
+// 0,x - hole of x empty elements
+// n,c - continious [c, ...] of length n
+// $.. - references to common chunks
+// -{x} - same as 1,{x}
+
+// See tests/multi-byte.test.js to verify that this data decodes exactly into the encoding spec tables
+
+let indices
+const tables = new Map()
+/* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
+
+function loadBase64(str) {
+ const x = fromBase64url(str)
+ const len = x.length
+ const len2 = len >> 1
+ const y = new Uint8Array(len)
+ let a = -1, b = 0 // prettier-ignore
+ for (let i = 0, j = 0; i < len; i += 2, j++) {
+ a = (a + x[j] + 1) & 0xff
+ b = (b + x[len2 + j]) & 0xff
+ y[i] = a
+ y[i + 1] = b
+ }
+
+ return y
+}
+
+function unwrap(res, t, pos) {
+ let code = 0
+ for (let i = 0; i < t.length; i++) {
+ let x = t[i]
+ if (typeof x === 'number') {
+ if (x === 0) {
+ pos += t[++i]
+ } else {
+ if (x < 0) {
+ code -= x
+ x = 1
+ } else {
+ code += t[++i]
+ }
+
+ for (let k = 0; k < x; k++, pos++, code++) {
+ if (code <= 0xff_ff) {
+ res[pos] = code
+ } else {
+ const c = String.fromCodePoint(code)
+ res[pos] = (c.charCodeAt(0) << 16) | c.charCodeAt(1)
+ }
+ }
+ }
+ } else if (x[0] === '$' && Object.hasOwn(indices, x)) {
+ pos = unwrap(res, indices[x], pos) // self-reference using shared chunks
+ } else {
+ let last
+ // splits by codepoints
+ for (const c of utf16toString(loadBase64(x), 'uint8-le')) {
+ last = c
+ res[pos++] = c.length === 1 ? c.charCodeAt(0) : (c.charCodeAt(0) << 16) | c.charCodeAt(1)
+ }
+
+ code = last.codePointAt(0) + 1
+ }
+ }
+
+ return pos
+}
+
+export function getTable(id) {
+ const cached = tables.get(id)
+ if (cached) return cached
+
+ if (!indices) indices = loadEncodings() // lazy-load
+ if (!Object.hasOwn(indices, id)) throw new Error('Unknown encoding')
+ if (!indices[id]) throw new Error('Table already used (likely incorrect bundler dedupe)')
+
+ let res
+ if (id.endsWith('-ranges')) {
+ res = []
+ let a = 0, b = 0 // prettier-ignore
+ const idx = indices[id]
+ while (idx.length > 0) res.push([(a += idx.shift()), (b += idx.shift())]) // destroying, we remove it later anyway
+ } else if (id.endsWith('-katakana')) {
+ let a = -1
+ res = new Uint16Array(indices[id].map((x) => (a += x + 1)))
+ } else if (id === 'big5') {
+ res = new Uint32Array(sizes[id]) // single or double charcodes
+ unwrap(res, indices[id], 0)
+ // Pointer code updates are embedded into the table
+ // These are skipped in encoder as encoder uses only pointers >= (0xA1 - 0x81) * 157
+ res[1133] = 0xca_03_04
+ res[1135] = 0xca_03_0c
+ res[1164] = 0xea_03_04
+ res[1166] = 0xea_03_0c
+ } else {
+ if (!Object.hasOwn(sizes, id)) throw new Error('Unknown encoding')
+ res = new Uint16Array(sizes[id])
+ unwrap(res, indices[id], 0)
+ }
+
+ indices[id] = null // gc
+ tables.set(id, res)
+ return res
+}