diff options
Diffstat (limited to 'vanilla/node_modules/@exodus/bytes/fallback/multi-byte.table.js')
| -rw-r--r-- | vanilla/node_modules/@exodus/bytes/fallback/multi-byte.table.js | 118 |
1 files changed, 118 insertions, 0 deletions
diff --git a/vanilla/node_modules/@exodus/bytes/fallback/multi-byte.table.js b/vanilla/node_modules/@exodus/bytes/fallback/multi-byte.table.js new file mode 100644 index 0000000..c7dfeb2 --- /dev/null +++ b/vanilla/node_modules/@exodus/bytes/fallback/multi-byte.table.js @@ -0,0 +1,118 @@ +import { fromBase64url } from '@exodus/bytes/base64.js' +import { utf16toString } from '@exodus/bytes/utf16.js' +import loadEncodings from './multi-byte.encodings.cjs' + +export const sizes = { + jis0208: 11_104, + jis0212: 7211, + 'euc-kr': 23_750, + gb18030: 23_940, + big5: 19_782, +} + +// This is huge. It's _much_ smaller than https://npmjs.com/text-encoding though +// Exactly as mapped by the index table +// 0,x - hole of x empty elements +// n,c - continious [c, ...] of length n +// $.. - references to common chunks +// -{x} - same as 1,{x} + +// See tests/multi-byte.test.js to verify that this data decodes exactly into the encoding spec tables + +let indices +const tables = new Map() +/* eslint-disable @exodus/mutable/no-param-reassign-prop-only */ + +function loadBase64(str) { + const x = fromBase64url(str) + const len = x.length + const len2 = len >> 1 + const y = new Uint8Array(len) + let a = -1, b = 0 // prettier-ignore + for (let i = 0, j = 0; i < len; i += 2, j++) { + a = (a + x[j] + 1) & 0xff + b = (b + x[len2 + j]) & 0xff + y[i] = a + y[i + 1] = b + } + + return y +} + +function unwrap(res, t, pos) { + let code = 0 + for (let i = 0; i < t.length; i++) { + let x = t[i] + if (typeof x === 'number') { + if (x === 0) { + pos += t[++i] + } else { + if (x < 0) { + code -= x + x = 1 + } else { + code += t[++i] + } + + for (let k = 0; k < x; k++, pos++, code++) { + if (code <= 0xff_ff) { + res[pos] = code + } else { + const c = String.fromCodePoint(code) + res[pos] = (c.charCodeAt(0) << 16) | c.charCodeAt(1) + } + } + } + } else if (x[0] === '$' && Object.hasOwn(indices, x)) { + pos = unwrap(res, indices[x], pos) // self-reference using shared chunks + } else { + let last + // splits by codepoints + for (const c of utf16toString(loadBase64(x), 'uint8-le')) { + last = c + res[pos++] = c.length === 1 ? c.charCodeAt(0) : (c.charCodeAt(0) << 16) | c.charCodeAt(1) + } + + code = last.codePointAt(0) + 1 + } + } + + return pos +} + +export function getTable(id) { + const cached = tables.get(id) + if (cached) return cached + + if (!indices) indices = loadEncodings() // lazy-load + if (!Object.hasOwn(indices, id)) throw new Error('Unknown encoding') + if (!indices[id]) throw new Error('Table already used (likely incorrect bundler dedupe)') + + let res + if (id.endsWith('-ranges')) { + res = [] + let a = 0, b = 0 // prettier-ignore + const idx = indices[id] + while (idx.length > 0) res.push([(a += idx.shift()), (b += idx.shift())]) // destroying, we remove it later anyway + } else if (id.endsWith('-katakana')) { + let a = -1 + res = new Uint16Array(indices[id].map((x) => (a += x + 1))) + } else if (id === 'big5') { + res = new Uint32Array(sizes[id]) // single or double charcodes + unwrap(res, indices[id], 0) + // Pointer code updates are embedded into the table + // These are skipped in encoder as encoder uses only pointers >= (0xA1 - 0x81) * 157 + res[1133] = 0xca_03_04 + res[1135] = 0xca_03_0c + res[1164] = 0xea_03_04 + res[1166] = 0xea_03_0c + } else { + if (!Object.hasOwn(sizes, id)) throw new Error('Unknown encoding') + res = new Uint16Array(sizes[id]) + unwrap(res, indices[id], 0) + } + + indices[id] = null // gc + tables.set(id, res) + return res +} |
