diff options
Diffstat (limited to 'vanilla/node_modules/@exodus/bytes/fallback/multi-byte.js')
| -rw-r--r-- | vanilla/node_modules/@exodus/bytes/fallback/multi-byte.js | 962 |
1 files changed, 0 insertions, 962 deletions
diff --git a/vanilla/node_modules/@exodus/bytes/fallback/multi-byte.js b/vanilla/node_modules/@exodus/bytes/fallback/multi-byte.js deleted file mode 100644 index ff9d9cb..0000000 --- a/vanilla/node_modules/@exodus/bytes/fallback/multi-byte.js +++ /dev/null @@ -1,962 +0,0 @@ -import { E_STRING } from './_utils.js' -import { asciiPrefix, decodeAscii, decodeLatin1, decodeUCS2, encodeAscii } from './latin1.js' -import { getTable } from './multi-byte.table.js' - -export const E_STRICT = 'Input is not well-formed for this encoding' - -/* Decoders */ - -// If the decoder is not cleared properly, state can be preserved between non-streaming calls! -// See comment about fatal stream - -// All except iso-2022-jp are ASCII supersets -// When adding something that is not an ASCII superset, ajust the ASCII fast path -const mappers = { - // https://encoding.spec.whatwg.org/#euc-kr-decoder - 'euc-kr': (err) => { - const euc = getTable('euc-kr') - let lead = 0 - let oi = 0 - let o16 - - const decodeLead = (b) => { - if (b < 0x41 || b > 0xfe) { - lead = 0 - o16[oi++] = err() - if (b < 128) o16[oi++] = b - } else { - const p = euc[(lead - 0x81) * 190 + b - 0x41] - lead = 0 - if (p) { - o16[oi++] = p - } else { - o16[oi++] = err() - if (b < 128) o16[oi++] = b - } - } - } - - const decode = (arr, start, end, stream) => { - let i = start - o16 = new Uint16Array(end - start + (lead ? 1 : 0)) // there are pairs but they consume more than one byte - oi = 0 - - // Fast path - if (!lead) { - for (const last1 = end - 1; i < last1; ) { - const l = arr[i] - if (l < 128) { - o16[oi++] = l - i++ - } else { - if (l === 0x80 || l === 0xff) break - const b = arr[i + 1] - if (b < 0x41 || b === 0xff) break - const p = euc[(l - 0x81) * 190 + b - 0x41] - if (!p) break - o16[oi++] = p - i += 2 - } - } - } - - if (lead && i < end) decodeLead(arr[i++]) - while (i < end) { - const b = arr[i++] - if (b < 128) { - o16[oi++] = b - } else if (b === 0x80 || b === 0xff) { - o16[oi++] = err() - } else { - lead = b - if (i < end) decodeLead(arr[i++]) - } - } - - if (lead && !stream) { - lead = 0 - o16[oi++] = err() - } - - const res = decodeUCS2(o16, oi) - o16 = null - return res - } - - return { decode, isAscii: () => lead === 0 } - }, - // https://encoding.spec.whatwg.org/#euc-jp-decoder - 'euc-jp': (err) => { - const jis0208 = getTable('jis0208') - const jis0212 = getTable('jis0212') - let j12 = false - let lead = 0 - let oi = 0 - let o16 - - const decodeLead = (b) => { - if (lead === 0x8e && b >= 0xa1 && b <= 0xdf) { - lead = 0 - o16[oi++] = 0xfe_c0 + b - } else if (lead === 0x8f && b >= 0xa1 && b <= 0xfe) { - j12 = true - lead = b - } else { - let cp - if (lead >= 0xa1 && lead <= 0xfe && b >= 0xa1 && b <= 0xfe) { - cp = (j12 ? jis0212 : jis0208)[(lead - 0xa1) * 94 + b - 0xa1] - } - - lead = 0 - j12 = false - if (cp) { - o16[oi++] = cp - } else { - o16[oi++] = err() - if (b < 128) o16[oi++] = b - } - } - } - - const decode = (arr, start, end, stream) => { - let i = start - o16 = new Uint16Array(end - start + (lead ? 1 : 0)) - oi = 0 - - // Fast path, non-j12 - // lead = 0 means j12 = 0 - if (!lead) { - for (const last1 = end - 1; i < last1; ) { - const l = arr[i] - if (l < 128) { - o16[oi++] = l - i++ - } else { - const b = arr[i + 1] - if (l === 0x8e && b >= 0xa1 && b <= 0xdf) { - o16[oi++] = 0xfe_c0 + b - i += 2 - } else { - if (l < 0xa1 || l === 0xff || b < 0xa1 || b === 0xff) break - const cp = jis0208[(l - 0xa1) * 94 + b - 0xa1] - if (!cp) break - o16[oi++] = cp - i += 2 - } - } - } - } - - if (lead && i < end) decodeLead(arr[i++]) - if (lead && i < end) decodeLead(arr[i++]) // could be two leads, but no more - while (i < end) { - const b = arr[i++] - if (b < 128) { - o16[oi++] = b - } else if ((b < 0xa1 && b !== 0x8e && b !== 0x8f) || b === 0xff) { - o16[oi++] = err() - } else { - lead = b - if (i < end) decodeLead(arr[i++]) - if (lead && i < end) decodeLead(arr[i++]) // could be two leads - } - } - - if (lead && !stream) { - lead = 0 - j12 = false // can be true only when lead is non-zero - o16[oi++] = err() - } - - const res = decodeUCS2(o16, oi) - o16 = null - return res - } - - return { decode, isAscii: () => lead === 0 } // j12 can be true only when lead is non-zero - }, - // https://encoding.spec.whatwg.org/#iso-2022-jp-decoder - 'iso-2022-jp': (err) => { - const jis0208 = getTable('jis0208') - let dState = 1 - let oState = 1 - let lead = 0 // 0 or 0x21-0x7e - let out = false - - const bytes = (pushback, b) => { - if (dState < 5 && b === 0x1b) { - dState = 6 // escape start - return - } - - switch (dState) { - case 1: - case 2: - // ASCII, Roman (common) - out = false - if (dState === 2) { - if (b === 0x5c) return 0xa5 - if (b === 0x7e) return 0x20_3e - } - - if (b <= 0x7f && b !== 0x0e && b !== 0x0f) return b - return err() - case 3: - // Katakana - out = false - if (b >= 0x21 && b <= 0x5f) return 0xff_40 + b - return err() - case 4: - // Leading byte - out = false - if (b < 0x21 || b > 0x7e) return err() - lead = b - dState = 5 - return - case 5: - // Trailing byte - out = false - if (b === 0x1b) { - dState = 6 // escape start - return err() - } - - dState = 4 - if (b >= 0x21 && b <= 0x7e) { - const cp = jis0208[(lead - 0x21) * 94 + b - 0x21] - if (cp) return cp - } - - return err() - case 6: - // Escape start - if (b === 0x24 || b === 0x28) { - lead = b - dState = 7 - return - } - - out = false - dState = oState - pushback.push(b) - return err() - case 7: { - // Escape - const l = lead - lead = 0 - let s - if (l === 0x28) { - // eslint-disable-next-line unicorn/prefer-switch - if (b === 0x42) { - s = 1 - } else if (b === 0x4a) { - s = 2 - } else if (b === 0x49) { - s = 3 - } - } else if (l === 0x24 && (b === 0x40 || b === 0x42)) { - s = 4 - } - - if (s) { - dState = oState = s - const output = out - out = true - return output ? err() : undefined - } - - out = false - dState = oState - pushback.push(b, l) - return err() - } - } - } - - const eof = (pushback) => { - if (dState < 5) return null - out = false - switch (dState) { - case 5: - dState = 4 - return err() - case 6: - dState = oState - return err() - case 7: { - dState = oState - pushback.push(lead) - lead = 0 - return err() - } - } - } - - const decode = (arr, start, end, stream) => { - const o16 = new Uint16Array(end - start + 2) // err in eof + lead from state - let oi = 0 - let i = start - const pushback = [] // local and auto-cleared - - // First, dump everything until EOF - // Same as the full loop, but without EOF handling - while (i < end || pushback.length > 0) { - const c = bytes(pushback, pushback.length > 0 ? pushback.pop() : arr[i++]) - if (c !== undefined) o16[oi++] = c // 16-bit - } - - // Then, dump EOF. This needs the same loop as the characters can be pushed back - if (!stream) { - while (i <= end || pushback.length > 0) { - if (i < end || pushback.length > 0) { - const c = bytes(pushback, pushback.length > 0 ? pushback.pop() : arr[i++]) - if (c !== undefined) o16[oi++] = c // 16-bit - } else { - const c = eof(pushback) - if (c === null) break // clean exit - o16[oi++] = c - } - } - } - - // Chrome and WebKit fail on this, we don't: completely destroy the old decoder state when finished streaming - // > If this’s do not flush is false, then set this’s decoder to a new instance of this’s encoding’s decoder, - // > Set this’s do not flush to options["stream"] - if (!stream) { - dState = oState = 1 - lead = 0 - out = false - } - - return decodeUCS2(o16, oi) - } - - return { decode, isAscii: () => false } - }, - // https://encoding.spec.whatwg.org/#shift_jis-decoder - shift_jis: (err) => { - const jis0208 = getTable('jis0208') - let lead = 0 - let oi = 0 - let o16 - - const decodeLead = (b) => { - const l = lead - lead = 0 - if (b >= 0x40 && b <= 0xfc && b !== 0x7f) { - const p = (l - (l < 0xa0 ? 0x81 : 0xc1)) * 188 + b - (b < 0x7f ? 0x40 : 0x41) - if (p >= 8836 && p <= 10_715) { - o16[oi++] = 0xe0_00 - 8836 + p - return - } - - const cp = jis0208[p] - if (cp) { - o16[oi++] = cp - return - } - } - - o16[oi++] = err() - if (b < 128) o16[oi++] = b - } - - const decode = (arr, start, end, stream) => { - o16 = new Uint16Array(end - start + (lead ? 1 : 0)) - oi = 0 - let i = start - - // Fast path - if (!lead) { - for (const last1 = end - 1; i < last1; ) { - const l = arr[i] - if (l <= 0x80) { - o16[oi++] = l - i++ - } else if (l >= 0xa1 && l <= 0xdf) { - o16[oi++] = 0xfe_c0 + l - i++ - } else { - if (l === 0xa0 || l > 0xfc) break - const b = arr[i + 1] - if (b < 0x40 || b > 0xfc || b === 0x7f) break - const p = (l - (l < 0xa0 ? 0x81 : 0xc1)) * 188 + b - (b < 0x7f ? 0x40 : 0x41) - if (p >= 8836 && p <= 10_715) { - o16[oi++] = 0xe0_00 - 8836 + p - i += 2 - } else { - const cp = jis0208[p] - if (!cp) break - o16[oi++] = cp - i += 2 - } - } - } - } - - if (lead && i < end) decodeLead(arr[i++]) - while (i < end) { - const b = arr[i++] - if (b <= 0x80) { - o16[oi++] = b // 0x80 is allowed - } else if (b >= 0xa1 && b <= 0xdf) { - o16[oi++] = 0xfe_c0 + b - } else if (b === 0xa0 || b > 0xfc) { - o16[oi++] = err() - } else { - lead = b - if (i < end) decodeLead(arr[i++]) - } - } - - if (lead && !stream) { - lead = 0 - o16[oi++] = err() - } - - const res = decodeUCS2(o16, oi) - o16 = null - return res - } - - return { decode, isAscii: () => lead === 0 } - }, - // https://encoding.spec.whatwg.org/#gbk-decoder - gbk: (err) => mappers.gb18030(err), // 10.1.1. GBK’s decoder is gb18030’s decoder - // https://encoding.spec.whatwg.org/#gb18030-decoder - gb18030: (err) => { - const gb18030 = getTable('gb18030') - const gb18030r = getTable('gb18030-ranges') - let g1 = 0, g2 = 0, g3 = 0 // prettier-ignore - const index = (p) => { - if ((p > 39_419 && p < 189_000) || p > 1_237_575) return - if (p === 7457) return 0xe7_c7 - let a = 0, b = 0 // prettier-ignore - for (const [c, d] of gb18030r) { - if (c > p) break - a = c - b = d - } - - return b + p - a - } - - // g1 is 0 or 0x81-0xfe - // g2 is 0 or 0x30-0x39 - // g3 is 0 or 0x81-0xfe - - const decode = (arr, start, end, stream) => { - const o16 = new Uint16Array(end - start + (g1 ? 3 : 0)) // even with pushback it's at most 1 char per byte - let oi = 0 - let i = start - const pushback = [] // local and auto-cleared - - // Fast path for 2-byte only - // pushback is always empty ad start, and g1 = 0 means g2 = g3 = 0 - if (g1 === 0) { - for (const last1 = end - 1; i < last1; ) { - const b = arr[i] - if (b < 128) { - o16[oi++] = b - i++ - } else if (b === 0x80) { - o16[oi++] = 0x20_ac - i++ - } else { - if (b === 0xff) break - const n = arr[i + 1] - let cp - if (n < 0x7f) { - if (n < 0x40) break - cp = gb18030[(b - 0x81) * 190 + n - 0x40] - } else { - if (n === 0xff || n === 0x7f) break - cp = gb18030[(b - 0x81) * 190 + n - 0x41] - } - - if (!cp) break - o16[oi++] = cp // 16-bit - i += 2 - } - } - } - - // First, dump everything until EOF - // Same as the full loop, but without EOF handling - while (i < end || pushback.length > 0) { - const b = pushback.length > 0 ? pushback.pop() : arr[i++] - if (g1) { - // g2 can be set only when g1 is set, g3 can be set only when g2 is set - // hence, 3 checks for g3 is faster than 3 checks for g1 - if (g2) { - if (g3) { - if (b <= 0x39 && b >= 0x30) { - const p = index( - (g1 - 0x81) * 12_600 + (g2 - 0x30) * 1260 + (g3 - 0x81) * 10 + b - 0x30 - ) - g1 = g2 = g3 = 0 - if (p === undefined) { - o16[oi++] = err() - } else if (p <= 0xff_ff) { - o16[oi++] = p // Can validly return replacement - } else { - const d = p - 0x1_00_00 - o16[oi++] = 0xd8_00 | (d >> 10) - o16[oi++] = 0xdc_00 | (d & 0x3_ff) - } - } else { - pushback.push(b, g3, g2) - g1 = g2 = g3 = 0 - o16[oi++] = err() - } - } else if (b >= 0x81 && b <= 0xfe) { - g3 = b - } else { - pushback.push(b, g2) - g1 = g2 = 0 - o16[oi++] = err() - } - } else if (b <= 0x39 && b >= 0x30) { - g2 = b - } else { - let cp - if (b >= 0x40 && b <= 0xfe && b !== 0x7f) { - cp = gb18030[(g1 - 0x81) * 190 + b - (b < 0x7f ? 0x40 : 0x41)] - } - - g1 = 0 - if (cp) { - o16[oi++] = cp // 16-bit - } else { - o16[oi++] = err() - if (b < 128) o16[oi++] = b // can be processed immediately - } - } - } else if (b < 128) { - o16[oi++] = b - } else if (b === 0x80) { - o16[oi++] = 0x20_ac - } else if (b === 0xff) { - o16[oi++] = err() - } else { - g1 = b - } - } - - // if g1 = 0 then g2 = g3 = 0 - if (g1 && !stream) { - g1 = g2 = g3 = 0 - o16[oi++] = err() - } - - return decodeUCS2(o16, oi) - } - - return { decode, isAscii: () => g1 === 0 } // if g1 = 0 then g2 = g3 = 0 - }, - // https://encoding.spec.whatwg.org/#big5 - big5: (err) => { - // The only decoder which returns multiple codepoints per byte, also has non-charcode codepoints - // We store that as strings - const big5 = getTable('big5') - let lead = 0 - let oi = 0 - let o16 - - const decodeLead = (b) => { - if (b < 0x40 || (b > 0x7e && b < 0xa1) || b === 0xff) { - lead = 0 - o16[oi++] = err() - if (b < 128) o16[oi++] = b - } else { - const p = big5[(lead - 0x81) * 157 + b - (b < 0x7f ? 0x40 : 0x62)] - lead = 0 - if (p > 0x1_00_00) { - o16[oi++] = p >> 16 - o16[oi++] = p & 0xff_ff - } else if (p) { - o16[oi++] = p - } else { - o16[oi++] = err() - if (b < 128) o16[oi++] = b - } - } - } - - // eslint-disable-next-line sonarjs/no-identical-functions - const decode = (arr, start, end, stream) => { - let i = start - o16 = new Uint16Array(end - start + (lead ? 1 : 0)) // there are pairs but they consume more than one byte - oi = 0 - - // Fast path - if (!lead) { - for (const last1 = end - 1; i < last1; ) { - const l = arr[i] - if (l < 128) { - o16[oi++] = l - i++ - } else { - if (l === 0x80 || l === 0xff) break - const b = arr[i + 1] - if (b < 0x40 || (b > 0x7e && b < 0xa1) || b === 0xff) break - const p = big5[(l - 0x81) * 157 + b - (b < 0x7f ? 0x40 : 0x62)] - if (p > 0x1_00_00) { - o16[oi++] = p >> 16 - o16[oi++] = p & 0xff_ff - } else { - if (!p) break - o16[oi++] = p - } - - i += 2 - } - } - } - - if (lead && i < end) decodeLead(arr[i++]) - while (i < end) { - const b = arr[i++] - if (b < 128) { - o16[oi++] = b - } else if (b === 0x80 || b === 0xff) { - o16[oi++] = err() - } else { - lead = b - if (i < end) decodeLead(arr[i++]) - } - } - - if (lead && !stream) { - lead = 0 - o16[oi++] = err() - } - - const res = decodeUCS2(o16, oi) - o16 = null - return res - } - - return { decode, isAscii: () => lead === 0 } - }, -} - -export const isAsciiSuperset = (enc) => enc !== 'iso-2022-jp' // all others are ASCII supersets and can use fast path - -export function multibyteDecoder(enc, loose = false) { - if (typeof loose !== 'boolean') throw new TypeError('loose option should be boolean') - if (!Object.hasOwn(mappers, enc)) throw new RangeError('Unsupported encoding') - - // Input is assumed to be typechecked already - let mapper - const asciiSuperset = isAsciiSuperset(enc) - let streaming // because onErr is cached in mapper - const onErr = loose - ? () => 0xff_fd - : () => { - // The correct way per spec seems to be not destoying the decoder state in stream mode, even when fatal - // Decoders big5, euc-jp, euc-kr, shift_jis, gb18030 / gbk - all clear state before throwing unless EOF, so not affected - // iso-2022-jp is the only tricky one one where this !stream check matters in non-stream mode - if (!streaming) mapper = null // destroy state, effectively the same as 'do not flush' = false, but early - throw new TypeError(E_STRICT) - } - - return (arr, stream = false) => { - let res = '' - if (asciiSuperset && (!mapper || mapper.isAscii?.())) { - const prefixLen = asciiPrefix(arr) - if (prefixLen === arr.length) return decodeAscii(arr) // ascii - res = decodeLatin1(arr, 0, prefixLen) // TODO: check if decodeAscii with subarray is faster for small prefixes too - } - - streaming = stream // affects onErr - if (!mapper) mapper = mappers[enc](onErr) - return res + mapper.decode(arr, res.length, arr.length, stream) - } -} - -/* Encoders */ - -const maps = new Map() -const e7 = [[148, 236], [149, 237], [150, 243]] // prettier-ignore -const e8 = [[30, 89], [38, 97], [43, 102], [44, 103], [50, 109], [67, 126], [84, 144], [100, 160]] // prettier-ignore -const preencoders = { - __proto__: null, - big5: (p) => ((((p / 157) | 0) + 0x81) << 8) | ((p % 157 < 0x3f ? 0x40 : 0x62) + (p % 157)), - shift_jis: (p) => { - const l = (p / 188) | 0 - const t = p % 188 - return ((l + (l < 0x1f ? 0x81 : 0xc1)) << 8) | ((t < 0x3f ? 0x40 : 0x41) + t) - }, - 'iso-2022-jp': (p) => ((((p / 94) | 0) + 0x21) << 8) | ((p % 94) + 0x21), - 'euc-jp': (p) => ((((p / 94) | 0) + 0xa1) << 8) | ((p % 94) + 0xa1), - 'euc-kr': (p) => ((((p / 190) | 0) + 0x81) << 8) | ((p % 190) + 0x41), - gb18030: (p) => ((((p / 190) | 0) + 0x81) << 8) | ((p % 190 < 0x3f ? 0x40 : 0x41) + (p % 190)), -} - -preencoders.gbk = preencoders.gb18030 - -// We accept that encoders use non-trivial amount of mem, for perf -// most are are 128 KiB mem, big5 is 380 KiB, lazy-loaded at first use -function getMap(id, size, ascii) { - const cached = maps.get(id) - if (cached) return cached - let tname = id - const sjis = id === 'shift_jis' - const iso2022jp = id === 'iso-2022-jp' - if (iso2022jp) tname = 'jis0208' - if (id === 'gbk') tname = 'gb18030' - if (id === 'euc-jp' || sjis) tname = 'jis0208' - const table = getTable(tname) - const map = new Uint16Array(size) - const enc = preencoders[id] || ((p) => p + 1) - for (let i = 0; i < table.length; i++) { - const c = table[i] - if (!c) continue - if (id === 'big5') { - if (i < 5024) continue // this also skips multi-codepoint strings - // In big5, all return first entries except for these - if ( - map[c] && - c !== 0x25_50 && - c !== 0x25_5e && - c !== 0x25_61 && - c !== 0x25_6a && - c !== 0x53_41 && - c !== 0x53_45 - ) { - continue - } - } else { - if (sjis && i >= 8272 && i <= 8835) continue - if (map[c]) continue - } - - if (c > 0xff_ff) { - // always a single codepoint here - const s = String.fromCharCode(c >> 16, c & 0xff_ff) - map[s.codePointAt(0)] = enc(i) - } else { - map[c] = enc(i) - } - } - - if (ascii) for (let i = 0; i < 0x80; i++) map[i] = i - if (sjis || id === 'euc-jp') { - if (sjis) map[0x80] = 0x80 - const d = sjis ? 0xfe_c0 : 0x70_c0 - for (let i = 0xff_61; i <= 0xff_9f; i++) map[i] = i - d - map[0x22_12] = map[0xff_0d] - map[0xa5] = 0x5c - map[0x20_3e] = 0x7e - } else if (tname === 'gb18030') { - if (id === 'gbk') map[0x20_ac] = 0x80 - for (let i = 0xe7_8d; i <= 0xe7_93; i++) map[i] = i - 0x40_b4 - for (const [a, b] of e7) map[0xe7_00 | a] = 0xa6_00 | b - for (const [a, b] of e8) map[0xe8_00 | a] = 0xfe_00 | b - } - - maps.set(id, map) - return map -} - -const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex -let gb18030r, katakana - -export function multibyteEncoder(enc, onError) { - if (!Object.hasOwn(mappers, enc)) throw new RangeError('Unsupported encoding') - const size = enc === 'big5' ? 0x2_f8_a7 : 0x1_00_00 // for big5, max codepoint in table + 1 - const iso2022jp = enc === 'iso-2022-jp' - const gb18030 = enc === 'gb18030' - const ascii = isAsciiSuperset(enc) - const width = iso2022jp ? 5 : gb18030 ? 4 : 2 - const tailsize = iso2022jp ? 3 : 0 - const map = getMap(enc, size, ascii) - if (gb18030 && !gb18030r) gb18030r = getTable('gb18030-ranges') - if (iso2022jp && !katakana) katakana = getTable('iso-2022-jp-katakana') - return (str) => { - if (typeof str !== 'string') throw new TypeError(E_STRING) - if (ascii && !NON_LATIN.test(str)) { - try { - return encodeAscii(str, E_STRICT) - } catch {} - } - - const length = str.length - const u8 = new Uint8Array(length * width + tailsize) - let i = 0 - - if (ascii) { - while (i < length) { - const x = str.charCodeAt(i) - if (x >= 128) break - u8[i++] = x - } - } - - // eslint-disable-next-line unicorn/consistent-function-scoping - const err = (code) => { - if (onError) return onError(code, u8, i) - throw new TypeError(E_STRICT) - } - - if (!map || map.length < size) /* c8 ignore next */ throw new Error('Unreachable') // Important for perf - - if (iso2022jp) { - let state = 0 // 0 = ASCII, 1 = Roman, 2 = jis0208 - const restore = () => { - state = 0 - u8[i++] = 0x1b - u8[i++] = 0x28 - u8[i++] = 0x42 - } - - for (let j = 0; j < length; j++) { - let x = str.charCodeAt(j) - if (x >= 0xd8_00 && x < 0xe0_00) { - if (state === 2) restore() - if (x >= 0xdc_00 || j + 1 === length) { - i += err(x) // lone - } else { - const x1 = str.charCodeAt(j + 1) - if (x1 < 0xdc_00 || x1 >= 0xe0_00) { - i += err(x) // lone - } else { - j++ // consume x1 - i += err(0x1_00_00 + ((x1 & 0x3_ff) | ((x & 0x3_ff) << 10))) - } - } - } else if (x < 0x80) { - if (state === 2 || (state === 1 && (x === 0x5c || x === 0x7e))) restore() - if (x === 0xe || x === 0xf || x === 0x1b) { - i += err(0xff_fd) // 12.2.2. step 3: This returns U+FFFD rather than codePoint to prevent attacks - } else { - u8[i++] = x - } - } else if (x === 0xa5 || x === 0x20_3e) { - if (state !== 1) { - state = 1 - u8[i++] = 0x1b - u8[i++] = 0x28 - u8[i++] = 0x4a - } - - u8[i++] = x === 0xa5 ? 0x5c : 0x7e - } else { - if (x === 0x22_12) x = 0xff_0d - if (x >= 0xff_61 && x <= 0xff_9f) x = katakana[x - 0xff_61] - const e = map[x] - if (e) { - if (state !== 2) { - state = 2 - u8[i++] = 0x1b - u8[i++] = 0x24 - u8[i++] = 0x42 - } - - u8[i++] = e >> 8 - u8[i++] = e & 0xff - } else { - if (state === 2) restore() - i += err(x) - } - } - } - - if (state) restore() - } else if (gb18030) { - // Deduping this branch hurts other encoders perf - const encode = (cp) => { - let a = 0, b = 0 // prettier-ignore - for (const [c, d] of gb18030r) { - if (d > cp) break - a = c - b = d - } - - let rp = cp === 0xe7_c7 ? 7457 : a + cp - b - u8[i++] = 0x81 + ((rp / 12_600) | 0) - rp %= 12_600 - u8[i++] = 0x30 + ((rp / 1260) | 0) - rp %= 1260 - u8[i++] = 0x81 + ((rp / 10) | 0) - u8[i++] = 0x30 + (rp % 10) - } - - for (let j = i; j < length; j++) { - const x = str.charCodeAt(j) - if (x >= 0xd8_00 && x < 0xe0_00) { - if (x >= 0xdc_00 || j + 1 === length) { - i += err(x) // lone - } else { - const x1 = str.charCodeAt(j + 1) - if (x1 < 0xdc_00 || x1 >= 0xe0_00) { - i += err(x) // lone - } else { - j++ // consume x1 - encode(0x1_00_00 + ((x1 & 0x3_ff) | ((x & 0x3_ff) << 10))) - } - } - } else { - const e = map[x] - if (e & 0xff_00) { - u8[i++] = e >> 8 - u8[i++] = e & 0xff - } else if (e || x === 0) { - u8[i++] = e - } else if (x === 0xe5_e5) { - i += err(x) - } else { - encode(x) - } - } - } - } else { - const long = - enc === 'big5' - ? (x) => { - const e = map[x] - if (e & 0xff_00) { - u8[i++] = e >> 8 - u8[i++] = e & 0xff - } else if (e || x === 0) { - u8[i++] = e - } else { - i += err(x) - } - } - : (x) => { - i += err(x) - } - - for (let j = i; j < length; j++) { - const x = str.charCodeAt(j) - if (x >= 0xd8_00 && x < 0xe0_00) { - if (x >= 0xdc_00 || j + 1 === length) { - i += err(x) // lone - } else { - const x1 = str.charCodeAt(j + 1) - if (x1 < 0xdc_00 || x1 >= 0xe0_00) { - i += err(x) // lone - } else { - j++ // consume x1 - long(0x1_00_00 + ((x1 & 0x3_ff) | ((x & 0x3_ff) << 10))) - } - } - } else { - const e = map[x] - if (e & 0xff_00) { - u8[i++] = e >> 8 - u8[i++] = e & 0xff - } else if (e || x === 0) { - u8[i++] = e - } else { - i += err(x) - } - } - } - } - - return i === u8.length ? u8 : u8.subarray(0, i) - } -} |
