vanilla/node_modules/@exodus/bytes/fallback/latin1.js


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151

import {
  nativeEncoder,
  nativeDecoder,
  nativeDecoderLatin1,
  nativeBuffer,
  encodeCharcodes,
  isHermes,
  isDeno,
  isLE,
} from './platform.js'

const atob = /* @__PURE__ */ (() => globalThis.atob)()
const web64 = /* @__PURE__ */ (() => Uint8Array.prototype.toBase64)()

// See http://stackoverflow.com/a/22747272/680742, which says that lowest limit is in Chrome, with 0xffff args
// On Hermes, actual max is 0x20_000 minus current stack depth, 1/16 of that should be safe
const maxFunctionArgs = 0x20_00

// toBase64+atob path is faster on everything where fromBase64 is fast
const useLatin1atob = web64 && atob

export function asciiPrefix(arr) {
  let p = 0 // verified ascii bytes
  const length = arr.length
  // Threshold tested on Hermes (worse on <=48, better on >=52)
  // Also on v8 arrs of size <=64 might be on heap and using Uint32Array on them is unoptimal
  if (length > 64) {
    // Speedup with u32
    const u32start = (4 - (arr.byteOffset & 3)) % 4 // offset start by this many bytes for alignment
    for (; p < u32start; p++) if (arr[p] >= 0x80) return p
    const u32length = ((arr.byteLength - u32start) / 4) | 0
    const u32 = new Uint32Array(arr.buffer, arr.byteOffset + u32start, u32length)
    let i = 0
    for (const last3 = u32length - 3; ; p += 16, i += 4) {
      if (i >= last3) break // loop is fast enough for moving this here to be _very_ useful, likely due to array access checks
      const a = u32[i]
      const b = u32[i + 1]
      const c = u32[i + 2]
      const d = u32[i + 3]
      // "(a | b | c | d) & mask" is slower on Hermes though faster on v8
      if (a & 0x80_80_80_80 || b & 0x80_80_80_80 || c & 0x80_80_80_80 || d & 0x80_80_80_80) break
    }

    for (; i < u32length; p += 4, i++) if (u32[i] & 0x80_80_80_80) break
  }

  for (; p < length; p++) if (arr[p] >= 0x80) return p
  return length
}

// Capable of decoding Uint16Array to UTF-16 as well as Uint8Array to Latin-1
export function decodeLatin1(arr, start = 0, stop = arr.length) {
  start |= 0
  stop |= 0
  const total = stop - start
  if (total === 0) return ''

  if (
    useLatin1atob &&
    total >= 256 &&
    total < 1e8 &&
    arr.toBase64 === web64 &&
    arr.BYTES_PER_ELEMENT === 1
  ) {
    const sliced = start === 0 && stop === arr.length ? arr : arr.subarray(start, stop)
    return atob(sliced.toBase64())
  }

  if (total > maxFunctionArgs) {
    let prefix = ''
    for (let i = start; i < stop; ) {
      const i1 = Math.min(stop, i + maxFunctionArgs)
      prefix += String.fromCharCode.apply(String, arr.subarray(i, i1))
      i = i1
    }

    return prefix
  }

  const sliced = start === 0 && stop === arr.length ? arr : arr.subarray(start, stop)
  return String.fromCharCode.apply(String, sliced)
}

// Unchecked for well-formedness, raw. Expects Uint16Array input
export const decodeUCS2 =
  nativeBuffer && isLE && !isDeno
    ? (u16, stop = u16.length) => {
        // TODO: fast path for BE, perhaps faster path for Deno. Note that decoder replaces, this function doesn't
        if (stop > 32) return nativeBuffer.from(u16.buffer, u16.byteOffset, stop * 2).ucs2Slice() // from 64 bytes, below are in heap
        return decodeLatin1(u16, 0, stop)
      }
    : (u16, stop = u16.length) => decodeLatin1(u16, 0, stop)

// Does not check input, uses best available method
// Building an array for this is only faster than proper string concatenation when TextDecoder or native Buffer are available
export const decodeAscii = nativeBuffer
  ? (a) =>
      // Buffer is faster on Node.js (but only for long enough data), if we know that output is ascii
      a.byteLength >= 0x3_00 && !isDeno
        ? nativeBuffer.from(a.buffer, a.byteOffset, a.byteLength).latin1Slice(0, a.byteLength) // .latin1Slice is faster than .asciiSlice
        : nativeDecoder.decode(a) // On Node.js, utf8 decoder is faster than latin1
  : nativeDecoderLatin1
    ? (a) => nativeDecoderLatin1.decode(a) // On browsers (specifically WebKit), latin1 decoder is faster than utf8
    : (a) =>
        decodeLatin1(
          a instanceof Uint8Array ? a : new Uint8Array(a.buffer, a.byteOffset, a.byteLength)
        )

/* eslint-disable @exodus/mutable/no-param-reassign-prop-only */

export function encodeAsciiPrefix(x, s) {
  let i = 0
  for (const len3 = s.length - 3; i < len3; i += 4) {
    const x0 = s.charCodeAt(i), x1 = s.charCodeAt(i + 1), x2 = s.charCodeAt(i + 2), x3 = s.charCodeAt(i + 3) // prettier-ignore
    if ((x0 | x1 | x2 | x3) >= 128) break
    x[i] = x0
    x[i + 1] = x1
    x[i + 2] = x2
    x[i + 3] = x3
  }

  return i
}

/* eslint-enable @exodus/mutable/no-param-reassign-prop-only */

// Warning: can be used only on checked strings, converts strings to 8-bit
export const encodeLatin1 = (str) => encodeCharcodes(str, new Uint8Array(str.length))

// Expects nativeEncoder to be present
const useEncodeInto = /* @__PURE__ */ (() => isHermes && nativeEncoder?.encodeInto)()
export const encodeAscii = useEncodeInto
  ? (str, ERR) => {
      // Much faster in Hermes
      const codes = new Uint8Array(str.length + 4) // overshoot by a full utf8 char
      const info = nativeEncoder.encodeInto(str, codes)
      if (info.read !== str.length || info.written !== str.length) throw new SyntaxError(ERR) // non-ascii
      return codes.subarray(0, str.length)
    }
  : nativeBuffer
    ? (str, ERR) => {
        // TextEncoder is slow on Node.js 24 / 25 (was ok on 22)
        const codes = nativeBuffer.from(str, 'utf8') // ascii/latin1 coerces, we need to check
        if (codes.length !== str.length) throw new SyntaxError(ERR) // non-ascii
        return new Uint8Array(codes.buffer, codes.byteOffset, codes.byteLength)
      }
    : (str, ERR) => {
        const codes = nativeEncoder.encode(str)
        if (codes.length !== str.length) throw new SyntaxError(ERR) // non-ascii
        return codes
      }