aboutsummaryrefslogtreecommitdiffstats
path: root/vanilla/node_modules/@exodus/bytes/single-byte.d.ts
blob: ab6d6a2a3be04b8300dba32e4a432e8c17c472e7 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
/**
 * Decode / encode the legacy single-byte encodings according to the
 * [Encoding standard](https://encoding.spec.whatwg.org/)
 * ([§9](https://encoding.spec.whatwg.org/#legacy-single-byte-encodings),
 * [§14.5](https://encoding.spec.whatwg.org/#x-user-defined)),
 * and [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859) `iso-8859-*` mappings.
 *
 * ```js
 * import { createSinglebyteDecoder, createSinglebyteEncoder } from '@exodus/bytes/single-byte.js'
 * import { windows1252toString, windows1252fromString } from '@exodus/bytes/single-byte.js'
 * import { latin1toString, latin1fromString } from '@exodus/bytes/single-byte.js'
 * ```
 *
 * > [!WARNING]
 * > This is a lower-level API for single-byte encodings.
 * > It might not match what you expect, as it supports both WHATWG and unicode.org encodings under
 * > different names, with the main intended usecase for the latter being either non-web or legacy contexts.
 * >
 * > For a safe WHATWG Encoding-compatible API, see `@exodus/bytes/encoding.js` import (and variants of it).
 * >
 * > Be sure to know what you are doing and check documentation when directly using encodings from this file.
 *
 * Supports all single-byte encodings listed in the WHATWG Encoding standard:
 * `ibm866`, `iso-8859-2`, `iso-8859-3`, `iso-8859-4`, `iso-8859-5`, `iso-8859-6`, `iso-8859-7`, `iso-8859-8`,
 * `iso-8859-8-i`, `iso-8859-10`, `iso-8859-13`, `iso-8859-14`, `iso-8859-15`, `iso-8859-16`, `koi8-r`, `koi8-u`,
 * `macintosh`, `windows-874`, `windows-1250`, `windows-1251`, `windows-1252`, `windows-1253`, `windows-1254`,
 * `windows-1255`, `windows-1256`, `windows-1257`, `windows-1258`, `x-mac-cyrillic` and `x-user-defined`.
 *
 * Also supports `iso-8859-1`, `iso-8859-9`, `iso-8859-11` as defined at
 * [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859)
 * (and all other `iso-8859-*` encodings there as they match WHATWG).
 *
 * > [!NOTE]
 * > While all `iso-8859-*` encodings supported by the [WHATWG Encoding standard](https://encoding.spec.whatwg.org/) match
 * > [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859), the WHATWG Encoding spec doesn't support
 * > `iso-8859-1`, `iso-8859-9`, `iso-8859-11`, and instead maps them as labels to `windows-1252`, `windows-1254`, `windows-874`.\
 * > `createSinglebyteDecoder()` (unlike `TextDecoder` or `legacyHookDecode()`) does not do such mapping,
 * > so its results will differ from `TextDecoder` for those encoding names.
 *
 * ```js
 * > new TextDecoder('iso-8859-1').encoding
 * 'windows-1252'
 * > new TextDecoder('iso-8859-9').encoding
 * 'windows-1254'
 * > new TextDecoder('iso-8859-11').encoding
 * 'windows-874'
 * > new TextDecoder('iso-8859-9').decode(Uint8Array.of(0x80, 0x81, 0xd0))
 * '€\x81Ğ' // this is actually decoded according to windows-1254 per TextDecoder spec
 * > createSinglebyteDecoder('iso-8859-9')(Uint8Array.of(0x80, 0x81, 0xd0))
 * '\x80\x81Ğ' // this is iso-8859-9 as defined at https://unicode.org/Public/MAPPINGS/ISO8859/8859-9.txt
 * ```
 *
 * All WHATWG Encoding spec [`windows-*` encodings](https://encoding.spec.whatwg.org/#windows-874) are supersets of
 * corresponding [unicode.org encodings](https://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/), meaning that
 * they encode/decode all the old valid (non-replacement) strings / byte sequences identically, but can also support
 * a wider range of inputs.
 *
 * @module @exodus/bytes/single-byte.js
 */

/// <reference types="node" />

import type { Uint8ArrayBuffer } from './array.js';

/**
 * Create a decoder for a supported one-byte `encoding`, given its lowercased name `encoding`.
 *
 * Returns a function `decode(arr)` that decodes bytes to a string.
 *
 * @param encoding - The encoding name (e.g., 'iso-8859-1', 'windows-1252')
 * @param loose - If true, replaces unmapped bytes with replacement character instead of throwing (default: false)
 * @returns A function that decodes bytes to string
 */
export function createSinglebyteDecoder(
  encoding: string,
  loose?: boolean
): (arr: Uint8Array) => string;

/**
 * Create an encoder for a supported one-byte `encoding`, given its lowercased name `encoding`.
 *
 * Returns a function `encode(string)` that encodes a string to bytes.
 *
 * In `'fatal'` mode (default), will throw on non well-formed strings or any codepoints which could
 * not be encoded in the target encoding.
 *
 * @param encoding - The encoding name (e.g., 'iso-8859-1', 'windows-1252')
 * @param options - Encoding options
 * @param options.mode - Encoding mode (default: 'fatal'). Currently, only 'fatal' mode is supported.
 * @returns A function that encodes string to bytes
 */
export function createSinglebyteEncoder(
  encoding: string,
  options?: { mode?: 'fatal' }
): (string: string) => Uint8ArrayBuffer;

/**
 * Decode `iso-8859-1` bytes to a string.
 *
 * There is no loose variant for this encoding, all bytes can be decoded.
 *
 * Same as:
 * ```js
 * const latin1toString = createSinglebyteDecoder('iso-8859-1')
 * ```
 *
 * > [!NOTE]
 * > This is different from `new TextDecoder('iso-8859-1')` and `new TextDecoder('latin1')`, as those
 * > alias to `new TextDecoder('windows-1252')`.
 *
 * @param arr - The bytes to decode
 * @returns The decoded string
 */
export function latin1toString(arr: Uint8Array): string;

/**
 * Encode a string to `iso-8859-1` bytes.
 *
 * Throws on non well-formed strings or any codepoints which could not be encoded in `iso-8859-1`.
 *
 * Same as:
 * ```js
 * const latin1fromString = createSinglebyteEncoder('iso-8859-1', { mode: 'fatal' })
 * ```
 *
 * @param string - The string to encode
 * @returns The encoded bytes
 */
export function latin1fromString(string: string): Uint8ArrayBuffer;

/**
 * Decode `windows-1252` bytes to a string.
 *
 * There is no loose variant for this encoding, all bytes can be decoded.
 *
 * Same as:
 * ```js
 * const windows1252toString = createSinglebyteDecoder('windows-1252')
 * ```
 *
 * @param arr - The bytes to decode
 * @returns The decoded string
 */
export function windows1252toString(arr: Uint8Array): string;

/**
 * Encode a string to `windows-1252` bytes.
 *
 * Throws on non well-formed strings or any codepoints which could not be encoded in `windows-1252`.
 *
 * Same as:
 * ```js
 * const windows1252fromString = createSinglebyteEncoder('windows-1252', { mode: 'fatal' })
 * ```
 *
 * @param string - The string to encode
 * @returns The encoded bytes
 */
export function windows1252fromString(string: string): Uint8ArrayBuffer;