diff options
| author | Adam Mathes <adam@adammathes.com> | 2026-02-13 21:34:48 -0800 |
|---|---|---|
| committer | Adam Mathes <adam@adammathes.com> | 2026-02-13 21:34:48 -0800 |
| commit | 76cb9c2a39d477a64824a985ade40507e3bbade1 (patch) | |
| tree | 41e997aa9c6f538d3a136af61dae9424db2005a9 /vanilla/node_modules/css-tree/lib/tokenizer/char-code-definitions.js | |
| parent | 819a39a21ac992b1393244a4c283bbb125208c69 (diff) | |
| download | neko-76cb9c2a39d477a64824a985ade40507e3bbade1.tar.gz neko-76cb9c2a39d477a64824a985ade40507e3bbade1.tar.bz2 neko-76cb9c2a39d477a64824a985ade40507e3bbade1.zip | |
feat(vanilla): add testing infrastructure and tests (NK-wjnczv)
Diffstat (limited to 'vanilla/node_modules/css-tree/lib/tokenizer/char-code-definitions.js')
| -rw-r--r-- | vanilla/node_modules/css-tree/lib/tokenizer/char-code-definitions.js | 212 |
1 files changed, 212 insertions, 0 deletions
diff --git a/vanilla/node_modules/css-tree/lib/tokenizer/char-code-definitions.js b/vanilla/node_modules/css-tree/lib/tokenizer/char-code-definitions.js new file mode 100644 index 0000000..715572a --- /dev/null +++ b/vanilla/node_modules/css-tree/lib/tokenizer/char-code-definitions.js @@ -0,0 +1,212 @@ +const EOF = 0; + +// https://drafts.csswg.org/css-syntax-3/ +// § 4.2. Definitions + +// digit +// A code point between U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9). +export function isDigit(code) { + return code >= 0x0030 && code <= 0x0039; +} + +// hex digit +// A digit, or a code point between U+0041 LATIN CAPITAL LETTER A (A) and U+0046 LATIN CAPITAL LETTER F (F), +// or a code point between U+0061 LATIN SMALL LETTER A (a) and U+0066 LATIN SMALL LETTER F (f). +export function isHexDigit(code) { + return ( + isDigit(code) || // 0 .. 9 + (code >= 0x0041 && code <= 0x0046) || // A .. F + (code >= 0x0061 && code <= 0x0066) // a .. f + ); +} + +// uppercase letter +// A code point between U+0041 LATIN CAPITAL LETTER A (A) and U+005A LATIN CAPITAL LETTER Z (Z). +export function isUppercaseLetter(code) { + return code >= 0x0041 && code <= 0x005A; +} + +// lowercase letter +// A code point between U+0061 LATIN SMALL LETTER A (a) and U+007A LATIN SMALL LETTER Z (z). +export function isLowercaseLetter(code) { + return code >= 0x0061 && code <= 0x007A; +} + +// letter +// An uppercase letter or a lowercase letter. +export function isLetter(code) { + return isUppercaseLetter(code) || isLowercaseLetter(code); +} + +// non-ASCII code point +// A code point with a value equal to or greater than U+0080 <control>. +// +// 2024-09-02: The latest spec narrows the range for non-ASCII characters (see https://github.com/csstree/csstree/issues/188). +// However, all modern browsers support a wider range, and strictly following the latest spec could result +// in some CSS being parsed incorrectly, even though it works in the browser. Therefore, this function adheres +// to the previous, broader definition of non-ASCII characters. +export function isNonAscii(code) { + return code >= 0x0080; +} + +// name-start code point +// A letter, a non-ASCII code point, or U+005F LOW LINE (_). +export function isNameStart(code) { + return isLetter(code) || isNonAscii(code) || code === 0x005F; +} + +// name code point +// A name-start code point, a digit, or U+002D HYPHEN-MINUS (-). +export function isName(code) { + return isNameStart(code) || isDigit(code) || code === 0x002D; +} + +// non-printable code point +// A code point between U+0000 NULL and U+0008 BACKSPACE, or U+000B LINE TABULATION, +// or a code point between U+000E SHIFT OUT and U+001F INFORMATION SEPARATOR ONE, or U+007F DELETE. +export function isNonPrintable(code) { + return ( + (code >= 0x0000 && code <= 0x0008) || + (code === 0x000B) || + (code >= 0x000E && code <= 0x001F) || + (code === 0x007F) + ); +} + +// newline +// U+000A LINE FEED. Note that U+000D CARRIAGE RETURN and U+000C FORM FEED are not included in this definition, +// as they are converted to U+000A LINE FEED during preprocessing. +// TODO: we doesn't do a preprocessing, so check a code point for U+000D CARRIAGE RETURN and U+000C FORM FEED +export function isNewline(code) { + return code === 0x000A || code === 0x000D || code === 0x000C; +} + +// whitespace +// A newline, U+0009 CHARACTER TABULATION, or U+0020 SPACE. +export function isWhiteSpace(code) { + return isNewline(code) || code === 0x0020 || code === 0x0009; +} + +// § 4.3.8. Check if two code points are a valid escape +export function isValidEscape(first, second) { + // If the first code point is not U+005C REVERSE SOLIDUS (\), return false. + if (first !== 0x005C) { + return false; + } + + // Otherwise, if the second code point is a newline or EOF, return false. + if (isNewline(second) || second === EOF) { + return false; + } + + // Otherwise, return true. + return true; +} + +// § 4.3.9. Check if three code points would start an identifier +export function isIdentifierStart(first, second, third) { + // Look at the first code point: + + // U+002D HYPHEN-MINUS + if (first === 0x002D) { + // If the second code point is a name-start code point or a U+002D HYPHEN-MINUS, + // or the second and third code points are a valid escape, return true. Otherwise, return false. + return ( + isNameStart(second) || + second === 0x002D || + isValidEscape(second, third) + ); + } + + // name-start code point + if (isNameStart(first)) { + // Return true. + return true; + } + + // U+005C REVERSE SOLIDUS (\) + if (first === 0x005C) { + // If the first and second code points are a valid escape, return true. Otherwise, return false. + return isValidEscape(first, second); + } + + // anything else + // Return false. + return false; +} + +// § 4.3.10. Check if three code points would start a number +export function isNumberStart(first, second, third) { + // Look at the first code point: + + // U+002B PLUS SIGN (+) + // U+002D HYPHEN-MINUS (-) + if (first === 0x002B || first === 0x002D) { + // If the second code point is a digit, return true. + if (isDigit(second)) { + return 2; + } + + // Otherwise, if the second code point is a U+002E FULL STOP (.) + // and the third code point is a digit, return true. + // Otherwise, return false. + return second === 0x002E && isDigit(third) ? 3 : 0; + } + + // U+002E FULL STOP (.) + if (first === 0x002E) { + // If the second code point is a digit, return true. Otherwise, return false. + return isDigit(second) ? 2 : 0; + } + + // digit + if (isDigit(first)) { + // Return true. + return 1; + } + + // anything else + // Return false. + return 0; +} + +// +// Misc +// + +// detect BOM (https://en.wikipedia.org/wiki/Byte_order_mark) +export function isBOM(code) { + // UTF-16BE + if (code === 0xFEFF) { + return 1; + } + + // UTF-16LE + if (code === 0xFFFE) { + return 1; + } + + return 0; +} + +// Fast code category +// Only ASCII code points has a special meaning, that's why we define a maps for 0..127 codes only +const CATEGORY = new Array(0x80); +export const EofCategory = 0x80; +export const WhiteSpaceCategory = 0x82; +export const DigitCategory = 0x83; +export const NameStartCategory = 0x84; +export const NonPrintableCategory = 0x85; + +for (let i = 0; i < CATEGORY.length; i++) { + CATEGORY[i] = + isWhiteSpace(i) && WhiteSpaceCategory || + isDigit(i) && DigitCategory || + isNameStart(i) && NameStartCategory || + isNonPrintable(i) && NonPrintableCategory || + i || EofCategory; +} + +export function charCodeCategory(code) { + return code < 0x80 ? CATEGORY[code] : NameStartCategory; +} |
