diff options
Diffstat (limited to 'vanilla/node_modules/css-tree/cjs/tokenizer/char-code-definitions.cjs')
| -rw-r--r-- | vanilla/node_modules/css-tree/cjs/tokenizer/char-code-definitions.cjs | 236 |
1 files changed, 236 insertions, 0 deletions
diff --git a/vanilla/node_modules/css-tree/cjs/tokenizer/char-code-definitions.cjs b/vanilla/node_modules/css-tree/cjs/tokenizer/char-code-definitions.cjs new file mode 100644 index 0000000..ebd4b58 --- /dev/null +++ b/vanilla/node_modules/css-tree/cjs/tokenizer/char-code-definitions.cjs @@ -0,0 +1,236 @@ +'use strict'; + +const EOF = 0; + +// https://drafts.csswg.org/css-syntax-3/ +// § 4.2. Definitions + +// digit +// A code point between U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9). +function isDigit(code) { + return code >= 0x0030 && code <= 0x0039; +} + +// hex digit +// A digit, or a code point between U+0041 LATIN CAPITAL LETTER A (A) and U+0046 LATIN CAPITAL LETTER F (F), +// or a code point between U+0061 LATIN SMALL LETTER A (a) and U+0066 LATIN SMALL LETTER F (f). +function isHexDigit(code) { + return ( + isDigit(code) || // 0 .. 9 + (code >= 0x0041 && code <= 0x0046) || // A .. F + (code >= 0x0061 && code <= 0x0066) // a .. f + ); +} + +// uppercase letter +// A code point between U+0041 LATIN CAPITAL LETTER A (A) and U+005A LATIN CAPITAL LETTER Z (Z). +function isUppercaseLetter(code) { + return code >= 0x0041 && code <= 0x005A; +} + +// lowercase letter +// A code point between U+0061 LATIN SMALL LETTER A (a) and U+007A LATIN SMALL LETTER Z (z). +function isLowercaseLetter(code) { + return code >= 0x0061 && code <= 0x007A; +} + +// letter +// An uppercase letter or a lowercase letter. +function isLetter(code) { + return isUppercaseLetter(code) || isLowercaseLetter(code); +} + +// non-ASCII code point +// A code point with a value equal to or greater than U+0080 <control>. +// +// 2024-09-02: The latest spec narrows the range for non-ASCII characters (see https://github.com/csstree/csstree/issues/188). +// However, all modern browsers support a wider range, and strictly following the latest spec could result +// in some CSS being parsed incorrectly, even though it works in the browser. Therefore, this function adheres +// to the previous, broader definition of non-ASCII characters. +function isNonAscii(code) { + return code >= 0x0080; +} + +// name-start code point +// A letter, a non-ASCII code point, or U+005F LOW LINE (_). +function isNameStart(code) { + return isLetter(code) || isNonAscii(code) || code === 0x005F; +} + +// name code point +// A name-start code point, a digit, or U+002D HYPHEN-MINUS (-). +function isName(code) { + return isNameStart(code) || isDigit(code) || code === 0x002D; +} + +// non-printable code point +// A code point between U+0000 NULL and U+0008 BACKSPACE, or U+000B LINE TABULATION, +// or a code point between U+000E SHIFT OUT and U+001F INFORMATION SEPARATOR ONE, or U+007F DELETE. +function isNonPrintable(code) { + return ( + (code >= 0x0000 && code <= 0x0008) || + (code === 0x000B) || + (code >= 0x000E && code <= 0x001F) || + (code === 0x007F) + ); +} + +// newline +// U+000A LINE FEED. Note that U+000D CARRIAGE RETURN and U+000C FORM FEED are not included in this definition, +// as they are converted to U+000A LINE FEED during preprocessing. +// TODO: we doesn't do a preprocessing, so check a code point for U+000D CARRIAGE RETURN and U+000C FORM FEED +function isNewline(code) { + return code === 0x000A || code === 0x000D || code === 0x000C; +} + +// whitespace +// A newline, U+0009 CHARACTER TABULATION, or U+0020 SPACE. +function isWhiteSpace(code) { + return isNewline(code) || code === 0x0020 || code === 0x0009; +} + +// § 4.3.8. Check if two code points are a valid escape +function isValidEscape(first, second) { + // If the first code point is not U+005C REVERSE SOLIDUS (\), return false. + if (first !== 0x005C) { + return false; + } + + // Otherwise, if the second code point is a newline or EOF, return false. + if (isNewline(second) || second === EOF) { + return false; + } + + // Otherwise, return true. + return true; +} + +// § 4.3.9. Check if three code points would start an identifier +function isIdentifierStart(first, second, third) { + // Look at the first code point: + + // U+002D HYPHEN-MINUS + if (first === 0x002D) { + // If the second code point is a name-start code point or a U+002D HYPHEN-MINUS, + // or the second and third code points are a valid escape, return true. Otherwise, return false. + return ( + isNameStart(second) || + second === 0x002D || + isValidEscape(second, third) + ); + } + + // name-start code point + if (isNameStart(first)) { + // Return true. + return true; + } + + // U+005C REVERSE SOLIDUS (\) + if (first === 0x005C) { + // If the first and second code points are a valid escape, return true. Otherwise, return false. + return isValidEscape(first, second); + } + + // anything else + // Return false. + return false; +} + +// § 4.3.10. Check if three code points would start a number +function isNumberStart(first, second, third) { + // Look at the first code point: + + // U+002B PLUS SIGN (+) + // U+002D HYPHEN-MINUS (-) + if (first === 0x002B || first === 0x002D) { + // If the second code point is a digit, return true. + if (isDigit(second)) { + return 2; + } + + // Otherwise, if the second code point is a U+002E FULL STOP (.) + // and the third code point is a digit, return true. + // Otherwise, return false. + return second === 0x002E && isDigit(third) ? 3 : 0; + } + + // U+002E FULL STOP (.) + if (first === 0x002E) { + // If the second code point is a digit, return true. Otherwise, return false. + return isDigit(second) ? 2 : 0; + } + + // digit + if (isDigit(first)) { + // Return true. + return 1; + } + + // anything else + // Return false. + return 0; +} + +// +// Misc +// + +// detect BOM (https://en.wikipedia.org/wiki/Byte_order_mark) +function isBOM(code) { + // UTF-16BE + if (code === 0xFEFF) { + return 1; + } + + // UTF-16LE + if (code === 0xFFFE) { + return 1; + } + + return 0; +} + +// Fast code category +// Only ASCII code points has a special meaning, that's why we define a maps for 0..127 codes only +const CATEGORY = new Array(0x80); +const EofCategory = 0x80; +const WhiteSpaceCategory = 0x82; +const DigitCategory = 0x83; +const NameStartCategory = 0x84; +const NonPrintableCategory = 0x85; + +for (let i = 0; i < CATEGORY.length; i++) { + CATEGORY[i] = + isWhiteSpace(i) && WhiteSpaceCategory || + isDigit(i) && DigitCategory || + isNameStart(i) && NameStartCategory || + isNonPrintable(i) && NonPrintableCategory || + i || EofCategory; +} + +function charCodeCategory(code) { + return code < 0x80 ? CATEGORY[code] : NameStartCategory; +} + +exports.DigitCategory = DigitCategory; +exports.EofCategory = EofCategory; +exports.NameStartCategory = NameStartCategory; +exports.NonPrintableCategory = NonPrintableCategory; +exports.WhiteSpaceCategory = WhiteSpaceCategory; +exports.charCodeCategory = charCodeCategory; +exports.isBOM = isBOM; +exports.isDigit = isDigit; +exports.isHexDigit = isHexDigit; +exports.isIdentifierStart = isIdentifierStart; +exports.isLetter = isLetter; +exports.isLowercaseLetter = isLowercaseLetter; +exports.isName = isName; +exports.isNameStart = isNameStart; +exports.isNewline = isNewline; +exports.isNonAscii = isNonAscii; +exports.isNonPrintable = isNonPrintable; +exports.isNumberStart = isNumberStart; +exports.isUppercaseLetter = isUppercaseLetter; +exports.isValidEscape = isValidEscape; +exports.isWhiteSpace = isWhiteSpace; |
