diff options
| author | Adam Mathes <adam@adammathes.com> | 2026-02-13 21:34:48 -0800 |
|---|---|---|
| committer | Adam Mathes <adam@adammathes.com> | 2026-02-13 21:34:48 -0800 |
| commit | 76cb9c2a39d477a64824a985ade40507e3bbade1 (patch) | |
| tree | 41e997aa9c6f538d3a136af61dae9424db2005a9 /vanilla/node_modules/css-tree/cjs/tokenizer/index.cjs | |
| parent | 819a39a21ac992b1393244a4c283bbb125208c69 (diff) | |
| download | neko-76cb9c2a39d477a64824a985ade40507e3bbade1.tar.gz neko-76cb9c2a39d477a64824a985ade40507e3bbade1.tar.bz2 neko-76cb9c2a39d477a64824a985ade40507e3bbade1.zip | |
feat(vanilla): add testing infrastructure and tests (NK-wjnczv)
Diffstat (limited to 'vanilla/node_modules/css-tree/cjs/tokenizer/index.cjs')
| -rw-r--r-- | vanilla/node_modules/css-tree/cjs/tokenizer/index.cjs | 554 |
1 files changed, 554 insertions, 0 deletions
diff --git a/vanilla/node_modules/css-tree/cjs/tokenizer/index.cjs b/vanilla/node_modules/css-tree/cjs/tokenizer/index.cjs new file mode 100644 index 0000000..bfb73e5 --- /dev/null +++ b/vanilla/node_modules/css-tree/cjs/tokenizer/index.cjs @@ -0,0 +1,554 @@ +'use strict'; + +const types = require('./types.cjs'); +const charCodeDefinitions = require('./char-code-definitions.cjs'); +const utils = require('./utils.cjs'); +const names = require('./names.cjs'); +const OffsetToLocation = require('./OffsetToLocation.cjs'); +const TokenStream = require('./TokenStream.cjs'); + +function tokenize(source, onToken) { + function getCharCode(offset) { + return offset < sourceLength ? source.charCodeAt(offset) : 0; + } + + // § 4.3.3. Consume a numeric token + function consumeNumericToken() { + // Consume a number and let number be the result. + offset = utils.consumeNumber(source, offset); + + // If the next 3 input code points would start an identifier, then: + if (charCodeDefinitions.isIdentifierStart(getCharCode(offset), getCharCode(offset + 1), getCharCode(offset + 2))) { + // Create a <dimension-token> with the same value and type flag as number, and a unit set initially to the empty string. + // Consume a name. Set the <dimension-token>’s unit to the returned value. + // Return the <dimension-token>. + type = types.Dimension; + offset = utils.consumeName(source, offset); + return; + } + + // Otherwise, if the next input code point is U+0025 PERCENTAGE SIGN (%), consume it. + if (getCharCode(offset) === 0x0025) { + // Create a <percentage-token> with the same value as number, and return it. + type = types.Percentage; + offset++; + return; + } + + // Otherwise, create a <number-token> with the same value and type flag as number, and return it. + type = types.Number; + } + + // § 4.3.4. Consume an ident-like token + function consumeIdentLikeToken() { + const nameStartOffset = offset; + + // Consume a name, and let string be the result. + offset = utils.consumeName(source, offset); + + // If string’s value is an ASCII case-insensitive match for "url", + // and the next input code point is U+0028 LEFT PARENTHESIS ((), consume it. + if (utils.cmpStr(source, nameStartOffset, offset, 'url') && getCharCode(offset) === 0x0028) { + // While the next two input code points are whitespace, consume the next input code point. + offset = utils.findWhiteSpaceEnd(source, offset + 1); + + // If the next one or two input code points are U+0022 QUOTATION MARK ("), U+0027 APOSTROPHE ('), + // or whitespace followed by U+0022 QUOTATION MARK (") or U+0027 APOSTROPHE ('), + // then create a <function-token> with its value set to string and return it. + if (getCharCode(offset) === 0x0022 || + getCharCode(offset) === 0x0027) { + type = types.Function; + offset = nameStartOffset + 4; + return; + } + + // Otherwise, consume a url token, and return it. + consumeUrlToken(); + return; + } + + // Otherwise, if the next input code point is U+0028 LEFT PARENTHESIS ((), consume it. + // Create a <function-token> with its value set to string and return it. + if (getCharCode(offset) === 0x0028) { + type = types.Function; + offset++; + return; + } + + // Otherwise, create an <ident-token> with its value set to string and return it. + type = types.Ident; + } + + // § 4.3.5. Consume a string token + function consumeStringToken(endingCodePoint) { + // This algorithm may be called with an ending code point, which denotes the code point + // that ends the string. If an ending code point is not specified, + // the current input code point is used. + if (!endingCodePoint) { + endingCodePoint = getCharCode(offset++); + } + + // Initially create a <string-token> with its value set to the empty string. + type = types.String; + + // Repeatedly consume the next input code point from the stream: + for (; offset < source.length; offset++) { + const code = source.charCodeAt(offset); + + switch (charCodeDefinitions.charCodeCategory(code)) { + // ending code point + case endingCodePoint: + // Return the <string-token>. + offset++; + return; + + // EOF + // case EofCategory: + // This is a parse error. Return the <string-token>. + // return; + + // newline + case charCodeDefinitions.WhiteSpaceCategory: + if (charCodeDefinitions.isNewline(code)) { + // This is a parse error. Reconsume the current input code point, + // create a <bad-string-token>, and return it. + offset += utils.getNewlineLength(source, offset, code); + type = types.BadString; + return; + } + break; + + // U+005C REVERSE SOLIDUS (\) + case 0x005C: + // If the next input code point is EOF, do nothing. + if (offset === source.length - 1) { + break; + } + + const nextCode = getCharCode(offset + 1); + + // Otherwise, if the next input code point is a newline, consume it. + if (charCodeDefinitions.isNewline(nextCode)) { + offset += utils.getNewlineLength(source, offset + 1, nextCode); + } else if (charCodeDefinitions.isValidEscape(code, nextCode)) { + // Otherwise, (the stream starts with a valid escape) consume + // an escaped code point and append the returned code point to + // the <string-token>’s value. + offset = utils.consumeEscaped(source, offset) - 1; + } + break; + + // anything else + // Append the current input code point to the <string-token>’s value. + } + } + } + + // § 4.3.6. Consume a url token + // Note: This algorithm assumes that the initial "url(" has already been consumed. + // This algorithm also assumes that it’s being called to consume an "unquoted" value, like url(foo). + // A quoted value, like url("foo"), is parsed as a <function-token>. Consume an ident-like token + // automatically handles this distinction; this algorithm shouldn’t be called directly otherwise. + function consumeUrlToken() { + // Initially create a <url-token> with its value set to the empty string. + type = types.Url; + + // Consume as much whitespace as possible. + offset = utils.findWhiteSpaceEnd(source, offset); + + // Repeatedly consume the next input code point from the stream: + for (; offset < source.length; offset++) { + const code = source.charCodeAt(offset); + + switch (charCodeDefinitions.charCodeCategory(code)) { + // U+0029 RIGHT PARENTHESIS ()) + case 0x0029: + // Return the <url-token>. + offset++; + return; + + // EOF + // case EofCategory: + // This is a parse error. Return the <url-token>. + // return; + + // whitespace + case charCodeDefinitions.WhiteSpaceCategory: + // Consume as much whitespace as possible. + offset = utils.findWhiteSpaceEnd(source, offset); + + // If the next input code point is U+0029 RIGHT PARENTHESIS ()) or EOF, + // consume it and return the <url-token> + // (if EOF was encountered, this is a parse error); + if (getCharCode(offset) === 0x0029 || offset >= source.length) { + if (offset < source.length) { + offset++; + } + return; + } + + // otherwise, consume the remnants of a bad url, create a <bad-url-token>, + // and return it. + offset = utils.consumeBadUrlRemnants(source, offset); + type = types.BadUrl; + return; + + // U+0022 QUOTATION MARK (") + // U+0027 APOSTROPHE (') + // U+0028 LEFT PARENTHESIS (() + // non-printable code point + case 0x0022: + case 0x0027: + case 0x0028: + case charCodeDefinitions.NonPrintableCategory: + // This is a parse error. Consume the remnants of a bad url, + // create a <bad-url-token>, and return it. + offset = utils.consumeBadUrlRemnants(source, offset); + type = types.BadUrl; + return; + + // U+005C REVERSE SOLIDUS (\) + case 0x005C: + // If the stream starts with a valid escape, consume an escaped code point and + // append the returned code point to the <url-token>’s value. + if (charCodeDefinitions.isValidEscape(code, getCharCode(offset + 1))) { + offset = utils.consumeEscaped(source, offset) - 1; + break; + } + + // Otherwise, this is a parse error. Consume the remnants of a bad url, + // create a <bad-url-token>, and return it. + offset = utils.consumeBadUrlRemnants(source, offset); + type = types.BadUrl; + return; + + // anything else + // Append the current input code point to the <url-token>’s value. + } + } + } + + // ensure source is a string + source = String(source || ''); + + const sourceLength = source.length; + let start = charCodeDefinitions.isBOM(getCharCode(0)); + let offset = start; + let type; + + // https://drafts.csswg.org/css-syntax-3/#consume-token + // § 4.3.1. Consume a token + while (offset < sourceLength) { + const code = source.charCodeAt(offset); + + switch (charCodeDefinitions.charCodeCategory(code)) { + // whitespace + case charCodeDefinitions.WhiteSpaceCategory: + // Consume as much whitespace as possible. Return a <whitespace-token>. + type = types.WhiteSpace; + offset = utils.findWhiteSpaceEnd(source, offset + 1); + break; + + // U+0022 QUOTATION MARK (") + case 0x0022: + // Consume a string token and return it. + consumeStringToken(); + break; + + // U+0023 NUMBER SIGN (#) + case 0x0023: + // If the next input code point is a name code point or the next two input code points are a valid escape, then: + if (charCodeDefinitions.isName(getCharCode(offset + 1)) || charCodeDefinitions.isValidEscape(getCharCode(offset + 1), getCharCode(offset + 2))) { + // Create a <hash-token>. + type = types.Hash; + + // If the next 3 input code points would start an identifier, set the <hash-token>’s type flag to "id". + // if (isIdentifierStart(getCharCode(offset + 1), getCharCode(offset + 2), getCharCode(offset + 3))) { + // // TODO: set id flag + // } + + // Consume a name, and set the <hash-token>’s value to the returned string. + offset = utils.consumeName(source, offset + 1); + + // Return the <hash-token>. + } else { + // Otherwise, return a <delim-token> with its value set to the current input code point. + type = types.Delim; + offset++; + } + + break; + + // U+0027 APOSTROPHE (') + case 0x0027: + // Consume a string token and return it. + consumeStringToken(); + break; + + // U+0028 LEFT PARENTHESIS (() + case 0x0028: + // Return a <(-token>. + type = types.LeftParenthesis; + offset++; + break; + + // U+0029 RIGHT PARENTHESIS ()) + case 0x0029: + // Return a <)-token>. + type = types.RightParenthesis; + offset++; + break; + + // U+002B PLUS SIGN (+) + case 0x002B: + // If the input stream starts with a number, ... + if (charCodeDefinitions.isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) { + // ... reconsume the current input code point, consume a numeric token, and return it. + consumeNumericToken(); + } else { + // Otherwise, return a <delim-token> with its value set to the current input code point. + type = types.Delim; + offset++; + } + break; + + // U+002C COMMA (,) + case 0x002C: + // Return a <comma-token>. + type = types.Comma; + offset++; + break; + + // U+002D HYPHEN-MINUS (-) + case 0x002D: + // If the input stream starts with a number, reconsume the current input code point, consume a numeric token, and return it. + if (charCodeDefinitions.isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) { + consumeNumericToken(); + } else { + // Otherwise, if the next 2 input code points are U+002D HYPHEN-MINUS U+003E GREATER-THAN SIGN (->), consume them and return a <CDC-token>. + if (getCharCode(offset + 1) === 0x002D && + getCharCode(offset + 2) === 0x003E) { + type = types.CDC; + offset = offset + 3; + } else { + // Otherwise, if the input stream starts with an identifier, ... + if (charCodeDefinitions.isIdentifierStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) { + // ... reconsume the current input code point, consume an ident-like token, and return it. + consumeIdentLikeToken(); + } else { + // Otherwise, return a <delim-token> with its value set to the current input code point. + type = types.Delim; + offset++; + } + } + } + break; + + // U+002E FULL STOP (.) + case 0x002E: + // If the input stream starts with a number, ... + if (charCodeDefinitions.isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) { + // ... reconsume the current input code point, consume a numeric token, and return it. + consumeNumericToken(); + } else { + // Otherwise, return a <delim-token> with its value set to the current input code point. + type = types.Delim; + offset++; + } + + break; + + // U+002F SOLIDUS (/) + case 0x002F: + // If the next two input code point are U+002F SOLIDUS (/) followed by a U+002A ASTERISK (*), + if (getCharCode(offset + 1) === 0x002A) { + // ... consume them and all following code points up to and including the first U+002A ASTERISK (*) + // followed by a U+002F SOLIDUS (/), or up to an EOF code point. + type = types.Comment; + offset = source.indexOf('*/', offset + 2); + offset = offset === -1 ? source.length : offset + 2; + } else { + type = types.Delim; + offset++; + } + break; + + // U+003A COLON (:) + case 0x003A: + // Return a <colon-token>. + type = types.Colon; + offset++; + break; + + // U+003B SEMICOLON (;) + case 0x003B: + // Return a <semicolon-token>. + type = types.Semicolon; + offset++; + break; + + // U+003C LESS-THAN SIGN (<) + case 0x003C: + // If the next 3 input code points are U+0021 EXCLAMATION MARK U+002D HYPHEN-MINUS U+002D HYPHEN-MINUS (!--), ... + if (getCharCode(offset + 1) === 0x0021 && + getCharCode(offset + 2) === 0x002D && + getCharCode(offset + 3) === 0x002D) { + // ... consume them and return a <CDO-token>. + type = types.CDO; + offset = offset + 4; + } else { + // Otherwise, return a <delim-token> with its value set to the current input code point. + type = types.Delim; + offset++; + } + + break; + + // U+0040 COMMERCIAL AT (@) + case 0x0040: + // If the next 3 input code points would start an identifier, ... + if (charCodeDefinitions.isIdentifierStart(getCharCode(offset + 1), getCharCode(offset + 2), getCharCode(offset + 3))) { + // ... consume a name, create an <at-keyword-token> with its value set to the returned value, and return it. + type = types.AtKeyword; + offset = utils.consumeName(source, offset + 1); + } else { + // Otherwise, return a <delim-token> with its value set to the current input code point. + type = types.Delim; + offset++; + } + + break; + + // U+005B LEFT SQUARE BRACKET ([) + case 0x005B: + // Return a <[-token>. + type = types.LeftSquareBracket; + offset++; + break; + + // U+005C REVERSE SOLIDUS (\) + case 0x005C: + // If the input stream starts with a valid escape, ... + if (charCodeDefinitions.isValidEscape(code, getCharCode(offset + 1))) { + // ... reconsume the current input code point, consume an ident-like token, and return it. + consumeIdentLikeToken(); + } else { + // Otherwise, this is a parse error. Return a <delim-token> with its value set to the current input code point. + type = types.Delim; + offset++; + } + break; + + // U+005D RIGHT SQUARE BRACKET (]) + case 0x005D: + // Return a <]-token>. + type = types.RightSquareBracket; + offset++; + break; + + // U+007B LEFT CURLY BRACKET ({) + case 0x007B: + // Return a <{-token>. + type = types.LeftCurlyBracket; + offset++; + break; + + // U+007D RIGHT CURLY BRACKET (}) + case 0x007D: + // Return a <}-token>. + type = types.RightCurlyBracket; + offset++; + break; + + // digit + case charCodeDefinitions.DigitCategory: + // Reconsume the current input code point, consume a numeric token, and return it. + consumeNumericToken(); + break; + + // name-start code point + case charCodeDefinitions.NameStartCategory: + // Reconsume the current input code point, consume an ident-like token, and return it. + consumeIdentLikeToken(); + break; + + // EOF + // case EofCategory: + // Return an <EOF-token>. + // break; + + // anything else + default: + // Return a <delim-token> with its value set to the current input code point. + type = types.Delim; + offset++; + } + + // put token to stream + onToken(type, start, start = offset); + } +} + +exports.AtKeyword = types.AtKeyword; +exports.BadString = types.BadString; +exports.BadUrl = types.BadUrl; +exports.CDC = types.CDC; +exports.CDO = types.CDO; +exports.Colon = types.Colon; +exports.Comma = types.Comma; +exports.Comment = types.Comment; +exports.Delim = types.Delim; +exports.Dimension = types.Dimension; +exports.EOF = types.EOF; +exports.Function = types.Function; +exports.Hash = types.Hash; +exports.Ident = types.Ident; +exports.LeftCurlyBracket = types.LeftCurlyBracket; +exports.LeftParenthesis = types.LeftParenthesis; +exports.LeftSquareBracket = types.LeftSquareBracket; +exports.Number = types.Number; +exports.Percentage = types.Percentage; +exports.RightCurlyBracket = types.RightCurlyBracket; +exports.RightParenthesis = types.RightParenthesis; +exports.RightSquareBracket = types.RightSquareBracket; +exports.Semicolon = types.Semicolon; +exports.String = types.String; +exports.Url = types.Url; +exports.WhiteSpace = types.WhiteSpace; +exports.tokenTypes = types; +exports.DigitCategory = charCodeDefinitions.DigitCategory; +exports.EofCategory = charCodeDefinitions.EofCategory; +exports.NameStartCategory = charCodeDefinitions.NameStartCategory; +exports.NonPrintableCategory = charCodeDefinitions.NonPrintableCategory; +exports.WhiteSpaceCategory = charCodeDefinitions.WhiteSpaceCategory; +exports.charCodeCategory = charCodeDefinitions.charCodeCategory; +exports.isBOM = charCodeDefinitions.isBOM; +exports.isDigit = charCodeDefinitions.isDigit; +exports.isHexDigit = charCodeDefinitions.isHexDigit; +exports.isIdentifierStart = charCodeDefinitions.isIdentifierStart; +exports.isLetter = charCodeDefinitions.isLetter; +exports.isLowercaseLetter = charCodeDefinitions.isLowercaseLetter; +exports.isName = charCodeDefinitions.isName; +exports.isNameStart = charCodeDefinitions.isNameStart; +exports.isNewline = charCodeDefinitions.isNewline; +exports.isNonAscii = charCodeDefinitions.isNonAscii; +exports.isNonPrintable = charCodeDefinitions.isNonPrintable; +exports.isNumberStart = charCodeDefinitions.isNumberStart; +exports.isUppercaseLetter = charCodeDefinitions.isUppercaseLetter; +exports.isValidEscape = charCodeDefinitions.isValidEscape; +exports.isWhiteSpace = charCodeDefinitions.isWhiteSpace; +exports.cmpChar = utils.cmpChar; +exports.cmpStr = utils.cmpStr; +exports.consumeBadUrlRemnants = utils.consumeBadUrlRemnants; +exports.consumeEscaped = utils.consumeEscaped; +exports.consumeName = utils.consumeName; +exports.consumeNumber = utils.consumeNumber; +exports.decodeEscaped = utils.decodeEscaped; +exports.findDecimalNumberEnd = utils.findDecimalNumberEnd; +exports.findWhiteSpaceEnd = utils.findWhiteSpaceEnd; +exports.findWhiteSpaceStart = utils.findWhiteSpaceStart; +exports.getNewlineLength = utils.getNewlineLength; +exports.tokenNames = names; +exports.OffsetToLocation = OffsetToLocation.OffsetToLocation; +exports.TokenStream = TokenStream.TokenStream; +exports.tokenize = tokenize; |
