1 files changed, 513 insertions, 0 deletions
diff --git a/vanilla/node_modules/css-tree/lib/tokenizer/index.js b/vanilla/node_modules/css-tree/lib/tokenizer/index.js
new file mode 100644
index 0000000..16df44c
--- /dev/null
+++ b/vanilla/node_modules/css-tree/lib/tokenizer/index.js
@@ -0,0 +1,513 @@
+import * as TYPE from './types.js';
+import {
+    isNewline,
+    isName,
+    isValidEscape,
+    isNumberStart,
+    isIdentifierStart,
+    isBOM,
+    charCodeCategory,
+    WhiteSpaceCategory,
+    DigitCategory,
+    NameStartCategory,
+    NonPrintableCategory
+} from './char-code-definitions.js';
+import {
+    cmpStr,
+    getNewlineLength,
+    findWhiteSpaceEnd,
+    consumeEscaped,
+    consumeName,
+    consumeNumber,
+    consumeBadUrlRemnants
+} from './utils.js';
+
+export function tokenize(source, onToken) {
+    function getCharCode(offset) {
+        return offset < sourceLength ? source.charCodeAt(offset) : 0;
+    }
+
+    // § 4.3.3. Consume a numeric token
+    function consumeNumericToken() {
+        // Consume a number and let number be the result.
+        offset = consumeNumber(source, offset);
+
+        // If the next 3 input code points would start an identifier, then:
+        if (isIdentifierStart(getCharCode(offset), getCharCode(offset + 1), getCharCode(offset + 2))) {
+            // Create a <dimension-token> with the same value and type flag as number, and a unit set initially to the empty string.
+            // Consume a name. Set the <dimension-token>’s unit to the returned value.
+            // Return the <dimension-token>.
+            type = TYPE.Dimension;
+            offset = consumeName(source, offset);
+            return;
+        }
+
+        // Otherwise, if the next input code point is U+0025 PERCENTAGE SIGN (%), consume it.
+        if (getCharCode(offset) === 0x0025) {
+            // Create a <percentage-token> with the same value as number, and return it.
+            type = TYPE.Percentage;
+            offset++;
+            return;
+        }
+
+        // Otherwise, create a <number-token> with the same value and type flag as number, and return it.
+        type = TYPE.Number;
+    }
+
+    // § 4.3.4. Consume an ident-like token
+    function consumeIdentLikeToken() {
+        const nameStartOffset = offset;
+
+        // Consume a name, and let string be the result.
+        offset = consumeName(source, offset);
+
+        // If string’s value is an ASCII case-insensitive match for "url",
+        // and the next input code point is U+0028 LEFT PARENTHESIS ((), consume it.
+        if (cmpStr(source, nameStartOffset, offset, 'url') && getCharCode(offset) === 0x0028) {
+            // While the next two input code points are whitespace, consume the next input code point.
+            offset = findWhiteSpaceEnd(source, offset + 1);
+
+            // If the next one or two input code points are U+0022 QUOTATION MARK ("), U+0027 APOSTROPHE ('),
+            // or whitespace followed by U+0022 QUOTATION MARK (") or U+0027 APOSTROPHE ('),
+            // then create a <function-token> with its value set to string and return it.
+            if (getCharCode(offset) === 0x0022 ||
+                getCharCode(offset) === 0x0027) {
+                type = TYPE.Function;
+                offset = nameStartOffset + 4;
+                return;
+            }
+
+            // Otherwise, consume a url token, and return it.
+            consumeUrlToken();
+            return;
+        }
+
+        // Otherwise, if the next input code point is U+0028 LEFT PARENTHESIS ((), consume it.
+        // Create a <function-token> with its value set to string and return it.
+        if (getCharCode(offset) === 0x0028) {
+            type = TYPE.Function;
+            offset++;
+            return;
+        }
+
+        // Otherwise, create an <ident-token> with its value set to string and return it.
+        type = TYPE.Ident;
+    }
+
+    // § 4.3.5. Consume a string token
+    function consumeStringToken(endingCodePoint) {
+        // This algorithm may be called with an ending code point, which denotes the code point
+        // that ends the string. If an ending code point is not specified,
+        // the current input code point is used.
+        if (!endingCodePoint) {
+            endingCodePoint = getCharCode(offset++);
+        }
+
+        // Initially create a <string-token> with its value set to the empty string.
+        type = TYPE.String;
+
+        // Repeatedly consume the next input code point from the stream:
+        for (; offset < source.length; offset++) {
+            const code = source.charCodeAt(offset);
+
+            switch (charCodeCategory(code)) {
+                // ending code point
+                case endingCodePoint:
+                    // Return the <string-token>.
+                    offset++;
+                    return;
+
+                    // EOF
+                    // case EofCategory:
+                    // This is a parse error. Return the <string-token>.
+                    // return;
+
+                // newline
+                case WhiteSpaceCategory:
+                    if (isNewline(code)) {
+                        // This is a parse error. Reconsume the current input code point,
+                        // create a <bad-string-token>, and return it.
+                        offset += getNewlineLength(source, offset, code);
+                        type = TYPE.BadString;
+                        return;
+                    }
+                    break;
+
+                // U+005C REVERSE SOLIDUS (\)
+                case 0x005C:
+                    // If the next input code point is EOF, do nothing.
+                    if (offset === source.length - 1) {
+                        break;
+                    }
+
+                    const nextCode = getCharCode(offset + 1);
+
+                    // Otherwise, if the next input code point is a newline, consume it.
+                    if (isNewline(nextCode)) {
+                        offset += getNewlineLength(source, offset + 1, nextCode);
+                    } else if (isValidEscape(code, nextCode)) {
+                        // Otherwise, (the stream starts with a valid escape) consume
+                        // an escaped code point and append the returned code point to
+                        // the <string-token>’s value.
+                        offset = consumeEscaped(source, offset) - 1;
+                    }
+                    break;
+
+                // anything else
+                // Append the current input code point to the <string-token>’s value.
+            }
+        }
+    }
+
+    // § 4.3.6. Consume a url token
+    // Note: This algorithm assumes that the initial "url(" has already been consumed.
+    // This algorithm also assumes that it’s being called to consume an "unquoted" value, like url(foo).
+    // A quoted value, like url("foo"), is parsed as a <function-token>. Consume an ident-like token
+    // automatically handles this distinction; this algorithm shouldn’t be called directly otherwise.
+    function consumeUrlToken() {
+        // Initially create a <url-token> with its value set to the empty string.
+        type = TYPE.Url;
+
+        // Consume as much whitespace as possible.
+        offset = findWhiteSpaceEnd(source, offset);
+
+        // Repeatedly consume the next input code point from the stream:
+        for (; offset < source.length; offset++) {
+            const code = source.charCodeAt(offset);
+
+            switch (charCodeCategory(code)) {
+                // U+0029 RIGHT PARENTHESIS ())
+                case 0x0029:
+                    // Return the <url-token>.
+                    offset++;
+                    return;
+
+                    // EOF
+                    // case EofCategory:
+                    // This is a parse error. Return the <url-token>.
+                    // return;
+
+                // whitespace
+                case WhiteSpaceCategory:
+                    // Consume as much whitespace as possible.
+                    offset = findWhiteSpaceEnd(source, offset);
+
+                    // If the next input code point is U+0029 RIGHT PARENTHESIS ()) or EOF,
+                    // consume it and return the <url-token>
+                    // (if EOF was encountered, this is a parse error);
+                    if (getCharCode(offset) === 0x0029 || offset >= source.length) {
+                        if (offset < source.length) {
+                            offset++;
+                        }
+                        return;
+                    }
+
+                    // otherwise, consume the remnants of a bad url, create a <bad-url-token>,
+                    // and return it.
+                    offset = consumeBadUrlRemnants(source, offset);
+                    type = TYPE.BadUrl;
+                    return;
+
+                // U+0022 QUOTATION MARK (")
+                // U+0027 APOSTROPHE (')
+                // U+0028 LEFT PARENTHESIS (()
+                // non-printable code point
+                case 0x0022:
+                case 0x0027:
+                case 0x0028:
+                case NonPrintableCategory:
+                    // This is a parse error. Consume the remnants of a bad url,
+                    // create a <bad-url-token>, and return it.
+                    offset = consumeBadUrlRemnants(source, offset);
+                    type = TYPE.BadUrl;
+                    return;
+
+                // U+005C REVERSE SOLIDUS (\)
+                case 0x005C:
+                    // If the stream starts with a valid escape, consume an escaped code point and
+                    // append the returned code point to the <url-token>’s value.
+                    if (isValidEscape(code, getCharCode(offset + 1))) {
+                        offset = consumeEscaped(source, offset) - 1;
+                        break;
+                    }
+
+                    // Otherwise, this is a parse error. Consume the remnants of a bad url,
+                    // create a <bad-url-token>, and return it.
+                    offset = consumeBadUrlRemnants(source, offset);
+                    type = TYPE.BadUrl;
+                    return;
+
+                // anything else
+                // Append the current input code point to the <url-token>’s value.
+            }
+        }
+    }
+
+    // ensure source is a string
+    source = String(source || '');
+
+    const sourceLength = source.length;
+    let start = isBOM(getCharCode(0));
+    let offset = start;
+    let type;
+
+    // https://drafts.csswg.org/css-syntax-3/#consume-token
+    // § 4.3.1. Consume a token
+    while (offset < sourceLength) {
+        const code = source.charCodeAt(offset);
+
+        switch (charCodeCategory(code)) {
+            // whitespace
+            case WhiteSpaceCategory:
+                // Consume as much whitespace as possible. Return a <whitespace-token>.
+                type = TYPE.WhiteSpace;
+                offset = findWhiteSpaceEnd(source, offset + 1);
+                break;
+
+            // U+0022 QUOTATION MARK (")
+            case 0x0022:
+                // Consume a string token and return it.
+                consumeStringToken();
+                break;
+
+            // U+0023 NUMBER SIGN (#)
+            case 0x0023:
+                // If the next input code point is a name code point or the next two input code points are a valid escape, then:
+                if (isName(getCharCode(offset + 1)) || isValidEscape(getCharCode(offset + 1), getCharCode(offset + 2))) {
+                    // Create a <hash-token>.
+                    type = TYPE.Hash;
+
+                    // If the next 3 input code points would start an identifier, set the <hash-token>’s type flag to "id".
+                    // if (isIdentifierStart(getCharCode(offset + 1), getCharCode(offset + 2), getCharCode(offset + 3))) {
+                    //     // TODO: set id flag
+                    // }
+
+                    // Consume a name, and set the <hash-token>’s value to the returned string.
+                    offset = consumeName(source, offset + 1);
+
+                    // Return the <hash-token>.
+                } else {
+                    // Otherwise, return a <delim-token> with its value set to the current input code point.
+                    type = TYPE.Delim;
+                    offset++;
+                }
+
+                break;
+
+            // U+0027 APOSTROPHE (')
+            case 0x0027:
+                // Consume a string token and return it.
+                consumeStringToken();
+                break;
+
+            // U+0028 LEFT PARENTHESIS (()
+            case 0x0028:
+                // Return a <(-token>.
+                type = TYPE.LeftParenthesis;
+                offset++;
+                break;
+
+            // U+0029 RIGHT PARENTHESIS ())
+            case 0x0029:
+                // Return a <)-token>.
+                type = TYPE.RightParenthesis;
+                offset++;
+                break;
+
+            // U+002B PLUS SIGN (+)
+            case 0x002B:
+                // If the input stream starts with a number, ...
+                if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
+                    // ... reconsume the current input code point, consume a numeric token, and return it.
+                    consumeNumericToken();
+                } else {
+                    // Otherwise, return a <delim-token> with its value set to the current input code point.
+                    type = TYPE.Delim;
+                    offset++;
+                }
+                break;
+
+            // U+002C COMMA (,)
+            case 0x002C:
+                // Return a <comma-token>.
+                type = TYPE.Comma;
+                offset++;
+                break;
+
+            // U+002D HYPHEN-MINUS (-)
+            case 0x002D:
+                // If the input stream starts with a number, reconsume the current input code point, consume a numeric token, and return it.
+                if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
+                    consumeNumericToken();
+                } else {
+                    // Otherwise, if the next 2 input code points are U+002D HYPHEN-MINUS U+003E GREATER-THAN SIGN (->), consume them and return a <CDC-token>.
+                    if (getCharCode(offset + 1) === 0x002D &&
+                        getCharCode(offset + 2) === 0x003E) {
+                        type = TYPE.CDC;
+                        offset = offset + 3;
+                    } else {
+                        // Otherwise, if the input stream starts with an identifier, ...
+                        if (isIdentifierStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
+                            // ... reconsume the current input code point, consume an ident-like token, and return it.
+                            consumeIdentLikeToken();
+                        } else {
+                            // Otherwise, return a <delim-token> with its value set to the current input code point.
+                            type = TYPE.Delim;
+                            offset++;
+                        }
+                    }
+                }
+                break;
+
+            // U+002E FULL STOP (.)
+            case 0x002E:
+                // If the input stream starts with a number, ...
+                if (isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
+                    // ... reconsume the current input code point, consume a numeric token, and return it.
+                    consumeNumericToken();
+                } else {
+                    // Otherwise, return a <delim-token> with its value set to the current input code point.
+                    type = TYPE.Delim;
+                    offset++;
+                }
+
+                break;
+
+            // U+002F SOLIDUS (/)
+            case 0x002F:
+                // If the next two input code point are U+002F SOLIDUS (/) followed by a U+002A ASTERISK (*),
+                if (getCharCode(offset + 1) === 0x002A) {
+                    // ... consume them and all following code points up to and including the first U+002A ASTERISK (*)
+                    // followed by a U+002F SOLIDUS (/), or up to an EOF code point.
+                    type = TYPE.Comment;
+                    offset = source.indexOf('*/', offset + 2);
+                    offset = offset === -1 ? source.length : offset + 2;
+                } else {
+                    type = TYPE.Delim;
+                    offset++;
+                }
+                break;
+
+            // U+003A COLON (:)
+            case 0x003A:
+                // Return a <colon-token>.
+                type = TYPE.Colon;
+                offset++;
+                break;
+
+            // U+003B SEMICOLON (;)
+            case 0x003B:
+                // Return a <semicolon-token>.
+                type = TYPE.Semicolon;
+                offset++;
+                break;
+
+            // U+003C LESS-THAN SIGN (<)
+            case 0x003C:
+                // If the next 3 input code points are U+0021 EXCLAMATION MARK U+002D HYPHEN-MINUS U+002D HYPHEN-MINUS (!--), ...
+                if (getCharCode(offset + 1) === 0x0021 &&
+                    getCharCode(offset + 2) === 0x002D &&
+                    getCharCode(offset + 3) === 0x002D) {
+                    // ... consume them and return a <CDO-token>.
+                    type = TYPE.CDO;
+                    offset = offset + 4;
+                } else {
+                    // Otherwise, return a <delim-token> with its value set to the current input code point.
+                    type = TYPE.Delim;
+                    offset++;
+                }
+
+                break;
+
+            // U+0040 COMMERCIAL AT (@)
+            case 0x0040:
+                // If the next 3 input code points would start an identifier, ...
+                if (isIdentifierStart(getCharCode(offset + 1), getCharCode(offset + 2), getCharCode(offset + 3))) {
+                    // ... consume a name, create an <at-keyword-token> with its value set to the returned value, and return it.
+                    type = TYPE.AtKeyword;
+                    offset = consumeName(source, offset + 1);
+                } else {
+                    // Otherwise, return a <delim-token> with its value set to the current input code point.
+                    type = TYPE.Delim;
+                    offset++;
+                }
+
+                break;
+
+            // U+005B LEFT SQUARE BRACKET ([)
+            case 0x005B:
+                // Return a <[-token>.
+                type = TYPE.LeftSquareBracket;
+                offset++;
+                break;
+
+            // U+005C REVERSE SOLIDUS (\)
+            case 0x005C:
+                // If the input stream starts with a valid escape, ...
+                if (isValidEscape(code, getCharCode(offset + 1))) {
+                    // ... reconsume the current input code point, consume an ident-like token, and return it.
+                    consumeIdentLikeToken();
+                } else {
+                    // Otherwise, this is a parse error. Return a <delim-token> with its value set to the current input code point.
+                    type = TYPE.Delim;
+                    offset++;
+                }
+                break;
+
+            // U+005D RIGHT SQUARE BRACKET (])
+            case 0x005D:
+                // Return a <]-token>.
+                type = TYPE.RightSquareBracket;
+                offset++;
+                break;
+
+            // U+007B LEFT CURLY BRACKET ({)
+            case 0x007B:
+                // Return a <{-token>.
+                type = TYPE.LeftCurlyBracket;
+                offset++;
+                break;
+
+            // U+007D RIGHT CURLY BRACKET (})
+            case 0x007D:
+                // Return a <}-token>.
+                type = TYPE.RightCurlyBracket;
+                offset++;
+                break;
+
+            // digit
+            case DigitCategory:
+                // Reconsume the current input code point, consume a numeric token, and return it.
+                consumeNumericToken();
+                break;
+
+            // name-start code point
+            case NameStartCategory:
+                // Reconsume the current input code point, consume an ident-like token, and return it.
+                consumeIdentLikeToken();
+                break;
+
+                // EOF
+                // case EofCategory:
+                // Return an <EOF-token>.
+                // break;
+
+            // anything else
+            default:
+                // Return a <delim-token> with its value set to the current input code point.
+                type = TYPE.Delim;
+                offset++;
+        }
+
+        // put token to stream
+        onToken(type, start, start = offset);
+    }
+}
+
+export * from './types.js';
+export * as tokenTypes from './types.js';
+export { default as tokenNames } from './names.js';
+export * from './char-code-definitions.js';
+export * from './utils.js';
+export * from './OffsetToLocation.js';
+export * from './TokenStream.js';