From afa87af01c79a9baa539f2992d32154d2a4739bd Mon Sep 17 00:00:00 2001 From: Adam Mathes Date: Sat, 14 Feb 2026 14:46:37 -0800 Subject: task: delete vanilla js prototype\n\n- Removed vanilla/ directory and web/dist/vanilla directory\n- Updated Makefile, Dockerfile, and CI workflow to remove vanilla references\n- Cleaned up web/web.go to remove vanilla embed and routes\n- Verified build and tests pass\n\nCloses NK-2tcnmq --- .../parse5/dist/tokenizer/preprocessor.js | 196 --------------------- 1 file changed, 196 deletions(-) delete mode 100644 vanilla/node_modules/parse5/dist/tokenizer/preprocessor.js (limited to 'vanilla/node_modules/parse5/dist/tokenizer/preprocessor.js') diff --git a/vanilla/node_modules/parse5/dist/tokenizer/preprocessor.js b/vanilla/node_modules/parse5/dist/tokenizer/preprocessor.js deleted file mode 100644 index 530407b..0000000 --- a/vanilla/node_modules/parse5/dist/tokenizer/preprocessor.js +++ /dev/null @@ -1,196 +0,0 @@ -import { CODE_POINTS as $, getSurrogatePairCodePoint, isControlCodePoint, isSurrogate, isSurrogatePair, isUndefinedCodePoint, } from '../common/unicode.js'; -import { ERR } from '../common/error-codes.js'; -//Const -const DEFAULT_BUFFER_WATERLINE = 1 << 16; -//Preprocessor -//NOTE: HTML input preprocessing -//(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream) -export class Preprocessor { - constructor(handler) { - this.handler = handler; - this.html = ''; - this.pos = -1; - // NOTE: Initial `lastGapPos` is -2, to ensure `col` on initialisation is 0 - this.lastGapPos = -2; - this.gapStack = []; - this.skipNextNewLine = false; - this.lastChunkWritten = false; - this.endOfChunkHit = false; - this.bufferWaterline = DEFAULT_BUFFER_WATERLINE; - this.isEol = false; - this.lineStartPos = 0; - this.droppedBufferSize = 0; - this.line = 1; - //NOTE: avoid reporting errors twice on advance/retreat - this.lastErrOffset = -1; - } - /** The column on the current line. If we just saw a gap (eg. a surrogate pair), return the index before. */ - get col() { - return this.pos - this.lineStartPos + Number(this.lastGapPos !== this.pos); - } - get offset() { - return this.droppedBufferSize + this.pos; - } - getError(code, cpOffset) { - const { line, col, offset } = this; - const startCol = col + cpOffset; - const startOffset = offset + cpOffset; - return { - code, - startLine: line, - endLine: line, - startCol, - endCol: startCol, - startOffset, - endOffset: startOffset, - }; - } - _err(code) { - if (this.handler.onParseError && this.lastErrOffset !== this.offset) { - this.lastErrOffset = this.offset; - this.handler.onParseError(this.getError(code, 0)); - } - } - _addGap() { - this.gapStack.push(this.lastGapPos); - this.lastGapPos = this.pos; - } - _processSurrogate(cp) { - //NOTE: try to peek a surrogate pair - if (this.pos !== this.html.length - 1) { - const nextCp = this.html.charCodeAt(this.pos + 1); - if (isSurrogatePair(nextCp)) { - //NOTE: we have a surrogate pair. Peek pair character and recalculate code point. - this.pos++; - //NOTE: add a gap that should be avoided during retreat - this._addGap(); - return getSurrogatePairCodePoint(cp, nextCp); - } - } - //NOTE: we are at the end of a chunk, therefore we can't infer the surrogate pair yet. - else if (!this.lastChunkWritten) { - this.endOfChunkHit = true; - return $.EOF; - } - //NOTE: isolated surrogate - this._err(ERR.surrogateInInputStream); - return cp; - } - willDropParsedChunk() { - return this.pos > this.bufferWaterline; - } - dropParsedChunk() { - if (this.willDropParsedChunk()) { - this.html = this.html.substring(this.pos); - this.lineStartPos -= this.pos; - this.droppedBufferSize += this.pos; - this.pos = 0; - this.lastGapPos = -2; - this.gapStack.length = 0; - } - } - write(chunk, isLastChunk) { - if (this.html.length > 0) { - this.html += chunk; - } - else { - this.html = chunk; - } - this.endOfChunkHit = false; - this.lastChunkWritten = isLastChunk; - } - insertHtmlAtCurrentPos(chunk) { - this.html = this.html.substring(0, this.pos + 1) + chunk + this.html.substring(this.pos + 1); - this.endOfChunkHit = false; - } - startsWith(pattern, caseSensitive) { - // Check if our buffer has enough characters - if (this.pos + pattern.length > this.html.length) { - this.endOfChunkHit = !this.lastChunkWritten; - return false; - } - if (caseSensitive) { - return this.html.startsWith(pattern, this.pos); - } - for (let i = 0; i < pattern.length; i++) { - const cp = this.html.charCodeAt(this.pos + i) | 0x20; - if (cp !== pattern.charCodeAt(i)) { - return false; - } - } - return true; - } - peek(offset) { - const pos = this.pos + offset; - if (pos >= this.html.length) { - this.endOfChunkHit = !this.lastChunkWritten; - return $.EOF; - } - const code = this.html.charCodeAt(pos); - return code === $.CARRIAGE_RETURN ? $.LINE_FEED : code; - } - advance() { - this.pos++; - //NOTE: LF should be in the last column of the line - if (this.isEol) { - this.isEol = false; - this.line++; - this.lineStartPos = this.pos; - } - if (this.pos >= this.html.length) { - this.endOfChunkHit = !this.lastChunkWritten; - return $.EOF; - } - let cp = this.html.charCodeAt(this.pos); - //NOTE: all U+000D CARRIAGE RETURN (CR) characters must be converted to U+000A LINE FEED (LF) characters - if (cp === $.CARRIAGE_RETURN) { - this.isEol = true; - this.skipNextNewLine = true; - return $.LINE_FEED; - } - //NOTE: any U+000A LINE FEED (LF) characters that immediately follow a U+000D CARRIAGE RETURN (CR) character - //must be ignored. - if (cp === $.LINE_FEED) { - this.isEol = true; - if (this.skipNextNewLine) { - // `line` will be bumped again in the recursive call. - this.line--; - this.skipNextNewLine = false; - this._addGap(); - return this.advance(); - } - } - this.skipNextNewLine = false; - if (isSurrogate(cp)) { - cp = this._processSurrogate(cp); - } - //OPTIMIZATION: first check if code point is in the common allowed - //range (ASCII alphanumeric, whitespaces, big chunk of BMP) - //before going into detailed performance cost validation. - const isCommonValidRange = this.handler.onParseError === null || - (cp > 0x1f && cp < 0x7f) || - cp === $.LINE_FEED || - cp === $.CARRIAGE_RETURN || - (cp > 0x9f && cp < 64976); - if (!isCommonValidRange) { - this._checkForProblematicCharacters(cp); - } - return cp; - } - _checkForProblematicCharacters(cp) { - if (isControlCodePoint(cp)) { - this._err(ERR.controlCharacterInInputStream); - } - else if (isUndefinedCodePoint(cp)) { - this._err(ERR.noncharacterInInputStream); - } - } - retreat(count) { - this.pos -= count; - while (this.pos < this.lastGapPos) { - this.lastGapPos = this.gapStack.pop(); - this.pos--; - } - this.isEol = false; - } -} -- cgit v1.2.3