From 76cb9c2a39d477a64824a985ade40507e3bbade1 Mon Sep 17 00:00:00 2001 From: Adam Mathes Date: Fri, 13 Feb 2026 21:34:48 -0800 Subject: feat(vanilla): add testing infrastructure and tests (NK-wjnczv) --- .../tldts-core/src/extract-hostname.ts | 170 +++++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 vanilla/node_modules/tldts-core/src/extract-hostname.ts (limited to 'vanilla/node_modules/tldts-core/src/extract-hostname.ts') diff --git a/vanilla/node_modules/tldts-core/src/extract-hostname.ts b/vanilla/node_modules/tldts-core/src/extract-hostname.ts new file mode 100644 index 0000000..8211ff4 --- /dev/null +++ b/vanilla/node_modules/tldts-core/src/extract-hostname.ts @@ -0,0 +1,170 @@ +/** + * @param url - URL we want to extract a hostname from. + * @param urlIsValidHostname - hint from caller; true if `url` is already a valid hostname. + */ +export default function extractHostname( + url: string, + urlIsValidHostname: boolean, +): string | null { + let start = 0; + let end: number = url.length; + let hasUpper = false; + + // If url is not already a valid hostname, then try to extract hostname. + if (!urlIsValidHostname) { + // Special handling of data URLs + if (url.startsWith('data:')) { + return null; + } + + // Trim leading spaces + while (start < url.length && url.charCodeAt(start) <= 32) { + start += 1; + } + + // Trim trailing spaces + while (end > start + 1 && url.charCodeAt(end - 1) <= 32) { + end -= 1; + } + + // Skip scheme. + if ( + url.charCodeAt(start) === 47 /* '/' */ && + url.charCodeAt(start + 1) === 47 /* '/' */ + ) { + start += 2; + } else { + const indexOfProtocol = url.indexOf(':/', start); + if (indexOfProtocol !== -1) { + // Implement fast-path for common protocols. We expect most protocols + // should be one of these 4 and thus we will not need to perform the + // more expansive validity check most of the time. + const protocolSize = indexOfProtocol - start; + const c0 = url.charCodeAt(start); + const c1 = url.charCodeAt(start + 1); + const c2 = url.charCodeAt(start + 2); + const c3 = url.charCodeAt(start + 3); + const c4 = url.charCodeAt(start + 4); + + if ( + protocolSize === 5 && + c0 === 104 /* 'h' */ && + c1 === 116 /* 't' */ && + c2 === 116 /* 't' */ && + c3 === 112 /* 'p' */ && + c4 === 115 /* 's' */ + ) { + // https + } else if ( + protocolSize === 4 && + c0 === 104 /* 'h' */ && + c1 === 116 /* 't' */ && + c2 === 116 /* 't' */ && + c3 === 112 /* 'p' */ + ) { + // http + } else if ( + protocolSize === 3 && + c0 === 119 /* 'w' */ && + c1 === 115 /* 's' */ && + c2 === 115 /* 's' */ + ) { + // wss + } else if ( + protocolSize === 2 && + c0 === 119 /* 'w' */ && + c1 === 115 /* 's' */ + ) { + // ws + } else { + // Check that scheme is valid + for (let i = start; i < indexOfProtocol; i += 1) { + const lowerCaseCode = url.charCodeAt(i) | 32; + if ( + !( + ( + (lowerCaseCode >= 97 && lowerCaseCode <= 122) || // [a, z] + (lowerCaseCode >= 48 && lowerCaseCode <= 57) || // [0, 9] + lowerCaseCode === 46 || // '.' + lowerCaseCode === 45 || // '-' + lowerCaseCode === 43 + ) // '+' + ) + ) { + return null; + } + } + } + + // Skip 0, 1 or more '/' after ':/' + start = indexOfProtocol + 2; + while (url.charCodeAt(start) === 47 /* '/' */) { + start += 1; + } + } + } + + // Detect first occurrence of '/', '?' or '#'. We also keep track of the + // last occurrence of '@', ']' or ':' to speed-up subsequent parsing of + // (respectively), identifier, ipv6 or port. + let indexOfIdentifier = -1; + let indexOfClosingBracket = -1; + let indexOfPort = -1; + for (let i = start; i < end; i += 1) { + const code: number = url.charCodeAt(i); + if ( + code === 35 || // '#' + code === 47 || // '/' + code === 63 // '?' + ) { + end = i; + break; + } else if (code === 64) { + // '@' + indexOfIdentifier = i; + } else if (code === 93) { + // ']' + indexOfClosingBracket = i; + } else if (code === 58) { + // ':' + indexOfPort = i; + } else if (code >= 65 && code <= 90) { + hasUpper = true; + } + } + + // Detect identifier: '@' + if ( + indexOfIdentifier !== -1 && + indexOfIdentifier > start && + indexOfIdentifier < end + ) { + start = indexOfIdentifier + 1; + } + + // Handle ipv6 addresses + if (url.charCodeAt(start) === 91 /* '[' */) { + if (indexOfClosingBracket !== -1) { + return url.slice(start + 1, indexOfClosingBracket).toLowerCase(); + } + return null; + } else if (indexOfPort !== -1 && indexOfPort > start && indexOfPort < end) { + // Detect port: ':' + end = indexOfPort; + } + } + + // Trim trailing dots + while (end > start + 1 && url.charCodeAt(end - 1) === 46 /* '.' */) { + end -= 1; + } + + const hostname: string = + start !== 0 || end !== url.length ? url.slice(start, end) : url; + + if (hasUpper) { + return hostname.toLowerCase(); + } + + return hostname; +} -- cgit v1.2.3