Skip to main content
Module

std/node/url.ts

Deno standard library
Go to Latest
File
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466
// Copyright 2018-2022 the Deno authors. All rights reserved. MIT license.// Copyright Joyent, Inc. and other Node contributors.//// Permission is hereby granted, free of charge, to any person obtaining a// copy of this software and associated documentation files (the// "Software"), to deal in the Software without restriction, including// without limitation the rights to use, copy, modify, merge, publish,// distribute, sublicense, and/or sell copies of the Software, and to permit// persons to whom the Software is furnished to do so, subject to the// following conditions://// The above copyright notice and this permission notice shall be included// in all copies or substantial portions of the Software.//// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE// USE OR OTHER DEALINGS IN THE SOFTWARE.
import { ERR_INVALID_ARG_TYPE, ERR_INVALID_ARG_VALUE, ERR_INVALID_FILE_URL_HOST, ERR_INVALID_FILE_URL_PATH, ERR_INVALID_URL, ERR_INVALID_URL_SCHEME,} from "./internal/errors.ts";import { validateString } from "./internal/validators.mjs";import { CHAR_0, CHAR_9, CHAR_AT, CHAR_BACKWARD_SLASH, CHAR_CARRIAGE_RETURN, CHAR_CIRCUMFLEX_ACCENT, CHAR_DOT, CHAR_DOUBLE_QUOTE, CHAR_FORM_FEED, CHAR_FORWARD_SLASH, CHAR_GRAVE_ACCENT, CHAR_HASH, CHAR_HYPHEN_MINUS, CHAR_LEFT_ANGLE_BRACKET, CHAR_LEFT_CURLY_BRACKET, CHAR_LEFT_SQUARE_BRACKET, CHAR_LINE_FEED, CHAR_LOWERCASE_A, CHAR_LOWERCASE_Z, CHAR_NO_BREAK_SPACE, CHAR_PERCENT, CHAR_PLUS, CHAR_QUESTION_MARK, CHAR_RIGHT_ANGLE_BRACKET, CHAR_RIGHT_CURLY_BRACKET, CHAR_RIGHT_SQUARE_BRACKET, CHAR_SEMICOLON, CHAR_SINGLE_QUOTE, CHAR_SPACE, CHAR_TAB, CHAR_UNDERSCORE, CHAR_UPPERCASE_A, CHAR_UPPERCASE_Z, CHAR_VERTICAL_LINE, CHAR_ZERO_WIDTH_NOBREAK_SPACE,} from "../path/_constants.ts";import * as path from "./path.ts";import { toASCII } from "./internal/idna.ts";import { isWindows, osType } from "../_util/os.ts";import { encodeStr, hexTable } from "./internal/querystring.ts";import querystring from "./querystring.ts";import type { ParsedUrlQuery, ParsedUrlQueryInput } from "./querystring.ts";
const forwardSlashRegEx = /\//g;const percentRegEx = /%/g;const backslashRegEx = /\\/g;const newlineRegEx = /\n/g;const carriageReturnRegEx = /\r/g;const tabRegEx = /\t/g;// Reference: RFC 3986, RFC 1808, RFC 2396
// define these here so at least they only have to be// compiled once on the first module load.const protocolPattern = /^[a-z0-9.+-]+:/i;const portPattern = /:[0-9]*$/;const hostPattern = /^\/\/[^@/]+@[^@/]+/;// Special case for a simple path URLconst simplePathPattern = /^(\/\/?(?!\/)[^?\s]*)(\?[^\s]*)?$/;// Protocols that can allow "unsafe" and "unwise" chars.const unsafeProtocol = new Set(["javascript", "javascript:"]);// Protocols that never have a hostname.const hostlessProtocol = new Set(["javascript", "javascript:"]);// Protocols that always contain a // bit.const slashedProtocol = new Set([ "http", "http:", "https", "https:", "ftp", "ftp:", "gopher", "gopher:", "file", "file:", "ws", "ws:", "wss", "wss:",]);
const hostnameMaxLen = 255;
// These characters do not need escaping:// ! - . _ ~// ' ( ) * :// digits// alpha (uppercase)// alpha (lowercase)// deno-fmt-ignoreconst noEscapeAuth = new Int8Array([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00 - 0x0F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1F 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, // 0x20 - 0x2F 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, // 0x30 - 0x3F 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 - 0x4F 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 0x50 - 0x5F 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 - 0x6F 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, // 0x70 - 0x7F]);
// This prevents some common spoofing bugs due to our use of IDNA toASCII. For// compatibility, the set of characters we use here is the *intersection* of// "forbidden host code point" in the WHATWG URL Standard [1] and the// characters in the host parsing loop in Url.prototype.parse, with the// following additions://// - ':' since this could cause a "protocol spoofing" bug// - '@' since this could cause parts of the hostname to be confused with auth// - '[' and ']' since this could cause a non-IPv6 hostname to be interpreted// as IPv6 by isIpv6Hostname above//// [1]: https://url.spec.whatwg.org/#forbidden-host-code-pointconst forbiddenHostChars = /[\0\t\n\r #%/:<>?@[\\\]^|]/;// For IPv6, permit '[', ']', and ':'.const forbiddenHostCharsIpv6 = /[\0\t\n\r #%/<>?@\\^|]/;
const _url = URL;export { _url as URL };
// Legacy URL APIexport class Url { public protocol: string | null; public slashes: boolean | null; public auth: string | null; public host: string | null; public port: string | null; public hostname: string | null; public hash: string | null; public search: string | null; public query: string | ParsedUrlQuery | null; public pathname: string | null; public path: string | null; public href: string | null; [key: string]: unknown;
constructor() { this.protocol = null; this.slashes = null; this.auth = null; this.host = null; this.port = null; this.hostname = null; this.hash = null; this.search = null; this.query = null; this.pathname = null; this.path = null; this.href = null; }
#parseHost() { let host = this.host || ""; let port: RegExpExecArray | null | string = portPattern.exec(host); if (port) { port = port[0]; if (port !== ":") { this.port = port.slice(1); } host = host.slice(0, host.length - port.length); } if (host) this.hostname = host; }
public resolve(relative: string) { return this.resolveObject(parse(relative, false, true)).format(); }
public resolveObject(relative: string | Url) { if (typeof relative === "string") { const rel = new Url(); rel.urlParse(relative, false, true); relative = rel; }
const result = new Url(); const tkeys = Object.keys(this); for (let tk = 0; tk < tkeys.length; tk++) { const tkey = tkeys[tk]; result[tkey] = this[tkey]; }
// Hash is always overridden, no matter what. // even href="" will remove it. result.hash = relative.hash;
// If the relative url is empty, then there's nothing left to do here. if (relative.href === "") { result.href = result.format(); return result; }
// Hrefs like //foo/bar always cut to the protocol. if (relative.slashes && !relative.protocol) { // Take everything except the protocol from relative const rkeys = Object.keys(relative); for (let rk = 0; rk < rkeys.length; rk++) { const rkey = rkeys[rk]; if (rkey !== "protocol") result[rkey] = relative[rkey]; }
// urlParse appends trailing / to urls like http://www.example.com if ( result.protocol && slashedProtocol.has(result.protocol) && result.hostname && !result.pathname ) { result.path = result.pathname = "/"; }
result.href = result.format(); return result; }
if (relative.protocol && relative.protocol !== result.protocol) { // If it's a known url protocol, then changing // the protocol does weird things // first, if it's not file:, then we MUST have a host, // and if there was a path // to begin with, then we MUST have a path. // if it is file:, then the host is dropped, // because that's known to be hostless. // anything else is assumed to be absolute. if (!slashedProtocol.has(relative.protocol)) { const keys = Object.keys(relative); for (let v = 0; v < keys.length; v++) { const k = keys[v]; result[k] = relative[k]; } result.href = result.format(); return result; }
result.protocol = relative.protocol; if ( !relative.host && !/^file:?$/.test(relative.protocol) && !hostlessProtocol.has(relative.protocol) ) { const relPath = (relative.pathname || "").split("/"); while (relPath.length && !(relative.host = relPath.shift() || null)); if (!relative.host) relative.host = ""; if (!relative.hostname) relative.hostname = ""; if (relPath[0] !== "") relPath.unshift(""); if (relPath.length < 2) relPath.unshift(""); result.pathname = relPath.join("/"); } else { result.pathname = relative.pathname; } result.search = relative.search; result.query = relative.query; result.host = relative.host || ""; result.auth = relative.auth; result.hostname = relative.hostname || relative.host; result.port = relative.port; // To support http.request if (result.pathname || result.search) { const p = result.pathname || ""; const s = result.search || ""; result.path = p + s; } result.slashes = result.slashes || relative.slashes; result.href = result.format(); return result; }
const isSourceAbs = result.pathname && result.pathname.charAt(0) === "/"; const isRelAbs = relative.host || (relative.pathname && relative.pathname.charAt(0) === "/"); let mustEndAbs: string | boolean | number | null = isRelAbs || isSourceAbs || (result.host && relative.pathname); const removeAllDots = mustEndAbs; let srcPath = (result.pathname && result.pathname.split("/")) || []; const relPath = (relative.pathname && relative.pathname.split("/")) || []; const noLeadingSlashes = result.protocol && !slashedProtocol.has(result.protocol);
// If the url is a non-slashed url, then relative // links like ../.. should be able // to crawl up to the hostname, as well. This is strange. // result.protocol has already been set by now. // Later on, put the first path part into the host field. if (noLeadingSlashes) { result.hostname = ""; result.port = null; if (result.host) { if (srcPath[0] === "") srcPath[0] = result.host; else srcPath.unshift(result.host); } result.host = ""; if (relative.protocol) { relative.hostname = null; relative.port = null; result.auth = null; if (relative.host) { if (relPath[0] === "") relPath[0] = relative.host; else relPath.unshift(relative.host); } relative.host = null; } mustEndAbs = mustEndAbs && (relPath[0] === "" || srcPath[0] === ""); }
if (isRelAbs) { // it's absolute. if (relative.host || relative.host === "") { if (result.host !== relative.host) result.auth = null; result.host = relative.host; result.port = relative.port; } if (relative.hostname || relative.hostname === "") { if (result.hostname !== relative.hostname) result.auth = null; result.hostname = relative.hostname; } result.search = relative.search; result.query = relative.query; srcPath = relPath; // Fall through to the dot-handling below. } else if (relPath.length) { // it's relative // throw away the existing file, and take the new path instead. if (!srcPath) srcPath = []; srcPath.pop(); srcPath = srcPath.concat(relPath); result.search = relative.search; result.query = relative.query; } else if (relative.search !== null && relative.search !== undefined) { // Just pull out the search. // like href='?foo'. // Put this after the other two cases because it simplifies the booleans if (noLeadingSlashes) { result.hostname = result.host = srcPath.shift() || null; // Occasionally the auth can get stuck only in host. // This especially happens in cases like // url.resolveObject('mailto:local1@domain1', 'local2@domain2') const authInHost = result.host && result.host.indexOf("@") > 0 && result.host.split("@"); if (authInHost) { result.auth = authInHost.shift() || null; result.host = result.hostname = authInHost.shift() || null; } } result.search = relative.search; result.query = relative.query; // To support http.request if (result.pathname !== null || result.search !== null) { result.path = (result.pathname ? result.pathname : "") + (result.search ? result.search : ""); } result.href = result.format(); return result; }
if (!srcPath.length) { // No path at all. All other things were already handled above. result.pathname = null; // To support http.request if (result.search) { result.path = "/" + result.search; } else { result.path = null; } result.href = result.format(); return result; }
// If a url ENDs in . or .., then it must get a trailing slash. // however, if it ends in anything else non-slashy, // then it must NOT get a trailing slash. let last = srcPath.slice(-1)[0]; const hasTrailingSlash = ((result.host || relative.host || srcPath.length > 1) && (last === "." || last === "..")) || last === "";
// Strip single dots, resolve double dots to parent dir // if the path tries to go above the root, `up` ends up > 0 let up = 0; for (let i = srcPath.length - 1; i >= 0; i--) { last = srcPath[i]; if (last === ".") { srcPath.splice(i, 1); } else if (last === "..") { srcPath.splice(i, 1); up++; } else if (up) { srcPath.splice(i, 1); up--; } }
// If the path is allowed to go above the root, restore leading ..s if (!mustEndAbs && !removeAllDots) { while (up--) { srcPath.unshift(".."); } }
if ( mustEndAbs && srcPath[0] !== "" && (!srcPath[0] || srcPath[0].charAt(0) !== "/") ) { srcPath.unshift(""); }
if (hasTrailingSlash && srcPath.join("/").substr(-1) !== "/") { srcPath.push(""); }
const isAbsolute = srcPath[0] === "" || (srcPath[0] && srcPath[0].charAt(0) === "/");
// put the host back if (noLeadingSlashes) { result.hostname = result.host = isAbsolute ? "" : srcPath.length ? srcPath.shift() || null : ""; // Occasionally the auth can get stuck only in host. // This especially happens in cases like // url.resolveObject('mailto:local1@domain1', 'local2@domain2') const authInHost = result.host && result.host.indexOf("@") > 0 ? result.host.split("@") : false; if (authInHost) { result.auth = authInHost.shift() || null; result.host = result.hostname = authInHost.shift() || null; } }
mustEndAbs = mustEndAbs || (result.host && srcPath.length);
if (mustEndAbs && !isAbsolute) { srcPath.unshift(""); }
if (!srcPath.length) { result.pathname = null; result.path = null; } else { result.pathname = srcPath.join("/"); }
// To support request.http if (result.pathname !== null || result.search !== null) { result.path = (result.pathname ? result.pathname : "") + (result.search ? result.search : ""); } result.auth = relative.auth || result.auth; result.slashes = result.slashes || relative.slashes; result.href = result.format(); return result; }
format() { let auth = this.auth || ""; if (auth) { auth = encodeStr(auth, noEscapeAuth, hexTable); auth += "@"; }
let protocol = this.protocol || ""; let pathname = this.pathname || ""; let hash = this.hash || ""; let host = ""; let query = "";
if (this.host) { host = auth + this.host; } else if (this.hostname) { host = auth + (this.hostname.includes(":") && !isIpv6Hostname(this.hostname) ? "[" + this.hostname + "]" : this.hostname); if (this.port) { host += ":" + this.port; } }
if (this.query !== null && typeof this.query === "object") { query = querystring.stringify(this.query); }
let search = this.search || (query && "?" + query) || "";
if (protocol && protocol.charCodeAt(protocol.length - 1) !== 58 /* : */) { protocol += ":"; }
let newPathname = ""; let lastPos = 0; for (let i = 0; i < pathname.length; ++i) { switch (pathname.charCodeAt(i)) { case CHAR_HASH: if (i - lastPos > 0) { newPathname += pathname.slice(lastPos, i); } newPathname += "%23"; lastPos = i + 1; break; case CHAR_QUESTION_MARK: if (i - lastPos > 0) { newPathname += pathname.slice(lastPos, i); } newPathname += "%3F"; lastPos = i + 1; break; } } if (lastPos > 0) { if (lastPos !== pathname.length) { pathname = newPathname + pathname.slice(lastPos); } else pathname = newPathname; }
// Only the slashedProtocols get the //. Not mailto:, xmpp:, etc. // unless they had them to begin with. if (this.slashes || slashedProtocol.has(protocol)) { if (this.slashes || host) { if (pathname && pathname.charCodeAt(0) !== CHAR_FORWARD_SLASH) { pathname = "/" + pathname; } host = "//" + host; } else if ( protocol.length >= 4 && protocol.charCodeAt(0) === 102 /* f */ && protocol.charCodeAt(1) === 105 /* i */ && protocol.charCodeAt(2) === 108 /* l */ && protocol.charCodeAt(3) === 101 /* e */ ) { host = "//"; } }
search = search.replace(/#/g, "%23");
if (hash && hash.charCodeAt(0) !== CHAR_HASH) { hash = "#" + hash; } if (search && search.charCodeAt(0) !== CHAR_QUESTION_MARK) { search = "?" + search; }
return protocol + host + pathname + search + hash; }
public urlParse( url: string, parseQueryString: boolean, slashesDenoteHost: boolean, ) { validateString(url, "url");
// Copy chrome, IE, opera backslash-handling behavior. // Back slashes before the query string get converted to forward slashes // See: https://code.google.com/p/chromium/issues/detail?id=25916 let hasHash = false; let start = -1; let end = -1; let rest = ""; let lastPos = 0; for (let i = 0, inWs = false, split = false; i < url.length; ++i) { const code = url.charCodeAt(i);
// Find first and last non-whitespace characters for trimming const isWs = code === CHAR_SPACE || code === CHAR_TAB || code === CHAR_CARRIAGE_RETURN || code === CHAR_LINE_FEED || code === CHAR_FORM_FEED || code === CHAR_NO_BREAK_SPACE || code === CHAR_ZERO_WIDTH_NOBREAK_SPACE; if (start === -1) { if (isWs) continue; lastPos = start = i; } else if (inWs) { if (!isWs) { end = -1; inWs = false; } } else if (isWs) { end = i; inWs = true; }
// Only convert backslashes while we haven't seen a split character if (!split) { switch (code) { case CHAR_HASH: hasHash = true; // Fall through case CHAR_QUESTION_MARK: split = true; break; case CHAR_BACKWARD_SLASH: if (i - lastPos > 0) rest += url.slice(lastPos, i); rest += "/"; lastPos = i + 1; break; } } else if (!hasHash && code === CHAR_HASH) { hasHash = true; } }
// Check if string was non-empty (including strings with only whitespace) if (start !== -1) { if (lastPos === start) { // We didn't convert any backslashes
if (end === -1) { if (start === 0) rest = url; else rest = url.slice(start); } else { rest = url.slice(start, end); } } else if (end === -1 && lastPos < url.length) { // We converted some backslashes and have only part of the entire string rest += url.slice(lastPos); } else if (end !== -1 && lastPos < end) { // We converted some backslashes and have only part of the entire string rest += url.slice(lastPos, end); } }
if (!slashesDenoteHost && !hasHash) { // Try fast path regexp const simplePath = simplePathPattern.exec(rest); if (simplePath) { this.path = rest; this.href = rest; this.pathname = simplePath[1]; if (simplePath[2]) { this.search = simplePath[2]; if (parseQueryString) { this.query = querystring.parse(this.search.slice(1)); } else { this.query = this.search.slice(1); } } else if (parseQueryString) { this.search = null; this.query = Object.create(null); } return this; } }
let proto: RegExpExecArray | null | string = protocolPattern.exec(rest); let lowerProto = ""; if (proto) { proto = proto[0]; lowerProto = proto.toLowerCase(); this.protocol = lowerProto; rest = rest.slice(proto.length); }
// Figure out if it's got a host // user@server is *always* interpreted as a hostname, and url // resolution will treat //foo/bar as host=foo,path=bar because that's // how the browser resolves relative URLs. let slashes; if (slashesDenoteHost || proto || hostPattern.test(rest)) { slashes = rest.charCodeAt(0) === CHAR_FORWARD_SLASH && rest.charCodeAt(1) === CHAR_FORWARD_SLASH; if (slashes && !(proto && hostlessProtocol.has(lowerProto))) { rest = rest.slice(2); this.slashes = true; } }
if ( !hostlessProtocol.has(lowerProto) && (slashes || (proto && !slashedProtocol.has(proto))) ) { // there's a hostname. // the first instance of /, ?, ;, or # ends the host. // // If there is an @ in the hostname, then non-host chars *are* allowed // to the left of the last @ sign, unless some host-ending character // comes *before* the @-sign. // URLs are obnoxious. // // ex: // http://a@b@c/ => user:a@b host:c // http://a@b?@c => user:a host:b path:/?@c
let hostEnd = -1; let atSign = -1; let nonHost = -1; for (let i = 0; i < rest.length; ++i) { switch (rest.charCodeAt(i)) { case CHAR_TAB: case CHAR_LINE_FEED: case CHAR_CARRIAGE_RETURN: case CHAR_SPACE: case CHAR_DOUBLE_QUOTE: case CHAR_PERCENT: case CHAR_SINGLE_QUOTE: case CHAR_SEMICOLON: case CHAR_LEFT_ANGLE_BRACKET: case CHAR_RIGHT_ANGLE_BRACKET: case CHAR_BACKWARD_SLASH: case CHAR_CIRCUMFLEX_ACCENT: case CHAR_GRAVE_ACCENT: case CHAR_LEFT_CURLY_BRACKET: case CHAR_VERTICAL_LINE: case CHAR_RIGHT_CURLY_BRACKET: // Characters that are never ever allowed in a hostname from RFC 2396 if (nonHost === -1) nonHost = i; break; case CHAR_HASH: case CHAR_FORWARD_SLASH: case CHAR_QUESTION_MARK: // Find the first instance of any host-ending characters if (nonHost === -1) nonHost = i; hostEnd = i; break; case CHAR_AT: // At this point, either we have an explicit point where the // auth portion cannot go past, or the last @ char is the decider. atSign = i; nonHost = -1; break; } if (hostEnd !== -1) break; } start = 0; if (atSign !== -1) { this.auth = decodeURIComponent(rest.slice(0, atSign)); start = atSign + 1; } if (nonHost === -1) { this.host = rest.slice(start); rest = ""; } else { this.host = rest.slice(start, nonHost); rest = rest.slice(nonHost); }
// pull out port. this.#parseHost();
// We've indicated that there is a hostname, // so even if it's empty, it has to be present. if (typeof this.hostname !== "string") this.hostname = "";
const hostname = this.hostname;
// If hostname begins with [ and ends with ] // assume that it's an IPv6 address. const ipv6Hostname = isIpv6Hostname(hostname);
// validate a little. if (!ipv6Hostname) { rest = getHostname(this, rest, hostname); }
if (this.hostname.length > hostnameMaxLen) { this.hostname = ""; } else { // Hostnames are always lower case. this.hostname = this.hostname.toLowerCase(); }
if (this.hostname !== "") { if (ipv6Hostname) { if (forbiddenHostCharsIpv6.test(this.hostname)) { throw new ERR_INVALID_URL(url); } } else { // IDNA Support: Returns a punycoded representation of "domain". // It only converts parts of the domain name that // have non-ASCII characters, i.e. it doesn't matter if // you call it with a domain that already is ASCII-only.
// Use lenient mode (`true`) to try to support even non-compliant // URLs. this.hostname = toASCII(this.hostname);
// Prevent two potential routes of hostname spoofing. // 1. If this.hostname is empty, it must have become empty due to toASCII // since we checked this.hostname above. // 2. If any of forbiddenHostChars appears in this.hostname, it must have // also gotten in due to toASCII. This is since getHostname would have // filtered them out otherwise. // Rather than trying to correct this by moving the non-host part into // the pathname as we've done in getHostname, throw an exception to // convey the severity of this issue. if (this.hostname === "" || forbiddenHostChars.test(this.hostname)) { throw new ERR_INVALID_URL(url); } } }
const p = this.port ? ":" + this.port : ""; const h = this.hostname || ""; this.host = h + p;
// strip [ and ] from the hostname // the host field still retains them, though if (ipv6Hostname) { this.hostname = this.hostname.slice(1, -1); if (rest[0] !== "/") { rest = "/" + rest; } } }
// Now rest is set to the post-host stuff. // Chop off any delim chars. if (!unsafeProtocol.has(lowerProto)) { // First, make 100% sure that any "autoEscape" chars get // escaped, even if encodeURIComponent doesn't think they // need to be. rest = autoEscapeStr(rest); }
let questionIdx = -1; let hashIdx = -1; for (let i = 0; i < rest.length; ++i) { const code = rest.charCodeAt(i); if (code === CHAR_HASH) { this.hash = rest.slice(i); hashIdx = i; break; } else if (code === CHAR_QUESTION_MARK && questionIdx === -1) { questionIdx = i; } }
if (questionIdx !== -1) { if (hashIdx === -1) { this.search = rest.slice(questionIdx); this.query = rest.slice(questionIdx + 1); } else { this.search = rest.slice(questionIdx, hashIdx); this.query = rest.slice(questionIdx + 1, hashIdx); } if (parseQueryString) { this.query = querystring.parse(this.query); } } else if (parseQueryString) { // No query string, but parseQueryString still requested this.search = null; this.query = Object.create(null); }
const useQuestionIdx = questionIdx !== -1 && (hashIdx === -1 || questionIdx < hashIdx); const firstIdx = useQuestionIdx ? questionIdx : hashIdx; if (firstIdx === -1) { if (rest.length > 0) this.pathname = rest; } else if (firstIdx > 0) { this.pathname = rest.slice(0, firstIdx); } if (slashedProtocol.has(lowerProto) && this.hostname && !this.pathname) { this.pathname = "/"; }
// To support http.request if (this.pathname || this.search) { const p = this.pathname || ""; const s = this.search || ""; this.path = p + s; }
// Finally, reconstruct the href based on what has been validated. this.href = this.format(); return this; }}
interface UrlObject { auth?: string | null | undefined; hash?: string | null | undefined; host?: string | null | undefined; hostname?: string | null | undefined; href?: string | null | undefined; pathname?: string | null | undefined; protocol?: string | null | undefined; search?: string | null | undefined; slashes?: boolean | null | undefined; port?: string | number | null | undefined; query?: string | null | ParsedUrlQueryInput | undefined;}
export function format( urlObject: string | URL | Url | UrlObject, options?: { auth: boolean; fragment: boolean; search: boolean; unicode: boolean; },): string { if (typeof urlObject === "string") { urlObject = parse(urlObject, true, false); } else if (typeof urlObject !== "object" || urlObject === null) { throw new ERR_INVALID_ARG_TYPE( "urlObject", ["Object", "string"], urlObject, ); } else if (!(urlObject instanceof Url)) { if (urlObject instanceof URL) { return formatWhatwg(urlObject, options); } return Url.prototype.format.call(urlObject); }
return (urlObject as Url).format();}
/** * The URL object has both a `toString()` method and `href` property that return string serializations of the URL. * These are not, however, customizable in any way. * This method allows for basic customization of the output. * @see Tested in `parallel/test-url-format-whatwg.js`. * @param urlObject * @param options * @param options.auth `true` if the serialized URL string should include the username and password, `false` otherwise. **Default**: `true`. * @param options.fragment `true` if the serialized URL string should include the fragment, `false` otherwise. **Default**: `true`. * @param options.search `true` if the serialized URL string should include the search query, **Default**: `true`. * @param options.unicode `true` if Unicode characters appearing in the host component of the URL string should be encoded directly as opposed to being Punycode encoded. **Default**: `false`. * @returns a customizable serialization of a URL `String` representation of a `WHATWG URL` object. */function formatWhatwg( urlObject: string | URL, options?: { auth: boolean; fragment: boolean; search: boolean; unicode: boolean; },): string { if (typeof urlObject === "string") { urlObject = new URL(urlObject); } if (options) { if (typeof options !== "object") { throw new ERR_INVALID_ARG_TYPE("options", "object", options); } }
options = { auth: true, fragment: true, search: true, unicode: false, ...options, };
let ret = urlObject.protocol; if (urlObject.host !== null) { ret += "//"; const hasUsername = !!urlObject.username; const hasPassword = !!urlObject.password; if (options.auth && (hasUsername || hasPassword)) { if (hasUsername) { ret += urlObject.username; } if (hasPassword) { ret += `:${urlObject.password}`; } ret += "@"; } // TODO(wafuwfu13): Support unicode option // ret += options.unicode ? // domainToUnicode(urlObject.host) : urlObject.host; ret += urlObject.host; if (urlObject.port) { ret += `:${urlObject.port}`; } }
ret += urlObject.pathname;
if (options.search && urlObject.search) { ret += urlObject.search; } if (options.fragment && urlObject.hash) { ret += urlObject.hash; }
return ret;}
function isIpv6Hostname(hostname: string) { return ( hostname.charCodeAt(0) === CHAR_LEFT_SQUARE_BRACKET && hostname.charCodeAt(hostname.length - 1) === CHAR_RIGHT_SQUARE_BRACKET );}
function getHostname(self: Url, rest: string, hostname: string) { for (let i = 0; i < hostname.length; ++i) { const code = hostname.charCodeAt(i); const isValid = (code >= CHAR_LOWERCASE_A && code <= CHAR_LOWERCASE_Z) || code === CHAR_DOT || (code >= CHAR_UPPERCASE_A && code <= CHAR_UPPERCASE_Z) || (code >= CHAR_0 && code <= CHAR_9) || code === CHAR_HYPHEN_MINUS || code === CHAR_PLUS || code === CHAR_UNDERSCORE || code > 127;
// Invalid host character if (!isValid) { self.hostname = hostname.slice(0, i); return `/${hostname.slice(i)}${rest}`; } } return rest;}
// Escaped characters. Use empty strings to fill up unused entries.// Using Array is faster than Object/Map// deno-fmt-ignoreconst escapedCodes = [ /* 0 - 9 */ "", "", "", "", "", "", "", "", "", "%09", /* 10 - 19 */ "%0A", "", "", "%0D", "", "", "", "", "", "", /* 20 - 29 */ "", "", "", "", "", "", "", "", "", "", /* 30 - 39 */ "", "", "%20", "", "%22", "", "", "", "", "%27", /* 40 - 49 */ "", "", "", "", "", "", "", "", "", "", /* 50 - 59 */ "", "", "", "", "", "", "", "", "", "", /* 60 - 69 */ "%3C", "", "%3E", "", "", "", "", "", "", "", /* 70 - 79 */ "", "", "", "", "", "", "", "", "", "", /* 80 - 89 */ "", "", "", "", "", "", "", "", "", "", /* 90 - 99 */ "", "", "%5C", "", "%5E", "", "%60", "", "", "", /* 100 - 109 */ "", "", "", "", "", "", "", "", "", "", /* 110 - 119 */ "", "", "", "", "", "", "", "", "", "", /* 120 - 125 */ "", "", "", "%7B", "%7C", "%7D"];
// Automatically escape all delimiters and unwise characters from RFC 2396.// Also escape single quotes in case of an XSS attack.// Return the escaped string.function autoEscapeStr(rest: string) { let escaped = ""; let lastEscapedPos = 0; for (let i = 0; i < rest.length; ++i) { // `escaped` contains substring up to the last escaped character. const escapedChar = escapedCodes[rest.charCodeAt(i)]; if (escapedChar) { // Concat if there are ordinary characters in the middle. if (i > lastEscapedPos) { escaped += rest.slice(lastEscapedPos, i); } escaped += escapedChar; lastEscapedPos = i + 1; } } if (lastEscapedPos === 0) { // Nothing has been escaped. return rest; }
// There are ordinary characters at the end. if (lastEscapedPos < rest.length) { escaped += rest.slice(lastEscapedPos); }
return escaped;}
/** * The url.urlParse() method takes a URL string, parses it, and returns a URL object. * * @see Tested in `parallel/test-url-parse-format.js`. * @param url The URL string to parse. * @param parseQueryString If `true`, the query property will always be set to an object returned by the querystring module's parse() method. If false, * the query property on the returned URL object will be an unparsed, undecoded string. Default: false. * @param slashesDenoteHost If `true`, the first token after the literal string // and preceding the next / will be interpreted as the host */export function parse( url: string | Url, parseQueryString: boolean, slashesDenoteHost: boolean,) { if (url instanceof Url) return url;
const urlObject = new Url(); urlObject.urlParse(url, parseQueryString, slashesDenoteHost); return urlObject;}
/** The url.resolve() method resolves a target URL relative to a base URL in a manner similar to that of a Web browser resolving an anchor tag HREF. * @see https://nodejs.org/api/url.html#urlresolvefrom-to * @legacy */export function resolve(from: string, to: string) { return parse(from, false, true).resolve(to);}
export function resolveObject(source: string | Url, relative: string) { if (!source) return relative; return parse(source, false, true).resolveObject(relative);}
/** * This function ensures the correct decodings of percent-encoded characters as well as ensuring a cross-platform valid absolute path string. * @see Tested in `parallel/test-fileurltopath.js`. * @param path The file URL string or URL object to convert to a path. * @returns The fully-resolved platform-specific Node.js file path. */export function fileURLToPath(path: string | URL): string { if (typeof path === "string") path = new URL(path); else if (!(path instanceof URL)) { throw new ERR_INVALID_ARG_TYPE("path", ["string", "URL"], path); } if (path.protocol !== "file:") { throw new ERR_INVALID_URL_SCHEME("file"); } return isWindows ? getPathFromURLWin(path) : getPathFromURLPosix(path);}
function getPathFromURLWin(url: URL): string { const hostname = url.hostname; let pathname = url.pathname; for (let n = 0; n < pathname.length; n++) { if (pathname[n] === "%") { const third = pathname.codePointAt(n + 2)! | 0x20; if ( (pathname[n + 1] === "2" && third === 102) || // 2f 2F / (pathname[n + 1] === "5" && third === 99) // 5c 5C \ ) { throw new ERR_INVALID_FILE_URL_PATH( "must not include encoded \\ or / characters", ); } } }
pathname = pathname.replace(forwardSlashRegEx, "\\"); pathname = decodeURIComponent(pathname); if (hostname !== "") { // TODO(bartlomieju): add support for punycode encodings return `\\\\${hostname}${pathname}`; } else { // Otherwise, it's a local path that requires a drive letter const letter = pathname.codePointAt(1)! | 0x20; const sep = pathname[2]; if ( letter < CHAR_LOWERCASE_A || letter > CHAR_LOWERCASE_Z || // a..z A..Z sep !== ":" ) { throw new ERR_INVALID_FILE_URL_PATH("must be absolute"); } return pathname.slice(1); }}
function getPathFromURLPosix(url: URL): string { if (url.hostname !== "") { throw new ERR_INVALID_FILE_URL_HOST(osType); } const pathname = url.pathname; for (let n = 0; n < pathname.length; n++) { if (pathname[n] === "%") { const third = pathname.codePointAt(n + 2)! | 0x20; if (pathname[n + 1] === "2" && third === 102) { throw new ERR_INVALID_FILE_URL_PATH( "must not include encoded / characters", ); } } } return decodeURIComponent(pathname);}
/** * The following characters are percent-encoded when converting from file path * to URL: * - %: The percent character is the only character not encoded by the * `pathname` setter. * - \: Backslash is encoded on non-windows platforms since it's a valid * character but the `pathname` setters replaces it by a forward slash. * - LF: The newline character is stripped out by the `pathname` setter. * (See whatwg/url#419) * - CR: The carriage return character is also stripped out by the `pathname` * setter. * - TAB: The tab character is also stripped out by the `pathname` setter. */function encodePathChars(filepath: string): string { if (filepath.includes("%")) { filepath = filepath.replace(percentRegEx, "%25"); } // In posix, backslash is a valid character in paths: if (!isWindows && filepath.includes("\\")) { filepath = filepath.replace(backslashRegEx, "%5C"); } if (filepath.includes("\n")) { filepath = filepath.replace(newlineRegEx, "%0A"); } if (filepath.includes("\r")) { filepath = filepath.replace(carriageReturnRegEx, "%0D"); } if (filepath.includes("\t")) { filepath = filepath.replace(tabRegEx, "%09"); } return filepath;}
/** * This function ensures that `filepath` is resolved absolutely, and that the URL control characters are correctly encoded when converting into a File URL. * @see Tested in `parallel/test-url-pathtofileurl.js`. * @param filepath The file path string to convert to a file URL. * @returns The file URL object. */export function pathToFileURL(filepath: string): URL { const outURL = new URL("file://"); if (isWindows && filepath.startsWith("\\\\")) { // UNC path format: \\server\share\resource const paths = filepath.split("\\"); if (paths.length <= 3) { throw new ERR_INVALID_ARG_VALUE( "filepath", filepath, "Missing UNC resource path", ); } const hostname = paths[2]; if (hostname.length === 0) { throw new ERR_INVALID_ARG_VALUE( "filepath", filepath, "Empty UNC servername", ); }
// TODO(wafuwafu13): To be `outURL.hostname = domainToASCII(hostname)` once `domainToASCII` are implemented outURL.hostname = hostname; outURL.pathname = encodePathChars(paths.slice(3).join("/")); } else { let resolved = path.resolve(filepath); // path.resolve strips trailing slashes so we must add them back const filePathLast = filepath.charCodeAt(filepath.length - 1); if ( (filePathLast === CHAR_FORWARD_SLASH || (isWindows && filePathLast === CHAR_BACKWARD_SLASH)) && resolved[resolved.length - 1] !== path.sep ) { resolved += "/"; }
outURL.pathname = encodePathChars(resolved); } return outURL;}
interface HttpOptions { protocol: string; hostname: string; hash: string; search: string; pathname: string; path: string; href: string; port?: number; auth?: string;}
/** * This utility function converts a URL object into an ordinary options object as expected by the `http.request()` and `https.request()` APIs. * @see Tested in `parallel/test-url-urltooptions.js`. * @param url The `WHATWG URL` object to convert to an options object. * @returns HttpOptions * @returns HttpOptions.protocol Protocol to use. * @returns HttpOptions.hostname A domain name or IP address of the server to issue the request to. * @returns HttpOptions.hash The fragment portion of the URL. * @returns HttpOptions.search The serialized query portion of the URL. * @returns HttpOptions.pathname The path portion of the URL. * @returns HttpOptions.path Request path. Should include query string if any. E.G. `'/index.html?page=12'`. An exception is thrown when the request path contains illegal characters. Currently, only spaces are rejected but that may change in the future. * @returns HttpOptions.href The serialized URL. * @returns HttpOptions.port Port of remote server. * @returns HttpOptions.auth Basic authentication i.e. `'user:password'` to compute an Authorization header. */export function urlToHttpOptions(url: URL): HttpOptions { const options: HttpOptions = { protocol: url.protocol, hostname: typeof url.hostname === "string" && url.hostname.startsWith("[") ? url.hostname.slice(1, -1) : url.hostname, hash: url.hash, search: url.search, pathname: url.pathname, path: `${url.pathname || ""}${url.search || ""}`, href: url.href, }; if (url.port !== "") { options.port = Number(url.port); } if (url.username || url.password) { options.auth = `${decodeURIComponent(url.username)}:${ decodeURIComponent( url.password, ) }`; } return options;}
const URLSearchParams_ = URLSearchParams;export { URLSearchParams_ as URLSearchParams };
export default { parse, format, resolve, resolveObject, fileURLToPath, pathToFileURL, urlToHttpOptions, Url, URL, URLSearchParams,};