Skip to main content
Module

x/css/core/lexer/lexer.ts

CSS Lexer & Parser implementation for Deno
Latest
File
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690
import dbg from "../../debug/debug.js";import type { Token } from "../../ast/mod.ts";
const debug = dbg("lex");
/** * Convert a CSS string into an array of lexical tokens. * * @param {String} css CSS * @returns {Array} lexical tokens */export function lex(css: string): Token[] { let start = 0; // Debug timer start.
let buffer = ""; // Character accumulator let ch: string; // Current character let column = 0; // Current source column number let cursor = -1; // Current source cursor position let depth = 0; // Current nesting depth let line = 1; // Current source line number let state = "before-selector"; // Current state const stack = [state]; // State stack let token: Token = {}; // Current token const tokens: Token[] = []; // Token accumulator
// Supported @-rules, in roughly descending order of usage probability. const atRules: any = [ "media", "keyframes", { name: "-webkit-keyframes", type: "keyframes", prefix: "-webkit-" }, { name: "-moz-keyframes", type: "keyframes", prefix: "-moz-" }, { name: "-ms-keyframes", type: "keyframes", prefix: "-ms-" }, { name: "-o-keyframes", type: "keyframes", prefix: "-o-" }, "font-face", { name: "import", state: "before-at-value" }, { name: "charset", state: "before-at-value" }, "supports", "viewport", { name: "namespace", state: "before-at-value" }, "document", { name: "-moz-document", type: "document", prefix: "-moz-" }, "page", ];
// -- Functions ------------------------------------------------------------
/** * Advance the character cursor and return the next character. * * @returns {String} The next character. */ function getCh(): string { skip(); return css[cursor]; }
/** * Return the state at the given index in the stack. * The stack is LIFO so indexing is from the right. * * @param {Number} [index=0] Index to return. * @returns {String} state */ function getState(index?: number): string { return index ? stack[stack.length - 1 - index] : state; }
/** * Look ahead for a string beginning from the next position. The string * being looked for must start at the next position. * * @param {String} str The string to look for. * @returns {Boolean} Whether the string was found. */ function isNextString(str: string): boolean { let start = cursor + 1; return str === css.slice(start, start + str.length); }
/** * Find the start position of a substring beginning from the next * position. The string being looked for may begin anywhere. * * @param {String} str The substring to look for. * @returns {Number|false} The position, or `false` if not found. */ function find(str: string): number | boolean { let pos = css.slice(cursor).indexOf(str);
return pos > 0 ? pos : false; }
/** * Determine whether a character is next. * * @param {String} ch Character. * @returns {Boolean} Whether the character is next. */ function isNextChar(ch: string): boolean { return ch === peek(1); }
/** * Return the character at the given cursor offset. The offset is relative * to the cursor, so negative values move backwards. * * @param {Number} [offset=1] Cursor offset. * @returns {String} Character. */ function peek(offset: number): string { return css[cursor + (offset || 1)]; }
/** * Remove the current state from the stack and set the new current state. * * @returns {String} The removed state. */ function popState(): string | undefined { let removed = stack.pop(); state = stack[stack.length - 1];
return removed; }
/** * Set the current state and add it to the stack. * * @param {String} newState The new state. * @returns {Number} The new stack length. */ function pushState(newState: string): number { state = newState; stack.push(state);
return stack.length; }
/** * Replace the current state with a new state. * * @param {String} newState The new state. * @returns {String} The replaced state. */ function replaceState(newState: string): string { let previousState = state; stack[stack.length - 1] = state = newState;
return previousState; }
/** * Move the character cursor. Positive numbers move the cursor forward. * Negative numbers are not supported! * * @param {Number} [n=1] Number of characters to skip. */ function skip(n?: number) { if ((n || 1) == 1) { if (css[cursor] == "\n") { line++; column = 1; } else { column++; } cursor++; } else { let skipStr = css.slice(cursor, cursor + (n || 0)).split("\n"); if (skipStr.length > 1) { line += skipStr.length - 1; column = 1; } column += skipStr[skipStr.length - 1].length; cursor = cursor + (n || 0); } }
/** * Add the current token to the pile and reset the buffer. */ function addToken() { token.end = { line: line, col: column, };
debug("addToken:", JSON.stringify(token, null, 2));
tokens.push(token);
buffer = ""; token = {}; }
/** * Set the current token. * * @param {String} type Token type. */ function initializeToken(type: string) { token = { type: type, start: { line: line, col: column, }, }; }
// -- Main Loop ------------------------------------------------------------
/* The main loop is a state machine that reads in one character at a time, and determines what to do based on the current state and character. This is implemented as a series of nested `switch` statements and the case orders have been mildly optimized based on rough probabilities calculated by processing a small sample of real-world CSS. Further optimization (such as a dispatch table) shouldn't be necessary since the total number of cases is very low. */
start = Date.now();
while ((ch = getCh())) { debug(ch, getState());
// column += 1;
switch (ch) { // Space case " ": switch (getState()) { case "selector": case "value": case "value-paren": case "at-group": case "at-value": case "comment": case "double-string": case "single-string": buffer += ch; break; } break;
// Newline or tab
case "\n": case "\t": case "\r": case "\f": switch (getState()) { case "value": case "value-paren": case "at-group": case "comment": case "single-string": case "double-string": case "selector": buffer += ch; break;
case "at-value": // Tokenize an @-rule if a semi-colon was omitted. if ("\n" === ch) { token.value = buffer.trim(); addToken(); popState(); } break; }
// if ('\n' === ch) { // column = 0; // line += 1; // } break;
case ":": switch (getState()) { case "name": token.name = buffer.trim(); buffer = "";
replaceState("before-value"); break;
case "before-selector": buffer += ch;
initializeToken("selector"); pushState("selector"); break;
case "before-value": replaceState("value"); buffer += ch; break;
default: buffer += ch; break; } break;
case ";": switch (getState()) { case "name": case "before-value": case "value": // Tokenize a declaration // if value is empty skip the declaration if (buffer.trim().length > 0) { (token.value = buffer.trim()), addToken(); } replaceState("before-name"); break;
case "value-paren": // Insignificant semi-colon buffer += ch; break;
case "at-value": // Tokenize an @-rule token.value = buffer.trim(); addToken(); popState(); break;
case "before-name": // Extraneous semi-colon break;
default: buffer += ch; break; } break;
case "{": switch (getState()) { case "selector": // If the sequence is `\{` then assume that the brace should be escaped. if (peek(-1) === "\\") { buffer += ch; break; }
// Tokenize a selector token.text = buffer.trim(); addToken(); replaceState("before-name"); depth = depth + 1; break;
case "at-group": // Tokenize an @-group token.name = buffer.trim(); switch (token.type) { case "font-face": case "viewport": case "page": pushState("before-name"); break;
default: pushState("before-selector"); }
addToken(); depth = depth + 1; break;
case "name": case "at-rule": // Tokenize a declaration or an @-rule token.name = buffer.trim(); addToken(); pushState("before-name"); depth = depth + 1; break;
case "comment": case "double-string": case "single-string": // Ignore braces in comments and strings buffer += ch; break; case "before-value": replaceState("value"); buffer += ch; break; }
break;
case "}": switch (getState()) { case "before-name": case "name": case "before-value": case "value": // If the buffer contains anything, it is a value if (buffer) { token.value = buffer.trim(); }
// If the current token has a name and a value it should be tokenized. if (token.name && token.value) { addToken(); }
// Leave the block initializeToken("end"); addToken(); popState();
// We might need to leave again. // XXX: What about 3 levels deep? if ("at-group" === getState()) { initializeToken("at-group-end"); addToken(); popState(); }
if (depth > 0) { depth = depth - 1; }
break;
case "at-group": case "before-selector": case "selector": // If the sequence is `\}` then assume that the brace should be escaped. if (peek(-1) === "\\") { buffer += ch; break; }
if (depth > 0) { // Leave block if in an at-group if ("at-group" === getState(1)) { initializeToken("at-group-end"); addToken(); } }
if (depth > 1) { popState(); }
if (depth > 0) { depth = depth - 1; } break;
case "double-string": case "single-string": case "comment": // Ignore braces in comments and strings. buffer += ch; break; }
break;
// Strings
case '"': case "'": switch (getState()) { case "double-string": if ('"' === ch && "\\" !== peek(-1)) { popState(); } break;
case "single-string": if ("'" === ch && "\\" !== peek(-1)) { popState(); } break;
case "before-at-value": replaceState("at-value"); pushState('"' === ch ? "double-string" : "single-string"); break;
case "before-value": replaceState("value"); pushState('"' === ch ? "double-string" : "single-string"); break;
case "comment": // Ignore strings within comments. break;
default: if ("\\" !== peek(-1)) { pushState('"' === ch ? "double-string" : "single-string"); } }
buffer += ch; break;
// Comments
case "/": switch (getState()) { case "comment": case "double-string": case "single-string": // Ignore buffer += ch; break;
case "before-value": case "selector": case "name": case "value": if (isNextChar("*")) { // Ignore comments in selectors, properties and values. They are // difficult to represent in the AST. let pos = find("*/");
if (pos && typeof pos !== "boolean") { skip(pos + 1); } } else { if (getState() == "before-value") replaceState("value"); buffer += ch; } break;
default: if (isNextChar("*")) { // Create a comment token initializeToken("comment"); pushState("comment"); skip(); } else { buffer += ch; } break; } break;
// Comment end or universal selector
case "*": switch (getState()) { case "comment": if (isNextChar("/")) { // Tokenize a comment token.text = buffer; // Don't trim()! skip(); addToken(); popState(); } else { buffer += ch; } break;
case "before-selector": buffer += ch; initializeToken("selector"); pushState("selector"); break;
case "before-value": replaceState("value"); buffer += ch; break;
default: buffer += ch; } break;
// @-rules
case "@": switch (getState()) { case "comment": case "double-string": case "single-string": buffer += ch; break; case "before-value": replaceState("value"); buffer += ch; break;
default: // Iterate over the supported @-rules and attempt to tokenize one. let tokenized = false; let name; let rule;
for (let j = 0, len = atRules.length; !tokenized && j < len; ++j) { rule = atRules[j]; name = rule.name || rule;
if (!isNextString(name)) continue;
tokenized = true;
initializeToken(name); pushState(rule.state || "at-group"); skip(name.length);
if (rule.prefix) { token.prefix = rule.prefix; }
if (rule.type) { token.type = rule.type; } }
if (!tokenized) { buffer += ch; } break; } break;
// Parentheses are tracked to disambiguate semi-colons, such as within a // data URI.
case "(": switch (getState()) { case "value": pushState("value-paren"); break; case "before-value": replaceState("value"); break; }
buffer += ch; break;
case ")": switch (getState()) { case "value-paren": popState(); break; case "before-value": replaceState("value"); break; }
buffer += ch; break;
default: switch (getState()) { case "before-selector": initializeToken("selector"); pushState("selector"); break;
case "before-name": initializeToken("property"); replaceState("name"); break;
case "before-value": replaceState("value"); break;
case "before-at-value": replaceState("at-value"); break; }
buffer += ch; break; } }
debug("ran in", Date.now() - start + "ms");
return tokens;}