/core/lexer/lexer.ts | css@0.3.0

CSS Lexer & Parser implementation for Deno
Latest
File
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690
import dbg from "../../debug/debug.js";import type { Token } from "../../ast/mod.ts";
const debug = dbg("lex");
/** * Convert a CSS string into an array of lexical tokens. * * @param {String} css CSS * @returns {Array} lexical tokens */export function lex(css: string): Token[] {  let start = 0; // Debug timer start.
  let buffer = ""; // Character accumulator  let ch: string; // Current character  let column = 0; // Current source column number  let cursor = -1; // Current source cursor position  let depth = 0; // Current nesting depth  let line = 1; // Current source line number  let state = "before-selector"; // Current state  const stack = [state]; // State stack  let token: Token = {}; // Current token  const tokens: Token[] = []; // Token accumulator
  // Supported @-rules, in roughly descending order of usage probability.  const atRules: any = [    "media",    "keyframes",    { name: "-webkit-keyframes", type: "keyframes", prefix: "-webkit-" },    { name: "-moz-keyframes", type: "keyframes", prefix: "-moz-" },    { name: "-ms-keyframes", type: "keyframes", prefix: "-ms-" },    { name: "-o-keyframes", type: "keyframes", prefix: "-o-" },    "font-face",    { name: "import", state: "before-at-value" },    { name: "charset", state: "before-at-value" },    "supports",    "viewport",    { name: "namespace", state: "before-at-value" },    "document",    { name: "-moz-document", type: "document", prefix: "-moz-" },    "page",  ];
  // -- Functions ------------------------------------------------------------
  /**   * Advance the character cursor and return the next character.   *   * @returns {String} The next character.   */  function getCh(): string {    skip();    return css[cursor];  }
  /**   * Return the state at the given index in the stack.   * The stack is LIFO so indexing is from the right.   *   * @param {Number} [index=0] Index to return.   * @returns {String} state   */  function getState(index?: number): string {    return index ? stack[stack.length - 1 - index] : state;  }
  /**   * Look ahead for a string beginning from the next position. The string   * being looked for must start at the next position.   *   * @param {String} str The string to look for.   * @returns {Boolean} Whether the string was found.   */  function isNextString(str: string): boolean {    let start = cursor + 1;    return str === css.slice(start, start + str.length);  }
  /**   * Find the start position of a substring beginning from the next   * position. The string being looked for may begin anywhere.   *   * @param {String} str The substring to look for.   * @returns {Number|false} The position, or `false` if not found.   */  function find(str: string): number | boolean {    let pos = css.slice(cursor).indexOf(str);
    return pos > 0 ? pos : false;  }
  /**   * Determine whether a character is next.   *   * @param {String} ch Character.   * @returns {Boolean} Whether the character is next.   */  function isNextChar(ch: string): boolean {    return ch === peek(1);  }
  /**   * Return the character at the given cursor offset. The offset is relative   * to the cursor, so negative values move backwards.   *   * @param {Number} [offset=1] Cursor offset.   * @returns {String} Character.   */  function peek(offset: number): string {    return css[cursor + (offset || 1)];  }
  /**   * Remove the current state from the stack and set the new current state.   *   * @returns {String} The removed state.   */  function popState(): string | undefined {    let removed = stack.pop();    state = stack[stack.length - 1];
    return removed;  }
  /**   * Set the current state and add it to the stack.   *   * @param {String} newState The new state.   * @returns {Number} The new stack length.   */  function pushState(newState: string): number {    state = newState;    stack.push(state);
    return stack.length;  }
  /**   * Replace the current state with a new state.   *   * @param {String} newState The new state.   * @returns {String} The replaced state.   */  function replaceState(newState: string): string {    let previousState = state;    stack[stack.length - 1] = state = newState;
    return previousState;  }
  /**   * Move the character cursor. Positive numbers move the cursor forward.   * Negative numbers are not supported!   *   * @param {Number} [n=1] Number of characters to skip.   */  function skip(n?: number) {    if ((n || 1) == 1) {      if (css[cursor] == "\n") {        line++;        column = 1;      } else {        column++;      }      cursor++;    } else {      let skipStr = css.slice(cursor, cursor + (n || 0)).split("\n");      if (skipStr.length > 1) {        line += skipStr.length - 1;        column = 1;      }      column += skipStr[skipStr.length - 1].length;      cursor = cursor + (n || 0);    }  }
  /**   * Add the current token to the pile and reset the buffer.   */  function addToken() {    token.end = {      line: line,      col: column,    };
    debug("addToken:", JSON.stringify(token, null, 2));
    tokens.push(token);
    buffer = "";    token = {};  }
  /**   * Set the current token.   *   * @param {String} type Token type.   */  function initializeToken(type: string) {    token = {      type: type,      start: {        line: line,        col: column,      },    };  }
  // -- Main Loop ------------------------------------------------------------
  /*  The main loop is a state machine that reads in one character at a time,  and determines what to do based on the current state and character.  This is implemented as a series of nested `switch` statements and the  case orders have been mildly optimized based on rough probabilities  calculated by processing a small sample of real-world CSS.  Further optimization (such as a dispatch table) shouldn't be necessary  since the total number of cases is very low.  */
  start = Date.now();
  while ((ch = getCh())) {    debug(ch, getState());
    // column += 1;
    switch (ch) {      // Space      case " ":        switch (getState()) {          case "selector":          case "value":          case "value-paren":          case "at-group":          case "at-value":          case "comment":          case "double-string":          case "single-string":            buffer += ch;            break;        }        break;
      // Newline or tab
      case "\n":      case "\t":      case "\r":      case "\f":        switch (getState()) {          case "value":          case "value-paren":          case "at-group":          case "comment":          case "single-string":          case "double-string":          case "selector":            buffer += ch;            break;
          case "at-value":            // Tokenize an @-rule if a semi-colon was omitted.            if ("\n" === ch) {              token.value = buffer.trim();              addToken();              popState();            }            break;        }
        // if ('\n' === ch) {        //   column = 0;        //   line += 1;        // }        break;
      case ":":        switch (getState()) {          case "name":            token.name = buffer.trim();            buffer = "";
            replaceState("before-value");            break;
          case "before-selector":            buffer += ch;
            initializeToken("selector");            pushState("selector");            break;
          case "before-value":            replaceState("value");            buffer += ch;            break;
          default:            buffer += ch;            break;        }        break;
      case ";":        switch (getState()) {          case "name":          case "before-value":          case "value":            // Tokenize a declaration            // if value is empty skip the declaration            if (buffer.trim().length > 0) {              (token.value = buffer.trim()), addToken();            }            replaceState("before-name");            break;
          case "value-paren":            // Insignificant semi-colon            buffer += ch;            break;
          case "at-value":            // Tokenize an @-rule            token.value = buffer.trim();            addToken();            popState();            break;
          case "before-name":            // Extraneous semi-colon            break;
          default:            buffer += ch;            break;        }        break;
      case "{":        switch (getState()) {          case "selector":            // If the sequence is `\{` then assume that the brace should be escaped.            if (peek(-1) === "\\") {              buffer += ch;              break;            }
            // Tokenize a selector            token.text = buffer.trim();            addToken();            replaceState("before-name");            depth = depth + 1;            break;
          case "at-group":            // Tokenize an @-group            token.name = buffer.trim();            switch (token.type) {              case "font-face":              case "viewport":              case "page":                pushState("before-name");                break;
              default:                pushState("before-selector");            }
            addToken();            depth = depth + 1;            break;
          case "name":          case "at-rule":            // Tokenize a declaration or an @-rule            token.name = buffer.trim();            addToken();            pushState("before-name");            depth = depth + 1;            break;
          case "comment":          case "double-string":          case "single-string":            // Ignore braces in comments and strings            buffer += ch;            break;          case "before-value":            replaceState("value");            buffer += ch;            break;        }
        break;
      case "}":        switch (getState()) {          case "before-name":          case "name":          case "before-value":          case "value":            // If the buffer contains anything, it is a value            if (buffer) {              token.value = buffer.trim();            }
            // If the current token has a name and a value it should be tokenized.            if (token.name && token.value) {              addToken();            }
            // Leave the block            initializeToken("end");            addToken();            popState();
            // We might need to leave again.            // XXX: What about 3 levels deep?            if ("at-group" === getState()) {              initializeToken("at-group-end");              addToken();              popState();            }
            if (depth > 0) {              depth = depth - 1;            }
            break;
          case "at-group":          case "before-selector":          case "selector":            // If the sequence is `\}` then assume that the brace should be escaped.            if (peek(-1) === "\\") {              buffer += ch;              break;            }
            if (depth > 0) {              // Leave block if in an at-group              if ("at-group" === getState(1)) {                initializeToken("at-group-end");                addToken();              }            }
            if (depth > 1) {              popState();            }
            if (depth > 0) {              depth = depth - 1;            }            break;
          case "double-string":          case "single-string":          case "comment":            // Ignore braces in comments and strings.            buffer += ch;            break;        }
        break;
      // Strings
      case '"':      case "'":        switch (getState()) {          case "double-string":            if ('"' === ch && "\\" !== peek(-1)) {              popState();            }            break;
          case "single-string":            if ("'" === ch && "\\" !== peek(-1)) {              popState();            }            break;
          case "before-at-value":            replaceState("at-value");            pushState('"' === ch ? "double-string" : "single-string");            break;
          case "before-value":            replaceState("value");            pushState('"' === ch ? "double-string" : "single-string");            break;
          case "comment":            // Ignore strings within comments.            break;
          default:            if ("\\" !== peek(-1)) {              pushState('"' === ch ? "double-string" : "single-string");            }        }
        buffer += ch;        break;
      // Comments
      case "/":        switch (getState()) {          case "comment":          case "double-string":          case "single-string":            // Ignore            buffer += ch;            break;
          case "before-value":          case "selector":          case "name":          case "value":            if (isNextChar("*")) {              // Ignore comments in selectors, properties and values. They are              // difficult to represent in the AST.              let pos = find("*/");
              if (pos && typeof pos !== "boolean") {                skip(pos + 1);              }            } else {              if (getState() == "before-value") replaceState("value");              buffer += ch;            }            break;
          default:            if (isNextChar("*")) {              // Create a comment token              initializeToken("comment");              pushState("comment");              skip();            } else {              buffer += ch;            }            break;        }        break;
      // Comment end or universal selector
      case "*":        switch (getState()) {          case "comment":            if (isNextChar("/")) {              // Tokenize a comment              token.text = buffer; // Don't trim()!              skip();              addToken();              popState();            } else {              buffer += ch;            }            break;
          case "before-selector":            buffer += ch;            initializeToken("selector");            pushState("selector");            break;
          case "before-value":            replaceState("value");            buffer += ch;            break;
          default:            buffer += ch;        }        break;
      // @-rules
      case "@":        switch (getState()) {          case "comment":          case "double-string":          case "single-string":            buffer += ch;            break;          case "before-value":            replaceState("value");            buffer += ch;            break;
          default:            // Iterate over the supported @-rules and attempt to tokenize one.            let tokenized = false;            let name;            let rule;
            for (let j = 0, len = atRules.length; !tokenized && j < len; ++j) {              rule = atRules[j];              name = rule.name || rule;
              if (!isNextString(name)) continue;
              tokenized = true;
              initializeToken(name);              pushState(rule.state || "at-group");              skip(name.length);
              if (rule.prefix) {                token.prefix = rule.prefix;              }
              if (rule.type) {                token.type = rule.type;              }            }
            if (!tokenized) {              buffer += ch;            }            break;        }        break;
      // Parentheses are tracked to disambiguate semi-colons, such as within a      // data URI.
      case "(":        switch (getState()) {          case "value":            pushState("value-paren");            break;          case "before-value":            replaceState("value");            break;        }
        buffer += ch;        break;
      case ")":        switch (getState()) {          case "value-paren":            popState();            break;          case "before-value":            replaceState("value");            break;        }
        buffer += ch;        break;
      default:        switch (getState()) {          case "before-selector":            initializeToken("selector");            pushState("selector");            break;
          case "before-name":            initializeToken("property");            replaceState("name");            break;
          case "before-value":            replaceState("value");            break;
          case "before-at-value":            replaceState("at-value");            break;        }
        buffer += ch;        break;    }  }
  debug("ran in", Date.now() - start + "ms");
  return tokens;}