/src/block-lexer.ts | markdown@v2.0.0

Deno Markdown module forked from https://github.com/ts-stack/markdown/tree/bb47aa8e625e89e6aa84f49a98536a3089dee831
Latest
File
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
/** * @license * * Copyright (c) 2011-2014, Christopher Jeffrey. (MIT Licensed) * https://github.com/chjj/marked * * Copyright (c) 2018, Костя Третяк. (MIT Licensed) * https://github.com/ts-stack/markdown */
import { ExtendRegexp } from "./extend-regexp.ts";import {  Align,  LexerReturns,  Links,  MarkedOptions,  RulesBlockBase,  RulesBlockGfm,  RulesBlockTables,  Token,  TokenType,  Obj} from "./interfaces.ts";import { Marked } from "./marked.ts";import { load } from "https://deno.land/std/encoding/_yaml/loader/loader.ts";
export class BlockLexer<T extends typeof BlockLexer> {  static simpleRules: RegExp[] = [];  protected static rulesBase: RulesBlockBase;  /**   * GFM Block Grammar.   */  protected static rulesGfm: RulesBlockGfm;  /**   * GFM + Tables Block Grammar.   */  protected static rulesTables: RulesBlockTables;  protected rules!: RulesBlockBase | RulesBlockGfm | RulesBlockTables;  protected options: MarkedOptions;  protected links: Links = {};  protected tokens: Token[] = [];  protected frontmatter: Obj = {};  protected hasRulesGfm!: boolean;  protected hasRulesTables!: boolean;
  constructor(protected staticThis: typeof BlockLexer, options?: object) {    this.options = options || Marked.options;    this.setRules();  }
  /**   * Accepts Markdown text and returns object with tokens and links.   *   * @param src String of markdown source to be compiled.   * @param options Hash of options.   */  static lex(    src: string,    options?: MarkedOptions,    top?: boolean,    isBlockQuote?: boolean,  ): LexerReturns {    const lexer = new this(this, options);    return lexer.getTokens(src, top, isBlockQuote);  }
  protected static getRulesBase(): RulesBlockBase {    if (this.rulesBase) {      return this.rulesBase;    }
    const base: RulesBlockBase = {      newline: /^\n+/,      code: /^( {4}[^\n]+\n*)+/,      hr: /^( *[-*_]){3,} *(?:\n+|$)/,      heading: /^ *(#{1,6}) *([^\n]+?) *#* *(?:\n+|$)/,      lheading: /^([^\n]+)\n *(=|-){2,} *(?:\n+|$)/,      blockquote: /^( *>[^\n]+(\n[^\n]+)*\n*)+/,      list: /^( *)(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?!\1bull )\n*|\s*$)/,      html:        /^ *(?:comment *(?:\n|\s*$)|closed *(?:\n{2,}|\s*$)|closing *(?:\n{2,}|\s*$))/,      def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +["(]([^\n]+)[")])? *(?:\n+|$)/,      paragraph:        /^((?:[^\n]+\n?(?!hr|heading|lheading|blockquote|tag|def))+)\n*/,      text: /^[^\n]+/,      bullet: /(?:[*+-]|\d+\.)/,      item: /^( *)(bull) [^\n]*(?:\n(?!\1bull )[^\n]*)*/,    };
    base.item = new ExtendRegexp(base.item, "gm").setGroup(/bull/g, base.bullet)      .getRegexp();
    base.list = new ExtendRegexp(base.list)      .setGroup(/bull/g, base.bullet)      .setGroup("hr", "\\n+(?=\\1?(?:[-*_] *){3,}(?:\\n+|$))")      .setGroup("def", "\\n+(?=" + base.def.source + ")")      .getRegexp();
    const tag = "(?!(?:" +      "a|em|strong|small|s|cite|q|dfn|abbr|data|time|code" +      "|var|samp|kbd|sub|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo" +      "|span|br|wbr|ins|del|img)\\b)\\w+(?!:/|[^\\w\\s@]*@)\\b";
    base.html = new ExtendRegexp(base.html)      .setGroup("comment", /<!--[\s\S]*?-->/)      .setGroup("closed", /<(tag)[\s\S]+?<\/\1>/)      .setGroup("closing", /<tag(?:"[^"]*"|'[^']*'|[^'">])*?>/)      .setGroup(/tag/g, tag)      .getRegexp();
    base.paragraph = new ExtendRegexp(base.paragraph)      .setGroup("hr", base.hr)      .setGroup("heading", base.heading)      .setGroup("lheading", base.lheading)      .setGroup("blockquote", base.blockquote)      .setGroup("tag", "<" + tag)      .setGroup("def", base.def)      .getRegexp();
    return (this.rulesBase = base);  }
  protected static getRulesGfm(): RulesBlockGfm {    if (this.rulesGfm) {      return this.rulesGfm;    }
    const base = this.getRulesBase();
    const gfm: RulesBlockGfm = {      ...base,      ...{        fences: /^ *(`{3,}|~{3,})[ \.]*(\S+)? *\n([\s\S]*?)\s*\1 *(?:\n+|$)/,        paragraph: /^/,        heading: /^ *(#{1,6}) +([^\n]+?) *#* *(?:\n+|$)/,      },    };
    const group1 = gfm.fences.source.replace("\\1", "\\2");    const group2 = base.list.source.replace("\\1", "\\3");
    gfm.paragraph = new ExtendRegexp(base.paragraph).setGroup(      "(?!",      `(?!${group1}|${group2}|`,    ).getRegexp();
    return (this.rulesGfm = gfm);  }
  protected static getRulesTable(): RulesBlockTables {    if (this.rulesTables) {      return this.rulesTables;    }
    return (this.rulesTables = {      ...this.getRulesGfm(),      ...{        nptable:          /^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*/,        table: /^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*/,      },    });  }
  protected setRules() {    if (this.options.gfm) {      if (this.options.tables) {        this.rules = this.staticThis.getRulesTable();      } else {        this.rules = this.staticThis.getRulesGfm();      }    } else {      this.rules = this.staticThis.getRulesBase();    }
    this.hasRulesGfm = (this.rules as RulesBlockGfm).fences !== undefined;    this.hasRulesTables = (this.rules as RulesBlockTables).table !== undefined;  }
  /**   * Lexing.   */  protected getTokens(    src: string,    top?: boolean,    isBlockQuote?: boolean,  ): LexerReturns {    let nextPart = src;    let execArr, fmArr: RegExpExecArray | null;
    mainLoop:    while (nextPart) {      // newline      if ((execArr = this.rules.newline.exec(nextPart))) {        nextPart = nextPart.substring(execArr[0].length);
        if (execArr[0].length > 1) {          this.tokens.push({ type: TokenType.space });        }      }
      // code      if ((execArr = this.rules.code.exec(nextPart))) {        nextPart = nextPart.substring(execArr[0].length);        const code = execArr[0].replace(/^ {4}/gm, "");
        this.tokens.push({          type: TokenType.code,          text: !this.options.pedantic ? code.replace(/\n+$/, "") : code,        });        continue;      }
      // fences code (gfm)      if (        this.hasRulesGfm &&        (execArr = (this.rules as RulesBlockGfm).fences.exec(nextPart))      ) {        nextPart = nextPart.substring(execArr[0].length);
        this.tokens.push({          type: TokenType.code,          lang: execArr[2],          text: execArr[3] || "",        });        continue;      }
      // heading      if ((execArr = this.rules.heading.exec(nextPart))) {        nextPart = nextPart.substring(execArr[0].length);        this.tokens.push({          type: TokenType.heading,          depth: execArr[1].length,          text: execArr[2],        });        continue;      }
      // table no leading pipe (gfm)      if (        top && this.hasRulesTables &&        (execArr = (this.rules as RulesBlockTables).nptable.exec(nextPart))      ) {        nextPart = nextPart.substring(execArr[0].length);
        const item: Token = {          type: TokenType.table,          header: execArr[1].replace(/^ *| *\| *$/g, "").split(/ *\| */),          align: execArr[2].replace(/^ *|\| *$/g, "").split(            / *\| */,          ) as Align[],          cells: [],        };
        if (!item.align) throw ReferenceError;
        for (let i = 0; i < item.align.length; i++) {          if (/^ *-+: *$/.test(item.align[i])) {            item.align[i] = "right";          } else if (/^ *:-+: *$/.test(item.align[i])) {            item.align[i] = "center";          } else if (/^ *:-+ *$/.test(item.align[i])) {            item.align[i] = "left";          } else {            item.align[i] = "";          }        }
        const td: string[] = execArr[3].replace(/\n$/, "").split("\n");
        if (!item.cells) throw ReferenceError;
        for (let i = 0; i < td.length; i++) {          item.cells[i] = td[i].split(/ *\| */);        }
        this.tokens.push(item);        continue;      }
      // lheading      if ((execArr = this.rules.lheading.exec(nextPart))) {        nextPart = nextPart.substring(execArr[0].length);
        this.tokens.push({          type: TokenType.heading,          depth: execArr[2] === "=" ? 1 : 2,          text: execArr[1],        });        continue;      }
      // hr      if ((execArr = this.rules.hr.exec(nextPart))) {
        // Checks if the previous string contains a content.        if ((this.tokens.length == 0) || (this.tokens.every(object => object.type == TokenType.space))) {
          // Grabs front-matter data and parse it into Javascript object.          if (fmArr = /^(?:\-\-\-)(.*?)(?:\-\-\-|\.\.\.)/s.exec(nextPart)) {            nextPart = nextPart.substring(fmArr[0].length);            this.frontmatter = <Obj> load(fmArr[1]);          }          continue;
        } else {          nextPart = nextPart.substring(execArr[0].length);          this.tokens.push({ type: TokenType.hr });          continue;        }      }
      // blockquote      if ((execArr = this.rules.blockquote.exec(nextPart))) {        nextPart = nextPart.substring(execArr[0].length);        this.tokens.push({ type: TokenType.blockquoteStart });        const str = execArr[0].replace(/^ *> ?/gm, "");
        // Pass `top` to keep the current        // "toplevel" state. This is exactly        // how markdown.pl works.        this.getTokens(str);        this.tokens.push({ type: TokenType.blockquoteEnd });        continue;      }
      // list      if ((execArr = this.rules.list.exec(nextPart))) {        nextPart = nextPart.substring(execArr[0].length);        const bull: string = execArr[2];
        this.tokens.push(          { type: TokenType.listStart, ordered: bull.length > 1 },        );
        // Get each top-level item.        const str = execArr[0].match(this.rules.item) || "";        const length = str.length;
        let next = false;        let space: number;        let blockBullet: string;        let loose: boolean;
        for (let i = 0; i < length; i++) {          let item = str[i];
          // Remove the list item's bullet so it is seen as the next token.          space = item.length;          item = item.replace(/^ *([*+-]|\d+\.) +/, "");
          // Outdent whatever the list item contains. Hacky.          if (item.indexOf("\n ") !== -1) {            space -= item.length;            item = !this.options.pedantic              ? item.replace(new RegExp("^ {1," + space + "}", "gm"), "")              : item.replace(/^ {1,4}/gm, "");          }
          // Determine whether the next list item belongs here.          // Backpedal if it does not belong in this list.          if (this.options.smartLists && i !== length - 1) {            const bb = this.staticThis.getRulesBase().bullet.exec(str[i + 1]);            blockBullet = bb ? bb[0] : "";
            if (              bull !== blockBullet &&              !(bull.length > 1 && blockBullet.length > 1)            ) {              nextPart = (str.slice(i + 1) as string[]).join("\n") + nextPart;              i = length - 1;            }          }
          // Determine whether item is loose or not.          // Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/          // for discount behavior.          loose = next || /\n\n(?!\s*$)/.test(item);
          if (i !== length - 1) {            next = item.charAt(item.length - 1) === "\n";
            if (!loose) {              loose = next;            }          }
          this.tokens.push(            {              type: loose ? TokenType.looseItemStart : TokenType.listItemStart,            },          );
          // Recurse.          this.getTokens(item, false, isBlockQuote);          this.tokens.push({ type: TokenType.listItemEnd });        }
        this.tokens.push({ type: TokenType.listEnd });        continue;      }
      // html      if ((execArr = this.rules.html.exec(nextPart))) {        nextPart = nextPart.substring(execArr[0].length);        const attr = execArr[1];        const isPre = attr === "pre" || attr === "script" || attr === "style";
        this.tokens.push({          type: this.options.sanitize ? TokenType.paragraph : TokenType.html,          pre: !this.options.sanitizer && isPre,          text: execArr[0],        });        continue;      }
      // def      if (top && (execArr = this.rules.def.exec(nextPart))) {        nextPart = nextPart.substring(execArr[0].length);
        this.links[execArr[1].toLowerCase()] = {          href: execArr[2],          title: execArr[3],        };        continue;      }
      // table (gfm)      if (        top && this.hasRulesTables &&        (execArr = (this.rules as RulesBlockTables).table.exec(nextPart))      ) {        nextPart = nextPart.substring(execArr[0].length);
        const item: Token = {          type: TokenType.table,          header: execArr[1].replace(/^ *| *\| *$/g, "").split(/ *\| */),          align: execArr[2].replace(/^ *|\| *$/g, "").split(            / *\| */,          ) as Align[],          cells: [],        };
        if (!item.align) throw ReferenceError;
        for (let i = 0; i < item.align.length; i++) {          if (/^ *-+: *$/.test(item.align[i])) {            item.align[i] = "right";          } else if (/^ *:-+: *$/.test(item.align[i])) {            item.align[i] = "center";          } else if (/^ *:-+ *$/.test(item.align[i])) {            item.align[i] = "left";          } else {            item.align[i] = "";          }        }
        const td = execArr[3].replace(/(?: *\| *)?\n$/, "").split("\n");
        if (!item.cells) throw ReferenceError;
        for (let i = 0; i < td.length; i++) {          item.cells[i] = td[i].replace(/^ *\| *| *\| *$/g, "").split(/ *\| */);        }
        this.tokens.push(item);        continue;      }
      // simple rules      if (this.staticThis.simpleRules.length) {        const simpleRules = this.staticThis.simpleRules;        for (let i = 0; i < simpleRules.length; i++) {          if ((execArr = simpleRules[i].exec(nextPart))) {            nextPart = nextPart.substring(execArr[0].length);            const type = "simpleRule" + (i + 1);            this.tokens.push({ type, execArr });            continue mainLoop;          }        }      }
      // top-level paragraph      if (top && (execArr = this.rules.paragraph.exec(nextPart))) {        nextPart = nextPart.substring(execArr[0].length);
        if (execArr[1].slice(-1) === "\n") {          this.tokens.push({            type: TokenType.paragraph,            text: execArr[1].slice(0, -1),          });        } else {          this.tokens.push({            type: this.tokens.length > 0 ? TokenType.paragraph : TokenType.text,            text: execArr[1],          });        }        continue;      }
      // text      // Top-level should never reach here.      if ((execArr = this.rules.text.exec(nextPart))) {        nextPart = nextPart.substring(execArr[0].length);        this.tokens.push({ type: TokenType.text, text: execArr[0] });        continue;      }
      if (nextPart) {        throw new Error(          "Infinite loop on byte: " + nextPart.charCodeAt(0) +            `, near text '${nextPart.slice(0, 30)}...'`,        );      }    }
    return { tokens: this.tokens, links: this.links, meta: this.frontmatter };  }}