Skip to main content
Module

x/markdown/src/block-lexer.ts

Deno Markdown module forked from https://github.com/ts-stack/markdown/tree/bb47aa8e625e89e6aa84f49a98536a3089dee831
Latest
File
/** * @license * * Copyright (c) 2011-2014, Christopher Jeffrey. (MIT Licensed) * https://github.com/chjj/marked * * Copyright (c) 2018, Костя Третяк. (MIT Licensed) * https://github.com/ts-stack/markdown */
import { ExtendRegexp } from "./extend-regexp.ts";import { Align, LexerReturns, Links, MarkedOptions, RulesBlockBase, RulesBlockGfm, RulesBlockTables, Token, TokenType, Obj} from "./interfaces.ts";import { Marked } from "./marked.ts";import { load } from "https://deno.land/std/encoding/_yaml/loader/loader.ts";
export class BlockLexer<T extends typeof BlockLexer> { static simpleRules: RegExp[] = []; protected static rulesBase: RulesBlockBase; /** * GFM Block Grammar. */ protected static rulesGfm: RulesBlockGfm; /** * GFM + Tables Block Grammar. */ protected static rulesTables: RulesBlockTables; protected rules!: RulesBlockBase | RulesBlockGfm | RulesBlockTables; protected options: MarkedOptions; protected links: Links = {}; protected tokens: Token[] = []; protected frontmatter: Obj = {}; protected hasRulesGfm!: boolean; protected hasRulesTables!: boolean;
constructor(protected staticThis: typeof BlockLexer, options?: object) { this.options = options || Marked.options; this.setRules(); }
/** * Accepts Markdown text and returns object with tokens and links. * * @param src String of markdown source to be compiled. * @param options Hash of options. */ static lex( src: string, options?: MarkedOptions, top?: boolean, isBlockQuote?: boolean, ): LexerReturns { const lexer = new this(this, options); return lexer.getTokens(src, top, isBlockQuote); }
protected static getRulesBase(): RulesBlockBase { if (this.rulesBase) { return this.rulesBase; }
const base: RulesBlockBase = { newline: /^\n+/, code: /^( {4}[^\n]+\n*)+/, hr: /^( *[-*_]){3,} *(?:\n+|$)/, heading: /^ *(#{1,6}) *([^\n]+?) *#* *(?:\n+|$)/, lheading: /^([^\n]+)\n *(=|-){2,} *(?:\n+|$)/, blockquote: /^( *>[^\n]+(\n[^\n]+)*\n*)+/, list: /^( *)(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?!\1bull )\n*|\s*$)/, html: /^ *(?:comment *(?:\n|\s*$)|closed *(?:\n{2,}|\s*$)|closing *(?:\n{2,}|\s*$))/, def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +["(]([^\n]+)[")])? *(?:\n+|$)/, paragraph: /^((?:[^\n]+\n?(?!hr|heading|lheading|blockquote|tag|def))+)\n*/, text: /^[^\n]+/, bullet: /(?:[*+-]|\d+\.)/, item: /^( *)(bull) [^\n]*(?:\n(?!\1bull )[^\n]*)*/, };
base.item = new ExtendRegexp(base.item, "gm").setGroup(/bull/g, base.bullet) .getRegexp();
base.list = new ExtendRegexp(base.list) .setGroup(/bull/g, base.bullet) .setGroup("hr", "\\n+(?=\\1?(?:[-*_] *){3,}(?:\\n+|$))") .setGroup("def", "\\n+(?=" + base.def.source + ")") .getRegexp();
const tag = "(?!(?:" + "a|em|strong|small|s|cite|q|dfn|abbr|data|time|code" + "|var|samp|kbd|sub|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo" + "|span|br|wbr|ins|del|img)\\b)\\w+(?!:/|[^\\w\\s@]*@)\\b";
base.html = new ExtendRegexp(base.html) .setGroup("comment", /<!--[\s\S]*?-->/) .setGroup("closed", /<(tag)[\s\S]+?<\/\1>/) .setGroup("closing", /<tag(?:"[^"]*"|'[^']*'|[^'">])*?>/) .setGroup(/tag/g, tag) .getRegexp();
base.paragraph = new ExtendRegexp(base.paragraph) .setGroup("hr", base.hr) .setGroup("heading", base.heading) .setGroup("lheading", base.lheading) .setGroup("blockquote", base.blockquote) .setGroup("tag", "<" + tag) .setGroup("def", base.def) .getRegexp();
return (this.rulesBase = base); }
protected static getRulesGfm(): RulesBlockGfm { if (this.rulesGfm) { return this.rulesGfm; }
const base = this.getRulesBase();
const gfm: RulesBlockGfm = { ...base, ...{ fences: /^ *(`{3,}|~{3,})[ \.]*(\S+)? *\n([\s\S]*?)\s*\1 *(?:\n+|$)/, paragraph: /^/, heading: /^ *(#{1,6}) +([^\n]+?) *#* *(?:\n+|$)/, }, };
const group1 = gfm.fences.source.replace("\\1", "\\2"); const group2 = base.list.source.replace("\\1", "\\3");
gfm.paragraph = new ExtendRegexp(base.paragraph).setGroup( "(?!", `(?!${group1}|${group2}|`, ).getRegexp();
return (this.rulesGfm = gfm); }
protected static getRulesTable(): RulesBlockTables { if (this.rulesTables) { return this.rulesTables; }
return (this.rulesTables = { ...this.getRulesGfm(), ...{ nptable: /^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*/, table: /^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*/, }, }); }
protected setRules() { if (this.options.gfm) { if (this.options.tables) { this.rules = this.staticThis.getRulesTable(); } else { this.rules = this.staticThis.getRulesGfm(); } } else { this.rules = this.staticThis.getRulesBase(); }
this.hasRulesGfm = (this.rules as RulesBlockGfm).fences !== undefined; this.hasRulesTables = (this.rules as RulesBlockTables).table !== undefined; }
/** * Lexing. */ protected getTokens( src: string, top?: boolean, isBlockQuote?: boolean, ): LexerReturns { let nextPart = src; let execArr, fmArr: RegExpExecArray | null;
mainLoop: while (nextPart) { // newline if ((execArr = this.rules.newline.exec(nextPart))) { nextPart = nextPart.substring(execArr[0].length);
if (execArr[0].length > 1) { this.tokens.push({ type: TokenType.space }); } }
// code if ((execArr = this.rules.code.exec(nextPart))) { nextPart = nextPart.substring(execArr[0].length); const code = execArr[0].replace(/^ {4}/gm, "");
this.tokens.push({ type: TokenType.code, text: !this.options.pedantic ? code.replace(/\n+$/, "") : code, }); continue; }
// fences code (gfm) if ( this.hasRulesGfm && (execArr = (this.rules as RulesBlockGfm).fences.exec(nextPart)) ) { nextPart = nextPart.substring(execArr[0].length);
this.tokens.push({ type: TokenType.code, lang: execArr[2], text: execArr[3] || "", }); continue; }
// heading if ((execArr = this.rules.heading.exec(nextPart))) { nextPart = nextPart.substring(execArr[0].length); this.tokens.push({ type: TokenType.heading, depth: execArr[1].length, text: execArr[2], }); continue; }
// table no leading pipe (gfm) if ( top && this.hasRulesTables && (execArr = (this.rules as RulesBlockTables).nptable.exec(nextPart)) ) { nextPart = nextPart.substring(execArr[0].length);
const item: Token = { type: TokenType.table, header: execArr[1].replace(/^ *| *\| *$/g, "").split(/ *\| */), align: execArr[2].replace(/^ *|\| *$/g, "").split( / *\| */, ) as Align[], cells: [], };
if (!item.align) throw ReferenceError;
for (let i = 0; i < item.align.length; i++) { if (/^ *-+: *$/.test(item.align[i])) { item.align[i] = "right"; } else if (/^ *:-+: *$/.test(item.align[i])) { item.align[i] = "center"; } else if (/^ *:-+ *$/.test(item.align[i])) { item.align[i] = "left"; } else { item.align[i] = ""; } }
const td: string[] = execArr[3].replace(/\n$/, "").split("\n");
if (!item.cells) throw ReferenceError;
for (let i = 0; i < td.length; i++) { item.cells[i] = td[i].split(/ *\| */); }
this.tokens.push(item); continue; }
// lheading if ((execArr = this.rules.lheading.exec(nextPart))) { nextPart = nextPart.substring(execArr[0].length);
this.tokens.push({ type: TokenType.heading, depth: execArr[2] === "=" ? 1 : 2, text: execArr[1], }); continue; }
// hr if ((execArr = this.rules.hr.exec(nextPart))) {
// Checks if the previous string contains a content. if ((this.tokens.length == 0) || (this.tokens.every(object => object.type == TokenType.space))) {
// Grabs front-matter data and parse it into Javascript object. if (fmArr = /^(?:\-\-\-)(.*?)(?:\-\-\-|\.\.\.)/s.exec(nextPart)) { nextPart = nextPart.substring(fmArr[0].length); this.frontmatter = <Obj> load(fmArr[1]); } continue;
} else { nextPart = nextPart.substring(execArr[0].length); this.tokens.push({ type: TokenType.hr }); continue; } }
// blockquote if ((execArr = this.rules.blockquote.exec(nextPart))) { nextPart = nextPart.substring(execArr[0].length); this.tokens.push({ type: TokenType.blockquoteStart }); const str = execArr[0].replace(/^ *> ?/gm, "");
// Pass `top` to keep the current // "toplevel" state. This is exactly // how markdown.pl works. this.getTokens(str); this.tokens.push({ type: TokenType.blockquoteEnd }); continue; }
// list if ((execArr = this.rules.list.exec(nextPart))) { nextPart = nextPart.substring(execArr[0].length); const bull: string = execArr[2];
this.tokens.push( { type: TokenType.listStart, ordered: bull.length > 1 }, );
// Get each top-level item. const str = execArr[0].match(this.rules.item) || ""; const length = str.length;
let next = false; let space: number; let blockBullet: string; let loose: boolean;
for (let i = 0; i < length; i++) { let item = str[i];
// Remove the list item's bullet so it is seen as the next token. space = item.length; item = item.replace(/^ *([*+-]|\d+\.) +/, "");
// Outdent whatever the list item contains. Hacky. if (item.indexOf("\n ") !== -1) { space -= item.length; item = !this.options.pedantic ? item.replace(new RegExp("^ {1," + space + "}", "gm"), "") : item.replace(/^ {1,4}/gm, ""); }
// Determine whether the next list item belongs here. // Backpedal if it does not belong in this list. if (this.options.smartLists && i !== length - 1) { const bb = this.staticThis.getRulesBase().bullet.exec(str[i + 1]); blockBullet = bb ? bb[0] : "";
if ( bull !== blockBullet && !(bull.length > 1 && blockBullet.length > 1) ) { nextPart = (str.slice(i + 1) as string[]).join("\n") + nextPart; i = length - 1; } }
// Determine whether item is loose or not. // Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/ // for discount behavior. loose = next || /\n\n(?!\s*$)/.test(item);
if (i !== length - 1) { next = item.charAt(item.length - 1) === "\n";
if (!loose) { loose = next; } }
this.tokens.push( { type: loose ? TokenType.looseItemStart : TokenType.listItemStart, }, );
// Recurse. this.getTokens(item, false, isBlockQuote); this.tokens.push({ type: TokenType.listItemEnd }); }
this.tokens.push({ type: TokenType.listEnd }); continue; }
// html if ((execArr = this.rules.html.exec(nextPart))) { nextPart = nextPart.substring(execArr[0].length); const attr = execArr[1]; const isPre = attr === "pre" || attr === "script" || attr === "style";
this.tokens.push({ type: this.options.sanitize ? TokenType.paragraph : TokenType.html, pre: !this.options.sanitizer && isPre, text: execArr[0], }); continue; }
// def if (top && (execArr = this.rules.def.exec(nextPart))) { nextPart = nextPart.substring(execArr[0].length);
this.links[execArr[1].toLowerCase()] = { href: execArr[2], title: execArr[3], }; continue; }
// table (gfm) if ( top && this.hasRulesTables && (execArr = (this.rules as RulesBlockTables).table.exec(nextPart)) ) { nextPart = nextPart.substring(execArr[0].length);
const item: Token = { type: TokenType.table, header: execArr[1].replace(/^ *| *\| *$/g, "").split(/ *\| */), align: execArr[2].replace(/^ *|\| *$/g, "").split( / *\| */, ) as Align[], cells: [], };
if (!item.align) throw ReferenceError;
for (let i = 0; i < item.align.length; i++) { if (/^ *-+: *$/.test(item.align[i])) { item.align[i] = "right"; } else if (/^ *:-+: *$/.test(item.align[i])) { item.align[i] = "center"; } else if (/^ *:-+ *$/.test(item.align[i])) { item.align[i] = "left"; } else { item.align[i] = ""; } }
const td = execArr[3].replace(/(?: *\| *)?\n$/, "").split("\n");
if (!item.cells) throw ReferenceError;
for (let i = 0; i < td.length; i++) { item.cells[i] = td[i].replace(/^ *\| *| *\| *$/g, "").split(/ *\| */); }
this.tokens.push(item); continue; }
// simple rules if (this.staticThis.simpleRules.length) { const simpleRules = this.staticThis.simpleRules; for (let i = 0; i < simpleRules.length; i++) { if ((execArr = simpleRules[i].exec(nextPart))) { nextPart = nextPart.substring(execArr[0].length); const type = "simpleRule" + (i + 1); this.tokens.push({ type, execArr }); continue mainLoop; } } }
// top-level paragraph if (top && (execArr = this.rules.paragraph.exec(nextPart))) { nextPart = nextPart.substring(execArr[0].length);
if (execArr[1].slice(-1) === "\n") { this.tokens.push({ type: TokenType.paragraph, text: execArr[1].slice(0, -1), }); } else { this.tokens.push({ type: this.tokens.length > 0 ? TokenType.paragraph : TokenType.text, text: execArr[1], }); } continue; }
// text // Top-level should never reach here. if ((execArr = this.rules.text.exec(nextPart))) { nextPart = nextPart.substring(execArr[0].length); this.tokens.push({ type: TokenType.text, text: execArr[0] }); continue; }
if (nextPart) { throw new Error( "Infinite loop on byte: " + nextPart.charCodeAt(0) + `, near text '${nextPart.slice(0, 30)}...'`, ); } }
return { tokens: this.tokens, links: this.links, meta: this.frontmatter }; }}