Skip to main content
Module

x/nunjucks/src/lexer.js

A powerful templating engine with inheritance, asynchronous control, and more (Jinja 2 inspired)
Go to Latest
File
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556
import * as lib from "./lib.js";
const whitespaceChars = " \n\t\r\u00A0";const delimChars = "()[]{}%*-+~/#,:|.<>=!";const intChars = "0123456789";
const BLOCK_START = "{%";const BLOCK_END = "%}";const VARIABLE_START = "{{";const VARIABLE_END = "}}";const COMMENT_START = "{#";const COMMENT_END = "#}";
const TOKEN_STRING = "string";const TOKEN_WHITESPACE = "whitespace";const TOKEN_DATA = "data";const TOKEN_BLOCK_START = "block-start";const TOKEN_BLOCK_END = "block-end";const TOKEN_VARIABLE_START = "variable-start";const TOKEN_VARIABLE_END = "variable-end";const TOKEN_COMMENT = "comment";const TOKEN_LEFT_PAREN = "left-paren";const TOKEN_RIGHT_PAREN = "right-paren";const TOKEN_LEFT_BRACKET = "left-bracket";const TOKEN_RIGHT_BRACKET = "right-bracket";const TOKEN_LEFT_CURLY = "left-curly";const TOKEN_RIGHT_CURLY = "right-curly";const TOKEN_OPERATOR = "operator";const TOKEN_COMMA = "comma";const TOKEN_COLON = "colon";const TOKEN_TILDE = "tilde";const TOKEN_PIPE = "pipe";const TOKEN_INT = "int";const TOKEN_FLOAT = "float";const TOKEN_BOOLEAN = "boolean";const TOKEN_NONE = "none";const TOKEN_SYMBOL = "symbol";const TOKEN_SPECIAL = "special";const TOKEN_REGEX = "regex";
function token(type, value, lineno, colno) { return { type, value, lineno, colno };}
class Tokenizer { constructor(str, opts) { this.str = str; this.index = 0; this.len = str.length; this.lineno = 0; this.colno = 0;
this.in_code = false;
opts = opts || {};
const tags = opts.tags || {}; this.tags = { BLOCK_START: tags.blockStart || BLOCK_START, BLOCK_END: tags.blockEnd || BLOCK_END, VARIABLE_START: tags.variableStart || VARIABLE_START, VARIABLE_END: tags.variableEnd || VARIABLE_END, COMMENT_START: tags.commentStart || COMMENT_START, COMMENT_END: tags.commentEnd || COMMENT_END, };
this.trimBlocks = !!opts.trimBlocks; this.lstripBlocks = !!opts.lstripBlocks; }
nextToken() { const lineno = this.lineno; const colno = this.colno; let tok;
if (this.in_code) { // Otherwise, if we are in a block parse it as code let cur = this.current();
if (this.isFinished()) { // We have nothing else to parse return null; } else if (cur === '"' || cur === "'") { // We've hit a string return token(TOKEN_STRING, this._parseString(cur), lineno, colno); } else if ((tok = this._extract(whitespaceChars))) { // We hit some whitespace return token(TOKEN_WHITESPACE, tok, lineno, colno); } else if ( (tok = this._extractString(this.tags.BLOCK_END)) || (tok = this._extractString("-" + this.tags.BLOCK_END)) ) { // Special check for the block end tag // // It is a requirement that start and end tags are composed of // delimiter characters (%{}[] etc), and our code always // breaks on delimiters so we can assume the token parsing // doesn't consume these elsewhere this.in_code = false; if (this.trimBlocks) { cur = this.current(); if (cur === "\n") { // Skip newline this.forward(); } else if (cur === "\r") { // Skip CRLF newline this.forward(); cur = this.current(); if (cur === "\n") { this.forward(); } else { // Was not a CRLF, so go back this.back(); } } } return token(TOKEN_BLOCK_END, tok, lineno, colno); } else if ( (tok = this._extractString(this.tags.VARIABLE_END)) || (tok = this._extractString("-" + this.tags.VARIABLE_END)) ) { // Special check for variable end tag (see above) this.in_code = false; return token(TOKEN_VARIABLE_END, tok, lineno, colno); } else if (cur === "r" && this.str.charAt(this.index + 1) === "/") { // Skip past 'r/'. this.forwardN(2);
// Extract until the end of the regex -- / ends it, \/ does not. let regexBody = ""; while (!this.isFinished()) { if (this.current() === "/" && this.previous() !== "\\") { this.forward(); break; } else { regexBody += this.current(); this.forward(); } }
// Check for flags. // The possible flags are according to https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Global_Objects/RegExp) const POSSIBLE_FLAGS = ["g", "i", "m", "y"]; let regexFlags = ""; while (!this.isFinished()) { const isCurrentAFlag = POSSIBLE_FLAGS.indexOf(this.current()) !== -1; if (isCurrentAFlag) { regexFlags += this.current(); this.forward(); } else { break; } }
return token( TOKEN_REGEX, { body: regexBody, flags: regexFlags, }, lineno, colno, ); } else if (delimChars.indexOf(cur) !== -1) { // We've hit a delimiter (a special char like a bracket) this.forward(); const complexOps = ["==", "===", "!=", "!==", "<=", ">=", "//", "**"]; const curComplex = cur + this.current(); let type;
if (lib.indexOf(complexOps, curComplex) !== -1) { this.forward(); cur = curComplex;
// See if this is a strict equality/inequality comparator if (lib.indexOf(complexOps, curComplex + this.current()) !== -1) { cur = curComplex + this.current(); this.forward(); } }
switch (cur) { case "(": type = TOKEN_LEFT_PAREN; break; case ")": type = TOKEN_RIGHT_PAREN; break; case "[": type = TOKEN_LEFT_BRACKET; break; case "]": type = TOKEN_RIGHT_BRACKET; break; case "{": type = TOKEN_LEFT_CURLY; break; case "}": type = TOKEN_RIGHT_CURLY; break; case ",": type = TOKEN_COMMA; break; case ":": type = TOKEN_COLON; break; case "~": type = TOKEN_TILDE; break; case "|": type = TOKEN_PIPE; break; default: type = TOKEN_OPERATOR; }
return token(type, cur, lineno, colno); } else { // We are not at whitespace or a delimiter, so extract the // text and parse it tok = this._extractUntil(whitespaceChars + delimChars);
if (tok.match(/^[-+]?[0-9]+$/)) { if (this.current() === ".") { this.forward(); const dec = this._extract(intChars); return token(TOKEN_FLOAT, tok + "." + dec, lineno, colno); } else { return token(TOKEN_INT, tok, lineno, colno); } } else if (tok.match(/^(true|false)$/)) { return token(TOKEN_BOOLEAN, tok, lineno, colno); } else if (tok === "none") { return token(TOKEN_NONE, tok, lineno, colno); /* * Added to make the test `null is null` evaluate truthily. * Otherwise, Nunjucks will look up null in the context and * return `undefined`, which is not what we want. This *may* have * consequences is someone is using null in their templates as a * variable. */ } else if (tok === "null") { return token(TOKEN_NONE, tok, lineno, colno); } else if (tok) { return token(TOKEN_SYMBOL, tok, lineno, colno); } else { throw new Error("Unexpected value while parsing: " + tok); } } } else { // Parse out the template text, breaking on tag // delimiters because we need to look for block/variable start // tags (don't use the full delimChars for optimization) const beginChars = (this.tags.BLOCK_START.charAt(0) + this.tags.VARIABLE_START.charAt(0) + this.tags.COMMENT_START.charAt(0) + this.tags.COMMENT_END.charAt(0));
if (this.isFinished()) { return null; } else if ( (tok = this._extractString(this.tags.BLOCK_START + "-")) || (tok = this._extractString(this.tags.BLOCK_START)) ) { this.in_code = true; return token(TOKEN_BLOCK_START, tok, lineno, colno); } else if ( (tok = this._extractString(this.tags.VARIABLE_START + "-")) || (tok = this._extractString(this.tags.VARIABLE_START)) ) { this.in_code = true; return token(TOKEN_VARIABLE_START, tok, lineno, colno); } else { tok = ""; let data; let inComment = false;
if (this._matches(this.tags.COMMENT_START)) { inComment = true; tok = this._extractString(this.tags.COMMENT_START); }
// Continually consume text, breaking on the tag delimiter // characters and checking to see if it's a start tag. // // We could hit the end of the template in the middle of // our looping, so check for the null return value from // _extractUntil while ((data = this._extractUntil(beginChars)) !== null) { tok += data;
if ( (this._matches(this.tags.BLOCK_START) || this._matches(this.tags.VARIABLE_START) || this._matches(this.tags.COMMENT_START)) && !inComment ) { if ( this.lstripBlocks && this._matches(this.tags.BLOCK_START) && this.colno > 0 && this.colno <= tok.length ) { const lastLine = tok.slice(-this.colno); if (/^\s+$/.test(lastLine)) { // Remove block leading whitespace from beginning of the string tok = tok.slice(0, -this.colno); if (!tok.length) { // All data removed, collapse to avoid unnecessary nodes // by returning next token (block start) return this.nextToken(); } } } // If it is a start tag, stop looping break; } else if (this._matches(this.tags.COMMENT_END)) { if (!inComment) { throw new Error("unexpected end of comment"); } tok += this._extractString(this.tags.COMMENT_END); break; } else { // It does not match any tag, so add the character and // carry on tok += this.current(); this.forward(); } }
if (data === null && inComment) { throw new Error("expected end of comment, got end of file"); }
return token( inComment ? TOKEN_COMMENT : TOKEN_DATA, tok, lineno, colno, ); } } }
_parseString(delimiter) { this.forward();
let str = "";
while (!this.isFinished() && this.current() !== delimiter) { const cur = this.current();
if (cur === "\\") { this.forward(); switch (this.current()) { case "n": str += "\n"; break; case "t": str += "\t"; break; case "r": str += "\r"; break; default: str += this.current(); } this.forward(); } else { str += cur; this.forward(); } }
this.forward(); return str; }
_matches(str) { if (this.index + str.length > this.len) { return null; }
const m = this.str.slice(this.index, this.index + str.length); return m === str; }
_extractString(str) { if (this._matches(str)) { this.forwardN(str.length); return str; } return null; }
_extractUntil(charString) { // Extract all non-matching chars, with the default matching set // to everything return this._extractMatching(true, charString || ""); }
_extract(charString) { // Extract all matching chars (no default, so charString must be // explicit) return this._extractMatching(false, charString); }
_extractMatching(breakOnMatch, charString) { // Pull out characters until a breaking char is hit. // If breakOnMatch is false, a non-matching char stops it. // If breakOnMatch is true, a matching char stops it.
if (this.isFinished()) { return null; }
const first = charString.indexOf(this.current());
// Only proceed if the first character doesn't meet our condition if ( (breakOnMatch && first === -1) || (!breakOnMatch && first !== -1) ) { let t = this.current(); this.forward();
// And pull out all the chars one at a time until we hit a // breaking char let idx = charString.indexOf(this.current());
while ( ((breakOnMatch && idx === -1) || (!breakOnMatch && idx !== -1)) && !this.isFinished() ) { t += this.current(); this.forward();
idx = charString.indexOf(this.current()); }
return t; }
return ""; }
_extractRegex(regex) { const matches = this.currentStr().match(regex); if (!matches) { return null; }
// Move forward whatever was matched this.forwardN(matches[0].length);
return matches; }
isFinished() { return this.index >= this.len; }
forwardN(n) { for (let i = 0; i < n; i++) { this.forward(); } }
forward() { this.index++;
if (this.previous() === "\n") { this.lineno++; this.colno = 0; } else { this.colno++; } }
backN(n) { for (let i = 0; i < n; i++) { this.back(); } }
back() { this.index--;
if (this.current() === "\n") { this.lineno--;
const idx = this.src.lastIndexOf("\n", this.index - 1); if (idx === -1) { this.colno = this.index; } else { this.colno = this.index - idx; } } else { this.colno--; } }
// current returns current character current() { if (!this.isFinished()) { return this.str.charAt(this.index); } return ""; }
// currentStr returns what's left of the unparsed string currentStr() { if (!this.isFinished()) { return this.str.substr(this.index); } return ""; }
previous() { return this.str.charAt(this.index - 1); }}
export default { lex(src, opts) { return new Tokenizer(src, opts); },
TOKEN_STRING, TOKEN_WHITESPACE, TOKEN_DATA, TOKEN_BLOCK_START, TOKEN_BLOCK_END, TOKEN_VARIABLE_START, TOKEN_VARIABLE_END, TOKEN_COMMENT, TOKEN_LEFT_PAREN, TOKEN_RIGHT_PAREN, TOKEN_LEFT_BRACKET, TOKEN_RIGHT_BRACKET, TOKEN_LEFT_CURLY, TOKEN_RIGHT_CURLY, TOKEN_OPERATOR, TOKEN_COMMA, TOKEN_COLON, TOKEN_TILDE, TOKEN_PIPE, TOKEN_INT, TOKEN_FLOAT, TOKEN_BOOLEAN, TOKEN_NONE, TOKEN_SYMBOL, TOKEN_SPECIAL, TOKEN_REGEX,};