Skip to main content
Module

x/denjucks/src/lexer.js

Deno templating engine
Latest
File
'use strict';
import lib from './lib.js';
var whitespaceChars = ' \n\t\r\u00A0';var delimChars = '()[]{}%*-+~/#,:|.<>=!';var intChars = '0123456789';
var BLOCK_START = '{%';var BLOCK_END = '%}';var VARIABLE_START = '{{';var VARIABLE_END = '}}';var COMMENT_START = '{#';var COMMENT_END = '#}';
var TOKEN_STRING = 'string';var TOKEN_WHITESPACE = 'whitespace';var TOKEN_DATA = 'data';var TOKEN_BLOCK_START = 'block-start';var TOKEN_BLOCK_END = 'block-end';var TOKEN_VARIABLE_START = 'variable-start';var TOKEN_VARIABLE_END = 'variable-end';var TOKEN_COMMENT = 'comment';var TOKEN_LEFT_PAREN = 'left-paren';var TOKEN_RIGHT_PAREN = 'right-paren';var TOKEN_LEFT_BRACKET = 'left-bracket';var TOKEN_RIGHT_BRACKET = 'right-bracket';var TOKEN_LEFT_CURLY = 'left-curly';var TOKEN_RIGHT_CURLY = 'right-curly';var TOKEN_OPERATOR = 'operator';var TOKEN_COMMA = 'comma';var TOKEN_COLON = 'colon';var TOKEN_TILDE = 'tilde';var TOKEN_PIPE = 'pipe';var TOKEN_INT = 'int';var TOKEN_FLOAT = 'float';var TOKEN_BOOLEAN = 'boolean';var TOKEN_NONE = 'none';var TOKEN_SYMBOL = 'symbol';var TOKEN_SPECIAL = 'special';var TOKEN_REGEX = 'regex';
function token(type, value, lineno, colno) { return { type: type, value: value, lineno: lineno, colno: colno };}
function Tokenizer(str, opts) { this.str = str; this.index = 0; this.len = str.length; this.lineno = 0; this.colno = 0;
this.in_code = false;
opts = opts || {};
var tags = opts.tags || {}; this.tags = { BLOCK_START: tags.blockStart || BLOCK_START, BLOCK_END: tags.blockEnd || BLOCK_END, VARIABLE_START: tags.variableStart || VARIABLE_START, VARIABLE_END: tags.variableEnd || VARIABLE_END, COMMENT_START: tags.commentStart || COMMENT_START, COMMENT_END: tags.commentEnd || COMMENT_END };
this.trimBlocks = !!opts.trimBlocks; this.lstripBlocks = !!opts.lstripBlocks;}
Tokenizer.prototype.nextToken = function() { var lineno = this.lineno; var colno = this.colno; var tok;
if(this.in_code) { // Otherwise, if we are in a block parse it as code var cur = this.current();
if(this.is_finished()) { // We have nothing else to parse return null; } else if(cur === '"' || cur === '\'') { // We've hit a string return token(TOKEN_STRING, this.parseString(cur), lineno, colno); } else if((tok = this._extract(whitespaceChars))) { // We hit some whitespace return token(TOKEN_WHITESPACE, tok, lineno, colno); } else if((tok = this._extractString(this.tags.BLOCK_END)) || (tok = this._extractString('-' + this.tags.BLOCK_END))) { // Special check for the block end tag // // It is a requirement that start and end tags are composed of // delimiter characters (%{}[] etc), and our code always // breaks on delimiters so we can assume the token parsing // doesn't consume these elsewhere this.in_code = false; if(this.trimBlocks) { cur = this.current(); if(cur === '\n') { // Skip newline this.forward(); }else if(cur === '\r'){ // Skip CRLF newline this.forward(); cur = this.current(); if(cur === '\n'){ this.forward(); }else{ // Was not a CRLF, so go back this.back(); } } } return token(TOKEN_BLOCK_END, tok, lineno, colno); } else if((tok = this._extractString(this.tags.VARIABLE_END)) || (tok = this._extractString('-' + this.tags.VARIABLE_END))) { // Special check for variable end tag (see above) this.in_code = false; return token(TOKEN_VARIABLE_END, tok, lineno, colno); } else if (cur === 'r' && this.str.charAt(this.index + 1) === '/') { // Skip past 'r/'. this.forwardN(2);
// Extract until the end of the regex -- / ends it, \/ does not. var regexBody = ''; while (!this.is_finished()) { if (this.current() === '/' && this.previous() !== '\\') { this.forward(); break; } else { regexBody += this.current(); this.forward(); } }
// Check for flags. // The possible flags are according to https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Global_Objects/RegExp) var POSSIBLE_FLAGS = ['g', 'i', 'm', 'y']; var regexFlags = ''; while (!this.is_finished()) { var isCurrentAFlag = POSSIBLE_FLAGS.indexOf(this.current()) !== -1; if (isCurrentAFlag) { regexFlags += this.current(); this.forward(); } else { break; } }
return token(TOKEN_REGEX, {body: regexBody, flags: regexFlags}, lineno, colno); } else if(delimChars.indexOf(cur) !== -1) { // We've hit a delimiter (a special char like a bracket) this.forward(); var complexOps = ['==', '===', '!=', '!==', '<=', '>=', '//', '**']; var curComplex = cur + this.current(); var type;
if(lib.indexOf(complexOps, curComplex) !== -1) { this.forward(); cur = curComplex;
// See if this is a strict equality/inequality comparator if(lib.indexOf(complexOps, curComplex + this.current()) !== -1) { cur = curComplex + this.current(); this.forward(); } }
switch(cur) { case '(': type = TOKEN_LEFT_PAREN; break; case ')': type = TOKEN_RIGHT_PAREN; break; case '[': type = TOKEN_LEFT_BRACKET; break; case ']': type = TOKEN_RIGHT_BRACKET; break; case '{': type = TOKEN_LEFT_CURLY; break; case '}': type = TOKEN_RIGHT_CURLY; break; case ',': type = TOKEN_COMMA; break; case ':': type = TOKEN_COLON; break; case '~': type = TOKEN_TILDE; break; case '|': type = TOKEN_PIPE; break; default: type = TOKEN_OPERATOR; }
return token(type, cur, lineno, colno); } else { // We are not at whitespace or a delimiter, so extract the // text and parse it tok = this._extractUntil(whitespaceChars + delimChars);
if(tok.match(/^[-+]?[0-9]+$/)) { if(this.current() === '.') { this.forward(); var dec = this._extract(intChars); return token(TOKEN_FLOAT, tok + '.' + dec, lineno, colno); } else { return token(TOKEN_INT, tok, lineno, colno); } } else if(tok.match(/^(true|false)$/)) { return token(TOKEN_BOOLEAN, tok, lineno, colno); } else if(tok === 'none') { return token(TOKEN_NONE, tok, lineno, colno); } else if(tok) { return token(TOKEN_SYMBOL, tok, lineno, colno); } else { throw new Error('Unexpected value while parsing: ' + tok); } } } else { // Parse out the template text, breaking on tag // delimiters because we need to look for block/variable start // tags (don't use the full delimChars for optimization) var beginChars = (this.tags.BLOCK_START.charAt(0) + this.tags.VARIABLE_START.charAt(0) + this.tags.COMMENT_START.charAt(0) + this.tags.COMMENT_END.charAt(0));
if(this.is_finished()) { return null; } else if((tok = this._extractString(this.tags.BLOCK_START + '-')) || (tok = this._extractString(this.tags.BLOCK_START))) { this.in_code = true; return token(TOKEN_BLOCK_START, tok, lineno, colno); } else if((tok = this._extractString(this.tags.VARIABLE_START + '-')) || (tok = this._extractString(this.tags.VARIABLE_START))) { this.in_code = true; return token(TOKEN_VARIABLE_START, tok, lineno, colno); } else { tok = ''; var data; var in_comment = false;
if(this._matches(this.tags.COMMENT_START)) { in_comment = true; tok = this._extractString(this.tags.COMMENT_START); }
// Continually consume text, breaking on the tag delimiter // characters and checking to see if it's a start tag. // // We could hit the end of the template in the middle of // our looping, so check for the null return value from // _extractUntil while((data = this._extractUntil(beginChars)) !== null) { tok += data;
if((this._matches(this.tags.BLOCK_START) || this._matches(this.tags.VARIABLE_START) || this._matches(this.tags.COMMENT_START)) && !in_comment) { if(this.lstripBlocks && this._matches(this.tags.BLOCK_START) && this.colno > 0 && this.colno <= tok.length) { var lastLine = tok.slice(-this.colno); if(/^\s+$/.test(lastLine)) { // Remove block leading whitespace from beginning of the string tok = tok.slice(0, -this.colno); if(!tok.length) { // All data removed, collapse to avoid unnecessary nodes // by returning next token (block start) return this.nextToken(); } } } // If it is a start tag, stop looping break; } else if(this._matches(this.tags.COMMENT_END)) { if(!in_comment) { throw new Error('unexpected end of comment'); } tok += this._extractString(this.tags.COMMENT_END); break; } else { // It does not match any tag, so add the character and // carry on tok += this.current(); this.forward(); } }
if(data === null && in_comment) { throw new Error('expected end of comment, got end of file'); }
return token(in_comment ? TOKEN_COMMENT : TOKEN_DATA, tok, lineno, colno); } }
throw new Error('Could not parse text');};
Tokenizer.prototype.parseString = function(delimiter) { this.forward();
var str = '';
while(!this.is_finished() && this.current() !== delimiter) { var cur = this.current();
if(cur === '\\') { this.forward(); switch(this.current()) { case 'n': str += '\n'; break; case 't': str += '\t'; break; case 'r': str += '\r'; break; default: str += this.current(); } this.forward(); } else { str += cur; this.forward(); } }
this.forward(); return str;};
Tokenizer.prototype._matches = function(str) { if(this.index + str.length > this.len) { return null; }
var m = this.str.slice(this.index, this.index + str.length); return m === str;};
Tokenizer.prototype._extractString = function(str) { if(this._matches(str)) { this.index += str.length; return str; } return null;};
Tokenizer.prototype._extractUntil = function(charString) { // Extract all non-matching chars, with the default matching set // to everything return this._extractMatching(true, charString || '');};
Tokenizer.prototype._extract = function(charString) { // Extract all matching chars (no default, so charString must be // explicit) return this._extractMatching(false, charString);};
Tokenizer.prototype._extractMatching = function (breakOnMatch, charString) { // Pull out characters until a breaking char is hit. // If breakOnMatch is false, a non-matching char stops it. // If breakOnMatch is true, a matching char stops it.
if(this.is_finished()) { return null; }
var first = charString.indexOf(this.current());
// Only proceed if the first character doesn't meet our condition if((breakOnMatch && first === -1) || (!breakOnMatch && first !== -1)) { var t = this.current(); this.forward();
// And pull out all the chars one at a time until we hit a // breaking char var idx = charString.indexOf(this.current());
while(((breakOnMatch && idx === -1) || (!breakOnMatch && idx !== -1)) && !this.is_finished()) { t += this.current(); this.forward();
idx = charString.indexOf(this.current()); }
return t; }
return '';};
Tokenizer.prototype._extractRegex = function(regex) { var matches = this.currentStr().match(regex); if(!matches) { return null; }
// Move forward whatever was matched this.forwardN(matches[0].length);
return matches;};
Tokenizer.prototype.is_finished = function() { return this.index >= this.len;};
Tokenizer.prototype.forwardN = function(n) { for(var i=0; i<n; i++) { this.forward(); }};
Tokenizer.prototype.forward = function() { this.index++;
if(this.previous() === '\n') { this.lineno++; this.colno = 0; } else { this.colno++; }};
Tokenizer.prototype.backN = function(n) { for(var i=0; i<n; i++) { this.back(); }};
Tokenizer.prototype.back = function() { this.index--;
if(this.current() === '\n') { this.lineno--;
var idx = this.src.lastIndexOf('\n', this.index-1); if(idx === -1) { this.colno = this.index; } else { this.colno = this.index - idx; } } else { this.colno--; }};
// current returns current characterTokenizer.prototype.current = function() { if(!this.is_finished()) { return this.str.charAt(this.index); } return '';};
// currentStr returns what's left of the unparsed stringTokenizer.prototype.currentStr = function() { if(!this.is_finished()) { return this.str.substr(this.index); } return '';};
Tokenizer.prototype.previous = function() { return this.str.charAt(this.index-1);};
export default { lex: function(src, opts) { return new Tokenizer(src, opts); },
TOKEN_STRING: TOKEN_STRING, TOKEN_WHITESPACE: TOKEN_WHITESPACE, TOKEN_DATA: TOKEN_DATA, TOKEN_BLOCK_START: TOKEN_BLOCK_START, TOKEN_BLOCK_END: TOKEN_BLOCK_END, TOKEN_VARIABLE_START: TOKEN_VARIABLE_START, TOKEN_VARIABLE_END: TOKEN_VARIABLE_END, TOKEN_COMMENT: TOKEN_COMMENT, TOKEN_LEFT_PAREN: TOKEN_LEFT_PAREN, TOKEN_RIGHT_PAREN: TOKEN_RIGHT_PAREN, TOKEN_LEFT_BRACKET: TOKEN_LEFT_BRACKET, TOKEN_RIGHT_BRACKET: TOKEN_RIGHT_BRACKET, TOKEN_LEFT_CURLY: TOKEN_LEFT_CURLY, TOKEN_RIGHT_CURLY: TOKEN_RIGHT_CURLY, TOKEN_OPERATOR: TOKEN_OPERATOR, TOKEN_COMMA: TOKEN_COMMA, TOKEN_COLON: TOKEN_COLON, TOKEN_TILDE: TOKEN_TILDE, TOKEN_PIPE: TOKEN_PIPE, TOKEN_INT: TOKEN_INT, TOKEN_FLOAT: TOKEN_FLOAT, TOKEN_BOOLEAN: TOKEN_BOOLEAN, TOKEN_NONE: TOKEN_NONE, TOKEN_SYMBOL: TOKEN_SYMBOL, TOKEN_SPECIAL: TOKEN_SPECIAL, TOKEN_REGEX: TOKEN_REGEX};