Skip to main content
Module

x/ohm_js/test/test-ohm-syntax.js

A library and language for building parsers, interpreters, compilers, etc.
Latest
File
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451
import test from 'ava-spec';
import fs from 'fs';import * as ohm from '../index.mjs';import {buildGrammar} from '../src/buildGrammar.js';
const arithmeticGrammarSource = fs.readFileSync('test/arithmetic.ohm').toString();const ohmGrammarSource = fs.readFileSync('src/ohm-grammar.ohm').toString();
const {describe} = test;
// --------------------------------------------------------------------// Helpers// --------------------------------------------------------------------
function compareGrammars(t, expected, actual) { // The other property on grammars is "constructors", which contains // closures which cause spurious test failures if we compare // them. So we ignore that property here, concentrating on `rules` // and other "real" properties of each grammar.
t.is(typeof actual, typeof expected); // ^ e.g. when one is undefined and the other isn't
if (expected && actual) { compareGrammars(t, expected.superGrammar, actual.superGrammar); // In the list below, we exclude superGrammar (just tested above) // and constructors (for reasons given above). ['namespaceName', 'name', 'ruleDecls', 'rules'].forEach(prop => { t.deepEqual(actual[prop], expected[prop]); }); }}
function buildTreeNodeWithUniqueId(g) { let nextId = 0; const s = g.createSemantics().addAttribute('tree', { _iter(...children) { return children.map(c => c.tree); }, _nonterminal(...children) { return ['id', nextId++, this.ctorName].concat(children.map(child => child.tree)); }, _terminal() { return this.sourceString; }, });
function makeTree(node) { return s(node).tree; } makeTree._getNextId = function() { return nextId; }; return makeTree;}
function assertSucceeds(t, matchResult, optMessage) { t.is(matchResult.succeeded(), true, optMessage); t.is(matchResult.failed(), false, optMessage);}
function assertFails(t, matchResult, optMessage) { t.is(matchResult.succeeded(), false, optMessage); t.is(matchResult.failed(), true, optMessage);}
// --------------------------------------------------------------------// Tests// --------------------------------------------------------------------
test('char', t => { const m = ohm.grammar('M { bang = "!" }'); const s = m.createSemantics().addAttribute('v', { _terminal() { return this.sourceString; }, });
assertSucceeds(t, m.match('!')); assertFails(t, m.match('!a')); assertFails(t, m.match('')); const cst = m.match('!'); t.is(s(cst).v, '!');});
test('string', t => { const m = ohm.grammar('M { foo = "foo\\b\\n\\r\\t\\\\\\"\\u01bcff\\x8f" }'); const s = m.createSemantics().addAttribute('v', { _terminal() { return this.sourceString; }, });
assertSucceeds(t, m.match('foo\b\n\r\t\\"\u01bcff\x8f')); assertFails(t, m.match('foo1')); assertFails(t, m.match('bar'));
const cst = m.match('foo\b\n\r\t\\"\u01bcff\x8f'); t.is(s(cst).v, 'foo\b\n\r\t\\"\u01bcff\x8f');
t.throws( () => { ohm.grammar('G { r = "\\w" }'); }, {message: /Expected "\\""/}, 'unrecognized escape characters are parse errors', );});
test('unicode code point escapes', t => { assertSucceeds( t, ohm.grammar(String.raw`G { start = "\u{78}\u{78}" }`).match('\u{78}\u{78}'), ); assertSucceeds(t, ohm.grammar(String.raw`G { start = "\u{1F920}" }`).match('🤠')); assertSucceeds(t, ohm.grammar(String.raw`G { start = "🤠" }`).match('🤠')); assertSucceeds(t, ohm.grammar(String.raw`G { a = "😬" b="🤠" }`).match('🤠', 'b'));
// More than 6 hex digits is just a parse error. (We'd like to make this nicer.) t.throws(() => ohm.grammar(String.raw`G { start = "\u{0000000} }`), { message: /Expected "\\"" or not "\\\\"/, });
t.throws(() => ohm.grammar('G { start = "\\u{FFFFFF}" }'), { message: /U\+FFFFFF is not a valid Unicode code point/, });});
describe('unicode', test => { const m = ohm.grammar('M {}');
test('recognition', t => { assertSucceeds(t, m.match('a', 'lower')); assertSucceeds(t, m.match('\u00E9', 'lower'), 'small letter e with acute'); assertSucceeds(t, m.match('\u03C9', 'lower'), 'Greek small letter Omega'); assertFails(t, m.match('`', 'lower')); assertFails(t, m.match('\u20AC', 'lower'), 'Euro sign'); assertFails(t, m.match('\u01C0', 'lower'), 'Latin letter dental click');
assertSucceeds(t, m.match('Z', 'upper')); assertSucceeds(t, m.match('\u03A9', 'upper'), 'Greek capital letter Omega'); assertFails(t, m.match('[', 'upper')); assertFails(t, m.match('\u20AC', 'upper'), 'Euro sign'); assertFails(t, m.match('\u01C0', 'upper'), 'Latin letter dental click');
assertSucceeds(t, m.match('\u01C0', 'letter'), 'dental click is a letter'); assertSucceeds(t, m.match(['\u01C0'], 'letter'), 'dental click in a list'); });
test('semantic actions', t => { const s = m.createSemantics().addAttribute('v', { _terminal() { return this.sourceString + this.sourceString; }, }); const r = m.match('\u01C0', 'letter'); t.is(s(r).v, '\u01C0\u01C0'); });});
test('ranges', t => { const m = ohm.grammar('M { charRange = "0".."9" }'); const s = m.createSemantics().addAttribute('v', { _terminal() { return this.sourceString; }, });
assertSucceeds(t, m.match('6', 'charRange')); assertFails(t, m.match('x', 'charRange')); t.is(s(m.match('4', 'charRange')).v, '4');
t.throws( () => { ohm.grammar('M { charRange = "ab".."c" }'); }, {message: /Expected "}"/}, 'from-terminal must have length 1', ); t.throws( () => { ohm.grammar('M { charRange = "ab".."cd" }'); }, {message: /Expected "}"/}, 'from-terminal must have length 1', ); t.throws( () => { ohm.grammar('M { charRange = "a".."bc" }'); }, {message: /Expected "\\""/}, 'to-terminal must have length 1', );});
test('ranges w/ code points > 0xFFFF', t => { const g = ohm.grammar(` G { face = "😇".."😈" notFace = ~face any } `);
// Every emoji by code point: https://emojipedia.org/emoji/ assertFails(t, g.match('😆')); // just below assertSucceeds(t, g.match('😇')); assertSucceeds(t, g.match('😈')); assertFails(t, g.match('😉')); // just above
assertSucceeds(t, g.match('x', 'notFace'));
const valActions = { _terminal() { return this.sourceString; }, };
const s = g.createSemantics().addAttribute('val', valActions); t.is(s(g.match('😈')).val, '😈');
// Test the same thing, but using Unicode code point escapes. const g2 = ohm.grammar(String.raw`G { face = "\u{1F607}".."\u{1F608}" }`); assertFails(t, g2.match('😆')); // just below assertSucceeds(t, g2.match('😇')); assertSucceeds(t, g2.match('😈')); assertFails(t, g2.match('😉')); // just above
const s2 = g2.createSemantics().addAttribute('val', valActions); t.is(s2(g2.match('😈')).val, '😈');});
test('ranges w/ code points > 0xFFFF, special cases', t => { // "Peace hand sign" is two code points, so this should fail. t.throws(() => ohm.grammar('G { start = "✌️".."✌️" }'));
const valActions = { _terminal() { return this.sourceString; }, };
const g = ohm.grammar('G { face = "\u{0}".."\u{1F608}" }'); assertSucceeds(t, g.match('😇')); const s = g.createSemantics().addAttribute('val', valActions); t.is(s(g.match('😈')).val, '😈');
const g2 = ohm.grammar(String.raw` G { start = "\u{1F603}".."\u{1F603}" | "\uD800".."\uFFFF" "x" -- fallback } `); // Try matching against a string where the first unit is a high surrogate, // but the second unit is *not* a low surrogate. assertSucceeds(t, g2.match('\u{D83D}x'));});
test('any consumes an entire code point', t => { const g = ohm.grammar('G { start = any any }'); const re = /../u; // The regex equivalent of `any any`.
t.is('😇'.length, 2); t.is('😇!'.length, 3); t.is('😇😇'.length, 4);
t.is(g.match('😇😇').succeeded(), true); t.truthy(re.exec('😇😇'));
t.is(g.match('😇!').succeeded(), true); t.truthy(re.exec('😇!'));
t.is(g.match('!😇').succeeded(), true); t.truthy(re.exec('!😇'));
t.is('👋🏿'.length, 4); // Skin color modifier is a separate code point. t.is(g.match('👋🏿').succeeded(), true); t.truthy(re.exec('👋🏿'));});
describe('alt', test => { const m = ohm.grammar('M { altTest = "a" | "b" }'); const s = m.createSemantics().addAttribute('v', { _terminal() { return this.sourceString; }, });
test('recognition', t => { assertFails(t, m.match('')); assertSucceeds(t, m.match('a')); assertSucceeds(t, m.match('b')); assertFails(t, m.match('ab')); });
test('semantic actions', t => { t.is(s(m.match('a')).v, 'a'); t.is(s(m.match('b')).v, 'b'); });});
describe("rule bodies in defs can start with a |, and it's a no-op", test => { const m = ohm.grammar('M { altTest = | "a" | "b" }'); const s = m.createSemantics().addAttribute('v', { _terminal() { return this.sourceString; }, });
test('recognition', t => { assertFails(t, m.match('')); assertSucceeds(t, m.match('a')); assertSucceeds(t, m.match('b')); assertFails(t, m.match('ab')); });
test('semantic actions', t => { t.is(s(m.match('a')).v, 'a'); t.is(s(m.match('b')).v, 'b'); });});
describe("rule bodies in overrides can start with a |, and it's a no-op", test => { const m = ohm.grammar('M { space := | "a" | "b" }'); const s = m.createSemantics().addAttribute('v', { _terminal() { return this.sourceString; }, });
test('recognition', t => { assertFails(t, m.match('', 'space')); assertSucceeds(t, m.match('a', 'space')); assertSucceeds(t, m.match('b', 'space')); assertFails(t, m.match(' ', 'space')); assertFails(t, m.match('\t', 'space')); });
test('semantic actions', t => { t.is(s(m.match('a', 'space')).v, 'a'); t.is(s(m.match('b', 'space')).v, 'b'); });});
describe("rule bodies in extends can start with a |, and it's a no-op", test => { const m = ohm.grammar('M { space += | "a" | "b" }'); const s = m.createSemantics().addAttribute('v', { _terminal() { return this.sourceString; }, });
test('recognition', t => { assertFails(t, m.match('', 'space')); assertSucceeds(t, m.match('a', 'space')); assertSucceeds(t, m.match('b', 'space')); assertSucceeds(t, m.match(' ', 'space')); assertSucceeds(t, m.match('\t', 'space')); });
test('semantic actions', t => { t.is(s(m.match('a', 'space')).v, 'a'); t.is(s(m.match('b', 'space')).v, 'b'); });});
describe('seq', test => { const m = ohm.grammar('M { start = "a" "bc" "z" }'); test('recognition', t => { assertFails(t, m.match('a')); assertFails(t, m.match('bc')); assertSucceeds(t, m.match('abcz')); assertFails(t, m.match('abbz')); });
test('semantic actions', t => { const f = m.match('abcz'); const s = m.createSemantics().addAttribute('v', { start(x, y, z) { return [x.sourceString, y.sourceString, z.sourceString]; }, }); t.deepEqual(s(f).v, ['a', 'bc', 'z']); });});
describe('alts and seqs together', test => { const m = ohm.grammar('M { start = "a" "b" "c" | "1" "2" "3" }');
test('recognition', t => { assertFails(t, m.match('ab')); assertFails(t, m.match('12')); assertSucceeds(t, m.match('abc')); assertSucceeds(t, m.match('123')); });
test('semantic actions', t => { const s = m.createSemantics().addAttribute('v', { start(x, _, y) { return [x.sourceString, y.sourceString]; }, }); t.deepEqual(s(m.match('abc')).v, ['a', 'c']); t.deepEqual(s(m.match('123')).v, ['1', '3']); });});
describe('kleene-* and kleene-+', test => { const m = ohm.grammar(` M { number = digit+ digits = digit* sss = &number number } `);
test('recognition', t => { assertFails(t, m.match('1234a', 'number')); assertSucceeds(t, m.match('1234', 'number')); assertSucceeds(t, m.match('5', 'number')); assertFails(t, m.match('', 'number'));
assertFails(t, m.match('1234a', 'digits')); assertSucceeds(t, m.match('1234', 'digits')); assertSucceeds(t, m.match('5', 'digits')); assertSucceeds(t, m.match('', 'digits')); });
test('semantic actions', t => { const s = m.createSemantics().addAttribute('v', { number(digits) { return ['digits', digits.children.map(c => c.v)]; }, digit(expr) { return ['digit', expr.v]; }, _terminal() { return this.sourceString; }, }); t.deepEqual(s(m.match('1234', 'number')).v, [ 'digits', [ ['digit', '1'], ['digit', '2'], ['digit', '3'], ['digit', '4'], ], ]); });
test('semantic actions are evaluated lazily', t => { const a = buildTreeNodeWithUniqueId(m); const tree = [ 'id', 1, 'number', [ ['id', 2, 'digit', '1'], ['id', 3, 'digit', '2'], ['id', 4, 'digit', '3'], ], ]; t.deepEqual(a(m.match('123', 'sss')), ['id', 0, 'sss', tree, tree]); t.is(a._getNextId(), 5); });});
describe('opt', test => { const m = ohm.grammar('M { name = "dr"? "warth" }');
test('recognition', t => { assertSucceeds(t, m.match('drwarth')); assertSucceeds(t, m.match('warth')); assertFails(t, m.match('mrwarth')); });
test('semantic actions', t => { const s = m.createSemantics().addAttribute('v', { name(title, last) { return [title.children.map(c => c.v)[0], last.sourceString]; }, _terminal() { return this.sourceString; }, }); t.deepEqual(s(m.match('drwarth')).v, ['dr', 'warth']); t.deepEqual(s(m.match('warth')).v, [undefined, 'warth']); });});
describe('not', test => { const m = ohm.grammar('M { start = ~"hello" any* }');
test('recognition', t => { assertSucceeds(t, m.match('yello world')); assertFails(t, m.match('hello world')); });
test('semantic actions', t => { const s = m.createSemantics().addAttribute('v', { start(x) { return x.sourceString; }, }); t.is(s(m.match('yello world')).v, 'yello world'); });});
describe('lookahead', test => { const m = ohm.grammar('M { start = &"hello" any* }');
test('recognition', t => { assertSucceeds(t, m.match('hello world')); assertFails(t, m.match('hell! world')); });
test('semantic actions', t => { const s = m.createSemantics().addAttribute('v', { start(x, _) { return x.sourceString; }, }); t.is(s(m.match('hello world')).v, 'hello'); });});
describe('simple left recursion', test => { const m = ohm.grammar(` M { number = numberRec | digit numberRec = number digit } `);
test('recognition', t => { assertFails(t, m.match('', 'number')); assertFails(t, m.match('a', 'number')); assertSucceeds(t, m.match('1', 'number')); assertSucceeds(t, m.match('12', 'number')); assertSucceeds(t, m.match('123', 'number')); assertSucceeds(t, m.match('7276218173', 'number')); });
test('semantic actions', t => { const f = m.match('1234', 'number'); const s = m .createSemantics() .addAttribute('v', { numberRec(n, d) { return n.v * 10 + d.v; }, digit(expr) { return expr.v.charCodeAt(0) - '0'.charCodeAt(0); }, _terminal() { return this.sourceString; }, }) .addAttribute('t', { number(expr) { return ['number', expr.t]; }, numberRec(n, d) { return ['numberRec', n.t, d.t]; }, _terminal() { return this.sourceString; }, }); t.is(s(f).v, 1234); t.deepEqual(s(f).t, [ 'number', [ 'numberRec', ['number', ['numberRec', ['number', ['numberRec', ['number', '1'], '2']], '3']], '4', ], ]); });
describe('simple left recursion, with non-involved rules', test => { const m = ohm.grammar(` M { add = addRec | pri addRec = add "+" pri pri = priX | priY priX = "x" priY = "y" } `);
test('recognition', t => { assertSucceeds(t, m.match('x+y+x', 'add')); });
test('semantic actions', t => { const s = m.createSemantics().addAttribute('v', { addRec(x, _, y) { return [x.v, '+', y.v]; }, _terminal() { return this.sourceString; }, }); t.deepEqual(s(m.match('x+y+x', 'add')).v, [['x', '+', 'y'], '+', 'x']); }); });
describe('indirect left recursion', test => { const m = ohm.grammar(` M { number = foo | digit foo = bar bar = baz baz = qux qux = quux quux = numberRec numberRec = number digit } `);
test('recognition', t => { assertFails(t, m.match('', 'number')); assertFails(t, m.match('a', 'number')); assertSucceeds(t, m.match('1', 'number')); assertSucceeds(t, m.match('123', 'number')); assertSucceeds(t, m.match('7276218173', 'number')); });
test('semantic actions', t => { const s = m.createSemantics().addAttribute('v', { numberRec(n, d) { return [n.v, d.v]; }, _terminal() { return this.sourceString; }, }); t.deepEqual(s(m.match('1234', 'number')).v, [[['1', '2'], '3'], '4']); }); });
describe('nested left recursion', test => { const m = ohm.grammar(` M { addExp = addExpRec | mulExp addExpRec = addExp "+" mulExp mulExp = mulExpRec | priExp mulExpRec = mulExp "*" priExp priExp = "0".."9" sss = &addExp addExp } `);
test('recognition', t => { assertSucceeds(t, m.match('1')); assertSucceeds(t, m.match('2+3')); assertFails(t, m.match('4+')); assertSucceeds(t, m.match('5*6')); assertSucceeds(t, m.match('7*8+9+0')); });
test('semantic actions', t => { const f = m.match('1*2+3+4*5'); const s = m .createSemantics() .addAttribute('t', { addExp(expr) { return ['addExp', expr.t]; }, addExpRec(x, _, y) { return ['addExpRec', x.t, y.t]; }, mulExp(expr) { return ['mulExp', expr.t]; }, mulExpRec(x, _, y) { return ['mulExpRec', x.t, y.t]; }, _terminal() { return this.sourceString; }, }) .addAttribute('v', { addExp(expr) { return expr.v; }, addExpRec(x, _, y) { return x.v + y.v; }, mulExp(expr) { return expr.v; }, mulExpRec(x, _, y) { return x.v * y.v; }, priExp(expr) { return parseInt(expr.v); }, _terminal() { return this.sourceString; }, }) .addAttribute('p', { addExpRec(x, _, y) { return '(' + x.p + '+' + y.p + ')'; }, mulExpRec(x, _, y) { return '(' + x.p + '*' + y.p + ')'; }, _terminal() { return this.sourceString; }, }); t.deepEqual(s(f).t, [ 'addExp', [ 'addExpRec', [ 'addExp', [ 'addExpRec', ['addExp', ['mulExp', ['mulExpRec', ['mulExp', '1'], '2']]], ['mulExp', '3'], ], ], ['mulExp', ['mulExpRec', ['mulExp', '4'], '5']], ], ]); t.is(s(f).v, 25); t.is(s(f).p, '(((1*2)+3)+(4*5))'); });
test('semantic actions are evaluated lazily', t => { const f = m.match('1*2+3+4*5', 'sss'); const a = buildTreeNodeWithUniqueId(m); const tree = [ 'id', 1, 'addExp', [ 'id', 2, 'addExpRec', [ 'id', 3, 'addExp', [ 'id', 4, 'addExpRec', [ 'id', 5, 'addExp', [ 'id', 6, 'mulExp', [ 'id', 7, 'mulExpRec', ['id', 8, 'mulExp', ['id', 9, 'priExp', '1']], '*', ['id', 10, 'priExp', '2'], ], ], ], '+', ['id', 11, 'mulExp', ['id', 12, 'priExp', '3']], ], ], '+', [ 'id', 13, 'mulExp', [ 'id', 14, 'mulExpRec', ['id', 15, 'mulExp', ['id', 16, 'priExp', '4']], '*', ['id', 17, 'priExp', '5'], ], ], ], ]; t.deepEqual(a(f), ['id', 0, 'sss', tree, tree]); t.is(a._getNextId(), 18); }); });
describe('nested and indirect left recursion', test => { const m = ohm.grammar(` G { addExp = a | c a = b b = addExpRec addExpRec = addExp "+" mulExp c = d d = mulExp mulExp = e | g e = f f = mulExpRec g = h h = priExp mulExpRec = mulExp "*" priExp priExp = "0".."9" } `);
test('recognition', t => { assertSucceeds(t, m.match('1')); assertSucceeds(t, m.match('2+3')); assertFails(t, m.match('4+')); assertSucceeds(t, m.match('5*6')); assertSucceeds(t, m.match('7+8*9+0')); });
test('semantic actions', t => { const s = m.createSemantics().addAttribute('t', { addExpRec(x, _, y) { return [x.t, '+', y.t]; }, mulExpRec(x, _, y) { return [x.t, '*', y.t]; }, _terminal() { return this.sourceString; }, }); t.deepEqual(s(m.match('7+8*9+0')).t, [['7', '+', ['8', '*', '9']], '+', '0']); }); });
describe('tricky left recursion (different heads at same position)', test => { const m = ohm.grammar(` G { tricky = &foo bar foo = fooRec | digit fooRec = bar digit bar = barRec | digit barRec = foo digit } `);
test('recognition', t => { assertSucceeds(t, m.match('1234', 'tricky')); });
test('semantic actions', t => { const f = m.match('1234', 'tricky'); // TODO: perhaps just use JSON.stringify(f) here, and compare the result? const s = m.createSemantics().addAttribute('t', { tricky(_, x) { return ['tricky', x.t]; }, foo(expr) { return ['foo', expr.t]; }, fooRec(x, y) { return ['fooRec', x.t, y.t]; }, bar(expr) { return ['bar', expr.t]; }, barRec(x, y) { return ['barRec', x.t, y.t]; }, _terminal() { return this.sourceString; }, }); t.deepEqual(s(f).t, [ 'tricky', [ 'bar', ['barRec', ['foo', ['fooRec', ['bar', ['barRec', ['foo', '1'], '2']], '3']], '4'], ], ]); }); });});
describe('inheritance', t => { test('no namespace', t => { t.throws( () => { ohm.grammar('G2 <: G1 {}'); }, {message: /Grammar G1 is not declared/}, ); });
test('empty namespace', t => { t.throws( () => { ohm.grammar('G2 <: G1 {}', {}); }, {message: /Grammar G1 is not declared in namespace/}, ); });
test('duplicate definition', t => { t.throws( () => { ohm.grammars('G1 { foo = "foo" } G2 <: G1 { foo = "bar" }'); }, { // eslint-disable-next-line max-len message: /Duplicate declaration for rule 'foo' in grammar 'G2' \(originally declared in 'G1'\)/, }, 'throws if rule is already declared in super-grammar', ); });
describe('override', test => { const ns = ohm.grammars('G1 { number = digit+ } G2 <: G1 { digit := "a".."z" }');
test('it checks that rule exists in super-grammar', t => { t.throws( () => { ohm.grammar('G3 <: G1 { foo := "foo" }', ns); }, {message: /Cannot override rule foo because it is not declared in G1/}, ); });
test("shouldn't matter if arities aren't the same", t => { // It's OK for the semantic action "API" of a grammar to be different // from that of its super-grammar.
// arity(overriding rule) > arity(overridden rule) ns.M1 = ohm.grammar('M1 { foo = "foo" }'); ohm.grammar('M2 <: M1 { foo := "foo" "bar" }', ns);
// arity(overriding rule) < arity(overridden rule) ns.M3 = ohm.grammar('M3 { foo = digit digit }', ns); ns.M4 = ohm.grammar('M4 <: M3 { foo := digit }', ns); t.pass(); });
test('should be ok to add new cases', t => { t.truthy(ohm.grammar('G { space := "foo" -- newCaseLabel }')); });
test('recognition', t => { assertSucceeds(t, ns.G1.match('1234', 'number')); assertFails(t, ns.G1.match('hello', 'number')); assertFails(t, ns.G1.match('h3llo', 'number'));
assertFails(t, ns.G2.match('1234', 'number')); assertSucceeds(t, ns.G2.match('hello', 'number')); assertFails(t, ns.G2.match('h3llo', 'number')); });
test('semantic actions', t => { const s = ns.G2.createSemantics().addAttribute('v', { number(digits) { return ['number', digits.children.map(c => c.v)]; }, digit(d) { return ['digit', d.v]; }, _terminal() { return this.sourceString; }, }); const expected = [ 'number', [ ['digit', 'a'], ['digit', 'b'], ['digit', 'c'], ['digit', 'd'], ], ]; t.deepEqual(s(ns.G2.match('abcd', 'number')).v, expected); }); });
describe('extend', test => { const ns = ohm.grammars('G1 { foo = "aaa" "bbb" } G2 <: G1 { foo += "111" "222" }');
test('recognition', t => { assertSucceeds(t, ns.G1.match('aaabbb')); assertFails(t, ns.G1.match('111222'));
assertSucceeds(t, ns.G2.match('aaabbb')); assertSucceeds(t, ns.G2.match('111222')); });
test('semantic actions', t => { const s = ns.G2.createSemantics().addAttribute('v', { foo(x, y) { return [x.sourceString, y.sourceString]; }, }); t.deepEqual(s(ns.G2.match('aaabbb')).v, ['aaa', 'bbb']); t.deepEqual(s(ns.G2.match('111222')).v, ['111', '222']); });
test('should check that rule exists in super-grammar', t => { t.throws( () => { ohm.grammar('G3 <: G1 { bar += "bar" }', ns); }, {message: /Cannot extend rule bar because it is not declared in G1/}, ); });
test('should make sure rule arities are compatible', t => { // An extending rule must produce the same number of values // as the underlying rule. This is to ensure the semantic // action "API" doesn't change.
// Too many: ns.M1 = ohm.grammar('M1 { foo = "foo" bar = "bar" baz = "baz" }'); try { ohm.grammar('M2 <: M1 { foo += bar baz }', ns); t.fail('Expected an exception to be thrown'); } catch (e) { t.is( e.message, [ 'Line 1, col 19:', '> 1 | M2 <: M1 { foo += bar baz }', ' ^~~~~~~', 'Rule foo involves an alternation which has inconsistent arity (expected 1, got 2)', ].join('\n'), ); }
// Too few: ns.M3 = ohm.grammar('M3 { foo = digit digit }'); try { ohm.grammar('M4 <: M3 { foo += digit }', ns); t.fail('Expected an exception to be thrown'); } catch (e) { t.is( e.message, [ 'Line 1, col 19:', '> 1 | M4 <: M3 { foo += digit }', ' ^~~~~', 'Rule foo involves an alternation which has inconsistent arity (expected 2, got 1)', ].join('\n'), ); } });
test('should be ok to add new cases', t => { t.truthy(ohm.grammar('G { space += "foo" -- newCaseLabel }')); }); });});
test('override with "..."', t => { let g = ohm.grammar('G { letter := "@" | ... }'); t.is(g.match('@', 'letter').succeeded(), true); t.is(g.match('a', 'letter').succeeded(), true);
g = ohm.grammar('G { letter := ... | "@" }'); t.is(g.match('@', 'letter').succeeded(), true); t.is(g.match('a', 'letter').succeeded(), true);
g = ohm.grammar('G { letter := "3" | ... | "@" }'); t.is(g.match('@', 'letter').succeeded(), true); t.is(g.match('a', 'letter').succeeded(), true); t.is(g.match('3', 'letter').succeeded(), true);
t.truthy(ohm.grammar('G { letter := ... }'), 'it allows `...` as the whole body');
// Check that the branches are evaluated in the correct order. g = ohm.grammar('G { letter := "" | ... }'); t.is(g.match('', 'letter').succeeded(), true); t.is(g.match('a', 'letter').succeeded(), false); g = ohm.grammar('G { letter := ... | "ab" }'); t.is(g.match('a', 'letter').succeeded(), true); t.is(g.match('ab', 'letter').succeeded(), false);
g = ohm.grammar(` G { Start = ListOf<letter, ","> ListOf<elem, sep> := "✌️" | ... }`); t.is(g.match('✌️').succeeded(), true, 'it works on parameterized rules');
t.throws( () => ohm.grammar('G { doesNotExist := ... }'), {message: /Cannot override rule doesNotExist/}, 'it gives the correct error message when overriding non-existent rule', );
t.throws( () => ohm.grammar('G { foo = ... }'), {message: /Expected "}"/}, "it's not allowed in a rule definition", );
t.throws( () => ohm.grammar('G { letter += ... }'), {message: /Expected "}"/}, "it's not allowed when extending", );
t.throws(() => ohm.grammar('G { letter := "@" "#" | ... }'), { message: /inconsistent arity/, });
t.throws( () => ohm.grammar('G { letter := ... | "@" | ... }'), {message: /at most once/}, "'...' can appear at most once in a rule body", );
/* TODO: - [ ] improve error message (inconsistent arity seems backwards) - [ ] improve error message when using `...` in a rule defintion/extension - [ ] unify Extend and Combine? - [ ] using '...' when overriding a non-existent rule */});
describe('bindings', test => { test('inconsistent arity in alts is an error', t => { try { ohm.grammar('G { foo = "a" "c" | "b" }'); } catch (e) { t.is( e.message, [ 'Line 1, col 21:', '> 1 | G { foo = "a" "c" | "b" }', ' ^~~', 'Rule foo involves an alternation which has inconsistent arity (expected 2, got 1)', ].join('\n'), ); } });
test('by default, bindings are evaluated lazily', t => { const g = ohm.grammar(` G { foo = bar baz bar = "a" baz = "b" } `);
let id = 0; let s = g.createSemantics().addAttribute('v', { foo(x, y) { const xv = x.v; const yv = y.v; return { x: xv, y: yv, }; }, bar(expr) { return ['bar', expr.v, id++]; }, baz(expr) { return ['baz', expr.v, id++]; }, _terminal() { return this.sourceString; }, }); t.deepEqual(s(g.match('ab')).v, { x: ['bar', 'a', 0], y: ['baz', 'b', 1], });
id = 0; s = g.createSemantics().addAttribute('v', { foo(x, y) { const yv = y.v; const xv = x.v; return { x: xv, y: yv, }; }, bar(expr) { return ['bar', expr.v, id++]; }, baz(expr) { return ['baz', expr.v, id++]; }, _terminal() { return this.sourceString; }, }); t.deepEqual(s(g.match('ab')).v, { x: ['bar', 'a', 1], y: ['baz', 'b', 0], }); });});
test('inline rule declarations', t => { function makeEval(g) { const s = g.createSemantics().addAttribute('v', { addExp_plus(x, op, y) { return x.v + y.v; }, addExp_minus(x, op, y) { return x.v - y.v; }, mulExp_times(x, op, y) { return x.v * y.v; }, mulExp_divide(x, op, y) { return x.v / y.v; }, priExp_paren(oparen, e, cparen) { return e.v; }, number_rec(n, d) { return n.v * 10 + d.v; }, digit(expr) { return expr.v.charCodeAt(0) - '0'.charCodeAt(0); }, _terminal() { return this.sourceString; }, }); return function(node) { return s(node).v; }; }
const ns = {}; const Arithmetic = (ns.Arithmetic = ohm.grammar(arithmeticGrammarSource));
assertSucceeds(t, Arithmetic.match('1*(2+3)-4/5'), 'expr is recognized'); t.is( makeEval(Arithmetic)(Arithmetic.match('10*(2+123)-4/5')), 1249.2, 'semantic action works', );
const m2 = ohm.grammar( ` Good <: Arithmetic { addExp := addExp "~" mulExp -- minus | mulExp } `, ns, ); t.is(makeEval(m2)(m2.match('2*3~4')), 2);
t.throws( () => { ohm.grammar('Bad <: Arithmetic { addExp += addExp "~" mulExp -- minus }', ns); }, { message: /rule 'addExp_minus' in grammar 'Bad' \(originally declared in 'Arithmetic'\)/, }, );
t.throws( () => { ohm.grammar('Bad { start = "a" ("b" -- bad\n) }'); }, undefined, 'inline rules must be at the top level', );});
describe('lexical vs. syntactic rules', test => { test("can't call syntactic rule from lexical rule, not not the other way around", t => { t.truthy(ohm.grammar('G { foo = bar bar = "bar" }'), 'lexical calling lexical'); t.throws( () => { ohm.grammar('G { foo = Bar Bar = "bar" }'); }, { message: /Cannot apply syntactic rule Bar from here \(inside a lexical context\)/, }, 'lexical calling syntactic', ); t.truthy(ohm.grammar('G { Foo = bar bar = "bar" }'), 'syntactic calling lexical'); t.truthy(ohm.grammar('G { Foo = Bar Bar = "bar" }'), 'syntactic calling syntactic'); });
test("lexical rules don't skip spaces implicitly", t => { const g = ohm.grammar('G { start = "foo" "bar" }'); assertSucceeds(t, g.match('foobar', 'start')); assertFails(t, g.match('foo bar')); assertFails(t, g.match(' foo bar ')); });
test('syntactic rules skip spaces implicitly', t => { const g = ohm.grammar('G { Start = "foo" "bar" }'); assertSucceeds(t, g.match('foobar')); assertSucceeds(t, g.match('foo bar')); assertSucceeds(t, g.match(' foo bar ')); });
test('mixing lexical and syntactic rules works as expected', t => { const g = ohm.grammar(` G { Start = foo bar foo = "foo" bar = "bar" } `); assertSucceeds(t, g.match('foobar')); assertSucceeds(t, g.match('foo bar')); assertSucceeds(t, g.match(' foo bar ')); });
// TODO: write more tests for this operator (e.g., to ensure that it's "transparent", arity-wise) // and maybe move it somewhere else. test('lexification operator works as expected', t => { const g = ohm.grammar(` G { ArrowFun = name #(spacesNoNl "=>") "{}" name = "x" | "y" spacesNoNl = " "* } `); assertSucceeds(t, g.match('x => {}')); assertSucceeds(t, g.match(' y => \n\n \n{}')); assertFails(t, g.match('x \n => {}'));
t.throws( () => { ohm.grammar('G { R = #("a" R) | "b" "c" }'); }, { message: /Cannot apply syntactic rule R from here \(inside a lexical context\)/, }, ); });});
test('space skipping semantics', t => { const g = ohm.grammar(` G { Iter = ">" letter+ #(space) Lookahead = ">" &letter #(space letter) NegLookahead = ">" ~digit #(space letter) } `); assertSucceeds(t, g.match('> a b ', 'Iter'), "iter doesn't consume trailing space"); assertSucceeds(t, g.match('> a', 'Lookahead'), "lookahead doesn't consume anything"); assertSucceeds( t, g.match('> a', 'NegLookahead'), "negative lookahead doesn't consume anything", );});
// https://github.com/ohmjs/ohm/issues/282test('single-line comment after case name (#282)', t => { const {ohmGrammar} = ohm; assertSucceeds( t, ohmGrammar.match(`G { Start = -- foo // ok | "x" }`), ); assertSucceeds(t, ohmGrammar.match('G {Start = -- foo // A comment\n}')); assertSucceeds(t, ohmGrammar.match('G {} // This works too')); assertSucceeds(t, ohmGrammar.match('// And this'));});
describe('bootstrap', test => { const ns = ohm.grammars(ohmGrammarSource);
test('it can recognize arithmetic grammar', t => { assertSucceeds(t, ns.Ohm.match(arithmeticGrammarSource, 'Grammar')); });
test('it can recognize itself', t => { assertSucceeds(t, ns.Ohm.match(ohmGrammarSource, 'Grammar')); });
test('it can produce a grammar that works', t => { const g = buildGrammar(ns.Ohm.match(ohmGrammarSource, 'Grammar'), {}, ns.Ohm); assertSucceeds( t, g.match(ohmGrammarSource, 'Grammar'), 'Ohm grammar can recognize itself', ); const Arithmetic = buildGrammar(g.match(arithmeticGrammarSource, 'Grammar'), {}, g); const s = Arithmetic.createSemantics().addAttribute('v', { exp(expr) { return expr.v; }, addExp(expr) { return expr.v; }, addExp_plus(x, op, y) { return x.v + y.v; }, addExp_minus(x, op, y) { return x.v - y.v; }, mulExp(expr) { return expr.v; }, mulExp_times(x, op, y) { return x.v * y.v; }, mulExp_divide(x, op, y) { return x.v / y.v; }, priExp(expr) { return expr.v; }, priExp_paren(oparen, e, cparen) { return e.v; }, number(expr) { return expr.v; }, number_rec(n, d) { return n.v * 10 + d.v; }, digit(expr) { return expr.v.charCodeAt(0) - '0'.charCodeAt(0); }, _terminal() { return this.sourceString; }, }); t.is(s(Arithmetic.match('10*(2+123)-4/5')).v, 1249.2); });
test('full bootstrap!', t => { const g = buildGrammar(ns.Ohm.match(ohmGrammarSource, 'Grammar'), {}, ns.Ohm); const gPrime = buildGrammar(g.match(ohmGrammarSource, 'Grammar'), {}, g); gPrime.namespaceName = g.namespaceName; // make their namespaceName properties the same compareGrammars(t, g, gPrime); });});