/src/Grammar.js | ohm_js@v17.1.0

A library and language for building parsers, interpreters, compilers, etc.
Latest
File
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
import {Matcher} from './Matcher.js';import {Semantics} from './Semantics.js';import * as common from './common.js';import * as errors from './errors.js';import * as pexprs from './pexprs.js';
// --------------------------------------------------------------------// Private stuff// --------------------------------------------------------------------
const SPECIAL_ACTION_NAMES = ['_iter', '_terminal', '_nonterminal', '_default'];
function getSortedRuleValues(grammar) {  return Object.keys(grammar.rules)      .sort()      .map(name => grammar.rules[name]);}
// Until ES2019, JSON was not a valid subset of JavaScript because U+2028 (line separator)// and U+2029 (paragraph separator) are allowed in JSON string literals, but not in JS.// This function properly encodes those two characters so that the resulting string is// represents both valid JSON, and valid JavaScript (for ES2018 and below).// See https://v8.dev/features/subsume-json for more details.const jsonToJS = str => str.replace(/\u2028/g, '\\u2028').replace(/\u2029/g, '\\u2029');
let ohmGrammar;let buildGrammar;
export class Grammar {  constructor(name, superGrammar, rules, optDefaultStartRule) {    this.name = name;    this.superGrammar = superGrammar;    this.rules = rules;    if (optDefaultStartRule) {      if (!(optDefaultStartRule in rules)) {        throw new Error(            "Invalid start rule: '" +            optDefaultStartRule +            "' is not a rule in grammar '" +            name +            "'",        );      }      this.defaultStartRule = optDefaultStartRule;    }    this._matchStateInitializer = undefined;    this.supportsIncrementalParsing = true;  }
  matcher() {    return new Matcher(this);  }
  // Return true if the grammar is a built-in grammar, otherwise false.  // NOTE: This might give an unexpected result if called before BuiltInRules is defined!  isBuiltIn() {    return this === Grammar.ProtoBuiltInRules || this === Grammar.BuiltInRules;  }
  equals(g) {    if (this === g) {      return true;    }    // Do the cheapest comparisons first.    if (      g == null ||      this.name !== g.name ||      this.defaultStartRule !== g.defaultStartRule ||      !(this.superGrammar === g.superGrammar || this.superGrammar.equals(g.superGrammar))    ) {      return false;    }    const myRules = getSortedRuleValues(this);    const otherRules = getSortedRuleValues(g);    return (      myRules.length === otherRules.length &&      myRules.every((rule, i) => {        return (          rule.description === otherRules[i].description &&          rule.formals.join(',') === otherRules[i].formals.join(',') &&          rule.body.toString() === otherRules[i].body.toString()        );      })    );  }
  match(input, optStartApplication) {    const m = this.matcher();    m.replaceInputRange(0, 0, input);    return m.match(optStartApplication);  }
  trace(input, optStartApplication) {    const m = this.matcher();    m.replaceInputRange(0, 0, input);    return m.trace(optStartApplication);  }
  createSemantics() {    return Semantics.createSemantics(this);  }
  extendSemantics(superSemantics) {    return Semantics.createSemantics(this, superSemantics._getSemantics());  }
  // Check that every key in `actionDict` corresponds to a semantic action, and that it maps to  // a function of the correct arity. If not, throw an exception.  _checkTopDownActionDict(what, name, actionDict) {    const problems = [];
    // eslint-disable-next-line guard-for-in    for (const k in actionDict) {      const v = actionDict[k];      const isSpecialAction = SPECIAL_ACTION_NAMES.includes(k);
      if (!isSpecialAction && !(k in this.rules)) {        problems.push(`'${k}' is not a valid semantic action for '${this.name}'`);        continue;      }      if (typeof v !== 'function') {        problems.push(`'${k}' must be a function in an action dictionary for '${this.name}'`);        continue;      }      const actual = v.length;      const expected = this._topDownActionArity(k);      if (actual !== expected) {        let details;        if (k === '_iter' || k === '_nonterminal') {          details =            `it should use a rest parameter, e.g. \`${k}(...children) {}\`. ` +            'NOTE: this is new in Ohm v16 — see https://ohmjs.org/d/ati for details.';        } else {          details = `expected ${expected}, got ${actual}`;        }        problems.push(`Semantic action '${k}' has the wrong arity: ${details}`);      }    }    if (problems.length > 0) {      const prettyProblems = problems.map(problem => '- ' + problem);      const error = new Error(          [            `Found errors in the action dictionary of the '${name}' ${what}:`,            ...prettyProblems,          ].join('\n'),      );      error.problems = problems;      throw error;    }  }
  // Return the expected arity for a semantic action named `actionName`, which  // is either a rule name or a special action name like '_nonterminal'.  _topDownActionArity(actionName) {    // All special actions have an expected arity of 0, though all but _terminal    // are expected to use the rest parameter syntax (e.g. `_iter(...children)`).    // This is considered to have arity 0, i.e. `((...args) => {}).length` is 0.    return SPECIAL_ACTION_NAMES.includes(actionName) ?      0 :      this.rules[actionName].body.getArity();  }
  _inheritsFrom(grammar) {    let g = this.superGrammar;    while (g) {      if (g.equals(grammar, true)) {        return true;      }      g = g.superGrammar;    }    return false;  }
  toRecipe(superGrammarExpr = undefined) {    const metaInfo = {};    // Include the grammar source if it is available.    if (this.source) {      metaInfo.source = this.source.contents;    }
    let startRule = null;    if (this.defaultStartRule) {      startRule = this.defaultStartRule;    }
    const rules = {};    Object.keys(this.rules).forEach(ruleName => {      const ruleInfo = this.rules[ruleName];      const {body} = ruleInfo;      const isDefinition = !this.superGrammar || !this.superGrammar.rules[ruleName];
      let operation;      if (isDefinition) {        operation = 'define';      } else {        operation = body instanceof pexprs.Extend ? 'extend' : 'override';      }
      const metaInfo = {};      if (ruleInfo.source && this.source) {        const adjusted = ruleInfo.source.relativeTo(this.source);        metaInfo.sourceInterval = [adjusted.startIdx, adjusted.endIdx];      }
      const description = isDefinition ? ruleInfo.description : null;      const bodyRecipe = body.outputRecipe(ruleInfo.formals, this.source);
      rules[ruleName] = [        operation, // "define"/"extend"/"override"        metaInfo,        description,        ruleInfo.formals,        bodyRecipe,      ];    });
    // If the caller provided an expression to use for the supergrammar, use that.    // Otherwise, if the supergrammar is a user grammar, use its recipe inline.    let superGrammarOutput = 'null';    if (superGrammarExpr) {      superGrammarOutput = superGrammarExpr;    } else if (this.superGrammar && !this.superGrammar.isBuiltIn()) {      superGrammarOutput = this.superGrammar.toRecipe();    }
    const recipeElements = [      ...['grammar', metaInfo, this.name].map(JSON.stringify),      superGrammarOutput,      ...[startRule, rules].map(JSON.stringify),    ];    return jsonToJS(`[${recipeElements.join(',')}]`);  }
  // TODO: Come up with better names for these methods.  // TODO: Write the analog of these methods for inherited attributes.  toOperationActionDictionaryTemplate() {    return this._toOperationOrAttributeActionDictionaryTemplate();  }  toAttributeActionDictionaryTemplate() {    return this._toOperationOrAttributeActionDictionaryTemplate();  }
  _toOperationOrAttributeActionDictionaryTemplate() {    // TODO: add the super-grammar's templates at the right place, e.g., a case for AddExpr_plus    // should appear next to other cases of AddExpr.
    const sb = new common.StringBuffer();    sb.append('{');
    let first = true;    // eslint-disable-next-line guard-for-in    for (const ruleName in this.rules) {      const {body} = this.rules[ruleName];      if (first) {        first = false;      } else {        sb.append(',');      }      sb.append('\n');      sb.append('  ');      this.addSemanticActionTemplate(ruleName, body, sb);    }
    sb.append('\n}');    return sb.contents();  }
  addSemanticActionTemplate(ruleName, body, sb) {    sb.append(ruleName);    sb.append(': function(');    const arity = this._topDownActionArity(ruleName);    sb.append(common.repeat('_', arity).join(', '));    sb.append(') {\n');    sb.append('  }');  }
  // Parse a string which expresses a rule application in this grammar, and return the  // resulting Apply node.  parseApplication(str) {    let app;    if (str.indexOf('<') === -1) {      // simple application      app = new pexprs.Apply(str);    } else {      // parameterized application      const cst = ohmGrammar.match(str, 'Base_application');      app = buildGrammar(cst, {});    }
    // Ensure that the application is valid.    if (!(app.ruleName in this.rules)) {      throw errors.undeclaredRule(app.ruleName, this.name);    }    const {formals} = this.rules[app.ruleName];    if (formals.length !== app.args.length) {      const {source} = this.rules[app.ruleName];      throw errors.wrongNumberOfParameters(          app.ruleName,          formals.length,          app.args.length,          source,      );    }    return app;  }
  _setUpMatchState(state) {    if (this._matchStateInitializer) {      this._matchStateInitializer(state);    }  }}
// The following grammar contains a few rules that couldn't be written  in "userland".// At the bottom of src/main.js, we create a sub-grammar of this grammar that's called// `BuiltInRules`. That grammar contains several convenience rules, e.g., `letter` and// `digit`, and is implicitly the super-grammar of any grammar whose super-grammar// isn't specified.Grammar.ProtoBuiltInRules = new Grammar(    'ProtoBuiltInRules', // name    undefined, // supergrammar    {      any: {        body: pexprs.any,        formals: [],        description: 'any character',        primitive: true,      },      end: {        body: pexprs.end,        formals: [],        description: 'end of input',        primitive: true,      },
      caseInsensitive: {        body: new pexprs.CaseInsensitiveTerminal(new pexprs.Param(0)),        formals: ['str'],        primitive: true,      },      lower: {        body: new pexprs.UnicodeChar('Ll'),        formals: [],        description: 'a lowercase letter',        primitive: true,      },      upper: {        body: new pexprs.UnicodeChar('Lu'),        formals: [],        description: 'an uppercase letter',        primitive: true,      },      // Union of Lt (titlecase), Lm (modifier), and Lo (other), i.e. any letter not in Ll or Lu.      unicodeLtmo: {        body: new pexprs.UnicodeChar('Ltmo'),        formals: [],        description: 'a Unicode character in Lt, Lm, or Lo',        primitive: true,      },
      // These rules are not truly primitive (they could be written in userland) but are defined      // here for bootstrapping purposes.      spaces: {        body: new pexprs.Star(new pexprs.Apply('space')),        formals: [],      },      space: {        body: new pexprs.Range('\x00', ' '),        formals: [],        description: 'a space',      },    },);
// This method is called from main.js once Ohm has loaded.Grammar.initApplicationParser = function(grammar, builderFn) {  ohmGrammar = grammar;  buildGrammar = builderFn;};