/lexer.ts | pgsql_ast_parser@10.5.2

Yet another simple Postgres SQL parser
File
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
import { compile, keywords, Token } from 'https://deno.land/x/moo@0.5.1-deno.2/mod.ts';import { sqlKeywords } from './keywords.ts';import { NodeLocation, PGComment } from './syntax/ast.ts';
// build keywordsconst keywordsMap: any = {};for (const k of sqlKeywords) {    keywordsMap['kw_' + k.toLowerCase()] = k;}const caseInsensitiveKeywords = (map: any) => {    const transform = keywords(map)    return (text: string) => transform(text.toUpperCase())}

// build lexerexport const lexer = compile({    word: {        match: /[eE](?!')[A-Za-z0-9_]*|[a-df-zA-DF-Z_][A-Za-z0-9_]*/,        type: caseInsensitiveKeywords(keywordsMap),        value: x => x.toLowerCase(),    },    wordQuoted: {        match: /"(?:[^"\*]|"")+"/,        type: () => 'quoted_word',        value: x => x.substring(1, x.length - 1),    },    string: {        match: /'(?:[^']|\'\')*'/,        value: x => {            return x.substring(1, x.length - 1)                .replace(/''/g, '\'');        },    },    eString: {        match: /\b(?:e|E)'(?:[^'\\]|[\r\n\s]|(?:\\\s)|(?:\\\n)|(?:\\.)|(?:\'\'))+'/,        value: x => {            return x.substring(2, x.length - 1)                .replace(/''/g, '\'')                .replace(/\\([\s\n])/g, (_, x) => x)                .replace(/\\./g, m => JSON.parse('"' + m + '"'));        },    },    qparam: {        match: /\$\d+/,    },    commentLine: /\-\-.*?$[\s\r\n]*/,    commentFullOpen: /(?<!\/)\/\*/,    commentFullClose: /\*\/[\s\r\n]*/,    star: '*',    comma: ',',    space: { match: /[\s\t\n\v\f\r]+/, lineBreaks: true, },    int: /\-?\d+(?![\.\d])/,    float: /\-?(?:(?:\d*\.\d+)|(?:\d+\.\d*))/,    // word: /[a-zA-Z][A-Za-z0-9_\-]*/,    lparen: '(',    rparen: ')',    lbracket: '[',    rbracket: ']',    semicolon: ';',    dot: /\.(?!\d)/,    op_cast: '::',    op_plus: '+',    op_eq: '=',    op_neq: {        match: /(?:!=)|(?:\<\>)/,        value: () => '!=',    },    op_minus: /(?<!\-)\-(?!\-)(?!\>)/,    op_div: /(?<!\/)\/(?!\/)/,    op_like: /(?<!\!)~~(?!\*)/, // ~~ =LIKE    op_ilike: /(?<!\!)~~\*/, // ~~* =ILIKE    op_not_like: /\!~~(?!\*)/, // !~~ =LIKE    op_not_ilike: /\!~~\*/, // !~~* =ILIKE    op_mod: '%',    op_exp: '^',    op_member: /\-\>(?!\>)/,    op_membertext: '->>',    op_additive: {        // group other additive operators        match: ['||', '-', '#-', '&&'],    },    op_compare: {        // group other comparison operators        // ... to add: "IN" and "NOT IN" that are matched by keywords        match: ['>', '>=', '<', '<=', '@>', '<@', '?', '?|', '?&', '#>>', '>>', '<<', '~'],    },    ops_others: {        // referenced as (any other operator) in https://www.postgresql.org/docs/12/sql-syntax-lexical.html#SQL-PRECEDENCE        // see also https://www.postgresql.org/docs/9.0/functions-math.html        match: ['|', '&', '^', '#'],    },    codeblock: {        match: /\$\$(?:.|[\s\t\n\v\f\r])*?\$\$/s,        lineBreaks: true,        value: (x: string) => x.substring(2, x.length - 2),    },});
lexer.next = (next => () => {    let tok: Token | undefined;    let commentFull: {        nested: number;        offset: number;        text: string;    } | null = null;
    while (tok = next.call(lexer)) {        // js regex can't be recursive, so we'll keep track of nested opens (/*) and closes (*/).        if (tok.type === 'commentFullOpen') {            if (commentFull === null) { // initial open - start collecting content                commentFull = {                    nested: 0,                    offset: tok.offset,                    text: tok.text                }                continue;            }            commentFull.nested++;        }        if (commentFull != null) {            // collect comment content            commentFull.text += tok.text;
            if (tok.type === 'commentFullClose') {                if (commentFull.nested === 0) { // finish comment, if not nested                    comments?.push(makeComment(commentFull))                    commentFull = null;                    continue;                }                commentFull.nested--;            }            continue;        }        if (tok.type === 'space') {            continue;        }        if (tok.type === 'commentLine') {            comments?.push(makeComment(tok))            continue;        }        break;    }
    if (trackingLoc && tok) {        const start = tok.offset;        const loc: NodeLocation = {            start,            end: start + tok.text.length,        };        (tok as any)._location = loc;    }    return tok;})(lexer.next);
export const lexerAny: any = lexer;
let comments: PGComment[] | null = null;
const makeComment = ({ offset, text }: { offset: number; text: string }): PGComment => ({	_location: { start: offset, end: offset + text.length },	comment: text,});
export function trackingComments<T>(act: () => T): { ast: T; comments: PGComment[] } {    if (comments) {        throw new Error('WAT ? Recursive comments tracking 🤔🤨 ?');    }    try {        comments = [];        const ast = act();        return { comments, ast };    } finally {        comments = null;    }}
let trackingLoc = false;export function tracking<T>(act: () => T): T {    if (trackingLoc) {        return act();    }    try {        trackingLoc = true;        return act();    } finally {        trackingLoc = false;    }}
export function track(xs: any, ret: any) {    if (!trackingLoc || !ret || typeof ret !== 'object') {        return ret;    }    const start = seek(xs, true);    const end = seek(xs, false);    if (!start || !end) {        return ret;    }    if (start === end) {        ret._location = start;    } else {        const loc: NodeLocation = {            start: start.start,            end: end.end,        };        ret._location = loc;    }
    return ret;}
const literal = Symbol('_literal');const doubleQuotedSym = Symbol('_doublequoted');export function box(xs: any, value: any, doubleQuoted?: boolean) {    if (!trackingLoc && !doubleQuoted) {        return value;    }    return track(xs, { [literal]: value, [doubleQuotedSym]: doubleQuoted });}

function unwrapNoBox(e: any[]): any {    if (Array.isArray(e) && e.length === 1) {        e = unwrapNoBox(e[0]);    }    if (Array.isArray(e) && !e.length) {        return null;    }    return e;}export function doubleQuoted(value: any) {    const uw = unwrapNoBox(value);    if (typeof value === 'object' && uw?.[doubleQuotedSym]) {        return {doubleQuoted: true};    }    return undefined;}export function unbox(value: any): any {    if (typeof value === 'object') {        return value?.[literal] ?? value;    }    return value;}

function seek(xs: any, start: boolean): NodeLocation | null {    if (!xs) {        return null;    }    if (Array.isArray(xs)) {        const diff = start ? 1 : -1;        for (let i = start ? 0 : xs.length - 1; i >= 0 && i < xs.length; i += diff) {            const v = seek(xs[i], start);            if (v) {                return v;            }        }        return null;    }    if (typeof xs !== 'object') {        return null;    }    return xs._location;}