const nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/;const nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/;const entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/;const entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/;export const ENTITIES: {[key: string]: number | string} = { amp: '&', gt: '>', lt: '<', quot: '"', apos: "'", AElig: 198, Aacute: 193, Acirc: 194, Agrave: 192, Aring: 197, Atilde: 195, Auml: 196, Ccedil: 199, ETH: 208, Eacute: 201, Ecirc: 202, Egrave: 200, Euml: 203, Iacute: 205, Icirc: 206, Igrave: 204, Iuml: 207, Ntilde: 209, Oacute: 211, Ocirc: 212, Ograve: 210, Oslash: 216, Otilde: 213, Ouml: 214, THORN: 222, Uacute: 218, Ucirc: 219, Ugrave: 217, Uuml: 220, Yacute: 221, aacute: 225, acirc: 226, aelig: 230, agrave: 224, aring: 229, atilde: 227, auml: 228, ccedil: 231, eacute: 233, ecirc: 234, egrave: 232, eth: 240, euml: 235, iacute: 237, icirc: 238, igrave: 236, iuml: 239, ntilde: 241, oacute: 243, ocirc: 244, ograve: 242, oslash: 248, otilde: 245, ouml: 246, szlig: 223, thorn: 254, uacute: 250, ucirc: 251, ugrave: 249, uuml: 252, yacute: 253, yuml: 255, copy: 169, reg: 174, nbsp: 160, iexcl: 161, cent: 162, pound: 163, curren: 164, yen: 165, brvbar: 166, sect: 167, uml: 168, ordf: 170, laquo: 171, not: 172, shy: 173, macr: 175, deg: 176, plusmn: 177, sup1: 185, sup2: 178, sup3: 179, acute: 180, micro: 181, para: 182, middot: 183, cedil: 184, ordm: 186, raquo: 187, frac14: 188, frac12: 189, frac34: 190, iquest: 191, times: 215, divide: 247, OElig: 338, oelig: 339, Scaron: 352, scaron: 353, Yuml: 376, fnof: 402, circ: 710, tilde: 732, Alpha: 913, Beta: 914, Gamma: 915, Delta: 916, Epsilon: 917, Zeta: 918, Eta: 919, Theta: 920, Iota: 921, Kappa: 922, Lambda: 923, Mu: 924, Nu: 925, Xi: 926, Omicron: 927, Pi: 928, Rho: 929, Sigma: 931, Tau: 932, Upsilon: 933, Phi: 934, Chi: 935, Psi: 936, Omega: 937, alpha: 945, beta: 946, gamma: 947, delta: 948, epsilon: 949, zeta: 950, eta: 951, theta: 952, iota: 953, kappa: 954, lambda: 955, mu: 956, nu: 957, xi: 958, omicron: 959, pi: 960, rho: 961, sigmaf: 962, sigma: 963, tau: 964, upsilon: 965, phi: 966, chi: 967, psi: 968, omega: 969, thetasym: 977, upsih: 978, piv: 982, ensp: 8194, emsp: 8195, thinsp: 8201, zwnj: 8204, zwj: 8205, lrm: 8206, rlm: 8207, ndash: 8211, mdash: 8212, lsquo: 8216, rsquo: 8217, sbquo: 8218, ldquo: 8220, rdquo: 8221, bdquo: 8222, dagger: 8224, Dagger: 8225, bull: 8226, hellip: 8230, permil: 8240, prime: 8242, Prime: 8243, lsaquo: 8249, rsaquo: 8250, oline: 8254, frasl: 8260, euro: 8364, image: 8465, weierp: 8472, real: 8476, trade: 8482, alefsym: 8501, larr: 8592, uarr: 8593, rarr: 8594, darr: 8595, harr: 8596, crarr: 8629, lArr: 8656, uArr: 8657, rArr: 8658, dArr: 8659, hArr: 8660, forall: 8704, part: 8706, exist: 8707, empty: 8709, nabla: 8711, isin: 8712, notin: 8713, ni: 8715, prod: 8719, sum: 8721, minus: 8722, lowast: 8727, radic: 8730, prop: 8733, infin: 8734, ang: 8736, and: 8743, or: 8744, cap: 8745, cup: 8746, int: 8747, there4: 8756, sim: 8764, cong: 8773, asymp: 8776, ne: 8800, equiv: 8801, le: 8804, ge: 8805, sub: 8834, sup: 8835, nsub: 8836, sube: 8838, supe: 8839, oplus: 8853, otimes: 8855, perp: 8869, sdot: 8901, lceil: 8968, rceil: 8969, lfloor: 8970, rfloor: 8971, lang: 9001, rang: 9002, loz: 9674, spades: 9824, clubs: 9827, hearts: 9829, diams: 9830,};Object.keys(ENTITIES).forEach(key => { const e = ENTITIES[key]; ENTITIES[key] = typeof e === 'number' ? String.fromCharCode(e) : e;});
interface SAXInterface { [key: string]: any;}
export class SAX implements SAXInterface { [key: string]: any;
EVENTS: string[]; ENTITIES: {[key: string]: number | string} = { ...ENTITIES, }; protected XML_ENTITIES: {[key: string]: string} = { amp: '&', gt: '>', lt: '<', quot: '"', apos: "'", }; protected S: any = 0; protected opt: any; protected trackPosition = false; protected column = 0; protected line = 0; protected c = ''; protected error: any; protected q = ''; protected bufferCheckPosition: any; protected closed = false; protected tags: any[] = []; protected looseCase = ''; protected closedRoot = false; protected sawRoot = false; protected strict = false; protected tag: any; protected strictEntities: any; protected state: any; protected noscript = false; protected attribList: any[] = []; protected ns: any; protected position = 0; private STATE: {[index: string]: any} = { BEGIN: this.S++, BEGIN_WHITESPACE: this.S++, TEXT: this.S++, TEXT_ENTITY: this.S++, OPEN_WAKA: this.S++, SGML_DECL: this.S++, SGML_DECL_QUOTED: this.S++, DOCTYPE: this.S++, DOCTYPE_QUOTED: this.S++, DOCTYPE_DTD: this.S++, DOCTYPE_DTD_QUOTED: this.S++, COMMENT_STARTING: this.S++, COMMENT: this.S++, COMMENT_ENDING: this.S++, COMMENT_ENDED: this.S++, CDATA: this.S++, CDATA_ENDING: this.S++, CDATA_ENDING_2: this.S++, PROC_INST: this.S++, PROC_INST_BODY: this.S++, PROC_INST_ENDING: this.S++, OPEN_TAG: this.S++, OPEN_TAG_SLASH: this.S++, ATTRIB: this.S++, ATTRIB_NAME: this.S++, ATTRIB_NAME_SAW_WHITE: this.S++, ATTRIB_VALUE: this.S++, ATTRIB_VALUE_QUOTED: this.S++, ATTRIB_VALUE_CLOSED: this.S++, ATTRIB_VALUE_UNQUOTED: this.S++, ATTRIB_VALUE_ENTITY_Q: this.S++, ATTRIB_VALUE_ENTITY_U: this.S++, CLOSE_TAG: this.S++, CLOSE_TAG_SAW_WHITE: this.S++, SCRIPT: this.S++, SCRIPT_ENDING: this.S++, }; private readonly BUFFERS: string[]; private parser: (strict: boolean, opt: any) => SAXParser; private CDATA = '[CDATA['; private DOCTYPE = 'DOCTYPE'; private XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'; private XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'; protected rootNS: {} = { xml: this.XML_NAMESPACE, xmlns: this.XMLNS_NAMESPACE, }; private comment: any; private sgmlDecl: any; private textNode = ''; private tagName: any; private doctype: any; private procInstName: any; private procInstBody: any; private entity = ''; private attribName: any; private attribValue: any; private cdata = ''; private script = ''; private startTagPosition = 0;
constructor() { this.BUFFERS = [ 'comment', 'sgmlDecl', 'textNode', 'tagName', 'doctype', 'procInstName', 'procInstBody', 'entity', 'attribName', 'attribValue', 'cdata', 'script', ]; this.EVENTS = [ 'text', 'processinginstruction', 'sgmldeclaration', 'doctype', 'comment', 'opentagstart', 'attribute', 'opentag', 'closetag', 'opencdata', 'cdata', 'closecdata', 'error', 'end', 'ready', 'script', 'opennamespace', 'closenamespace', ];
this.S = 0;
for (const s in this.STATE) { if (this.STATE.hasOwnProperty(s)) { this.STATE[this.STATE[s]] = s; } }
this.S = this.STATE;
this.parser = (strict, opt) => new SAXParser(strict, opt); }
private static charAt(chunk: string, i: number) { let result = ''; if (i < chunk.length) { result = chunk.charAt(i); } return result; }
private static isWhitespace(c: string) { return c === ' ' || c === '\n' || c === '\r' || c === '\t'; }
private static isQuote(c: string) { return c === '"' || c === "'"; }
private static isAttribEnd(c: string) { return c === '>' || SAX.isWhitespace(c); }
private static isMatch(regex: RegExp, c: string) { return regex.test(c); }
private static notMatch(regex: RegExp, c: string) { return !SAX.isMatch(regex, c); }
private static qname(name: string, attribute?: string | boolean) { const i = name.indexOf(':'); const qualName = i < 0 ? ['', name] : name.split(':'); let prefix = qualName[0]; let local = qualName[1];
if (attribute && name === 'xmlns') { prefix = 'xmlns'; local = ''; }
return {prefix, local}; }
write(chunk: null | object | string) { if (this.error) { throw this.error; } if (this.closed) { return this.errorFunction( 'Cannot write after close. Assign an onready handler.' ); } if (chunk === null) { return this.end(); } if (typeof chunk === 'object') { chunk = chunk.toString(); } let i = 0; let c: string; while (true) { c = SAX.charAt(chunk, i++); this.c = c;
if (!c) { break; }
if (this.trackPosition) { this.position++; if (c === '\n') { this.line++; this.column = 0; } else { this.column++; } }
switch (this.state) { case this.S.BEGIN: this.state = this.S.BEGIN_WHITESPACE; if (c === '\uFEFF') { continue; } this.beginWhiteSpace(c); continue;
case this.S.BEGIN_WHITESPACE: this.beginWhiteSpace(c); continue;
case this.S.TEXT: if (this.sawRoot && !this.closedRoot) { const starti = i - 1; while (c && c !== '<' && c !== '&') { c = SAX.charAt(chunk, i++); if (c && this.trackPosition) { this.position++; if (c === '\n') { this.line++; this.column = 0; } else { this.column++; } } } this.textNode += chunk.substring(starti, i - 1); } if (c === '<' && !(this.sawRoot && this.closedRoot && !this.strict)) { this.state = this.S.OPEN_WAKA; this.startTagPosition = this.position; } else { if (!SAX.isWhitespace(c) && (!this.sawRoot || this.closedRoot)) { this.strictFail('Text data outside of root node.'); } if (c === '&') { this.state = this.S.TEXT_ENTITY; } else { this.textNode += c; } } continue;
case this.S.SCRIPT: if (c === '<') { this.state = this.S.SCRIPT_ENDING; } else { this.script += c; } continue;
case this.S.SCRIPT_ENDING: if (c === '/') { this.state = this.S.CLOSE_TAG; } else { this.script += '<' + c; this.state = this.S.SCRIPT; } continue;
case this.S.OPEN_WAKA: if (c === '!') { this.state = this.S.SGML_DECL; this.sgmlDecl = ''; } else if (SAX.isWhitespace(c)) { } else if (SAX.isMatch(nameStart, c)) { this.state = this.S.OPEN_TAG; this.tagName = c; } else if (c === '/') { this.state = this.S.CLOSE_TAG; this.tagName = ''; } else if (c === '?') { this.state = this.S.PROC_INST; this.procInstName = this.procInstBody = ''; } else { this.strictFail('Unencoded <'); if (this.startTagPosition + 1 < this.position) { const pad = this.position - this.startTagPosition; c = new Array(pad).join(' ') + c; } this.textNode += '<' + c; this.state = this.S.TEXT; } continue;
case this.S.SGML_DECL: if ((this.sgmlDecl + c).toUpperCase() === this.CDATA) { this.emitNode('onopencdata'); this.state = this.S.CDATA; this.sgmlDecl = ''; this.cdata = ''; } else if (this.sgmlDecl + c === '--') { this.state = this.S.COMMENT; this.comment = ''; this.sgmlDecl = ''; } else if ((this.sgmlDecl + c).toUpperCase() === this.DOCTYPE) { this.state = this.S.DOCTYPE; if (this.doctype || this.sawRoot) { this.strictFail('Inappropriately located doctype declaration'); } this.doctype = ''; this.sgmlDecl = ''; } else if (c === '>') { this.emitNode('onsgmldeclaration', this.sgmlDecl); this.sgmlDecl = ''; this.state = this.S.TEXT; } else if (SAX.isQuote(c)) { this.state = this.S.SGML_DECL_QUOTED; this.sgmlDecl += c; } else { this.sgmlDecl += c; } continue;
case this.S.SGML_DECL_QUOTED: if (c === this.q) { this.state = this.S.SGML_DECL; this.q = ''; } this.sgmlDecl += c; continue;
case this.S.DOCTYPE: if (c === '>') { this.state = this.S.TEXT; this.emitNode('ondoctype', this.doctype); this.doctype = true; } else { this.doctype += c; if (c === '[') { this.state = this.S.DOCTYPE_DTD; } else if (SAX.isQuote(c)) { this.state = this.S.DOCTYPE_QUOTED; this.q = c; } } continue;
case this.S.DOCTYPE_QUOTED: this.doctype += c; if (c === this.q) { this.q = ''; this.state = this.S.DOCTYPE; } continue;
case this.S.DOCTYPE_DTD: this.doctype += c; if (c === ']') { this.state = this.S.DOCTYPE; } else if (SAX.isQuote(c)) { this.state = this.S.DOCTYPE_DTD_QUOTED; this.q = c; } continue;
case this.S.DOCTYPE_DTD_QUOTED: this.doctype += c; if (c === this.q) { this.state = this.S.DOCTYPE_DTD; this.q = ''; } continue;
case this.S.COMMENT: if (c === '-') { this.state = this.S.COMMENT_ENDING; } else { this.comment += c; } continue;
case this.S.COMMENT_ENDING: if (c === '-') { this.state = this.S.COMMENT_ENDED; this.comment = this.textApplyOptions(this.comment); if (this.comment) { this.emitNode('oncomment', this.comment); } this.comment = ''; } else { this.comment += '-' + c; this.state = this.S.COMMENT; } continue;
case this.S.COMMENT_ENDED: if (c !== '>') { this.strictFail('Malformed comment'); this.comment += '--' + c; this.state = this.S.COMMENT; } else { this.state = this.S.TEXT; } continue;
case this.S.CDATA: if (c === ']') { this.state = this.S.CDATA_ENDING; } else { this.cdata += c; } continue;
case this.S.CDATA_ENDING: if (c === ']') { this.state = this.S.CDATA_ENDING_2; } else { this.cdata += ']' + c; this.state = this.S.CDATA; } continue;
case this.S.CDATA_ENDING_2: if (c === '>') { if (this.cdata) { this.emitNode('oncdata', this.cdata); } this.emitNode('onclosecdata'); this.cdata = ''; this.state = this.S.TEXT; } else if (c === ']') { this.cdata += ']'; } else { this.cdata += ']]' + c; this.state = this.S.CDATA; } continue;
case this.S.PROC_INST: if (c === '?') { this.state = this.S.PROC_INST_ENDING; } else if (SAX.isWhitespace(c)) { this.state = this.S.PROC_INST_BODY; } else { this.procInstName += c; } continue;
case this.S.PROC_INST_BODY: if (!this.procInstBody && SAX.isWhitespace(c)) { continue; } else if (c === '?') { this.state = this.S.PROC_INST_ENDING; } else { this.procInstBody += c; } continue;
case this.S.PROC_INST_ENDING: if (c === '>') { this.emitNode('onprocessinginstruction', { name: this.procInstName, body: this.procInstBody, }); this.procInstName = this.procInstBody = ''; this.state = this.S.TEXT; } else { this.procInstBody += '?' + c; this.state = this.S.PROC_INST_BODY; } continue;
case this.S.OPEN_TAG: if (SAX.isMatch(nameBody, c)) { this.tagName += c; } else { this.newTag(); if (c === '>') { this.openTag(); } else if (c === '/') { this.state = this.S.OPEN_TAG_SLASH; } else { if (!SAX.isWhitespace(c)) { this.strictFail('Invalid character in tag name'); } this.state = this.S.ATTRIB; } } continue;
case this.S.OPEN_TAG_SLASH: if (c === '>') { this.openTag(true); this.closeTag(); } else { this.strictFail('Forward-slash in opening tag not followed by >'); this.state = this.S.ATTRIB; } continue;
case this.S.ATTRIB: if (SAX.isWhitespace(c)) { continue; } else if (c === '>') { this.openTag(); } else if (c === '/') { this.state = this.S.OPEN_TAG_SLASH; } else if (SAX.isMatch(nameStart, c)) { this.attribName = c; this.attribValue = ''; this.state = this.S.ATTRIB_NAME; } else { this.strictFail('Invalid attribute name'); } continue;
case this.S.ATTRIB_NAME: if (c === '=') { this.state = this.S.ATTRIB_VALUE; } else if (c === '>') { this.strictFail('Attribute without value'); this.attribValue = this.attribName; this.attrib(); this.openTag(); } else if (SAX.isWhitespace(c)) { this.state = this.S.ATTRIB_NAME_SAW_WHITE; } else if (SAX.isMatch(nameBody, c)) { this.attribName += c; } else { this.strictFail('Invalid attribute name'); } continue;
case this.S.ATTRIB_NAME_SAW_WHITE: if (c === '=') { this.state = this.S.ATTRIB_VALUE; } else if (SAX.isWhitespace(c)) { continue; } else { this.strictFail('Attribute without value'); this.tag.attributes[this.attribName] = ''; this.attribValue = ''; this.emitNode('onattribute', { name: this.attribName, value: '', }); this.attribName = ''; if (c === '>') { this.openTag(); } else if (SAX.isMatch(nameStart, c)) { this.attribName = c; this.state = this.S.ATTRIB_NAME; } else { this.strictFail('Invalid attribute name'); this.state = this.S.ATTRIB; } } continue;
case this.S.ATTRIB_VALUE: if (SAX.isWhitespace(c)) { continue; } else if (SAX.isQuote(c)) { this.q = c; this.state = this.S.ATTRIB_VALUE_QUOTED; } else { this.strictFail('Unquoted attribute value'); this.state = this.S.ATTRIB_VALUE_UNQUOTED; this.attribValue = c; } continue;
case this.S.ATTRIB_VALUE_QUOTED: if (c !== this.q) { if (c === '&') { this.state = this.S.ATTRIB_VALUE_ENTITY_Q; } else { this.attribValue += c; } continue; } this.attrib(); this.q = ''; this.state = this.S.ATTRIB_VALUE_CLOSED; continue;
case this.S.ATTRIB_VALUE_CLOSED: if (SAX.isWhitespace(c)) { this.state = this.S.ATTRIB; } else if (c === '>') { this.openTag(); } else if (c === '/') { this.state = this.S.OPEN_TAG_SLASH; } else if (SAX.isMatch(nameStart, c)) { this.strictFail('No whitespace between attributes'); this.attribName = c; this.attribValue = ''; this.state = this.S.ATTRIB_NAME; } else { this.strictFail('Invalid attribute name'); } continue;
case this.S.ATTRIB_VALUE_UNQUOTED: if (!SAX.isAttribEnd(c)) { if (c === '&') { this.state = this.S.ATTRIB_VALUE_ENTITY_U; } else { this.attribValue += c; } continue; } this.attrib(); if (c === '>') { this.openTag(); } else { this.state = this.S.ATTRIB; } continue;
case this.S.CLOSE_TAG: if (!this.tagName) { if (SAX.isWhitespace(c)) { continue; } else if (SAX.notMatch(nameStart, c)) { if (this.script) { this.script += '</' + c; this.state = this.S.SCRIPT; } else { this.strictFail('Invalid tagname in closing tag.'); } } else { this.tagName = c; } } else if (c === '>') { this.closeTag(); } else if (SAX.isMatch(nameBody, c)) { this.tagName += c; } else if (this.script) { this.script += '</' + this.tagName; this.tagName = ''; this.state = this.S.SCRIPT; } else { if (!SAX.isWhitespace(c)) { this.strictFail('Invalid tagname in closing tag'); } this.state = this.S.CLOSE_TAG_SAW_WHITE; } continue;
case this.S.CLOSE_TAG_SAW_WHITE: if (SAX.isWhitespace(c)) { continue; } if (c === '>') { this.closeTag(); } else { this.strictFail('Invalid characters in closing tag'); } continue;
case this.S.TEXT_ENTITY: case this.S.ATTRIB_VALUE_ENTITY_Q: case this.S.ATTRIB_VALUE_ENTITY_U: let returnState; let buffer; switch (this.state) { case this.S.TEXT_ENTITY: returnState = this.S.TEXT; buffer = 'textNode'; break;
case this.S.ATTRIB_VALUE_ENTITY_Q: returnState = this.S.ATTRIB_VALUE_QUOTED; buffer = 'attribValue'; break;
case this.S.ATTRIB_VALUE_ENTITY_U: returnState = this.S.ATTRIB_VALUE_UNQUOTED; buffer = 'attribValue'; break;
default: throw new Error('Unknown state: ' + this.state); }
if (c === ';') { this[buffer] += this.parseEntity(); this.entity = ''; this.state = returnState; } else if ( SAX.isMatch(this.entity.length ? entityBody : entityStart, c) ) { this.entity += c; } else { this.strictFail('Invalid character in entity name'); this[buffer] += '&' + this.entity + c; this.entity = ''; this.state = returnState; }
continue;
default: throw new Error('Unknown state: ' + this.state); } }
if (this.position >= this.bufferCheckPosition) { this.checkBufferLength(); } return this; }
protected emit(event: string, data?: Error | {}): void { if (this.hasOwnProperty(event)) this[event](data); }
protected clearBuffers() { for (let i = 0, l = this.BUFFERS.length; i < l; i++) { this[this[i]] = ''; } }
protected flushBuffers() { this.closeText(); if (this.cdata !== '') { this.emitNode('oncdata', this.cdata); this.cdata = ''; } if (this.script !== '') { this.emitNode('onscript', this.script); this.script = ''; } }
protected end() { if (this.sawRoot && !this.closedRoot) this.strictFail('Unclosed root tag'); if ( this.state !== this.S.BEGIN && this.state !== this.S.BEGIN_WHITESPACE && this.state !== this.S.TEXT ) { this.errorFunction('Unexpected end'); } this.closeText(); this.c = ''; this.closed = true; this.emit('onend'); return new SAXParser(this.strict, this.opt); }
protected errorFunction(er: string) { this.closeText(); if (this.trackPosition) { er += '\nLine: ' + this.line + '\nColumn: ' + this.column + '\nChar: ' + this.c; } const error = new Error(er); this.error = error; this.emit('onerror', error); return this; }
private attrib() { if (!this.strict) { this.attribName = this.attribName[this.looseCase](); }
if ( this.attribList.indexOf(this.attribName) !== -1 || this.tag.attributes.hasOwnProperty(this.attribName) ) { this.attribName = this.attribValue = ''; return; }
if (this.opt.xmlns) { const qn = SAX.qname(this.attribName, true); const prefix = qn.prefix; const local = qn.local;
if (prefix === 'xmlns') { if (local === 'xml' && this.attribValue !== this.XML_NAMESPACE) { this.strictFail( 'xml: prefix must be bound to ' + this.XML_NAMESPACE + '\n' + 'Actual: ' + this.attribValue ); } else if ( local === 'xmlns' && this.attribValue !== this.XMLNS_NAMESPACE ) { this.strictFail( 'xmlns: prefix must be bound to ' + this.XMLNS_NAMESPACE + '\n' + 'Actual: ' + this.attribValue ); } else { const tag = this.tag; const parent = this.tags[this.tags.length - 1] || this; if (tag.ns === parent.ns) { tag.ns = Object.create(parent.ns); } tag.ns[local] = this.attribValue; } }
this.attribList.push([this.attribName, this.attribValue]); } else { this.tag.attributes[this.attribName] = this.attribValue; this.emitNode('onattribute', { name: this.attribName, value: this.attribValue, }); }
this.attribName = this.attribValue = ''; }
private newTag() { if (!this.strict) this.tagName = this.tagName[this.looseCase](); const parent = this.tags[this.tags.length - 1] || this; const tag: any = (this.tag = {name: this.tagName, attributes: {}});
if (this.opt.xmlns) { tag.ns = parent.ns; } this.attribList.length = 0; this.emitNode('onopentagstart', tag); }
private parseEntity() { let entity = this.entity; const entityLC = entity.toLowerCase(); let num = NaN; let numStr = '';
if (this.ENTITIES[entity]) { return this.ENTITIES[entity]; } if (this.ENTITIES[entityLC]) { return this.ENTITIES[entityLC]; } entity = entityLC; if (entity.charAt(0) === '#') { if (entity.charAt(1) === 'x') { entity = entity.slice(2); num = parseInt(entity, 16); numStr = num.toString(16); } else { entity = entity.slice(1); num = parseInt(entity, 10); numStr = num.toString(10); } }
entity = entity.replace(/^0+/, ''); if (isNaN(num) || numStr.toLowerCase() !== entity) { this.strictFail('Invalid character entity'); return '&' + this.entity + ';'; }
return String.fromCodePoint(num); }
private beginWhiteSpace(c: string) { if (c === '<') { this.state = this.S.OPEN_WAKA; this.startTagPosition = this.position; } else if (!SAX.isWhitespace(c)) { this.strictFail('Non-whitespace before first tag.'); this.textNode = c; this.state = this.S.TEXT; } else { } }
private strictFail(message: string) { if (typeof this !== 'object' || !(this instanceof SAXParser)) { throw new Error('bad call to strictFail'); } if (this.strict) { this.errorFunction(message); } }
private textApplyOptions(text: string): string { if (this.opt.trim) text = text.trim(); if (this.opt.normalize) text = text.replace(/\s+/g, ' '); return text; }
private emitNode(nodeType: string, data?: {}) { if (this.textNode) this.closeText(); this.emit(nodeType, data); }
private closeText() { this.textNode = this.textApplyOptions(this.textNode); if ( this.textNode !== undefined && this.textNode !== '' && this.textNode !== 'undefined' ) { this.emit('ontext', this.textNode); } this.textNode = ''; }
private checkBufferLength() { const maxAllowed = Math.max(this.opt.MAX_BUFFER_LENGTH, 10); let maxActual = 0; for (let i = 0, l = this.BUFFERS.length; i < l; i++) { const len = this.hasOwnProperty(this.BUFFERS[i]) ? this[this.BUFFERS[i]].length : 0; if (len > maxAllowed) { switch (this.BUFFERS[i]) { case 'textNode': this.closeText(); break; case 'cdata': this.emitNode('oncdata', this.cdata); this.cdata = ''; break; case 'script': this.emitNode('onscript', this.script); this.script = ''; break; default: this.errorFunction( 'Max buffer length exceeded: ' + this.BUFFERS[i] ); } } maxActual = Math.max(maxActual, len); } const m = this.opt.MAX_BUFFER_LENGTH - maxActual; this.bufferCheckPosition = m + this.position; }
private openTag(selfClosing?: boolean) { if (this.opt.xmlns) { const tag = this.tag;
const qn = SAX.qname(this.tagName); tag.prefix = qn.prefix; tag.local = qn.local; tag.uri = tag.ns[qn.prefix] || '';
if (tag.prefix && !tag.uri) { this.strictFail( 'Unbound namespace prefix: ' + JSON.stringify(this.tagName) ); tag.uri = qn.prefix; }
const parent = this.tags[this.tags.length - 1] || this; if (tag.ns && parent.ns !== tag.ns) { const that = this; Object.keys(tag.ns).forEach(p => { that.emitNode('onopennamespace', { prefix: p, uri: tag.ns[p], }); }); }
for (let i = 0, l = this.attribList.length; i < l; i++) { const nv = this.attribList[i]; const name = nv[0]; const value = nv[1]; const qualName = SAX.qname(name, true); const prefix = qualName.prefix; const local = qualName.local; const uri = prefix === '' ? '' : tag.ns[prefix] || ''; const a = { name, value, prefix, local, uri, };
if (prefix && prefix !== 'xmlns' && !uri) { this.strictFail( 'Unbound namespace prefix: ' + JSON.stringify(prefix) ); a.uri = prefix; } this.tag.attributes[name] = a; this.emitNode('onattribute', a); } this.attribList.length = 0; }
this.tag.isSelfClosing = !!selfClosing;
this.sawRoot = true; this.tags.push(this.tag); this.emitNode('onopentag', this.tag); if (!selfClosing) { if (!this.noscript && this.tagName.toLowerCase() === 'script') { this.state = this.S.SCRIPT; } else { this.state = this.S.TEXT; } this.tag = null; this.tagName = ''; } this.attribName = this.attribValue = ''; this.attribList.length = 0; }
private closeTag() { if (!this.tagName) { this.strictFail('Weird empty close tag.'); this.textNode += '</>'; this.state = this.S.TEXT; return; }
if (this.script) { if (this.tagName !== 'script') { this.script += '</' + this.tagName + '>'; this.tagName = ''; this.state = this.S.SCRIPT; return; } this.emitNode('onscript', this.script); this.script = ''; }
let t = this.tags.length; let tagName = this.tagName; if (!this.strict) { tagName = tagName[this.looseCase](); } while (t--) { const close = this.tags[t]; if (close.name !== tagName) { this.strictFail('Unexpected close tag'); } else { break; } }
if (t < 0) { this.strictFail('Unmatched closing tag: ' + this.tagName); this.textNode += '</' + this.tagName + '>'; this.state = this.S.TEXT; return; } this.tagName = tagName; let s = this.tags.length; while (s-- > t) { const tag = (this.tag = this.tags.pop()); this.tagName = this.tag.name; this.emitNode('onclosetag', this.tagName);
const x: {[index: string]: any} = {}; for (const i in tag.ns) { if (tag.ns.hasOwnProperty(i)) { x[i] = tag.ns[i]; } }
const parent = this.tags[this.tags.length - 1] || this; if (this.opt.xmlns && tag.ns !== parent.ns) { const that = this; Object.keys(tag.ns).forEach(p => { const n = tag.ns[p]; that.emitNode('onclosenamespace', {prefix: p, uri: n}); }); } } if (t === 0) this.closedRoot = true; this.tagName = this.attribValue = this.attribName = ''; this.attribList.length = 0; this.state = this.S.TEXT; }}
export class SAXParser extends SAX { constructor(strict: boolean, opt: any) { super();
if (!(this instanceof SAXParser)) { return new SAXParser(strict, opt); }
this.clearBuffers(); this.q = this.c = ''; this.opt = {MAX_BUFFER_LENGTH: 64 * 1024, ...opt}; this.bufferCheckPosition = this.opt.MAX_BUFFER_LENGTH; this.opt.lowercase = this.opt.lowercase || this.opt.lowercasetags || false; this.looseCase = this.opt.lowercase ? 'toLowerCase' : 'toUpperCase'; this.tags = []; this.closed = this.closedRoot = this.sawRoot = false; this.tag = this.error = null; this.strict = !!strict; this.noscript = !!(strict || this.opt.noscript); this.state = this.S.BEGIN; this.strictEntities = this.opt.strictEntities; this.ENTITIES = this.strictEntities ? Object.create(this.XML_ENTITIES) : Object.create(this.ENTITIES); this.attribList = [];
if (this.opt.xmlns) { this.ns = Object.create(this.rootNS); }
this.trackPosition = this.opt.position !== false; if (this.trackPosition) { this.position = this.line = this.column = 0; } this.emit('onready'); }
ontext: Function = () => {}; onprocessinginstruction: Function = () => {}; onsgmldeclaration: Function = () => {}; ondoctype: Function = () => {}; oncomment: Function = () => {}; onopentagstart: Function = () => {}; onattribute: Function = () => {}; onopentag: Function = () => {}; onclosetag: Function = () => {}; onopencdata: Function = () => {}; oncdata: Function = () => {}; onclosecdata: Function = () => {}; onerror: Function = () => {}; onend: Function = () => {}; onready: Function = () => {}; onscript: Function = () => {}; onopennamespace: Function = () => {}; onclosenamespace: Function = () => {};
resume() { this.error = null; return this; }
close() { return this.write(null); }
flush() { this.flushBuffers(); }}