import { BufReader } from "../io/buffer.ts";import { TextProtoReader } from "../textproto/mod.ts";import { StringReader } from "../io/readers.ts";import { assert } from "../_util/assert.ts";
export { NEWLINE, stringify, StringifyError } from "./csv_stringify.ts";
export type { Column, ColumnDetails, DataItem, StringifyOptions,} from "./csv_stringify.ts";
const INVALID_RUNE = ["\r", "\n", '"'];
export const ERR_BARE_QUOTE = 'bare " in non-quoted-field';export const ERR_QUOTE = 'extraneous or missing " in quoted-field';export const ERR_INVALID_DELIM = "Invalid Delimiter";export const ERR_FIELD_COUNT = "wrong number of fields";
export class ParseError extends Error { startLine: number; line: number; column: number | null;
constructor( start: number, line: number, column: number | null, message: string, ) { super(); this.startLine = start; this.column = column; this.line = line;
if (message === ERR_FIELD_COUNT) { this.message = `record on line ${line}: ${message}`; } else if (start !== line) { this.message = `record on line ${start}; parse error on line ${line}, column ${column}: ${message}`; } else { this.message = `parse error on line ${line}, column ${column}: ${message}`; } }}
export interface ReadOptions { separator?: string; comment?: string; trimLeadingSpace?: boolean; lazyQuotes?: boolean; fieldsPerRecord?: number;}
function chkOptions(opt: ReadOptions): void { if (!opt.separator) { opt.separator = ","; } if (!opt.trimLeadingSpace) { opt.trimLeadingSpace = false; } if ( INVALID_RUNE.includes(opt.separator) || (typeof opt.comment === "string" && INVALID_RUNE.includes(opt.comment)) || opt.separator === opt.comment ) { throw new Error(ERR_INVALID_DELIM); }}
async function readRecord( startLine: number, reader: BufReader, opt: ReadOptions = { separator: ",", trimLeadingSpace: false },): Promise<string[] | null> { const tp = new TextProtoReader(reader); let line = await readLine(tp); let lineIndex = startLine + 1;
if (line === null) return null; if (line.length === 0) { return []; } if (opt.comment && line[0] === opt.comment) { return []; }
assert(opt.separator != null);
let fullLine = line; let quoteError: ParseError | null = null; const quote = '"'; const quoteLen = quote.length; const separatorLen = opt.separator.length; let recordBuffer = ""; const fieldIndexes = [] as number[]; parseField: for (;;) { if (opt.trimLeadingSpace) { line = line.trimLeft(); }
if (line.length === 0 || !line.startsWith(quote)) { const i = line.indexOf(opt.separator); let field = line; if (i >= 0) { field = field.substring(0, i); } if (!opt.lazyQuotes) { const j = field.indexOf(quote); if (j >= 0) { const col = runeCount( fullLine.slice(0, fullLine.length - line.slice(j).length), ); quoteError = new ParseError( startLine + 1, lineIndex, col, ERR_BARE_QUOTE, ); break parseField; } } recordBuffer += field; fieldIndexes.push(recordBuffer.length); if (i >= 0) { line = line.substring(i + separatorLen); continue parseField; } break parseField; } else { line = line.substring(quoteLen); for (;;) { const i = line.indexOf(quote); if (i >= 0) { recordBuffer += line.substring(0, i); line = line.substring(i + quoteLen); if (line.startsWith(quote)) { recordBuffer += quote; line = line.substring(quoteLen); } else if (line.startsWith(opt.separator)) { line = line.substring(separatorLen); fieldIndexes.push(recordBuffer.length); continue parseField; } else if (0 === line.length) { fieldIndexes.push(recordBuffer.length); break parseField; } else if (opt.lazyQuotes) { recordBuffer += quote; } else { const col = runeCount( fullLine.slice(0, fullLine.length - line.length - quoteLen), ); quoteError = new ParseError( startLine + 1, lineIndex, col, ERR_QUOTE, ); break parseField; } } else if (line.length > 0 || !(await isEOF(tp))) { recordBuffer += line; const r = await readLine(tp); lineIndex++; line = r ?? ""; fullLine = line; if (r === null) { if (!opt.lazyQuotes) { const col = runeCount(fullLine); quoteError = new ParseError( startLine + 1, lineIndex, col, ERR_QUOTE, ); break parseField; } fieldIndexes.push(recordBuffer.length); break parseField; } recordBuffer += "\n"; } else { if (!opt.lazyQuotes) { const col = runeCount(fullLine); quoteError = new ParseError( startLine + 1, lineIndex, col, ERR_QUOTE, ); break parseField; } fieldIndexes.push(recordBuffer.length); break parseField; } } } } if (quoteError) { throw quoteError; } const result = [] as string[]; let preIdx = 0; for (const i of fieldIndexes) { result.push(recordBuffer.slice(preIdx, i)); preIdx = i; } return result;}
async function isEOF(tp: TextProtoReader): Promise<boolean> { return (await tp.r.peek(0)) === null;}
function runeCount(s: string): number { return Array.from(s).length;}
async function readLine(tp: TextProtoReader): Promise<string | null> { let line: string; const r = await tp.readLine(); if (r === null) return null; line = r;
if ((await isEOF(tp)) && line.length > 0 && line[line.length - 1] === "\r") { line = line.substring(0, line.length - 1); }
if ( line.length >= 2 && line[line.length - 2] === "\r" && line[line.length - 1] === "\n" ) { line = line.substring(0, line.length - 2); line = line + "\n"; }
return line;}
export async function readMatrix( reader: BufReader, opt: ReadOptions = { separator: ",", trimLeadingSpace: false, lazyQuotes: false, },): Promise<string[][]> { const result: string[][] = []; let _nbFields: number | undefined; let lineResult: string[]; let first = true; let lineIndex = 0; chkOptions(opt);
for (;;) { const r = await readRecord(lineIndex, reader, opt); if (r === null) break; lineResult = r; lineIndex++; if (first) { first = false; if (opt.fieldsPerRecord !== undefined) { if (opt.fieldsPerRecord === 0) { _nbFields = lineResult.length; } else { _nbFields = opt.fieldsPerRecord; } } }
if (lineResult.length > 0) { if (_nbFields && _nbFields !== lineResult.length) { throw new ParseError(lineIndex, lineIndex, null, ERR_FIELD_COUNT); } result.push(lineResult); } } return result;}
export interface ColumnOptions { name: string;}
export interface ParseOptions extends ReadOptions { skipFirstRow?: boolean;
columns?: string[] | ColumnOptions[];}
export async function parse( input: string | BufReader,): Promise<string[][]>;export async function parse( input: string | BufReader, opt: Omit<ParseOptions, "columns" | "skipFirstRow">,): Promise<string[][]>;export async function parse( input: string | BufReader, opt: Omit<ParseOptions, "columns"> & { columns: string[] | ColumnOptions[]; },): Promise<Record<string, unknown>[]>;export async function parse( input: string | BufReader, opt: Omit<ParseOptions, "skipFirstRow"> & { skipFirstRow: true; },): Promise<Record<string, unknown>[]>;export async function parse( input: string | BufReader, opt: ParseOptions,): Promise<string[][] | Record<string, unknown>[]>;export async function parse( input: string | BufReader, opt: ParseOptions = { skipFirstRow: false, },): Promise<string[][] | Record<string, unknown>[]> { let r: string[][]; if (input instanceof BufReader) { r = await readMatrix(input, opt); } else { r = await readMatrix(new BufReader(new StringReader(input)), opt); } if (opt.skipFirstRow || opt.columns) { let headers: ColumnOptions[] = []; let i = 0;
if (opt.skipFirstRow) { const head = r.shift(); assert(head != null); headers = head.map( (e): ColumnOptions => { return { name: e, }; }, ); i++; }
if (opt.columns) { if (typeof opt.columns[0] !== "string") { headers = opt.columns as ColumnOptions[]; } else { const h = opt.columns as string[]; headers = h.map( (e): ColumnOptions => { return { name: e, }; }, ); } }
return r.map((e) => { if (e.length !== headers.length) { throw `Error number of fields line:${i}`; } i++; const out: Record<string, unknown> = {}; for (let j = 0; j < e.length; j++) { out[headers[j].name] = e[j]; } return out; }); } return r;}