Skip to main content
Module

std/encoding/csv_test.ts

Deno standard library
Go to Latest
File
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653
// Test ported from Golang// https://github.com/golang/go/blob/2cc15b1/src/encoding/csv/reader_test.go// Copyright 2011 The Go Authors. All rights reserved. BSD license.// https://github.com/golang/go/blob/master/LICENSE// Copyright 2018-2021 the Deno authors. All rights reserved. MIT license.
import { assertEquals, assertRejects } from "../testing/asserts.ts";import { ERR_BARE_QUOTE, ERR_FIELD_COUNT, ERR_INVALID_DELIM, ERR_QUOTE, parse, ParseError, readMatrix,} from "./csv.ts";import { StringReader } from "../io/readers.ts";import { BufReader } from "../io/bufio.ts";
// Test cases for `readMatrix()`const testCases = [ { Name: "Simple", Input: "a,b,c\n", Output: [["a", "b", "c"]], }, { Name: "CRLF", Input: "a,b\r\nc,d\r\n", Output: [ ["a", "b"], ["c", "d"], ], }, { Name: "BareCR", Input: "a,b\rc,d\r\n", Output: [["a", "b\rc", "d"]], }, { Name: "RFC4180test", Input: `#field1,field2,field3"aaa","bbb","ccc""a,a","bbb","ccc"zzz,yyy,xxx`, UseFieldsPerRecord: true, FieldsPerRecord: 0, Output: [ ["#field1", "field2", "field3"], ["aaa", "bbb", "ccc"], ["a,a", `bbb`, "ccc"], ["zzz", "yyy", "xxx"], ], }, { Name: "NoEOLTest", Input: "a,b,c", Output: [["a", "b", "c"]], }, { Name: "Semicolon", Input: "a;b;c\n", Output: [["a", "b", "c"]], Separator: ";", }, { Name: "MultiLine", Input: `"twoline","one line","threelinefield"`, Output: [["two\nline", "one line", "three\nline\nfield"]], }, { Name: "BlankLine", Input: "a,b,c\n\nd,e,f\n\n", Output: [ ["a", "b", "c"], ["d", "e", "f"], ], }, { Name: "BlankLineFieldCount", Input: "a,b,c\n\nd,e,f\n\n", Output: [ ["a", "b", "c"], ["d", "e", "f"], ], UseFieldsPerRecord: true, FieldsPerRecord: 0, }, { Name: "TrimSpace", Input: " a, b, c\n", Output: [["a", "b", "c"]], TrimLeadingSpace: true, }, { Name: "LeadingSpace", Input: " a, b, c\n", Output: [[" a", " b", " c"]], }, { Name: "Comment", Input: "#1,2,3\na,b,c\n#comment", Output: [["a", "b", "c"]], Comment: "#", }, { Name: "NoComment", Input: "#1,2,3\na,b,c", Output: [ ["#1", "2", "3"], ["a", "b", "c"], ], }, { Name: "LazyQuotes", Input: `a "word","1"2",a","b`, Output: [[`a "word"`, `1"2`, `a"`, `b`]], LazyQuotes: true, }, { Name: "BareQuotes", Input: `a "word","1"2",a"`, Output: [[`a "word"`, `1"2`, `a"`]], LazyQuotes: true, }, { Name: "BareDoubleQuotes", Input: `a""b,c`, Output: [[`a""b`, `c`]], LazyQuotes: true, }, { Name: "BadDoubleQuotes", Input: `a""b,c`, Error: new ParseError(1, 1, 1, ERR_BARE_QUOTE), }, { Name: "TrimQuote", Input: ` "a"," b",c`, Output: [["a", " b", "c"]], TrimLeadingSpace: true, }, { Name: "BadBareQuote", Input: `a "word","b"`, Error: new ParseError(1, 1, 2, ERR_BARE_QUOTE), }, { Name: "BadTrailingQuote", Input: `"a word",b"`, Error: new ParseError(1, 1, 10, ERR_BARE_QUOTE), }, { Name: "ExtraneousQuote", Input: `"a "word","b"`, Error: new ParseError(1, 1, 3, ERR_QUOTE), }, { Name: "BadFieldCount", Input: "a,b,c\nd,e", Error: new ParseError(2, 2, null, ERR_FIELD_COUNT), UseFieldsPerRecord: true, FieldsPerRecord: 0, }, { Name: "BadFieldCount1", Input: `a,b,c`, UseFieldsPerRecord: true, FieldsPerRecord: 2, Error: new ParseError(1, 1, null, ERR_FIELD_COUNT), }, { Name: "FieldCount", Input: "a,b,c\nd,e", Output: [ ["a", "b", "c"], ["d", "e"], ], }, { Name: "TrailingCommaEOF", Input: "a,b,c,", Output: [["a", "b", "c", ""]], }, { Name: "TrailingCommaEOL", Input: "a,b,c,\n", Output: [["a", "b", "c", ""]], }, { Name: "TrailingCommaSpaceEOF", Input: "a,b,c, ", Output: [["a", "b", "c", ""]], TrimLeadingSpace: true, }, { Name: "TrailingCommaSpaceEOL", Input: "a,b,c, \n", Output: [["a", "b", "c", ""]], TrimLeadingSpace: true, }, { Name: "TrailingCommaLine3", Input: "a,b,c\nd,e,f\ng,hi,", Output: [ ["a", "b", "c"], ["d", "e", "f"], ["g", "hi", ""], ], TrimLeadingSpace: true, }, { Name: "NotTrailingComma3", Input: "a,b,c, \n", Output: [["a", "b", "c", " "]], }, { Name: "CommaFieldTest", Input: `x,y,z,wx,y,z,x,y,,x,,,,,,"x","y","z","w""x","y","z","""x","y","","""x","","","""","","",""`, Output: [ ["x", "y", "z", "w"], ["x", "y", "z", ""], ["x", "y", "", ""], ["x", "", "", ""], ["", "", "", ""], ["x", "y", "z", "w"], ["x", "y", "z", ""], ["x", "y", "", ""], ["x", "", "", ""], ["", "", "", ""], ], }, { Name: "TrailingCommaIneffective1", Input: "a,b,\nc,d,e", Output: [ ["a", "b", ""], ["c", "d", "e"], ], TrimLeadingSpace: true, }, { Name: "ReadAllReuseRecord", Input: "a,b\nc,d", Output: [ ["a", "b"], ["c", "d"], ], ReuseRecord: true, }, { Name: "StartLine1", // Issue 19019 Input: 'a,"b\nc"d,e', Error: new ParseError(1, 2, 1, ERR_QUOTE), }, { Name: "StartLine2", Input: 'a,b\n"d\n\n,e', Error: new ParseError(2, 5, 0, ERR_QUOTE), }, { Name: "CRLFInQuotedField", // Issue 21201 Input: 'A,"Hello\r\nHi",B\r\n', Output: [["A", "Hello\nHi", "B"]], }, { Name: "BinaryBlobField", // Issue 19410 Input: "x09\x41\xb4\x1c,aktau", Output: [["x09A\xb4\x1c", "aktau"]], }, { Name: "TrailingCR", Input: "field1,field2\r", Output: [["field1", "field2"]], }, { Name: "QuotedTrailingCR", Input: '"field"\r', Output: [["field"]], }, { Name: "QuotedTrailingCRCR", Input: '"field"\r\r', Error: new ParseError(1, 1, 6, ERR_QUOTE), }, { Name: "FieldCR", Input: "field\rfield\r", Output: [["field\rfield"]], }, { Name: "FieldCRCR", Input: "field\r\rfield\r\r", Output: [["field\r\rfield\r"]], }, { Name: "FieldCRCRLF", Input: "field\r\r\nfield\r\r\n", Output: [["field\r"], ["field\r"]], }, { Name: "FieldCRCRLFCR", Input: "field\r\r\n\rfield\r\r\n\r", Output: [["field\r"], ["\rfield\r"]], }, { Name: "FieldCRCRLFCRCR", Input: "field\r\r\n\r\rfield\r\r\n\r\r", Output: [["field\r"], ["\r\rfield\r"], ["\r"]], }, { Name: "MultiFieldCRCRLFCRCR", Input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,", Output: [ ["field1", "field2\r"], ["\r\rfield1", "field2\r"], ["\r\r", ""], ], }, { Name: "NonASCIICommaAndComment", Input: "a£b,c£ \td,e\n€ comment\n", Output: [["a", "b,c", "d,e"]], TrimLeadingSpace: true, Separator: "£", Comment: "€", }, { Name: "NonASCIICommaAndCommentWithQuotes", Input: 'a€" b,"€ c\nλ comment\n', Output: [["a", " b,", " c"]], Separator: "€", Comment: "λ", }, { // λ and θ start with the same byte. // This tests that the parser doesn't confuse such characters. Name: "NonASCIICommaConfusion", Input: '"abθcd"λefθgh', Output: [["abθcd", "efθgh"]], Separator: "λ", Comment: "€", }, { Name: "NonASCIICommentConfusion", Input: "λ\nλ\nθ\nλ\n", Output: [["λ"], ["λ"], ["λ"]], Comment: "θ", }, { Name: "QuotedFieldMultipleLF", Input: '"\n\n\n\n"', Output: [["\n\n\n\n"]], }, { Name: "MultipleCRLF", Input: "\r\n\r\n\r\n\r\n", Output: [], }, /** * The implementation may read each line in several chunks if * it doesn't fit entirely. * in the read buffer, so we should test the code to handle that condition. */ /* TODO(kt3k): Enable this test case { Name: "HugeLines", Input: "#ignore\n".repeat(10000) + "@".repeat(5000) + "," + "*".repeat(5000), Output: [["@".repeat(5000), "*".repeat(5000)]], Comment: "#", }, */ { Name: "QuoteWithTrailingCRLF", Input: '"foo"bar"\r\n', Error: new ParseError(1, 1, 4, ERR_QUOTE), }, { Name: "LazyQuoteWithTrailingCRLF", Input: '"foo"bar"\r\n', Output: [[`foo"bar`]], LazyQuotes: true, }, { Name: "DoubleQuoteWithTrailingCRLF", Input: '"foo""bar"\r\n', Output: [[`foo"bar`]], }, { Name: "EvenQuotes", Input: `""""""""`, Output: [[`"""`]], }, { Name: "OddQuotes", Input: `"""""""`, Error: new ParseError(1, 1, 7, ERR_QUOTE), }, { Name: "LazyOddQuotes", Input: `"""""""`, Output: [[`"""`]], LazyQuotes: true, }, { Name: "BadComma1", Separator: "\n", Error: new Error(ERR_INVALID_DELIM), }, { Name: "BadComma2", Separator: "\r", Error: new Error(ERR_INVALID_DELIM), }, { Name: "BadComma3", Separator: '"', Error: new Error(ERR_INVALID_DELIM), }, { Name: "BadComment1", Comment: "\n", Error: new Error(ERR_INVALID_DELIM), }, { Name: "BadComment2", Comment: "\r", Error: new Error(ERR_INVALID_DELIM), }, { Name: "BadCommaComment", Separator: "X", Comment: "X", Error: new Error(ERR_INVALID_DELIM), },];for (const t of testCases) { Deno.test({ name: `[CSV] ${t.Name}`, async fn() { let separator = ","; let comment: string | undefined; let fieldsPerRec: number | undefined; let trim = false; let lazyquote = false; if (t.Separator) { separator = t.Separator; } if (t.Comment) { comment = t.Comment; } if (t.TrimLeadingSpace) { trim = true; } if (t.UseFieldsPerRecord) { fieldsPerRec = t.FieldsPerRecord; } if (t.LazyQuotes) { lazyquote = t.LazyQuotes; } let actual; if (t.Error) { await assertRejects(async () => { await readMatrix(new BufReader(new StringReader(t.Input ?? "")), { separator, comment: comment, trimLeadingSpace: trim, fieldsPerRecord: fieldsPerRec, lazyQuotes: lazyquote, }); }, (error: Error) => { assertEquals(error, t.Error); }); } else { actual = await readMatrix( new BufReader(new StringReader(t.Input ?? "")), { separator, comment: comment, trimLeadingSpace: trim, fieldsPerRecord: fieldsPerRec, lazyQuotes: lazyquote, }, ); const expected = t.Output; assertEquals(actual, expected); } }, });}
const parseTestCases = [ { name: "simple", in: "a,b,c", skipFirstRow: false, result: [["a", "b", "c"]], }, { name: "simple Bufreader", in: new BufReader(new StringReader("a,b,c")), skipFirstRow: false, result: [["a", "b", "c"]], }, { name: "multiline", in: "a,b,c\ne,f,g\n", skipFirstRow: false, result: [ ["a", "b", "c"], ["e", "f", "g"], ], }, { name: "header mapping boolean", in: "a,b,c\ne,f,g\n", skipFirstRow: true, result: [{ a: "e", b: "f", c: "g" }], }, { name: "header mapping array", in: "a,b,c\ne,f,g\n", columns: ["this", "is", "sparta"], result: [ { this: "a", is: "b", sparta: "c" }, { this: "e", is: "f", sparta: "g" }, ], }, { name: "header mapping object", in: "a,b,c\ne,f,g\n", columns: [{ name: "this" }, { name: "is" }, { name: "sparta" }], result: [ { this: "a", is: "b", sparta: "c" }, { this: "e", is: "f", sparta: "g" }, ], }, { name: "header mapping parse entry", in: "a,b,c\ne,f,g\n", columns: [ { name: "this", parse: (e: string): string => { return `b${e}$$`; }, }, { name: "is", parse: (e: string): number => { return e.length; }, }, { name: "sparta", parse: (e: string): unknown => { return { bim: `boom-${e}` }; }, }, ], result: [ { this: "ba$$", is: 1, sparta: { bim: `boom-c` } }, { this: "be$$", is: 1, sparta: { bim: `boom-g` } }, ], }, { name: "multiline parse", in: "a,b,c\ne,f,g\n", parse: (e: string[]): unknown => { return { super: e[0], street: e[1], fighter: e[2] }; }, skipFirstRow: false, result: [ { super: "a", street: "b", fighter: "c" }, { super: "e", street: "f", fighter: "g" }, ], }, { name: "header mapping object parseline", in: "a,b,c\ne,f,g\n", columns: [{ name: "this" }, { name: "is" }, { name: "sparta" }], parse: (e: Record<string, unknown>): unknown => { return { super: e.this, street: e.is, fighter: e.sparta }; }, result: [ { super: "a", street: "b", fighter: "c" }, { super: "e", street: "f", fighter: "g" }, ], }, { name: "provides both opts.skipFirstRow and opts.columns", in: "a,b,1\nc,d,2\ne,f,3", skipFirstRow: true, columns: [ { name: "foo" }, { name: "bar" }, { name: "baz", parse: (e: string) => Number(e) }, ], result: [ { foo: "c", bar: "d", baz: 2 }, { foo: "e", bar: "f", baz: 3 }, ], },];
for (const testCase of parseTestCases) { Deno.test({ name: `[CSV] Parse ${testCase.name}`, async fn() { const r = await parse(testCase.in, { skipFirstRow: testCase.skipFirstRow, columns: testCase.columns, parse: testCase.parse as (input: unknown) => unknown, }); assertEquals(r, testCase.result); }, });}
Deno.test({ name: "[CSV] ParseError.message", fn(): void { assertEquals( new ParseError(2, 2, null, ERR_FIELD_COUNT).message, `record on line 2: ${ERR_FIELD_COUNT}`, );
assertEquals( new ParseError(1, 2, 1, ERR_QUOTE).message, `record on line 1; parse error on line 2, column 1: ${ERR_QUOTE}`, );
assertEquals( new ParseError(1, 1, 7, ERR_QUOTE).message, `parse error on line 1, column 7: ${ERR_QUOTE}`, ); },});