/encoding/csv_test.ts | std@0.108.0

Deno standard library
File
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653
// Test ported from Golang// https://github.com/golang/go/blob/2cc15b1/src/encoding/csv/reader_test.go// Copyright 2011 The Go Authors. All rights reserved. BSD license.// https://github.com/golang/go/blob/master/LICENSE// Copyright 2018-2021 the Deno authors. All rights reserved. MIT license.
import { assertEquals, assertRejects } from "../testing/asserts.ts";import {  ERR_BARE_QUOTE,  ERR_FIELD_COUNT,  ERR_INVALID_DELIM,  ERR_QUOTE,  parse,  ParseError,  readMatrix,} from "./csv.ts";import { StringReader } from "../io/readers.ts";import { BufReader } from "../io/bufio.ts";
// Test cases for `readMatrix()`const testCases = [  {    Name: "Simple",    Input: "a,b,c\n",    Output: [["a", "b", "c"]],  },  {    Name: "CRLF",    Input: "a,b\r\nc,d\r\n",    Output: [      ["a", "b"],      ["c", "d"],    ],  },  {    Name: "BareCR",    Input: "a,b\rc,d\r\n",    Output: [["a", "b\rc", "d"]],  },  {    Name: "RFC4180test",    Input: `#field1,field2,field3"aaa","bbb","ccc""a,a","bbb","ccc"zzz,yyy,xxx`,    UseFieldsPerRecord: true,    FieldsPerRecord: 0,    Output: [      ["#field1", "field2", "field3"],      ["aaa", "bbb", "ccc"],      ["a,a", `bbb`, "ccc"],      ["zzz", "yyy", "xxx"],    ],  },  {    Name: "NoEOLTest",    Input: "a,b,c",    Output: [["a", "b", "c"]],  },  {    Name: "Semicolon",    Input: "a;b;c\n",    Output: [["a", "b", "c"]],    Separator: ";",  },  {    Name: "MultiLine",    Input: `"twoline","one line","threelinefield"`,    Output: [["two\nline", "one line", "three\nline\nfield"]],  },  {    Name: "BlankLine",    Input: "a,b,c\n\nd,e,f\n\n",    Output: [      ["a", "b", "c"],      ["d", "e", "f"],    ],  },  {    Name: "BlankLineFieldCount",    Input: "a,b,c\n\nd,e,f\n\n",    Output: [      ["a", "b", "c"],      ["d", "e", "f"],    ],    UseFieldsPerRecord: true,    FieldsPerRecord: 0,  },  {    Name: "TrimSpace",    Input: " a,  b,   c\n",    Output: [["a", "b", "c"]],    TrimLeadingSpace: true,  },  {    Name: "LeadingSpace",    Input: " a,  b,   c\n",    Output: [[" a", "  b", "   c"]],  },  {    Name: "Comment",    Input: "#1,2,3\na,b,c\n#comment",    Output: [["a", "b", "c"]],    Comment: "#",  },  {    Name: "NoComment",    Input: "#1,2,3\na,b,c",    Output: [      ["#1", "2", "3"],      ["a", "b", "c"],    ],  },  {    Name: "LazyQuotes",    Input: `a "word","1"2",a","b`,    Output: [[`a "word"`, `1"2`, `a"`, `b`]],    LazyQuotes: true,  },  {    Name: "BareQuotes",    Input: `a "word","1"2",a"`,    Output: [[`a "word"`, `1"2`, `a"`]],    LazyQuotes: true,  },  {    Name: "BareDoubleQuotes",    Input: `a""b,c`,    Output: [[`a""b`, `c`]],    LazyQuotes: true,  },  {    Name: "BadDoubleQuotes",    Input: `a""b,c`,    Error: new ParseError(1, 1, 1, ERR_BARE_QUOTE),  },  {    Name: "TrimQuote",    Input: ` "a"," b",c`,    Output: [["a", " b", "c"]],    TrimLeadingSpace: true,  },  {    Name: "BadBareQuote",    Input: `a "word","b"`,    Error: new ParseError(1, 1, 2, ERR_BARE_QUOTE),  },  {    Name: "BadTrailingQuote",    Input: `"a word",b"`,    Error: new ParseError(1, 1, 10, ERR_BARE_QUOTE),  },  {    Name: "ExtraneousQuote",    Input: `"a "word","b"`,    Error: new ParseError(1, 1, 3, ERR_QUOTE),  },  {    Name: "BadFieldCount",    Input: "a,b,c\nd,e",    Error: new ParseError(2, 2, null, ERR_FIELD_COUNT),    UseFieldsPerRecord: true,    FieldsPerRecord: 0,  },  {    Name: "BadFieldCount1",    Input: `a,b,c`,    UseFieldsPerRecord: true,    FieldsPerRecord: 2,    Error: new ParseError(1, 1, null, ERR_FIELD_COUNT),  },  {    Name: "FieldCount",    Input: "a,b,c\nd,e",    Output: [      ["a", "b", "c"],      ["d", "e"],    ],  },  {    Name: "TrailingCommaEOF",    Input: "a,b,c,",    Output: [["a", "b", "c", ""]],  },  {    Name: "TrailingCommaEOL",    Input: "a,b,c,\n",    Output: [["a", "b", "c", ""]],  },  {    Name: "TrailingCommaSpaceEOF",    Input: "a,b,c, ",    Output: [["a", "b", "c", ""]],    TrimLeadingSpace: true,  },  {    Name: "TrailingCommaSpaceEOL",    Input: "a,b,c, \n",    Output: [["a", "b", "c", ""]],    TrimLeadingSpace: true,  },  {    Name: "TrailingCommaLine3",    Input: "a,b,c\nd,e,f\ng,hi,",    Output: [      ["a", "b", "c"],      ["d", "e", "f"],      ["g", "hi", ""],    ],    TrimLeadingSpace: true,  },  {    Name: "NotTrailingComma3",    Input: "a,b,c, \n",    Output: [["a", "b", "c", " "]],  },  {    Name: "CommaFieldTest",    Input: `x,y,z,wx,y,z,x,y,,x,,,,,,"x","y","z","w""x","y","z","""x","y","","""x","","","""","","",""`,    Output: [      ["x", "y", "z", "w"],      ["x", "y", "z", ""],      ["x", "y", "", ""],      ["x", "", "", ""],      ["", "", "", ""],      ["x", "y", "z", "w"],      ["x", "y", "z", ""],      ["x", "y", "", ""],      ["x", "", "", ""],      ["", "", "", ""],    ],  },  {    Name: "TrailingCommaIneffective1",    Input: "a,b,\nc,d,e",    Output: [      ["a", "b", ""],      ["c", "d", "e"],    ],    TrimLeadingSpace: true,  },  {    Name: "ReadAllReuseRecord",    Input: "a,b\nc,d",    Output: [      ["a", "b"],      ["c", "d"],    ],    ReuseRecord: true,  },  {    Name: "StartLine1", // Issue 19019    Input: 'a,"b\nc"d,e',    Error: new ParseError(1, 2, 1, ERR_QUOTE),  },  {    Name: "StartLine2",    Input: 'a,b\n"d\n\n,e',    Error: new ParseError(2, 5, 0, ERR_QUOTE),  },  {    Name: "CRLFInQuotedField", // Issue 21201    Input: 'A,"Hello\r\nHi",B\r\n',    Output: [["A", "Hello\nHi", "B"]],  },  {    Name: "BinaryBlobField", // Issue 19410    Input: "x09\x41\xb4\x1c,aktau",    Output: [["x09A\xb4\x1c", "aktau"]],  },  {    Name: "TrailingCR",    Input: "field1,field2\r",    Output: [["field1", "field2"]],  },  {    Name: "QuotedTrailingCR",    Input: '"field"\r',    Output: [["field"]],  },  {    Name: "QuotedTrailingCRCR",    Input: '"field"\r\r',    Error: new ParseError(1, 1, 6, ERR_QUOTE),  },  {    Name: "FieldCR",    Input: "field\rfield\r",    Output: [["field\rfield"]],  },  {    Name: "FieldCRCR",    Input: "field\r\rfield\r\r",    Output: [["field\r\rfield\r"]],  },  {    Name: "FieldCRCRLF",    Input: "field\r\r\nfield\r\r\n",    Output: [["field\r"], ["field\r"]],  },  {    Name: "FieldCRCRLFCR",    Input: "field\r\r\n\rfield\r\r\n\r",    Output: [["field\r"], ["\rfield\r"]],  },  {    Name: "FieldCRCRLFCRCR",    Input: "field\r\r\n\r\rfield\r\r\n\r\r",    Output: [["field\r"], ["\r\rfield\r"], ["\r"]],  },  {    Name: "MultiFieldCRCRLFCRCR",    Input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,",    Output: [      ["field1", "field2\r"],      ["\r\rfield1", "field2\r"],      ["\r\r", ""],    ],  },  {    Name: "NonASCIICommaAndComment",    Input: "a£b,c£ \td,e\n€ comment\n",    Output: [["a", "b,c", "d,e"]],    TrimLeadingSpace: true,    Separator: "£",    Comment: "€",  },  {    Name: "NonASCIICommaAndCommentWithQuotes",    Input: 'a€"  b,"€ c\nλ comment\n',    Output: [["a", "  b,", " c"]],    Separator: "€",    Comment: "λ",  },  {    // λ and θ start with the same byte.    // This tests that the parser doesn't confuse such characters.    Name: "NonASCIICommaConfusion",    Input: '"abθcd"λefθgh',    Output: [["abθcd", "efθgh"]],    Separator: "λ",    Comment: "€",  },  {    Name: "NonASCIICommentConfusion",    Input: "λ\nλ\nθ\nλ\n",    Output: [["λ"], ["λ"], ["λ"]],    Comment: "θ",  },  {    Name: "QuotedFieldMultipleLF",    Input: '"\n\n\n\n"',    Output: [["\n\n\n\n"]],  },  {    Name: "MultipleCRLF",    Input: "\r\n\r\n\r\n\r\n",    Output: [],  },  /**   * The implementation may read each line in several chunks if   * it doesn't fit entirely.   * in the read buffer, so we should test the code to handle that condition.   */  /* TODO(kt3k): Enable this test case  {    Name: "HugeLines",    Input: "#ignore\n".repeat(10000) + "@".repeat(5000) + "," +      "*".repeat(5000),    Output: [["@".repeat(5000), "*".repeat(5000)]],    Comment: "#",  },  */  {    Name: "QuoteWithTrailingCRLF",    Input: '"foo"bar"\r\n',    Error: new ParseError(1, 1, 4, ERR_QUOTE),  },  {    Name: "LazyQuoteWithTrailingCRLF",    Input: '"foo"bar"\r\n',    Output: [[`foo"bar`]],    LazyQuotes: true,  },  {    Name: "DoubleQuoteWithTrailingCRLF",    Input: '"foo""bar"\r\n',    Output: [[`foo"bar`]],  },  {    Name: "EvenQuotes",    Input: `""""""""`,    Output: [[`"""`]],  },  {    Name: "OddQuotes",    Input: `"""""""`,    Error: new ParseError(1, 1, 7, ERR_QUOTE),  },  {    Name: "LazyOddQuotes",    Input: `"""""""`,    Output: [[`"""`]],    LazyQuotes: true,  },  {    Name: "BadComma1",    Separator: "\n",    Error: new Error(ERR_INVALID_DELIM),  },  {    Name: "BadComma2",    Separator: "\r",    Error: new Error(ERR_INVALID_DELIM),  },  {    Name: "BadComma3",    Separator: '"',    Error: new Error(ERR_INVALID_DELIM),  },  {    Name: "BadComment1",    Comment: "\n",    Error: new Error(ERR_INVALID_DELIM),  },  {    Name: "BadComment2",    Comment: "\r",    Error: new Error(ERR_INVALID_DELIM),  },  {    Name: "BadCommaComment",    Separator: "X",    Comment: "X",    Error: new Error(ERR_INVALID_DELIM),  },];for (const t of testCases) {  Deno.test({    name: `[CSV] ${t.Name}`,    async fn() {      let separator = ",";      let comment: string | undefined;      let fieldsPerRec: number | undefined;      let trim = false;      let lazyquote = false;      if (t.Separator) {        separator = t.Separator;      }      if (t.Comment) {        comment = t.Comment;      }      if (t.TrimLeadingSpace) {        trim = true;      }      if (t.UseFieldsPerRecord) {        fieldsPerRec = t.FieldsPerRecord;      }      if (t.LazyQuotes) {        lazyquote = t.LazyQuotes;      }      let actual;      if (t.Error) {        await assertRejects(async () => {          await readMatrix(new BufReader(new StringReader(t.Input ?? "")), {            separator,            comment: comment,            trimLeadingSpace: trim,            fieldsPerRecord: fieldsPerRec,            lazyQuotes: lazyquote,          });        }, (error: Error) => {          assertEquals(error, t.Error);        });      } else {        actual = await readMatrix(          new BufReader(new StringReader(t.Input ?? "")),          {            separator,            comment: comment,            trimLeadingSpace: trim,            fieldsPerRecord: fieldsPerRec,            lazyQuotes: lazyquote,          },        );        const expected = t.Output;        assertEquals(actual, expected);      }    },  });}
const parseTestCases = [  {    name: "simple",    in: "a,b,c",    skipFirstRow: false,    result: [["a", "b", "c"]],  },  {    name: "simple Bufreader",    in: new BufReader(new StringReader("a,b,c")),    skipFirstRow: false,    result: [["a", "b", "c"]],  },  {    name: "multiline",    in: "a,b,c\ne,f,g\n",    skipFirstRow: false,    result: [      ["a", "b", "c"],      ["e", "f", "g"],    ],  },  {    name: "header mapping boolean",    in: "a,b,c\ne,f,g\n",    skipFirstRow: true,    result: [{ a: "e", b: "f", c: "g" }],  },  {    name: "header mapping array",    in: "a,b,c\ne,f,g\n",    columns: ["this", "is", "sparta"],    result: [      { this: "a", is: "b", sparta: "c" },      { this: "e", is: "f", sparta: "g" },    ],  },  {    name: "header mapping object",    in: "a,b,c\ne,f,g\n",    columns: [{ name: "this" }, { name: "is" }, { name: "sparta" }],    result: [      { this: "a", is: "b", sparta: "c" },      { this: "e", is: "f", sparta: "g" },    ],  },  {    name: "header mapping parse entry",    in: "a,b,c\ne,f,g\n",    columns: [      {        name: "this",        parse: (e: string): string => {          return `b${e}$$`;        },      },      {        name: "is",        parse: (e: string): number => {          return e.length;        },      },      {        name: "sparta",        parse: (e: string): unknown => {          return { bim: `boom-${e}` };        },      },    ],    result: [      { this: "ba$$", is: 1, sparta: { bim: `boom-c` } },      { this: "be$$", is: 1, sparta: { bim: `boom-g` } },    ],  },  {    name: "multiline parse",    in: "a,b,c\ne,f,g\n",    parse: (e: string[]): unknown => {      return { super: e[0], street: e[1], fighter: e[2] };    },    skipFirstRow: false,    result: [      { super: "a", street: "b", fighter: "c" },      { super: "e", street: "f", fighter: "g" },    ],  },  {    name: "header mapping object parseline",    in: "a,b,c\ne,f,g\n",    columns: [{ name: "this" }, { name: "is" }, { name: "sparta" }],    parse: (e: Record<string, unknown>): unknown => {      return { super: e.this, street: e.is, fighter: e.sparta };    },    result: [      { super: "a", street: "b", fighter: "c" },      { super: "e", street: "f", fighter: "g" },    ],  },  {    name: "provides both opts.skipFirstRow and opts.columns",    in: "a,b,1\nc,d,2\ne,f,3",    skipFirstRow: true,    columns: [      { name: "foo" },      { name: "bar" },      { name: "baz", parse: (e: string) => Number(e) },    ],    result: [      { foo: "c", bar: "d", baz: 2 },      { foo: "e", bar: "f", baz: 3 },    ],  },];
for (const testCase of parseTestCases) {  Deno.test({    name: `[CSV] Parse ${testCase.name}`,    async fn() {      const r = await parse(testCase.in, {        skipFirstRow: testCase.skipFirstRow,        columns: testCase.columns,        parse: testCase.parse as (input: unknown) => unknown,      });      assertEquals(r, testCase.result);    },  });}
Deno.test({  name: "[CSV] ParseError.message",  fn(): void {    assertEquals(      new ParseError(2, 2, null, ERR_FIELD_COUNT).message,      `record on line 2: ${ERR_FIELD_COUNT}`,    );
    assertEquals(      new ParseError(1, 2, 1, ERR_QUOTE).message,      `record on line 1; parse error on line 2, column 1: ${ERR_QUOTE}`,    );
    assertEquals(      new ParseError(1, 1, 7, ERR_QUOTE).message,      `parse error on line 1, column 7: ${ERR_QUOTE}`,    );  },});