/csv/stream_test.ts | std@0.180.0

Deno standard library
File
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.import { CsvStream } from "./stream.ts";import type { CsvStreamOptions } from "./stream.ts";import { ERR_QUOTE, ParseError } from "./_io.ts";import { readableStreamFromIterable } from "../streams/readable_stream_from_iterable.ts";import { readableStreamFromReader } from "../streams/readable_stream_from_reader.ts";import {  assert,  assertEquals,  assertRejects,  assertStringIncludes,} from "../testing/asserts.ts";import type { AssertTrue, Has } from "../testing/types.ts";import { fromFileUrl, join } from "../path/mod.ts";import { StringReader } from "../io/string_reader.ts";
const testdataDir = join(fromFileUrl(import.meta.url), "../testdata");const encoder = new TextEncoder();
Deno.test({  name: "[csv/stream] CsvStream should work with Deno.File",  permissions: {    read: [testdataDir],  },  fn: async () => {    const file = await Deno.open(join(testdataDir, "simple.csv"));    const readable = file.readable      .pipeThrough(new TextDecoderStream())      .pipeThrough(new CsvStream());    const records = [] as Array<Array<string>>;    for await (const record of readable) {      records.push(record);    }    assertEquals(records, [      ["id", "name"],      ["1", "foobar"],      ["2", "barbaz"],    ]);  },});
Deno.test({  name: "[csv/stream] CsvStream with invalid csv",  fn: async () => {    const readable = readableStreamFromIterable([      encoder.encode("id,name\n"),      encoder.encode("\n"),      encoder.encode("1,foo\n"),      encoder.encode('2,"baz\n'),    ]).pipeThrough(new TextDecoderStream()).pipeThrough(      new CsvStream(),    );    const reader = readable.getReader();    assertEquals(await reader.read(), { done: false, value: ["id", "name"] });    assertEquals(await reader.read(), { done: false, value: ["1", "foo"] });    const error = await assertRejects(() => reader.read());    assert(error instanceof ParseError);    assertEquals(error.startLine, 4);    assertEquals(error.line, 5);    assertEquals(error.column, 0);    assertStringIncludes(error.message, ERR_QUOTE);  },});
Deno.test({  name: "[csv/stream] CsvStream with various inputs",  permissions: "none",  fn: async (t) => {    // These test cases were originally ported from Go:    // https://github.com/golang/go/blob/go1.12.5/src/encoding/csv/    // Copyright 2011 The Go Authors. All rights reserved. BSD license.    // https://github.com/golang/go/blob/master/LICENSE    const testCases = [      {        name: "CRLF",        input: "a,b\r\nc,d\r\n",        output: [["a", "b"], ["c", "d"]],      },      {        name: "BareCR",        input: "a,b\rc,d\r\n",        output: [["a", "b\rc", "d"]],      },      {        name: "NoEOLTest",        input: "a,b,c",        output: [["a", "b", "c"]],      },      {        name: "Semicolon",        input: "a;b;c\n",        output: [["a", "b", "c"]],        separator: ";",      },      {        name: "MultiLine",        input: `"twoline","one line","threelinefield"`,        output: [["two\nline", "one line", "three\nline\nfield"]],      },      {        name: "BlankLine",        input: "a,b,c\n\nd,e,f\n\n",        output: [          ["a", "b", "c"],          ["d", "e", "f"],        ],      },      {        name: "LeadingSpace",        input: " a,  b,   c\n",        output: [[" a", "  b", "   c"]],      },      {        name: "Comment",        input: "#1,2,3\na,b,c\n#comment",        output: [["a", "b", "c"]],        comment: "#",      },      {        name: "NoComment",        input: "#1,2,3\na,b,c",        output: [          ["#1", "2", "3"],          ["a", "b", "c"],        ],      },      {        name: "FieldCount",        input: "a,b,c\nd,e",        output: [          ["a", "b", "c"],          ["d", "e"],        ],      },      {        name: "TrailingCommaEOF",        input: "a,b,c,",        output: [["a", "b", "c", ""]],      },      {        name: "TrailingCommaEOL",        input: "a,b,c,\n",        output: [["a", "b", "c", ""]],      },      {        name: "NotTrailingComma3",        input: "a,b,c, \n",        output: [["a", "b", "c", " "]],      },      {        name: "CommaFieldTest",        input: `x,y,z,wx,y,z,x,y,,x,,,,,,"x","y","z","w""x","y","z","""x","y","","""x","","","""","","",""`,        output: [          ["x", "y", "z", "w"],          ["x", "y", "z", ""],          ["x", "y", "", ""],          ["x", "", "", ""],          ["", "", "", ""],          ["x", "y", "z", "w"],          ["x", "y", "z", ""],          ["x", "y", "", ""],          ["x", "", "", ""],          ["", "", "", ""],        ],      },      {        name: "CRLFInQuotedField", // Issue 21201        input: 'A,"Hello\r\nHi",B\r\n',        output: [["A", "Hello\nHi", "B"]],      },      {        name: "BinaryBlobField", // Issue 19410        input: "x09\x41\xb4\x1c,aktau",        output: [["x09A\xb4\x1c", "aktau"]],      },      {        name: "TrailingCR",        input: "field1,field2\r",        output: [["field1", "field2"]],      },      {        name: "QuotedTrailingCR",        input: '"field"\r',        output: [["field"]],      },      {        name: "FieldCR",        input: "field\rfield\r",        output: [["field\rfield"]],      },      {        name: "FieldCRCR",        input: "field\r\rfield\r\r",        output: [["field\r\rfield\r"]],      },      {        name: "FieldCRCRLF",        input: "field\r\r\nfield\r\r\n",        output: [["field\r"], ["field\r"]],      },      {        name: "FieldCRCRLFCR",        input: "field\r\r\n\rfield\r\r\n\r",        output: [["field\r"], ["\rfield\r"]],      },      {        name: "MultiFieldCRCRLFCRCR",        input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,",        output: [          ["field1", "field2\r"],          ["\r\rfield1", "field2\r"],          ["\r\r", ""],        ],      },      {        name: "NonASCIICommaAndCommentWithQuotes",        input: 'a€"  b,"€ c\nλ comment\n',        output: [["a", "  b,", " c"]],        separator: "€",        comment: "λ",      },      {        // λ and θ start with the same byte.        // This tests that the parser doesn't confuse such characters.        name: "NonASCIICommaConfusion",        input: '"abθcd"λefθgh',        output: [["abθcd", "efθgh"]],        separator: "λ",        comment: "€",      },      {        name: "NonASCIICommentConfusion",        input: "λ\nλ\nθ\nλ\n",        output: [["λ"], ["λ"], ["λ"]],        comment: "θ",      },      {        name: "QuotedFieldMultipleLF",        input: '"\n\n\n\n"',        output: [["\n\n\n\n"]],      },      {        name: "MultipleCRLF",        input: "\r\n\r\n\r\n\r\n",        output: [],      },      {        name: "DoubleQuoteWithTrailingCRLF",        input: '"foo""bar"\r\n',        output: [[`foo"bar`]],      },      {        name: "EvenQuotes",        input: `""""""""`,        output: [[`"""`]],      },      {        name: "simple",        input: "a,b,c",        output: [["a", "b", "c"]],        skipFirstRow: false,      },      {        name: "multiline",        input: "a,b,c\ne,f,g\n",        output: [          ["a", "b", "c"],          ["e", "f", "g"],        ],        skipFirstRow: false,      },      {        name: "header mapping boolean",        input: "a,b,c\ne,f,g\n",        output: [{ a: "e", b: "f", c: "g" }],        skipFirstRow: true,      },      {        name: "header mapping array",        input: "a,b,c\ne,f,g\n",        output: [          { this: "a", is: "b", sparta: "c" },          { this: "e", is: "f", sparta: "g" },        ],        columns: ["this", "is", "sparta"],      },      {        name: "provides both opts.skipFirstRow and opts.columns",        input: "a,b,1\nc,d,2\ne,f,3",        output: [          { foo: "c", bar: "d", baz: "2" },          { foo: "e", bar: "f", baz: "3" },        ],        skipFirstRow: true,        columns: ["foo", "bar", "baz"],      },      {        name: "mismatching number of headers and fields",        input: "a,b,c\nd,e",        skipFirstRow: true,        columns: ["foo", "bar", "baz"],        errorMessage:          "Error number of fields line: 1\nNumber of fields found: 3\nExpected number of fields: 2",      },    ];    for (const testCase of testCases) {      await t.step(testCase.name, async () => {        const options: CsvStreamOptions = {};        if (testCase.separator) {          options.separator = testCase.separator;        }        if (testCase.comment) {          options.comment = testCase.comment;        }        if (testCase.skipFirstRow) {          options.skipFirstRow = testCase.skipFirstRow;        }        if (testCase.columns) {          options.columns = testCase.columns;        }        const readable = createReadableStreamFromString(testCase.input)          .pipeThrough(new CsvStream(options));
        if (testCase.output) {          const actual = [] as Array<Array<string>>;          for await (const record of readable) {            actual.push(record);          }          assertEquals(actual, testCase.output);        } else {          await assertRejects(async () => {            for await (const _ of readable);          }, testCase.errorMessage);        }      });    }  },});
function createReadableStreamFromString(s: string): ReadableStream<string> {  return readableStreamFromReader(new StringReader(s)).pipeThrough(    new TextDecoderStream(),  );}
// Work around resource leak error with TextDecoderStream://   https://github.com/denoland/deno/issues/13142export const MyTextDecoderStream = () => {  const textDecoder = new TextDecoder();  return new TransformStream({    transform(chunk: Uint8Array, controller: TransformStreamDefaultController) {      controller.enqueue(textDecoder.decode(chunk));    },    flush(controller: TransformStreamDefaultController) {      controller.enqueue(textDecoder.decode());    },  });};
Deno.test({  name: "[csv/stream] cancel CsvStream during iteration does not leak file",  permissions: { read: [testdataDir] },  // TODO(kt3k): Enable this test on windows.  // See https://github.com/denoland/deno_std/issues/3160  ignore: Deno.build.os === "windows",  fn: async () => {    const file = await Deno.open(join(testdataDir, "large.csv"));    const readable = file.readable.pipeThrough(MyTextDecoderStream())      .pipeThrough(new CsvStream());    for await (const _record of readable) {      break;    }  },});
Deno.test({  name: "[csv/stream] correct typing",  fn() {    {      const { readable } = new CsvStream();      type _ = AssertTrue<Has<typeof readable, ReadableStream<string[]>>>;    }    {      const { readable } = new CsvStream({});      type _ = AssertTrue<Has<typeof readable, ReadableStream<string[]>>>;    }    {      const { readable } = new CsvStream({ skipFirstRow: undefined });      type _ = AssertTrue<Has<typeof readable, ReadableStream<string[]>>>;    }    {      const { readable } = new CsvStream({ skipFirstRow: false });      type _ = AssertTrue<Has<typeof readable, ReadableStream<string[]>>>;    }    {      const { readable } = new CsvStream({ skipFirstRow: true });      type _ = AssertTrue<        Has<typeof readable, ReadableStream<Record<string, unknown>>>      >;    }    {      const { readable } = new CsvStream({ columns: undefined });      type _ = AssertTrue<Has<typeof readable, ReadableStream<string[]>>>;    }    {      const { readable } = new CsvStream({ columns: ["aaa"] });      type _ = AssertTrue<        Has<typeof readable, ReadableStream<Record<string, unknown>>>      >;    }    {      const { readable } = new CsvStream({        skipFirstRow: false,        columns: undefined,      });      type _ = AssertTrue<Has<typeof readable, ReadableStream<string[]>>>;    }    {      const { readable } = new CsvStream({        skipFirstRow: true,        columns: undefined,      });      type _ = AssertTrue<        Has<typeof readable, ReadableStream<Record<string, unknown>>>      >;    }    {      const { readable } = new CsvStream({        skipFirstRow: false,        columns: ["aaa"],      });      type _ = AssertTrue<        Has<typeof readable, ReadableStream<Record<string, unknown>>>      >;    }    {      const { readable } = new CsvStream({        skipFirstRow: true,        columns: ["aaa"],      });      type _ = AssertTrue<        Has<typeof readable, ReadableStream<Record<string, unknown>>>      >;    }  },});