137 lines
4.1 KiB
TypeScript
137 lines
4.1 KiB
TypeScript
/**
|
|
* Tests for CSV data source parsing and batch reading.
|
|
*/
|
|
|
|
import { describe, it, expect } from 'vitest';
|
|
import { parseCsv, CsvDataSource } from '../core/piece/arpeggio/csv-data-source.js';
|
|
import { writeFileSync, mkdirSync } from 'node:fs';
|
|
import { join } from 'node:path';
|
|
import { tmpdir } from 'node:os';
|
|
import { randomUUID } from 'node:crypto';
|
|
|
|
describe('parseCsv', () => {
|
|
it('should parse simple CSV content', () => {
|
|
const csv = 'name,age\nAlice,30\nBob,25';
|
|
const result = parseCsv(csv);
|
|
expect(result).toEqual([
|
|
['name', 'age'],
|
|
['Alice', '30'],
|
|
['Bob', '25'],
|
|
]);
|
|
});
|
|
|
|
it('should handle quoted fields', () => {
|
|
const csv = 'name,description\nAlice,"Hello, World"\nBob,"Line1"';
|
|
const result = parseCsv(csv);
|
|
expect(result).toEqual([
|
|
['name', 'description'],
|
|
['Alice', 'Hello, World'],
|
|
['Bob', 'Line1'],
|
|
]);
|
|
});
|
|
|
|
it('should handle escaped quotes (double quotes)', () => {
|
|
const csv = 'name,value\nAlice,"He said ""hello"""\nBob,simple';
|
|
const result = parseCsv(csv);
|
|
expect(result).toEqual([
|
|
['name', 'value'],
|
|
['Alice', 'He said "hello"'],
|
|
['Bob', 'simple'],
|
|
]);
|
|
});
|
|
|
|
it('should handle CRLF line endings', () => {
|
|
const csv = 'name,age\r\nAlice,30\r\nBob,25';
|
|
const result = parseCsv(csv);
|
|
expect(result).toEqual([
|
|
['name', 'age'],
|
|
['Alice', '30'],
|
|
['Bob', '25'],
|
|
]);
|
|
});
|
|
|
|
it('should handle bare CR line endings', () => {
|
|
const csv = 'name,age\rAlice,30\rBob,25';
|
|
const result = parseCsv(csv);
|
|
expect(result).toEqual([
|
|
['name', 'age'],
|
|
['Alice', '30'],
|
|
['Bob', '25'],
|
|
]);
|
|
});
|
|
|
|
it('should handle empty fields', () => {
|
|
const csv = 'a,b,c\n1,,3\n,,';
|
|
const result = parseCsv(csv);
|
|
expect(result).toEqual([
|
|
['a', 'b', 'c'],
|
|
['1', '', '3'],
|
|
['', '', ''],
|
|
]);
|
|
});
|
|
|
|
it('should handle newlines within quoted fields', () => {
|
|
const csv = 'name,bio\nAlice,"Line1\nLine2"\nBob,simple';
|
|
const result = parseCsv(csv);
|
|
expect(result).toEqual([
|
|
['name', 'bio'],
|
|
['Alice', 'Line1\nLine2'],
|
|
['Bob', 'simple'],
|
|
]);
|
|
});
|
|
});
|
|
|
|
describe('CsvDataSource', () => {
|
|
function createTempCsv(content: string): string {
|
|
const dir = join(tmpdir(), `takt-csv-test-${randomUUID()}`);
|
|
mkdirSync(dir, { recursive: true });
|
|
const filePath = join(dir, 'test.csv');
|
|
writeFileSync(filePath, content, 'utf-8');
|
|
return filePath;
|
|
}
|
|
|
|
it('should read batches with batch_size 1', async () => {
|
|
const filePath = createTempCsv('name,age\nAlice,30\nBob,25\nCharlie,35');
|
|
const source = new CsvDataSource(filePath);
|
|
const batches = await source.readBatches(1);
|
|
|
|
expect(batches).toHaveLength(3);
|
|
expect(batches[0]!.rows).toEqual([{ name: 'Alice', age: '30' }]);
|
|
expect(batches[0]!.batchIndex).toBe(0);
|
|
expect(batches[0]!.totalBatches).toBe(3);
|
|
expect(batches[1]!.rows).toEqual([{ name: 'Bob', age: '25' }]);
|
|
expect(batches[2]!.rows).toEqual([{ name: 'Charlie', age: '35' }]);
|
|
});
|
|
|
|
it('should read batches with batch_size 2', async () => {
|
|
const filePath = createTempCsv('name,age\nAlice,30\nBob,25\nCharlie,35');
|
|
const source = new CsvDataSource(filePath);
|
|
const batches = await source.readBatches(2);
|
|
|
|
expect(batches).toHaveLength(2);
|
|
expect(batches[0]!.rows).toEqual([
|
|
{ name: 'Alice', age: '30' },
|
|
{ name: 'Bob', age: '25' },
|
|
]);
|
|
expect(batches[0]!.totalBatches).toBe(2);
|
|
expect(batches[1]!.rows).toEqual([
|
|
{ name: 'Charlie', age: '35' },
|
|
]);
|
|
});
|
|
|
|
it('should throw when CSV has no data rows', async () => {
|
|
const filePath = createTempCsv('name,age');
|
|
const source = new CsvDataSource(filePath);
|
|
await expect(source.readBatches(1)).rejects.toThrow('CSV file has no data rows');
|
|
});
|
|
|
|
it('should handle missing columns by returning empty string', async () => {
|
|
const filePath = createTempCsv('a,b,c\n1,2\n3');
|
|
const source = new CsvDataSource(filePath);
|
|
const batches = await source.readBatches(1);
|
|
|
|
expect(batches[0]!.rows).toEqual([{ a: '1', b: '2', c: '' }]);
|
|
expect(batches[1]!.rows).toEqual([{ a: '3', b: '', c: '' }]);
|
|
});
|
|
});
|