takt/src/__tests__/arpeggio-csv.test.ts
nrs 7e15691ba2
github-issue-200-arpeggio (#203)
* fix: stable release時にnext dist-tagを自動同期

* takt: github-issue-200-arpeggio
2026-02-10 13:37:15 +09:00

137 lines
4.1 KiB
TypeScript

/**
* Tests for CSV data source parsing and batch reading.
*/
import { describe, it, expect } from 'vitest';
import { parseCsv, CsvDataSource } from '../core/piece/arpeggio/csv-data-source.js';
import { writeFileSync, mkdirSync } from 'node:fs';
import { join } from 'node:path';
import { tmpdir } from 'node:os';
import { randomUUID } from 'node:crypto';
describe('parseCsv', () => {
it('should parse simple CSV content', () => {
const csv = 'name,age\nAlice,30\nBob,25';
const result = parseCsv(csv);
expect(result).toEqual([
['name', 'age'],
['Alice', '30'],
['Bob', '25'],
]);
});
it('should handle quoted fields', () => {
const csv = 'name,description\nAlice,"Hello, World"\nBob,"Line1"';
const result = parseCsv(csv);
expect(result).toEqual([
['name', 'description'],
['Alice', 'Hello, World'],
['Bob', 'Line1'],
]);
});
it('should handle escaped quotes (double quotes)', () => {
const csv = 'name,value\nAlice,"He said ""hello"""\nBob,simple';
const result = parseCsv(csv);
expect(result).toEqual([
['name', 'value'],
['Alice', 'He said "hello"'],
['Bob', 'simple'],
]);
});
it('should handle CRLF line endings', () => {
const csv = 'name,age\r\nAlice,30\r\nBob,25';
const result = parseCsv(csv);
expect(result).toEqual([
['name', 'age'],
['Alice', '30'],
['Bob', '25'],
]);
});
it('should handle bare CR line endings', () => {
const csv = 'name,age\rAlice,30\rBob,25';
const result = parseCsv(csv);
expect(result).toEqual([
['name', 'age'],
['Alice', '30'],
['Bob', '25'],
]);
});
it('should handle empty fields', () => {
const csv = 'a,b,c\n1,,3\n,,';
const result = parseCsv(csv);
expect(result).toEqual([
['a', 'b', 'c'],
['1', '', '3'],
['', '', ''],
]);
});
it('should handle newlines within quoted fields', () => {
const csv = 'name,bio\nAlice,"Line1\nLine2"\nBob,simple';
const result = parseCsv(csv);
expect(result).toEqual([
['name', 'bio'],
['Alice', 'Line1\nLine2'],
['Bob', 'simple'],
]);
});
});
describe('CsvDataSource', () => {
function createTempCsv(content: string): string {
const dir = join(tmpdir(), `takt-csv-test-${randomUUID()}`);
mkdirSync(dir, { recursive: true });
const filePath = join(dir, 'test.csv');
writeFileSync(filePath, content, 'utf-8');
return filePath;
}
it('should read batches with batch_size 1', async () => {
const filePath = createTempCsv('name,age\nAlice,30\nBob,25\nCharlie,35');
const source = new CsvDataSource(filePath);
const batches = await source.readBatches(1);
expect(batches).toHaveLength(3);
expect(batches[0]!.rows).toEqual([{ name: 'Alice', age: '30' }]);
expect(batches[0]!.batchIndex).toBe(0);
expect(batches[0]!.totalBatches).toBe(3);
expect(batches[1]!.rows).toEqual([{ name: 'Bob', age: '25' }]);
expect(batches[2]!.rows).toEqual([{ name: 'Charlie', age: '35' }]);
});
it('should read batches with batch_size 2', async () => {
const filePath = createTempCsv('name,age\nAlice,30\nBob,25\nCharlie,35');
const source = new CsvDataSource(filePath);
const batches = await source.readBatches(2);
expect(batches).toHaveLength(2);
expect(batches[0]!.rows).toEqual([
{ name: 'Alice', age: '30' },
{ name: 'Bob', age: '25' },
]);
expect(batches[0]!.totalBatches).toBe(2);
expect(batches[1]!.rows).toEqual([
{ name: 'Charlie', age: '35' },
]);
});
it('should throw when CSV has no data rows', async () => {
const filePath = createTempCsv('name,age');
const source = new CsvDataSource(filePath);
await expect(source.readBatches(1)).rejects.toThrow('CSV file has no data rows');
});
it('should handle missing columns by returning empty string', async () => {
const filePath = createTempCsv('a,b,c\n1,2\n3');
const source = new CsvDataSource(filePath);
const batches = await source.readBatches(1);
expect(batches[0]!.rows).toEqual([{ a: '1', b: '2', c: '' }]);
expect(batches[1]!.rows).toEqual([{ a: '3', b: '', c: '' }]);
});
});