Skip to main content

Reading CSV Files

Learning Focus

Use this lesson to query CSV files directly in DuckDB without any import step — one of DuckDB's most powerful features.

Quick Start

-- Auto-detect everything: delimiter, header, types
SELECT * FROM read_csv_auto('employees.csv') LIMIT 10;

-- Use the file path directly (DuckDB shorthand)
SELECT * FROM 'employees.csv' LIMIT 10;

read_csv_auto() Options

SELECT * FROM read_csv_auto(
'employees.csv',
header = true, -- first row is header
delim = ',', -- delimiter (',', ';', '\t', '|')
quote = '"', -- quoting character
nullstr = 'NULL', -- treat this string as NULL
skip = 1, -- skip N rows before header
dateformat = '%d/%m/%Y', -- date format if auto-detect fails
types = {'salary': 'DOUBLE', 'id': 'INTEGER'} -- force types
);

Reading Multiple Files (Glob Pattern)

-- Read all CSV files in a directory
SELECT * FROM read_csv_auto('data/*.csv');

-- Read multiple specific files
SELECT * FROM read_csv_auto(['jan.csv', 'feb.csv', 'mar.csv']);

-- Include the filename in results
SELECT filename, * FROM read_csv_auto('data/*.csv', filename = true);

Schema Inspection

-- Check inferred schema before querying
DESCRIBE SELECT * FROM read_csv_auto('employees.csv');

Query and Aggregate Without Import

-- Aggregate directly from file — no table needed
SELECT
department,
COUNT(*) AS headcount,
AVG(salary) AS avg_salary,
SUM(salary) AS total_salary
FROM read_csv_auto('employees.csv')
GROUP BY department
ORDER BY total_salary DESC;

Create Table from CSV

-- Persist to a table for repeated querying
CREATE TABLE employees AS
SELECT * FROM read_csv_auto('employees.csv');

Error and Type Handling

-- Ignore bad rows instead of failing
SELECT * FROM read_csv_auto('data.csv', ignore_errors = true);

-- Force column types to avoid inference errors
SELECT * FROM read_csv_auto('data.csv',
types = {'date_col': 'DATE', 'amount': 'DECIMAL(18,2)'}
);

Common Pitfalls

PitfallConsequencePrevention
Wrong delimiter auto-detectedColumns merged into oneSpecify delim = ';' explicitly
Date format mismatchNULL dates or parse errorSet dateformat = '%d/%m/%Y'
Large CSV queried repeatedlySlow (re-parsed each time)Use CREATE TABLE AS to persist

Quick Reference

SELECT * FROM 'file.csv' LIMIT 10;
SELECT * FROM read_csv_auto('file.csv', header=true, delim=',');
SELECT * FROM read_csv_auto('dir/*.csv', filename=true);
CREATE TABLE t AS SELECT * FROM read_csv_auto('file.csv');
DESCRIBE SELECT * FROM read_csv_auto('file.csv');

What's Next