Skip to content
%%capture
%pip install duckdb --upgrade
Getting Started
import duckdb
con = duckdb.connect("datacamp.duckdb")
con.execute("""
CREATE TABLE IF NOT EXISTS bank AS
SELECT * FROM read_csv('bank-marketing.csv')
""")
con.execute("SHOW ALL TABLES").fetchdf()
con.execute("SELECT * FROM bank LIMIT 5").fetchdf()
con.execute("SELECT * FROM bank WHERE duration < 100 LIMIT 5").fetchdf()
Relations
bank_duck = duckdb.read_csv("bank-marketing.csv",sep=";")
bank_duck.filter("duration < 100").limit(3).df()
rel = con.table("bank")
rel.columns
rel.filter("duration < 100").limit(3).df()
rel.filter("duration < 100").project("job,education,loan").order("job").limit(3).df()
Query Function
res = duckdb.query("""SELECT
job,
COUNT(*) AS total_clients_contacted,
AVG(duration) AS avg_campaign_duration,
FROM
'bank-marketing.csv'
WHERE
age > 30
GROUP BY
job
ORDER BY
total_clients_contacted DESC;""")
res.df()
con.commit()
con.close()