Skip to content
%%capture
%pip install duckdb --upgrade

Getting Started

import duckdb
con = duckdb.connect("datacamp.duckdb")
con.execute("""
    CREATE TABLE IF NOT EXISTS bank AS 
    SELECT * FROM read_csv('bank-marketing.csv')
""")
con.execute("SHOW ALL TABLES").fetchdf()
con.execute("SELECT * FROM bank LIMIT 5").fetchdf()
con.execute("SELECT * FROM bank WHERE duration < 100 LIMIT 5").fetchdf()

Relations

bank_duck = duckdb.read_csv("bank-marketing.csv",sep=";")
bank_duck.filter("duration < 100").limit(3).df()
rel = con.table("bank")
rel.columns
rel.filter("duration < 100").limit(3).df()
rel.filter("duration < 100").project("job,education,loan").order("job").limit(3).df()

Query Function

res = duckdb.query("""SELECT 
                            job,
                            COUNT(*) AS total_clients_contacted,
                            AVG(duration) AS avg_campaign_duration,
                        FROM 
                            'bank-marketing.csv'
                        WHERE 
                            age > 30
                        GROUP BY 
                            job
                        ORDER BY 
                            total_clients_contacted DESC;""")
res.df()
con.commit()
con.close()