Skip to content
Test dbt install
  • AI Chat
  • Code
  • Report
  • # Start coding here...
    !pip install dbt-duckdb
    !dbt --version
    import duckdb
    
    df = duckdb.read_parquet('https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-01.parquet')
    
    df.head()
    Unknown integration
    DataFrameavailable as
    df
    variable
    SELECT * FROM 'yellow_tripdata_2023-01.parquet'
    Unknown integration
    DataFrameavailable as
    df1
    variable
    select 
        '2023' as year,
    	datepart('month', tpep_pickup_datetime) as month,
        VendorID, 
    	avg(passenger_count) as average_passengers
    from 'yellow_tripdata_2023-01.parquet'
    where datepart('year', tpep_pickup_datetime) = 2023
    group by year, month, VendorID
    order by year, month
    Unknown integration
    DataFrameavailable as
    df2
    variable
    Run cancelled
    select 
        '2023' as year,
    	datepart('month', tpep_pickup_datetime) as month,
        VendorID, 
    	count(*) as total_rides,
    	avg(passenger_count) as average_passengers
    from 'yellow_tripdata_2023-01.parquet'
    where datepart('year', tpep_pickup_datetime) = 2023
    group by year, month, VendorID
    
    Unknown integration
    DataFrameavailable as
    df3
    variable
    select
      extract (month from cal.month) as month,
      extract( days from ((cal.month + interval '1 month') - cal.month)) as length
    from
      (select unnest(range(timestamp '2012-01-01', timestamp '2012-12-01', interval '1 month')) as month
      ) cal
    order by month;
    df3.dtypes
    Unknown integration
    DataFrameavailable as
    df4
    variable
    select
      regexp_replace ('abc', '(b|c)', 'X') AS aXc, -- aXc 
      regexp_replace ('abc', '(b|c)', '\\1\\1\\1\\1') AS abbbc, -- abbbc
      regexp_replace ('abc', '(.*)c', '\\1e') AS abe, -- abe
      regexp_replace ('abc', '(a)(b)', '\\2\\1') AS bac -- bac
    import pandas as pd
     
    # create pandas DataFrame with one column with five
    # datetime values through a dictionary
    df = pd.DataFrame({'DateTime': ['2021-01-15 20:02:11',
                                    '1989-05-24 20:34:11',
                                    '2020-01-18 14:43:24',
                                    '2021-01-15 20:02:10',
                                    '1999-04-04 20:34:11']})
     
    df2 = pd.DataFrame({'DateTime': [20080827,
                                     20080907,
                                     20080910,
                                     20081119,
                                     20081212]})
    print("Original data")
    print(df)
    print(df2)
    
    # convert datetime column to just date
    df2['Date'] = pd.to_datetime(df2['DateTime'].astype(str), format='%Y%m%d').dt.date.astype(str)
    df2['DateMix'] = df2['DateTime'].apply(lambda x: str(x)[0:4] + '-' + str(x)[4:6] + '-' + str(x)[6:8])
    df2['Date3'] = df2['DateTime'].apply(lambda x: f'{str(x)[0:4]}-{str(x)[4:6]}-{str(x)[6:8]}')
    #df2['Date'] = pd.to_datetime(str(df2['DateTime']), format='%Y%m%d') #.dt.date
     
    # display
    print("Only date")
    print(df2)
    #df2['Date'] = df2['Date'].astype(str)
    df2
    import pandas as pd
    df = pd.DataFrame({'Content': [
                                   "Filename$",
                                   "Filename%",
                                   "File$%name",
                                   "Filename"]})
    
    print(df)
    
    df.replace(to_replace=['\$', "%"], value="", inplace=True, regex=True)
    
    print(df)
    Unknown integration
    DataFrameavailable as
    df5
    variable
    create table t as select * from read_parquet('yellow_tripdata_2023-01.parquet')
    Unknown integration
    DataFrameavailable as
    df6
    variable
    Unknown integration
    DataFrameavailable as
    movies
    variable
    create table m as SELECT * FROM 'movies.csv'
    Unknown integration
    DataFrameavailable as
    df8
    variable
    select