Skip to content
sqlalchemy notes
  • AI Chat
  • Code
  • Report
  • Spinner
    # Start coding here... 
    
    results = connection.execute(statement).fetchall()
    
    df.to_sql("v1.0.5_table", engine, if_exists='replace') #if_exists='append', index=False)
    
    dff = pd.read_sql("v1.0.5_table", engine)
    
    # Example sqlalchemy
    #Make an alias of the employees table: managers
    managers = employees.alias()
    
    # Build a query to select names of managers and counts of their employees: stmt
    stmt = select([managers.columns.name, func.count(employees.columns.id)])
    
    # Append a where clause that ensures the manager id and employee mgr are equal
    stmt_matched = stmt.where(managers.columns.id == employees.columns.mgr)
    
    # Group by Managers Name
    stmt_grouped = stmt_matched.group_by(managers.columns.name)
    
    # Execute statement: results
    results = connection.execute(stmt_grouped).fetchall()
    
    # print manager
    for record in results:
        print(record)
        #Start a while loop checking for more results
        while more_results:
            # Fetch the first 50 results from the ResultProxy: partial_results
            partial_results = results_proxy.fetchmany(50)
    
            # if empty list, set more_results to False
            if partial_results == []:
                more_results = False
    
                # Loop over the fetched records and increment the count for the state
                for row in partial_results:
                    if row.state in state_count:
                        state_count[row.state] += 1
                        else:
                            state_count[row.state] = 1
    
                            # Close the ResultProxy, and thus the connection
                            results_proxy.close()
    
                            # Print the count by state
                            print(state_count)
    
    # Case study
    #import create_engine, MetaData
    from sqlalchemy import create_engine, MetaData
    
    # Define an engine to connect to chapter5.sqlite: engine
    engine = create_engine('sqlite:///chapter5.sqlite')
    
    # Initialize MetaData: metadata
    metadata = MetaData()
    #Import Table, Column, String, and Integer
    from sqlalchemy import (Table, Column, String, Integer)
    
    # Build a census table: census
    census = Table('census', metadata,
                    Column('state', String(30)),
                    Column('sex', String(1)),
                    Column('age', Integer()),
                    Column('pop2000', Integer()),
                    Column('pop2008', Integer()))
    
    # Create the table in the database
    metadata.create_all(engine)
    #Import insert
    from sqlalchemy import insert
    
    # Build insert statement: stmt
    stmt = insert(census)
    
    # Use values_list to insert data: results
    results_proxy = connection.execute(stmt, values_list) # values_list = [{}]
    
    # Print rowcount
    print(results_proxy.rowcount)
    
    #Import select and func
    from sqlalchemy import select, func
    
    # Select sex and average age weighted by 2000 population
    stmt = select([(func.sum(census.columns.pop2000 * census.columns.age) 
                / func.sum(census.columns.pop2000)).label('average_age'),
                census.columns.sex])
    
    # Group by sex
    stmt = stmt.group_by(census.columns.sex)
    
    # Execute the query and fetch all the results
    results = connection.execute(stmt).fetchall()
    
    # Print the sex and average age column for each result
    for result in results:
    	print(result['sex'], result['average_age'])
    
    #import case, cast and Float from sqlalchemy
    from sqlalchemy import case, cast, Float
    
    # Build a query to calculate the percentage of women in 2000: stmt
    stmt = select([census.columns.state,(func.sum(case([(census.columns.sex == 'F', census.columns.pop2000)], else_=0)) /
                    cast(func.sum(census.columns.pop2000), Float) * 100).label('percent_female')])
    
    # Group By state
    stmt = stmt.group_by(census.columns.state)
    
    # Execute the query and store the results: results
    results = connection.execute(stmt).fetchall()
    
    # Print the percentage
    for result in results:
    	print(result.state, result.percent_female)
    
    #Build query to return state name and population difference from 2008 to 2000
    stmt = select([census.columns.state,(census.columns.pop2008-census.columns.pop2000).label('pop_change')])
    
    # Group by State
    stmt = stmt.group_by(census.columns.state)
    
    # Order by Population Change
    stmt = stmt.order_by(desc('pop_change'))
    
    # Limit to top 10
    stmt = stmt.limit(10)
    
    # Use connection to execute the statement and fetch all results
    results = connection.execute(stmt).fetchall()
    
    # Print the state and population change for each record
    for result in results:
        print('{}:{}'.format(result.state, result.pop_change))
    import time
    import pandas as pd
    from sqlalchemy import create_engine
    import psycopg2
    
    
    
    #INPUT YOUR OWN CONNECTION STRING HERE
    conn_string = 'postgres://user:password@host/database'
    
    #Import .csv file
    df = pd.read_csv('upload_test_data.csv')
    
    
    
    #perform to_sql test and print result
    db = create_engine(conn_string)
    conn = db.connect()
    
    start_time = time.time()
    df.to_sql('to_sql_test', con=conn, if_exists='replace', index=False)
    print("to_sql duration: {} seconds".format(time.time() - start_time))
    
    
    
    #perform COPY test and print result
    sql = '''
    COPY copy_test
    FROM 'PATH_TO_FILE.csv' --input full file path here. see line 46
    DELIMITER ',' CSV;
    '''
    
    table_create_sql = '''
    CREATE TABLE IF NOT EXISTS copy_test (id                bigint,
                                          quantity          int,
                                          cost              double precision,
                                          total_revenue     double precision)
    '''
    
    pg_conn = psycopg2.connect(conn_string)
    cur = pg_conn.cursor()
    cur.execute(table_create_sql)
    cur.execute('TRUNCATE TABLE copy_test') #Truncate the table in case you've already run the script before
    
    start_time = time.time()
    df.to_csv('upload_test_data_from_copy.csv', index=False, header=False) #Name the .csv file reference in line 29 here
    cur.execute(sql)
    pg_conn.commit()
    cur.close()
    print("COPY duration: {} seconds".format(time.time() - start_time))
    
    
    
    #close connection
    conn.close()