sqlalchemy notes

# Start coding here... 

results = connection.execute(statement).fetchall()

df.to_sql("v1.0.5_table", engine, if_exists='replace') #if_exists='append', index=False)

dff = pd.read_sql("v1.0.5_table", engine)

# Example sqlalchemy
#Make an alias of the employees table: managers
managers = employees.alias()

# Build a query to select names of managers and counts of their employees: stmt
stmt = select([managers.columns.name, func.count(employees.columns.id)])

# Append a where clause that ensures the manager id and employee mgr are equal
stmt_matched = stmt.where(managers.columns.id == employees.columns.mgr)

# Group by Managers Name
stmt_grouped = stmt_matched.group_by(managers.columns.name)

# Execute statement: results
results = connection.execute(stmt_grouped).fetchall()

# print manager
for record in results:
    print(record)
    #Start a while loop checking for more results
    while more_results:
        # Fetch the first 50 results from the ResultProxy: partial_results
        partial_results = results_proxy.fetchmany(50)

        # if empty list, set more_results to False
        if partial_results == []:
            more_results = False

            # Loop over the fetched records and increment the count for the state
            for row in partial_results:
                if row.state in state_count:
                    state_count[row.state] += 1
                    else:
                        state_count[row.state] = 1

                        # Close the ResultProxy, and thus the connection
                        results_proxy.close()

                        # Print the count by state
                        print(state_count)

# Case study
#import create_engine, MetaData
from sqlalchemy import create_engine, MetaData

# Define an engine to connect to chapter5.sqlite: engine
engine = create_engine('sqlite:///chapter5.sqlite')

# Initialize MetaData: metadata
metadata = MetaData()
#Import Table, Column, String, and Integer
from sqlalchemy import (Table, Column, String, Integer)

# Build a census table: census
census = Table('census', metadata,
                Column('state', String(30)),
                Column('sex', String(1)),
                Column('age', Integer()),
                Column('pop2000', Integer()),
                Column('pop2008', Integer()))

# Create the table in the database
metadata.create_all(engine)
#Import insert
from sqlalchemy import insert

# Build insert statement: stmt
stmt = insert(census)

# Use values_list to insert data: results
results_proxy = connection.execute(stmt, values_list) # values_list = [{}]

# Print rowcount
print(results_proxy.rowcount)

#Import select and func
from sqlalchemy import select, func

# Select sex and average age weighted by 2000 population
stmt = select([(func.sum(census.columns.pop2000 * census.columns.age) 
            / func.sum(census.columns.pop2000)).label('average_age'),
            census.columns.sex])

# Group by sex
stmt = stmt.group_by(census.columns.sex)

# Execute the query and fetch all the results
results = connection.execute(stmt).fetchall()

# Print the sex and average age column for each result
for result in results:
	print(result['sex'], result['average_age'])

#import case, cast and Float from sqlalchemy
from sqlalchemy import case, cast, Float

# Build a query to calculate the percentage of women in 2000: stmt
stmt = select([census.columns.state,(func.sum(case([(census.columns.sex == 'F', census.columns.pop2000)], else_=0)) /
                cast(func.sum(census.columns.pop2000), Float) * 100).label('percent_female')])

# Group By state
stmt = stmt.group_by(census.columns.state)

# Execute the query and store the results: results
results = connection.execute(stmt).fetchall()

# Print the percentage
for result in results:
	print(result.state, result.percent_female)

#Build query to return state name and population difference from 2008 to 2000
stmt = select([census.columns.state,(census.columns.pop2008-census.columns.pop2000).label('pop_change')])

# Group by State
stmt = stmt.group_by(census.columns.state)

# Order by Population Change
stmt = stmt.order_by(desc('pop_change'))

# Limit to top 10
stmt = stmt.limit(10)

# Use connection to execute the statement and fetch all results
results = connection.execute(stmt).fetchall()

# Print the state and population change for each record
for result in results:
    print('{}:{}'.format(result.state, result.pop_change))

import time
import pandas as pd
from sqlalchemy import create_engine
import psycopg2



#INPUT YOUR OWN CONNECTION STRING HERE
conn_string = 'postgres://user:password@host/database'

#Import .csv file
df = pd.read_csv('upload_test_data.csv')



#perform to_sql test and print result
db = create_engine(conn_string)
conn = db.connect()

start_time = time.time()
df.to_sql('to_sql_test', con=conn, if_exists='replace', index=False)
print("to_sql duration: {} seconds".format(time.time() - start_time))



#perform COPY test and print result
sql = '''
COPY copy_test
FROM 'PATH_TO_FILE.csv' --input full file path here. see line 46
DELIMITER ',' CSV;
'''

table_create_sql = '''
CREATE TABLE IF NOT EXISTS copy_test (id                bigint,
                                      quantity          int,
                                      cost              double precision,
                                      total_revenue     double precision)
'''

pg_conn = psycopg2.connect(conn_string)
cur = pg_conn.cursor()
cur.execute(table_create_sql)
cur.execute('TRUNCATE TABLE copy_test') #Truncate the table in case you've already run the script before

start_time = time.time()
df.to_csv('upload_test_data_from_copy.csv', index=False, header=False) #Name the .csv file reference in line 29 here
cur.execute(sql)
pg_conn.commit()
cur.close()
print("COPY duration: {} seconds".format(time.time() - start_time))



#close connection
conn.close()