Skip to content
1 hidden cell
Introduction to Importing Data in Python
Introduction to Importing Data in Python
Run the hidden code cell below to import the data used in this course.
1 hidden cell
Take Notes
Add notes about the concepts you've learned and code cells with code you want to keep.
Add your notes here
# Add your code snippets here
Explore Datasets
Try importing the remaining files to explore the data and practice your skills!
datasets/disarea.dta
datasets/ja_data2.mat
datasets/L-L1_LOSC_4_V1-1126259446-32.hdf5
datasets/mnist_kaggle_some_rows.csv
datasets/sales.sas7bdat
Intro and Flat files
# text files
# open(filename, mode = 'r'/'w')
# import flat files using NumPy
# np.loadtxt(filename, delimiter = ','/ '\t', skiprows = 1, uscols = [0, 2], dtype = str)
# np.genfromtxt(filename, delimiter = ',', names = True, dtype = None) # load mixed datatypes
# np.recfromcsv(filename, delimiter = ',', names = True) # similar to genfromtext, default dtype is None
# import flat files using Pandas
# pd.read_csv(filename, sep = '', comment = '', na_values = '')
Other file types
# Pickled files
import pickle
with open('pickled_fruit.pkl', 'rb') as file:
data = pickle.load(file)
print(data)
# excel
data = pd.ExcelFile(filename, sheet_name) # parse excel sheets to data frames
# data.sheet_names
# data.parse("sheetname") / data.parse(0) # can use either sheetname in string or in index
# data.parse(sheet, usecols=[0], skiprows = 1, names = [''])
# get the contents in the current directory
import os
wd = os.getcwd()
os.listdir(wd)
# import sas files
from sas7bdat import SAS7BDAT
with SAS7BDAT('') as file:
df_sas = file.to_data_frame()
# import stata files
import pandas as pd
data = pd.read_stata('urbanpop.dta')
# import hdf5 files
import h5py
data = h5py.File(filename, 'r')
for key in data.keys():
print(key)
for key in data['meta'].keys():
print(key)
# import MATLAB files
import scipy.io
mat = scipy.io.loadmat(filename)
Relational database
# database engine
from sqlalchemy import create_engine
enngine = create_engine('sqlite:///Northwind.sqlite')
# get table names
table_names = engine.table_names()
# query relational database
con = engine.connect()
rs = con.execute("SELECT * FROM Orders")
df = pd.DataFrame(rs.fetchall()) # save the query results as a dataframe
df.columns = rs.keys()
con.close()
# or
with engin.connect() as con:
rs = con.execute("SELECT OrderI, OrderDate, ShipName FROM Orders")
df = pd.DataFrame(rs.fetchmany(size = 5))
df.columns = rs.keys()
# query relational database with Pandas
df = pd.read_sql_query("SELECT * FROM Orders", engine)