Skip to content
1 hidden cell
Introduction to Importing Data in Python
Introduction to Importing Data in Python
Run the hidden code cell below to import the data used in this course.
1 hidden cell
Take Notes
Add notes about the concepts you've learned and code cells with code you want to keep.
! ls will display the contents of your current directory
Add your notes here
# Open a file: file
file = open('moby_dick.txt', mode = 'r')
# Print it
print(file.read())
# Check whether file is closed
print(file.closed)
# Close file
file.close()
# Check whether file is closed
print(file.closed)
# Read & print the first 3 lines
with open('moby_dick.txt') as file:
print(file.readline())
print(file.readline())
print(file.readline())
# Import pandas as pd
import pandas as pd
# Assign the filename: file
file = 'titanic.csv'
# Read the file into a DataFrame: df
df = pd.read_csv(file)
# View the head of the DataFrame
print(df.head())
# Assign the filename: file
file = 'digits.csv'
# Read the first 5 rows of the file into a DataFrame: data
data = pd.read_csv(file, nrows=5, header=None)
# Build a numpy array from the DataFrame: data_array
data_array = data.values
# Print the datatype of data_array to the shell
print(type(data_array))
# Import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
# Assign filename: file
file = 'titanic_corrupt.txt'
# Import file: data, all text following '#' are considered comments, 'Nothing' is considered NA's
data = pd.read_csv(file, sep='\t', comment='#', na_values='Nothing')
# Print the head of the DataFrame
print(data.head())
# Plot 'Age' variable in a histogram
pd.DataFrame.hist(data[['Age']])
plt.xlabel('Age (years)')
plt.ylabel('count')
plt.show()
Import excel spreadsheets
# Import pandas
import pandas as pd
# Assign spreadsheet filename: file
file = 'battledeath.xlsx'
# Load spreadsheet: xls
xls = pd.ExcelFile(file)
# Print sheet names
print(xls.sheet_names)
# Load a sheet into a DataFrame by name: df1
df1 = xls.parse('2004')
# Print the head of the DataFrame df1
print(df1.head())
# Load a sheet into a DataFrame by index: df2
df2 = xls.parse(0)
# Print the head of the DataFrame df2
print(df2.head())
# Parse the first sheet and rename the columns: df1
df1 = xls.parse(0, skiprows=[0], names=list(['Country','AAM due to War (2002)']))
# Print the head of the DataFrame df1
print(df1.head())
# Parse the first column of the second sheet and rename the column: df2
df2 = xls.parse(1, usecols=[0], skiprows=[0], names=['Country'])
# Print the head of the DataFrame df2
print(df2.head())
Import SAS files