Skip to content

Introduction to Data Science in Python

Run the hidden code cell below to import the data used in this course.

# Importing pandas and numpy
import numpy as np
import pandas as pd

# Importing the course datasets
frequencies = pd.read_csv("datasets/all_frequencies.csv")
records = pd.read_csv("datasets/cell_phone_records.csv")
credit = pd.read_csv("datasets/credit_records.csv")
ransom = pd.read_csv("datasets/ransom.csv")
gravel = pd.read_csv("datasets/shoe_gravel_sample.csv")

Take Notes

Add notes about the concepts you've learned and code cells with code you want to keep.

#import pandas and load csv file

# Add your code snippets here
# Import pandas under the alias pd
import pandas as pd

# Load the CSV "credit_records.csv"
credit_records = pd.read_csv("credit_records.csv")

# Display the first five rows of credit_records using the .head() method
print(credit_records.head())
# Use .info() to inspect the DataFrame credit_records
print(credit_records.info())
# From matplotlib, import pyplot under the alias plt
from matplotlib import pyplot as plt

# Plot Officer Deshaun's hours_worked vs. day_of_week
plt.plot(deshaun.day_of_week, deshaun.hours_worked)

# Display Deshaun's plot
plt.show()
# Lines
plt.plot(deshaun.day_of_week, deshaun.hours_worked, label='Deshaun')
plt.plot(aditya.day_of_week, aditya.hours_worked, label='Aditya')
plt.plot(mengfei.day_of_week, mengfei.hours_worked, label='Mengfei')

# Add a title
plt.title("day")

# Add y-axis label
plt.ylabel("hours worked")

# Legend
plt.legend()
# Display plot
plt.show()
# Create plot
plt.plot(six_months.month, six_months.hours_worked)

# Add annotation "Missing June data" at (2.5, 80)
plt.text(2.5, 80, "Missing June data")

# Display graph
plt.show()
# x should be ransom.letter and y should be ransom.frequency
plt.plot(ransom.letter, ransom.frequency,
         # Label should be "Ransom"
         label="Ransom",
         # Plot the ransom letter as a dotted gray line
         linestyle=':', color='gray')

# Display the plot
plt.show()