Skip to content
1 hidden cell
Project: Investigating Netflix Movies
1 hidden cell
# Importing pandas and matplotlib
import pandas as pd
import matplotlib.pyplot as plt
# Start coding!
netflix_df = pd.read_csv("netflix_data.csv")
netflix_df
netflix_subset = netflix_df[netflix_df['type'] == 'Movie']
netflix_subset
# Select only the columns of interest
netflix_movies = netflix_subset[['title', 'country', 'genre', 'release_year', 'duration']]
netflix_movies
# Filter for durations shorter than 60 minutes
short_movies = netflix_movies[netflix_movies.duration < 60]
# Define an empty list
colors = []
# Iterate over rows of netflix_movies
for label, row in netflix_movies.iterrows() :
if row["genre"] == "Children" :
colors.append("red")
elif row["genre"] == "Documentaries" :
colors.append("blue")
elif row["genre"] == "Stand-Up":
colors.append("green")
else:
colors.append("black")
# Inspect the first 10 values in your list
colors[:10]
# Set the figure style and initalize a new figure
fig = plt.figure(figsize=(12,8))
# Create a scatter plot of duration versus release_year
plt.scatter(netflix_movies.release_year, netflix_movies.duration, c=colors)
# Create a title and axis labels
plt.title("Movie Duration by Year of Release")
plt.xlabel("Release year")
plt.ylabel("Duration (min)")
# Show the plot
plt.show()
# Are we certain that movies are getting shorter?
answer = "no"