Skip to content
1 hidden cell
Project: Investigating Netflix Movies
1 hidden cell
# Importing pandas and matplotlib
import pandas as pd
import matplotlib.pyplot as plt
# Start coding!# Load CSV file and store as netflix_df
netflix_df = pd.read_csv("netflix_data.csv")# Filter the data to remove TV shows and store as netflix_subset
netflix_subset = netflix_df[netflix_df['type'] != 'TV Show']
# Investigate the Netflix movie data, keeping only the columns "title", "country", "genre", "release_year", "duration", and saving this into a new DataFrame called netflix_movies
selected_netflixmovie = ["title", "country", "genre", "release_year", "duration"]
netflix_movies = netflix_df[netflix_df['type'] == 'Movie'][selected_netflixmovie]
# Filter netflix_movies to find the movies that are strictly shorter than 60 minutes, saving the resulting DataFrame as short_movies; inspect the result to find possible contributing factors.
short_movies = netflix_movies[netflix_movies['duration'] < 60]
# Possible factor: The movies are documentaries, children movies, or talkshows
# Using a for loop and if/elif statements, iterate through the rows of netflix_movies and assign colors of your choice to four genre groups ("Children", "Documentaries", "Stand-Up", and "Other" for everything else). Save the results in a colors list. Initialize a matplotlib figure object called fig and create a scatter plot for movie duration by release year using the colors list to color the points and using the labels "Release year" for the x-axis, "Duration (min)" for the y-axis, and the title "Movie Duration by Year of Release".
#Assigning colours
colors = []
for index, row in netflix_movies.iterrows():
genre = row['genre']
if genre == 'Children':
colors.append('blue')
elif genre == 'Documentaries':
colors.append('green')
elif genre == 'Stand-Up':
colors.append('orange')
else:
colors.append('gray')
# Set the figure style and initalize a new figure
fig = plt.figure(figsize=(12,8))
# Create a scatter plot of duration versus release_year
plt.scatter(netflix_movies.release_year, netflix_movies.duration, c=colors, alpha=0.5)
# Set labels and title
plt.title("Movie Duration by Year of Release")
plt.xlabel("Release year")
plt.ylabel("Duration (min)")
# Show the plot
plt.show()Are we certain that movies are getting shorter? No.
# Are we certain that movies are getting shorter?
answer = "No"