Skip to content
# Import the necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
# Read the Netflix data from a CSV file into a DataFrame
netflix_data = pd.read_csv("netflix_data.csv")
# Filter the DataFrame to select only rows where 'type' is 'Movie'
netflix_subset = netflix_data.loc[netflix_data['type'] == 'Movie', :]
# Create a new DataFrame 'netflix_movies' containing selected columns
netflix_movies = netflix_subset[["title", "country", "genre", "release_year", "duration"]]
# Filter 'netflix_movies' to select only movies with a duration less than 60 minutes
short_movies = netflix_movies.loc[netflix_movies['duration'] < 60, :]
# Create an empty list to store color codes for plotting
colors = []
# Iterate through the rows of 'netflix_movies' DataFrame
for lab, row in netflix_movies.iterrows():
# Assign colors based on the 'genre' column
if row['genre'] == 'Children':
colors.append('Orange')
elif row['genre'] == 'Documentaries':
colors.append('Black')
elif row['genre'] == 'Stand-Up':
colors.append('Green')
else:
colors.append('Blue')
# Create a new figure for the scatter plot with specified size
fig = plt.figure(figsize=(12, 8))
# Extract data for the x and y axes
x = netflix_movies['release_year']
y = netflix_movies['duration']
# Create a scatter plot with assigned colors
plt.scatter(x, y, c=colors)
# Label the x and y axes
plt.xlabel("Release year")
plt.ylabel("Duration (min)")
# Set the title of the plot
plt.title('Movie Duration by Year of Release')
# Display the plot
plt.show()
# Print a question and its answer
answer = 'maybe'
print('Are we certain that movies are getting shorter?', answer)