Skip to content
Project: Investigating Netflix Movies
  • AI Chat
  • Code
  • Report
  • # Import the necessary libraries
    import pandas as pd
    import matplotlib.pyplot as plt
    # Read the Netflix data from a CSV file into a DataFrame
    netflix_data = pd.read_csv("netflix_data.csv")
    # Filter the DataFrame to select only rows where 'type' is 'Movie'
    netflix_subset = netflix_data.loc[netflix_data['type'] == 'Movie', :]
    # Create a new DataFrame 'netflix_movies' containing selected columns
    netflix_movies = netflix_subset[["title", "country", "genre", "release_year", "duration"]]
    # Filter 'netflix_movies' to select only movies with a duration less than 60 minutes
    short_movies = netflix_movies.loc[netflix_movies['duration'] < 60, :]
    # Create an empty list to store color codes for plotting
    colors = []
    # Iterate through the rows of 'netflix_movies' DataFrame
    for lab, row in netflix_movies.iterrows():
        # Assign colors based on the 'genre' column
        if row['genre'] == 'Children':
            colors.append('Orange')
        elif row['genre'] == 'Documentaries':
            colors.append('Black')
        elif row['genre'] == 'Stand-Up':
            colors.append('Green')
        else:
            colors.append('Blue')
    # Create a new figure for the scatter plot with specified size
    fig = plt.figure(figsize=(12, 8))
    # Extract data for the x and y axes
    x = netflix_movies['release_year']
    y = netflix_movies['duration']
    # Create a scatter plot with assigned colors
    plt.scatter(x, y, c=colors)
    # Label the x and y axes
    plt.xlabel("Release year")
    plt.ylabel("Duration (min)")
    # Set the title of the plot
    plt.title('Movie Duration by Year of Release')
    # Display the plot
    plt.show()
    # Print a question and its answer
    answer = 'maybe'
    print('Are we certain that movies are getting shorter?', answer)