Skip to content
Investigating Netflix Movies and Guest Stars in The Office
  • AI Chat
  • Code
  • Report
  • #create a list of years of the data ranging from 2011 to 2020, and a list of durations 
    years = []
    
    [years.append(year) for year in range(2011,2021)]
    
    durations = [103, 101, 99, 100, 100, 95, 95, 96, 93, 90]
    
    movie_dict = {'years':years,'durations':durations}
    
    print(movie_dict)
    import pandas as pd 
    
    # Create a DataFrame from the dictionary
    durations_df = pd.DataFrame(movie_dict)
    
    print(durations_df)
    # Import matplotlib.pyplot under its usual alias and create a figure
    import matplotlib.pyplot as plt 
    fig = plt.figure()
    
    # Draw a line plot of release_years and durations
    plt.plot(durations_df['years'],durations_df['durations'])
    
    # Create a title
    plt.title('Netflix Movie Durations 2011-2020')
    
    # Show the plot
    plt.show()
    # Read in the CSV as a DataFrame
    netflix_df = pd.read_csv('datasets/netflix_data.csv')
    
    # Print the first five rows of the DataFrame
    print(netflix_df.head(5))
    # Subset the DataFrame for type "Movie"
    netflix_df_movies_only = netflix_df[netflix_df['type'] == 'Movie']
    
    # Select only the columns of interest
    netflix_movies_col_subset = netflix_df_movies_only[['title','country','genre','release_year','duration']]
    
    # Print the first five rows of the new DataFrame
    print(netflix_movies_col_subset.head(5))
    # Create a figure and increase the figure size
    fig = plt.figure(figsize=(12,8))
    
    plt.scatter(netflix_movies_col_subset['release_year'],netflix_movies_col_subset['duration'])
    
    plt.title('Movie Duration by Year of Release')
    
    plt.show()
    # Filter for durations shorter than 60 minutes
    short_movies = netflix_movies_col_subset[netflix_movies_col_subset['duration'] < 60]
    
    print(short_movies.head(20))
    colors = []
    
    for lab, row in netflix_movies_col_subset.iterrows() :
        if row['genre'] == 'Children' :
            colors.append('red')
        elif row['genre'] =='Documentaries' :
            colors.append('blue')
        elif row['genre'] == 'Stand-Up' :
            colors.append('green')
        else:
            colors.append('black')
            
    # Inspect the first 10 values in your list        
    print(colors[:10])
    # Set the figure style and initalize a new figure
    plt.style.use('fivethirtyeight')
    fig = plt.figure(figsize=(12,8))
    
    # Create a scatter plot of duration versus release_year
    plt.scatter(netflix_movies_col_subset['release_year'],netflix_movies_col_subset['duration'],color = colors)
    
    # Create a title and axis labels
    plt.xlabel('Release year')
    plt.ylabel('Duration (min) ')
    plt.title('Movie duration by year of release')
    
    # Show the plot
    plt.show()
    # Are we certain that movies are getting shorter?
    are_movies_getting_shorter = 'We can not tell'
    Hidden output