Skip to content
# Start coding here... 
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import iqr
coffee = pd.read_csv('coffee.csv')
df = pd.DataFrame(coffee.groupby('Place type').size(),columns = ['cuenta'])
df.reset_index (inplace = True) 
plt.bar(df['Place type'], df['cuenta'])
plt.hist(coffee[(coffee['Reviews'] < coffee['Reviews'].max()  )]['Reviews'],bins = 10)
plt.show()
plt.boxplot(coffee[(coffee['Reviews'] < coffee['Reviews'].max())]['Reviews'])
plt.show()
np.quantile(coffee['Reviews'].fillna(0),.5)
coffee['Reviews'].describe()
def outlier_detector (df, column):
    
    q1 = 0.25 
    q3 = 0.75
    
    lower = np.quantile(df[column].fillna(0.0),q1)
    higher =  np.quantile(df[column].fillna(0.0),q3)
    iqr = higher - lower
    
    df_outlier = df[(df[column] < lower - 1.5 * iqr) | (df[column] > higher + 1.5 * iqr)].sort_values (column)
    df = df.loc[[index_ for index_ in list(df.index) if index_ not in list(df_outlier.index)]]
    
    return df #df_outlier

x = outlier_detector (coffee, 'Reviews')
x['Reviews'].hist()