Skip to content
test
# Start coding here...
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import iqr
coffee = pd.read_csv('coffee.csv')
df = pd.DataFrame(coffee.groupby('Place type').size(),columns = ['cuenta'])
df.reset_index (inplace = True)
plt.bar(df['Place type'], df['cuenta'])plt.hist(coffee[(coffee['Reviews'] < coffee['Reviews'].max() )]['Reviews'],bins = 10)
plt.show()plt.boxplot(coffee[(coffee['Reviews'] < coffee['Reviews'].max())]['Reviews'])
plt.show()np.quantile(coffee['Reviews'].fillna(0),.5)
coffee['Reviews'].describe()def outlier_detector (df, column):
q1 = 0.25
q3 = 0.75
lower = np.quantile(df[column].fillna(0.0),q1)
higher = np.quantile(df[column].fillna(0.0),q3)
iqr = higher - lower
df_outlier = df[(df[column] < lower - 1.5 * iqr) | (df[column] > higher + 1.5 * iqr)].sort_values (column)
df = df.loc[[index_ for index_ in list(df.index) if index_ not in list(df_outlier.index)]]
return df #df_outlier
x = outlier_detector (coffee, 'Reviews')
x['Reviews'].hist()