Skip to content
Python Excercise exploration Airbnb and Wine Data
### Python Exercise Exploring data
Exploring Airbnb data
links to check https://matplotlib.org/2.0.2/gallery.html https://seaborn.pydata.org/examples/index.html These websites shows visuals plus their code
# Read data
import pandas as pd
#visualisations
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
#configuring
Read data
listings = pd.read_csv('AB_NYC_2019.csv')
wine = pd.read_csv('wine.csv')
Explore Data
by_neighbourhood = listings.groupby("neighbourhood", as_index=False)['id'].count()
top10_neighbourhoods = by_neighbourhood.sort_values(by='id', ascending=False).head(10)
#print(top10_neighbourhoods.dtypes)
fig = px.bar(top10_neighbourhoods, x='neighbourhood', y='id')
#fig.show()
#sns.barplot(data=top10_neighbourhoods, y='neighbourhood', x='id').set_title('Top 10 Neighbourhoods')
listings_min50 =listings[listings["price"] >= 500]
import plotly.express as px
fig_price= px.violin(listings_min50,
x='neighbourhood_group',
y='price',
color='neighbourhood_group'
)
#fig_price.update_layout(yaxis_range=[0, 4000])
#fig_price.show(config={"displayModeBar": False})
corr_matrix = listings.corr()
#sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
import matplotlib.pyplot as plt
# count missing values per attribute in dataframe wine
missing_values = wine.isnull().sum()
# calculate percentage of missing values per attribute
percent_missing = (missing_values / len(wine)) * 100
# sort values in descending order
percent_missing = percent_missing.sort_values(ascending=False)
# filter out attributes with no missing values
percent_missing = percent_missing[percent_missing > 0]
# plot horizontal bar chart
plt.figure(figsize=(10,6))
plt.barh(percent_missing.index, percent_missing.values)
plt.gca().invert_yaxis()
plt.xlabel('Percentage of missing values')
plt.title('Missing values per attribute in wine dataframe')
plt.show()
wine = pd.read_csv('wine.csv')
wine.rename(columns = {wine.columns[2] : 'Vine Area'}, inplace=True)
display(wine.head(10))
#wine.dtypes
display(wine.describe())
wine=wine.sort_values(by='Wine produced (ML)', ascending=False)
plt.bar(wine['Country'], wine['Wine produced (ML)'])
plt.title('title name')
plt.xlabel('x_axis name')
plt.ylabel('y_axis name')
plt.figure(figsize=(5,6))
plt.show()