Skip to content

### Python Exercise Exploring data

Exploring Airbnb data

links to check https://matplotlib.org/2.0.2/gallery.html https://seaborn.pydata.org/examples/index.html These websites shows visuals plus their code

# Read data
import pandas as pd

#visualisations
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns

#configuring


Read data

listings = pd.read_csv('AB_NYC_2019.csv')
wine = pd.read_csv('wine.csv')

Explore Data

by_neighbourhood = listings.groupby("neighbourhood", as_index=False)['id'].count()
top10_neighbourhoods = by_neighbourhood.sort_values(by='id', ascending=False).head(10)
#print(top10_neighbourhoods.dtypes)
fig = px.bar(top10_neighbourhoods, x='neighbourhood', y='id')
#fig.show()

#sns.barplot(data=top10_neighbourhoods, y='neighbourhood', x='id').set_title('Top 10 Neighbourhoods')
listings_min50 =listings[listings["price"] >= 500]
import plotly.express as px

fig_price= px.violin(listings_min50,
               x='neighbourhood_group',
               y='price',
               color='neighbourhood_group'
              )
#fig_price.update_layout(yaxis_range=[0, 4000])
#fig_price.show(config={"displayModeBar": False})
corr_matrix = listings.corr()
#sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
import matplotlib.pyplot as plt

# count missing values per attribute in dataframe wine
missing_values = wine.isnull().sum()

# calculate percentage of missing values per attribute
percent_missing = (missing_values / len(wine)) * 100

# sort values in descending order
percent_missing = percent_missing.sort_values(ascending=False)

# filter out attributes with no missing values
percent_missing = percent_missing[percent_missing > 0]

# plot horizontal bar chart
plt.figure(figsize=(10,6))
plt.barh(percent_missing.index, percent_missing.values)
plt.gca().invert_yaxis()
plt.xlabel('Percentage of missing values')
plt.title('Missing values per attribute in wine dataframe')
plt.show()
wine = pd.read_csv('wine.csv')
wine.rename(columns = {wine.columns[2] : 'Vine Area'}, inplace=True)
display(wine.head(10))
#wine.dtypes
display(wine.describe())
wine=wine.sort_values(by='Wine produced (ML)', ascending=False)
plt.bar(wine['Country'], wine['Wine produced (ML)'])

plt.title('title name')
plt.xlabel('x_axis name')
plt.ylabel('y_axis name')
plt.figure(figsize=(5,6))
plt.show()




Open the video in a new tab