Skip to content
#importing libraries...
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#pulling the holiday file
holiday = pd.read_csv('holiday.csv')
holiday.head()
#Visualising the types of holidays
chart = sns.countplot(data=holiday, x='holiday_type')
chart.set(xlabel='Holiday Type', ylabel='Number of Days', title = 'Holidays type and count')
plt.show()
# Converting Date column to pd date time Holiday
holiday['holiday_date'] = pd.to_datetime(holiday['holiday_date'])
holiday['month'] = holiday['holiday_date'].dt.month
holiday.head()
# Visualizing via count plot
fig, ax = plt.subplots()
plt.style.use('seaborn-colorblind')
ax.set_title('Number of holidays taken per month and their type')
sns.countplot(data=holiday, x='month', hue='holiday_type')
plt.show()
#reading the work file
uncleaned_work = pd.read_csv('fy22_23.csv')

uncleaned_work.info()
#cleaning data by removing the rows with null values in Date Completed column
work = uncleaned_work.dropna(subset=['Date Completed'])
work.info()
# Converting Date column to pd date time Work
work['Date Completed'] = pd.to_datetime(work['Date Completed'])
work['month'] = work['Date Completed'].dt.month
work.head()
# Visualizing via count plot
fig, (ax0, ax1) = plt.subplots(nrows=1, ncols=2, figsize=(12 ,5))
plt.style.use('seaborn-colorblind')
sns.countplot(data=work, x='month', ax=ax0)
ax0.set_ylabel('Number of Collection')
sns.countplot(data=work, x='month', hue='REGION', ax=ax1)
ax1.set_ylabel('Number of Collection')
fig.suptitle('Total Number of Collection v/s Region')

plt.show()
# Grouping by month and type of holiday
holiday_count = holiday.groupby(['month', 'holiday_type'])['holiday_date'].count().reset_index()

# Renaming the column name
holiday_count.rename(columns={'holiday_date': 'holiday_count'}, inplace=True)

# Calculating the month with highest number of holidays
max_holiday = holiday_count[holiday_count['holiday_count']==holiday_count['holiday_count'].max()]
month_with_max_holiday = max_holiday['month'].values[0]
print(f"The month with the highest holiday is {month_with_max_holiday}.")
Run cancelled
work_count = work.groupby(['month', 'REGION'])['Date Completed'].count().reset_index()
work_count.rename(columns={'Date Completed':'Collection Count'}, inplace=True)
work_count
Run cancelled
work_holiday = pd.merge(work_count, holiday_count, how='outer', on='month')
work_holiday['holiday_count'] = work_holiday['holiday_count'].fillna(0)
work_holiday
Run cancelled
fig, ax = plt.subplots(figsize=(8,8))
plt.style.use('seaborn-darkgrid')
palette = sns.color_palette("rocket_r", as_cmap=True)
sns.scatterplot(data=work_holiday, x='month',y='Collection Count', hue='holiday_count', style='REGION', palette=palette)
fig.suptitle('Work Vs Holiday Vs Region')
plt.show()