Skip to content
New Workbook
Sign up
DEAD_PEOPLE
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import zipfile
import urllib.request
import os
%matplotlib inline
# url ='https://stat.gov.pl/download/gfx/portalinformacyjny/pl/defaultaktualnosci/5468/39/24/1/zgony_wedlug_tygodni_w_polsce_2023_2.zip'
url = "https://stat.gov.pl/download/gfx/portalinformacyjny/pl/defaultaktualnosci/5468/39/25/1/zgony_wedlug_tygodni_w_polsce_2024.zip"
import urllib.request

# downloading zipfile from url and save it as file.zip

urllib.request.urlretrieve(url, 'file.zip')
# extracting files 
with zipfile.ZipFile('file.zip', 'r') as zip_ref:
    zip_ref.extractall()
# Get the list of all files and directories
path = "./Zgony wedêug tygodni w Polsce 2024/"
dir_list = os.listdir(path)

dir_list = pd.DataFrame(dir_list,columns=['nazwa'])
dir_list = dir_list.sort_values(by="nazwa")
dir_list
file = "Zgony wedêug tygodni w Polsce 2024/"
def get_data(file, region='Polska'):
    
    #''' getting data from spec. year and region '''
    
        #dropping first 6 rows while loading data
    df = pd.read_excel(file,header=6)
        #filltering via region 
    df_ = df[df['Unnamed: 2'] == region]
    df_ = df_.reset_index()
    df_columns = df.columns
    df_ = df_.drop(columns=[df_columns[1], df_columns[2], 'index'])
    df_t = df_.set_index('Unnamed: 0')
    df_t = df_t.T
    return df_t
#get_data(file+dir_list[-1])['Ogółem']

fig, ax = plt.subplots(figsize=(25, 13), layout='constrained')
plt.style.use("seaborn-v0_8-pastel")
fd=pd.DataFrame()
for i in dir_list.nazwa:
    ax.plot(get_data(file+i)['Ogółem'], label=i)  
    
#ax.plot(get_data(file+dir_list.nazwa[5])['Ogółem'], label=dir_list.nazwa[5]) 
ax.legend(); 
plt.show()
fd=pd.DataFrame()
data_full = pd.DataFrame()
for i in dir_list.nazwa:
    fd[i] = get_data(file+i)['Ogółem']
    
columns_z = {v : str(v[-9:-5]) for v in fd.columns}

fd = fd.rename(columns=columns_z)
fig, ax = plt.subplots(figsize=(25, 13))
fd_sum = fd.sum()
fd_sum.plot(kind='bar', ax=ax)

ax.set_xticklabels(fd.columns, rotation=90)

# Add values to each bar
for i, v in enumerate(fd_sum):
    ax.text(i, v + 0.1, str(v), ha='center', va='bottom')

plt.show()
fig, ax = plt.subplots(figsize=(25, 13))
plt.style.use('_mpl-gallery')

ax = fd.T.boxplot(meanline=True)
ax.set_xlabel("week number")
ax.set_ylabel('QTY DEAD PEOPLE')
ax.set_title('Zgony w latach 2000-2024 uśrednione w tygodniach')
ax.set_ylim(5000,17500)
ax.set_xlim(0,53)
ax = fd['2020'].T.plot(legend='2020',color='r')
ax = fd['2021'].T.plot(legend='2021',color='g')
ax = fd['2022'].T.plot(legend='2022', color='b')
ax = fd['2023'].T.plot(legend='2023', color='brown')
ax = fd['2024'].T.plot(legend='2024', color='black')
#ax.title('Zgony w latach 2000-2023 uśrednione w tygodniach')
plt.show()
fd
Run cancelled
# import pandas as pd
# import matplotlib.pyplot as plt
# rok= [x+2000 for x in range(24)]
# for dat in rok:
#     df = pd.read_excel('zgony_wedlug_tygodni_2023/Zgony wedêug tygodni w Polsce_'+str(dat)+'.xlsx',header=6)

#     df = df[df['Unnamed: 2']=='Polska']
#     df = df.set_index('Unnamed: 0')
#     df = df.drop(columns=['Unnamed: 1','Unnamed: 2'])
#     df = df.T

#     plt.style.use("seaborn-v0_8-pastel")
#     fig, ax = plt.subplots(figsize=(25, 13))
#     ax.set_ylim(0,17500)
#     ax.set_xlim(0,53)
#     ax.set_xlabel("week number")
#     ax.set_ylabel('QTY DEAD PEOPLE')
#     ax.set_title(str(dat))
#     categories = df.columns
#     categories = categories[1:]
#     #stackplot kolejnych grup wiekowych 
#     ax = plt.stackplot(df.index, *[df[cat] for cat in categories], labels=categories)
#     plt.legend()
#     plt.savefig('dane'+str(dat))

# #plt.show()