Skip to content
import pandas as pd

URL_DATA = 'https://storage.googleapis.com/dosm-public-economy/salaries_age_sex_xs.parquet'

df = pd.read_parquet(URL_DATA)
if 'date' in df.columns: df['date'] = pd.to_datetime(df['date'])

print(df)
print(df[0:20])
# Import matplotlib.pyplot under its usual alias and create a figure
import matplotlib.pyplot as plt
fig = plt.figure()

# Draw a scatter plot of release_years and durations
plt.scatter(df['variable'], df['mean'])

# Create a title
plt.title("Mean Wage vs Age Group")

# Show the plot
plt.show()

1 hidden cell
import numpy as np
import pandas as pd
import seaborn as sns

test = df.pivot_table('mean', ['year','sex'], 'variable')

print(test)