Skip to content

Course Notes

Use this workspace to take notes, store code snippets, or build your own interactive cheatsheet! The datasets used in this course are available in the datasets folder.

# Import any packages you want to use here
import pandas as pd
df = pd.read_csv('datasets/adult.csv')
pd.set_option('display.max_columns', 15)
print(df.dtypes)
df["Education"] = df["Education"].str.strip(" ")
want_to_know = ["Workclass","Education","Sex","Country"]
for i in want_to_know:
    print(i, df[i].unique())
'''This cell is a ordered list of the categorical Education column. It contains the list comprehension striping whitespace because I forgot about df.Series.str.strip() when I made it. Left it in because I didn't want to reorder the list of values after remembering about .str.strip().'''

ed_list = [' Preschool', ' 1st-4th',' 5th-6th', '10th', '11th', '12th', 'HS-grad', 'Some-college', 'Assoc-voc', 'Assoc-acdm', 'Bachelors', 'Masters', 'Prof-school', 'Dectorate']
#strip whitespace
ed_list = [(lambda x:x.replace(" ", ""))(x) for x in ed_list]

df["Education"] = df["Education"].astype("category")
df["Education"] = df["Education"].cat.set_categories(new_categories = ed_list, ordered=True)

Find the things that relate to me

my_hours = (df["Hours/Week"]==40)
my_age = (df["Age"]==34)
my_ed = (df["Education"]=="Assoc-acdm")

like_me = df[my_hours & my_ed]
import seaborn as sns
sns.pairplot(df)
vegas = pd.read_csv('datasets/lasvegas_tripadvisor.csv')
vegas.describe()
import seaborn as sns
sns.pairplot(data=vegas)
print(vegas.columns)
print(vegas["Traveler type"].value_counts())
import seaborn as sns
sns.catplot(x="Member years", y="Score", data=vegas, kind="box")
sns.set(font_scale=1.4)
sns.set_style("darkgrid")