Skip to content

Course Notes

Use this workspace to take notes, store code snippets, or build your own interactive cheatsheet! For courses that use data, the datasets will be available in the datasets folder.

# Import any packages you want to use here
import pandas as pd 
from sklearn.cluster import KMeans 
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
import zipfile

Take Notes

Add notes here about the concepts you've learned and code cells with code you want to keep.

Add your notes here

# Getting the grains table
with zipfile.ZipFile('datasets/Grains.zip') as file: 
    file.extractall('datasets')
samples = pd.read_csv('./datasets/Grains/seeds.csv', header = None)
display(samples.head())
inertias = []
ks = range(1,10)
for k in ks:
    model = KMeans(n_clusters= k)
    model.fit(samples)
    inertias.append(model.inertia_)
    print(f'{k = }, {model.n_clusters = } {model.inertia_ = :,.2f}, {model.score(samples) = :,.2f} ')

plt.plot(ks, inertias)
kmeans = KMeans()
ks = range(1,30)
param_grid = {'n_clusters': ks} 
kmeans_cv = GridSearchCV(kmeans, param_grid)
kmeans_cv.fit(samples)
kmeans_cv.best_params_, kmeans_cv.best_score_, samples.shape