Skip to content
1 hidden cell
Machine Learning with Python
# Import the course datasets
grains = pd.read_csv('datasets/grains.csv')
fish = pd.read_csv('datasets/fish.csv', header=None)
wine = pd.read_csv('datasets/wine.csv')
eurovision = pd.read_csv('datasets/eurovision-2016.csv')
stocks = pd.read_csv('datasets/company-stock-movements-2010-2015-incl.csv', index_col=0)
digits = pd.read_csv('datasets/lcd-digits.csv', header=None)Supervised Learning with scikit-learn
Run the hidden code cell below to import the data used in this course.
1 hidden cell
# Importing pandas
import pandas as pd
# Importing the datasets
diabetes = pd.read_csv('datasets/diabetes_clean.csv')
music = pd.read_csv('datasets/music_clean.csv')
advertising = pd.read_csv('datasets/advertising_and_sales_clean.csv')
telecom = pd.read_csv("datasets/telecom_churn_clean.csv")KNN Classifier - KNeighbors Classifier
Classifica novos dados de acordo com a localização e proximidade com outros dados já classificados.
- Create classifier with n of neighbors, fit to the data with X e Y, predict with new data.
# Import KNeighborsClassifier
from sklearn.neighbors import KNeighborsClassifier
y = churn_df['churn'].values
X = churn_df[["account_length", "customer_service_calls"]].values
# Create a KNN classifier with 6 neighbors
knn = KNeighborsClassifier(n_neighbors = 6)
# Fit the classifier to the data
knn.fit(X, y)
# Predict the labels for the X_new
y_pred = knn.predict(X_new)
# Print the predictions
print("Predictions: {}".format(y_pred)) - Measure model performance: through accuracy, which is the number of correct predictions divided by total obs.
# Import the module
from sklearn.model_selection import train_test_split
X = churn_df.drop("churn", axis = 1).values
y = churn_df["churn"].values
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42, stratify = y)
knn = KNeighborsClassifier(n_neighbors = 5)
# Fit the classifier to the training data
knn.fit(X_train, y_train)
# Print the accuracy
print(knn.score(X_test, y_test))# Create neighbors
neighbors = np.arange(1, 13)
train_accuracies = {}
test_accuracies = {}
for neighbor in neighbors:
# Set up a KNN Classifier
knn = KNeighborsClassifier(n_neighbors = neighbor)
# Fit the model
knn.fit(X_train, y_train)
# Compute accuracy
train_accuracies[neighbor] = knn.score(X_train, y_train)
test_accuracies[neighbor] = knn.score(X_test, y_test)
print(neighbors, '\n', train_accuracies, '\n', test_accuracies)- Graph showing training and testing model accuracies
# Add a title
plt.title("KNN: Varying Number of Neighbors")
# Plot training accuracies
plt.plot(neighbors, train_accuracies.values(), label = "Training Accuracy")
# Plot test accuracies
plt.plot(neighbors, test_accuracies.values(), label = "Testing Accuracy")
plt.legend()
plt.xlabel("Number of Neighbors")
plt.ylabel("Accuracy")
# Display the plot
plt.show()Introduction to Regression
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Create X from the radio column's values
X = sales_df['radio'].values # we could resume it as X = sales_df["radio"].values.reshape(-1, 1)
# Create y from the sales column's values
y = sales_df['sales'].values
# Reshape X
X = X.reshape(-1, 1) # X has to be a 'dataframe' with collumn 1, not a vector
# Check the shape of the features and targets
print(X.shape, y.shape)# Import LinearRegression
from sklearn.linear_model import LinearRegression
# Create the model
reg = LinearRegression()
# Fit the model to the data
reg.fit(X, y)
# Make predictions
predictions = reg.predict(X)
print(predictions[:5])