Skip to content

Supervised Learning with scikit-learn

Run the hidden code cell below to import the data used in this course.

# Importing pandas
import pandas as pd

# Importing the course datasets 
diabetes = pd.read_csv('datasets/diabetes_clean.csv')
music = pd.read_csv('datasets/music_clean.csv')
advertising = pd.read_csv('datasets/advertising_and_sales_clean.csv')
telecom = pd.read_csv("datasets/telecom_churn_clean.csv")
diabetes.head()
Hidden output
X = diabetes[['bmi', 'age']].values
y = diabetes['diabetes'].values

print(X.shape, y.shape)

Take Notes

Add notes about the concepts you've learned and code cells with code you want to keep.

Add your notes here

# Add your code snippets here
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
print(y_pred)
Hidden output
y_pred_probs = logreg.predict_proba(X_test)[:, 1]
print(y_pred_probs)
Hidden output
from sklearn.metrics import roc_curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_probs)
db_roc = pd.DataFrame({ 'fpr' : fpr,
                      'tpr' : tpr,
                      'thresholds' : thresholds})
db_roc['raio'] = db_roc['tpr']/db_roc['fpr']
db_roc.dropna(inplace=True)

db_roc.head()
import matplotlib.pyplot as plt

plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr, tpr)
plt.show
from sklearn.metrics import roc_auc_score
print(roc_auc_score(y_test, y_pred_probs))