Skip to content
Supervised Learning with scikit-learn
Supervised Learning with scikit-learn
Run the hidden code cell below to import the data used in this course.
# Importing pandas
import pandas as pd
# Importing the course datasets
diabetes = pd.read_csv('datasets/diabetes_clean.csv')
music = pd.read_csv('datasets/music_clean.csv')
advertising = pd.read_csv('datasets/advertising_and_sales_clean.csv')
telecom = pd.read_csv("datasets/telecom_churn_clean.csv")diabetes.head()Hidden output
X = diabetes[['bmi', 'age']].values
y = diabetes['diabetes'].values
print(X.shape, y.shape)Take Notes
Add notes about the concepts you've learned and code cells with code you want to keep.
Add your notes here
# Add your code snippets here
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
print(y_pred)Hidden output
y_pred_probs = logreg.predict_proba(X_test)[:, 1]
print(y_pred_probs)Hidden output
from sklearn.metrics import roc_curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_probs)
db_roc = pd.DataFrame({ 'fpr' : fpr,
'tpr' : tpr,
'thresholds' : thresholds})
db_roc['raio'] = db_roc['tpr']/db_roc['fpr']
db_roc.dropna(inplace=True)
db_roc.head()import matplotlib.pyplot as plt
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr, tpr)
plt.showfrom sklearn.metrics import roc_auc_score
print(roc_auc_score(y_test, y_pred_probs))