Skip to content
Machine Learning Testing using DeepChecks
%pip install deepchecks --upgrade -q
Data Integrity Suite with DeepChecks
import pandas as pd
loan_data = pd.read_csv("loan_data.csv")
loan_data.head()
from sklearn.model_selection import train_test_split
from deepchecks.tabular import Dataset
label_col = 'not.fully.paid'
deep_loan_data = Dataset(loan_data, label=label_col, cat_features=["purpose"])
from deepchecks.tabular.suites import data_integrity
integ_suite = data_integrity()
suite_result = integ_suite.run(deep_loan_data)
suite_result.show()
suite_result.show_in_iframe()
suite_result.save_as_html()
from deepchecks.tabular.checks import IsSingleValue, DataDuplicates
result = IsSingleValue().run(deep_loan_data)
result.value
result = DataDuplicates().run(deep_loan_data)
result.value
Machine Learning Testing with DeepChecks
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder
# Train test split
df_train, df_test = train_test_split(loan_data, stratify=loan_data[label_col], random_state=0)
# Encode the 'purpose' column
label_encoder = LabelEncoder()
df_train['purpose'] = label_encoder.fit_transform(df_train['purpose'])
df_test['purpose'] = label_encoder.fit_transform(df_test['purpose'])
# Define models
model_1 = LogisticRegression(random_state=1, max_iter=10000)
model_2 = RandomForestClassifier(n_estimators=50, random_state=1)
model_3 = GaussianNB()
# Create the VotingClassifier
clf_model = VotingClassifier(
estimators=[('lr', model_1), ('rf', model_2), ('svc', model_3)],
voting='soft'
)
# Train the model
clf_model.fit(df_train.drop(label_col, axis=1), df_train[label_col])
from deepchecks.tabular.suites import model_evaluation
deep_train = Dataset(df_train, label=label_col, cat_features=[])
deep_test = Dataset(df_test, label=label_col, cat_features=[])
evaluation_suite = model_evaluation()
suite_result = evaluation_suite.run(deep_train, deep_test, clf_model)
suite_result.show_in_iframe()
# suite_result.to_json()
from deepchecks.tabular.checks import LabelDrift
check = LabelDrift()
result = check.run(deep_train, deep_test)
result.value