Skip to content
%pip install deepchecks --upgrade -q

Data Integrity Suite with DeepChecks

import pandas as pd

loan_data = pd.read_csv("loan_data.csv")
loan_data.head()
from sklearn.model_selection import train_test_split
from deepchecks.tabular import Dataset

label_col = 'not.fully.paid'

deep_loan_data = Dataset(loan_data, label=label_col, cat_features=["purpose"])

from deepchecks.tabular.suites import data_integrity

integ_suite = data_integrity()
suite_result = integ_suite.run(deep_loan_data)
suite_result.show()

suite_result.show_in_iframe()
suite_result.save_as_html()
from deepchecks.tabular.checks import IsSingleValue, DataDuplicates

result = IsSingleValue().run(deep_loan_data)
result.value
result = DataDuplicates().run(deep_loan_data)
result.value

Machine Learning Testing with DeepChecks

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder

# Train test split
df_train, df_test = train_test_split(loan_data, stratify=loan_data[label_col], random_state=0)

# Encode the 'purpose' column
label_encoder = LabelEncoder()
df_train['purpose'] = label_encoder.fit_transform(df_train['purpose'])
df_test['purpose'] = label_encoder.fit_transform(df_test['purpose'])

# Define models
model_1 = LogisticRegression(random_state=1, max_iter=10000)
model_2 = RandomForestClassifier(n_estimators=50, random_state=1)
model_3 = GaussianNB()

# Create the VotingClassifier
clf_model = VotingClassifier(
    estimators=[('lr', model_1), ('rf', model_2), ('svc', model_3)],
    voting='soft'
)

# Train the model
clf_model.fit(df_train.drop(label_col, axis=1), df_train[label_col])
from deepchecks.tabular.suites import model_evaluation

deep_train = Dataset(df_train, label=label_col, cat_features=[])
deep_test =  Dataset(df_test,  label=label_col, cat_features=[])

evaluation_suite = model_evaluation()
suite_result = evaluation_suite.run(deep_train, deep_test, clf_model)
suite_result.show_in_iframe()
# suite_result.to_json()
from deepchecks.tabular.checks import LabelDrift
check = LabelDrift()
result = check.run(deep_train, deep_test)
result.value