Skip to content

Bank Marketing

This dataset consists of direct marketing campaigns by a Portuguese banking institution using phone calls. The campaigns aimed to sell subscriptions to a bank term deposit (see variable y).

Not sure where to begin? Scroll to the bottom to find challenges!

import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from sklearn.feature_selection import RFE

import plotly.express as px
from sklearn.preprocessing import LabelEncoder
df=pd.read_csv("bank-marketing.csv", sep=";")
df
df[df['job']=='unknown']
print(df["job"].value_counts(),end="\n==========================\n")
print(df["job"].count(),end="\n==========================\n")

print(df["marital"].value_counts(),end="\n==========================\n")
print(df["education"].value_counts(),end="\n==========================\n")
print(df["education"].count(),end="\n==========================\n")

print(df["default"].value_counts(),end="\n==========================\n")
print(df["housing"].value_counts(),end="\n==========================\n")
print(df["loan"].value_counts(),end="\n==========================\n")
print(df["contact"].value_counts(),end="\n==========================\n")
print(df["month"].value_counts(),end="\n==========================\n")
print(df["day_of_week"].value_counts(),end="\n==========================\n")
print(df["y"].value_counts(),end="\n==========================\n")
print(df["poutcome"].value_counts(),end="\n==========================\n")
df3=df.replace(to_replace='unknown',value= '')
print(df3["job"].value_counts(),end="\n==========================\n")
df2=pd.get_dummies(data=df,columns=['education','job','marital','default','housing','loan','contact',
             'month','day_of_week','poutcome'])
df2
Relation= df2[['y', 'job_unknown','education_unknown','marital_unknown','default_unknown','loan_unknown','housing_unknown']]
plot_ = Relation.corr()["y"]
plot_ = plot_.drop('y')
plot_.plot(kind = 'barh')
new_df = df2
new_df

corr = new_df.corr()["y"]
corr = corr.drop("y")
corr = corr.sort_values(ascending=True)

# sns.set_style("darkgrid")

corr.plot(kind="barh",figsize=(10,10),colormap="Pastel1")
plt.xlabel("amount of correlation to y column")
y = new_df["y"]
x = new_df.drop(["y"],axis=1)
sc=SelectKBest(mutual_info_classif,k=25)
new_x=sc.fit_transform(x,y)
features = sc.get_feature_names_out()
corr2 = corr[features].sort_values(ascending=True)
corr2.plot(kind = "barh",figsize=(10,10),colormap="Pastel1",title="25 Features")
plt.xlabel("amount of correlation to y column")
features