Interactive Prediction Dashboard

Concrete Strength Prediction Dashboard - Using ipywidgets

💪 Challenge

I have built an application that will enable the students to test out samples by adjusting the mixture of concrete ingredients and instantly seeing the predicted concrete strength.

"The project leader asked you to find a simple way to estimate strength so that students can predict how a particular sample is expected to perform."

I trained, tested and validated 5 different models, the best model by far was the Random Forest Regressor.

The items below have been provided within the notebook:

The average strength of the concrete samples at 1, 7, 14, and 28 days of age.
The coefficients , ... , to use in the following formula:

#%%capture
#!pip install bqplot
#!pip install pandas_profiling

import pandas as pd
import numpy as np
# calculations
import numpy as np
import math
from scipy import stats

# visualisation libraries:
import matplotlib.pyplot as plt
import seaborn as sns
import pylab 

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import bqplot.pyplot as bplt

df = pd.read_csv('data/concrete_data.csv')

# map column names
req_col_names = ["Cement", "BlastFurnaceSlag", "FlyAsh", "Water", "Superplasticizer",
                 "CoarseAggregate", "FineAggregate", "Age", "CC_Strength"]
curr_col_names = list(df.columns)

mapper = {}
for i, name in enumerate(curr_col_names):
    mapper[name] = req_col_names[i]

data = df.rename(columns=mapper)

from ipywidgets.widgets import FloatSlider,IntSlider,Layout,HBox,VBox
from IPython.display import display
import ipywidgets as widgets

cement = widgets.IntSlider(description="Cement", min=102,max=540,value=250,continuous_update=False)
slag = widgets.IntSlider(description="Slag", min=0,max=359,value=250,continuous_update=False)
flyash = widgets.IntSlider(description="Fly-Ash", min=0,max=200,value=250,continuous_update=False)
water = widgets.IntSlider(description="Water", min=121,max=247,value=250,continuous_update=False)
superplasticizer = widgets.IntSlider(description="Superplasticizer", min=0,max=32,value=15,continuous_update=False)
coarse_aggregate = widgets.IntSlider(description="Coarse_Aggregate", min=801,max=1145,value=250,continuous_update=False)
fine_aggregate = widgets.IntSlider(description="Fine Aggregate", min=594,max=993,value=250,continuous_update=False)
age = widgets.IntSlider(description="Age", min=1,max=365,value=250,continuous_update=False)

X = df.iloc[:,:-1] # Features 
y = df.iloc[:,-1] # Target 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2) 
X_train2, X_test2, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2) 
sc = StandardScaler() 
X_train = sc.fit_transform(X_train) 
X_test = sc.transform(X_test)

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score 
rfr = RandomForestRegressor(n_estimators=100, random_state = 111)

rfr.fit(X_train, y_train)

y_pred_rfr = rfr.predict(X_test)

def f(cement, slag, flyash,water,superplasticizer,coarse_aggregate,fine_aggregate,age):
    cement = {'cement': [cement]}
    slag = {'slag': [slag]}
    flyash = {'flyash': [flyash]}
    water = {'water':[water]}
    superplasticizer = {'superplasticizer':[superplasticizer]}
    coarse_aggregate = {'coarse_aggregate':[coarse_aggregate]}
    fine_aggregate = {'fine_aggregate':[fine_aggregate]}
    age = {'age':[age]}
    dfin = {**cement,**slag,**flyash,**water,**superplasticizer,**coarse_aggregate,**fine_aggregate,**age}
    dfinpd = pd.DataFrame(dfin)
    dfinpd = dfinpd.set_index('cement', drop = False)
    dfinpdmelt = dfinpd.melt(id_vars = 'cement', var_name = 'Attribute', value_name = 'Value')
    
    x_columns =  list(dfinpd.columns.values)
    x_test = dfinpd[x_columns]
    scaler = StandardScaler() 
    scale_fit = scaler.fit(X_train2) #save the mean and std. dev computed for your data.
    scaled_data = scale_fit.transform(x_test) #use the above saved values to scale your single observation or batch observations.
    scaled_data = pd.DataFrame(scaled_data)
    #x_test = x_test.drop(['slag','flyash'], axis=1)
    #x_test = sm.add_constant(X_test)
    ynew_pred = rfr.predict(scaled_data)
    print("-"*100)
    print("Prediction for concrete strength is ", int(ynew_pred))
    print("The minimum strength in dataset is: 2 and the maximum is 83")
    


def n(cement, slag, flyash,water,superplasticizer,coarse_aggregate,fine_aggregate,age):
    cement = {'cement': [cement]}
    slag = {'slag': [slag]}
    flyash = {'flyash': [flyash]}
    water = {'water':[water]}
    superplasticizer = {'superplasticizer':[superplasticizer]}
    coarse_aggregate = {'coarse_aggregate':[coarse_aggregate]}
    fine_aggregate = {'fine_aggregate':[fine_aggregate]}
    age = {'age':[age]}
    dfin = {**cement,**slag,**flyash,**water,**superplasticizer,**coarse_aggregate,**fine_aggregate,**age}
    dfinpd = pd.DataFrame(dfin)
    dfinpd = dfinpd.set_index('cement', drop = False)
    dfinpdmelt = dfinpd.melt(id_vars = 'cement', var_name = 'Attribute', value_name = 'Value')
    
    #additonal metrics
    #def n (dfinpd):
    newdf = dfinpd
    newdf['Water/Cementtot'] = newdf['water'] + newdf['cement']
    newdf['% Water-to-cement'] = newdf['water'] / newdf['Water/Cementtot']
    waterrate = newdf['% Water-to-cement']

    newdf['agg/Cementtot'] = newdf['coarse_aggregate'] + newdf['coarse_aggregate'] + newdf['cement']
    newdf['% cement-to-aggregate'] = newdf['cement'] / newdf['agg/Cementtot']
    cementtoaggrate = newdf['% cement-to-aggregate']
    newdf['totalweight'] = newdf['water'] + newdf['agg/Cementtot'] + newdf['slag'] + newdf['flyash'] + newdf['superplasticizer']
    newdf['% slag'] = newdf['slag'] / newdf['totalweight']
    newdf['% flyash'] = newdf['flyash'] / newdf['totalweight']
    newdf['% superplasticizer'] = newdf['superplasticizer'] / newdf['totalweight']
    newdf['% water'] = newdf['water'] / newdf['totalweight']
    newdf['% cement'] = newdf['cement'] / newdf['totalweight']
    newdf['% coarse_aggregate'] = newdf['coarse_aggregate'] / newdf['totalweight']
    newdf['% fine_aggregate'] = newdf['fine_aggregate'] / newdf['totalweight']   
    
    print("-"*100)
    print("The total mix weight is:", int(newdf['totalweight'])," - The minimum weight in dataset is: 2182 and the maximum is 2969")
    print("-"*100)
    print("Insights")
    print(" "*100)
    print("Proportion of water-to-cement is:", int(waterrate*100),"% - Lower water-to-cement ratio, increases strength but can make it difficult to work with. The minimum range in dataset is: 21% and the maximum is 65%")
    print("-"*100)
    print("Concrete mix weight is", int(cementtoaggrate*100),"% - Higher cement-aggregate ratio, increases strength. The minimum range in dataset is: 5% and the maximum is 24%")
    print("-"*100)
    print("Concrete Mix Proportions")
    print(" "*100)
    print("Proportion of slag is:", int(newdf['% slag']*100),"% - The minimum range in dataset is: 0% and the maximum is 11%")
    print(" "*100)
    print("Proportion of fly-Ash is:", int(newdf['% flyash']*100),"% - The minimum range in dataset is: 0% and the maximum is 8%")
    print(" "*100)
    print("Proportion of superplasticizer is:", int(newdf['% superplasticizer']*100),"% - The minimum range in dataset is: 0% and the maximum is 1%")
    print(" "*100)
    print("Proportion of water is:", int(newdf['% water']*100),"% - The minimum range in dataset is: 5% and the maximum is 11%")
    print(" "*100)
    print("Proportion of cement is:", int(newdf['% cement']*100),"% - The minimum range in dataset is: 4% and the maximum is 23%")
    print(" "*100)
    print("Proportion of coarse_aggregate is:", int(newdf['% coarse_aggregate']*100),"% - The minimum range in dataset is: 34% and the maximum is 47%")
    print(" "*100)
    print("Proportion of fine_aggregate is:", int(newdf['% fine_aggregate']*100),"% - The minimum range in dataset is: 24% and the maximum is 41%")
    
    
out = widgets.interactive_output(f, {'cement': cement, 'slag': slag, 'flyash': flyash,'water':water, 
                                     'superplasticizer':superplasticizer,'coarse_aggregate':coarse_aggregate,
                                     'fine_aggregate':fine_aggregate,'age':age});
sump =  widgets.interactive_output(n, {'cement': cement, 'slag': slag, 'flyash': flyash,'water':water, 
                                     'superplasticizer':superplasticizer,'coarse_aggregate':coarse_aggregate,
                                     'fine_aggregate':fine_aggregate,'age':age});

ui = widgets.VBox([cement, slag, flyash,water,superplasticizer,coarse_aggregate,fine_aggregate, age])
#display(ui,out)

title_01 = widgets.HTML(value='<p><font face="courier" size="+1" color="grey"><b>CONCRETE STRENGTH PREDICTION - CALCULATION DASHBOARD</center></u></b></p>')
text_01 = widgets.HTML(value='<p><font face="arial" size="2" color="grey">Use the scales to change the values, the proportional values to the right will show you the sample mix :</i></b></font></p>',description='')
text_02 = widgets.HTML(value='<p><font face="arial" size="2" color="grey">INSIGHTS <b></i>:</b></font></p>',description=' ')
text_test = widgets.HTML(value='<p><font face="arial" size="2" color="red">ESTIMATED CONCRETE PREDICTION :</i></b></font></p>',description=' ')

combo_02 = widgets.VBox([title_01,text_01,cement, water, slag, flyash,superplasticizer,coarse_aggregate,fine_aggregate,age])
combo_01= widgets.VBox([out])
box_layout =  Layout(flex='flex-shrink',flex_flow ='direction',justify_content='space-between',align_item='center')

from ipywidgets import GridspecLayout
grid = GridspecLayout(4, 3, height='600px')
grid[:3, 1:] = widgets.VBox([text_02,sump])
grid[:, 0] = widgets.VBox([title_01,text_01,text_test,out,ui])

grid

Concrete Strength at days 1, 7, 14 & 28

store_df_filtered = pd.DataFrame(df[df['age'].isin([1,7,14,28])])
store_df_filtered.rename(index = {1 : 'One', 7 : 'Seven', 14 : 'Fourteen', 28 : 'Twenty Eight'})
store_df_filtered = store_df_filtered.groupby('age',as_index=False)[['strength']].mean()
store_df_filtered.describe()

sns.barplot(data = store_df_filtered
            ,x = 'age'
            ,y = 'strength'
            ).set(title='Strength at days 1, 7, 14, 28')

Exploratory Data Analysis

from pandas_profiling import ProfileReport 
#profile = ProfileReport(df, 
                        #title='Pandas Profiling Report', 
                        #html={'style':{'full_width':True}}) 
profile = ProfileReport(df, 
                        title='Pandas Profiling Report') 
profile.to_widgets()

#report = pp.ProfileReport(df)
#profile.to_file('profile_report.html')

‌
‌
‌

Interactive Prediction Dashboard - Predicting Concrete Strength