Skip to content
Salary Predictio Model
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error
from sklearn.linear_model import LinearRegression
from sklearn.utils.validation import check_is_fitteddf = pd.read_csv("Salary Data.csv")df.dropna(inplace=True)jt = list(df["Job Title"])
job_dict = {}
for job in jt:
if job in job_dict.keys():
job_dict[job] += 1
else:
job_dict[job] = 1|df[df["Job Title"] == "Director of Marketing"]plt.scatter(x=df["Age"], y=df["Salary"])
plt.xlabel("Employee Age")
plt.ylabel("Salary")
plt.title("Salary vs. Age");# Splitting Data
# Training Data and Testing Data
# 1. Feature Matrix
features = ["Age"]
X_train = df[features]
X_train.shapetarget = "Salary"
y_train = df[target]
y_train.shape# Getting a baseline for our model
y_mean = y_train.mean()
y_meany_prediction_baseline = [y_mean] * len(y_train)
y_prediction_baseline[:2]plt.plot(X_train["Age"], y_prediction_baseline, color="orange", label="Model Baseline")
plt.scatter(x=df["Age"], y=df["Salary"])
plt.xlabel("Employee Age")
plt.ylabel("Salary")
plt.title("Salary vs. Age");# Getting the mean absolute error
mae = mean_absolute_error(y_train, y_prediction_baseline)
mae# Creating the model
model = LinearRegression()isinstance(model, LinearRegression)# Fitting/training the model
model.fit(X_train, y_train)