Salary Predictio Model

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error
from sklearn.linear_model import LinearRegression
from sklearn.utils.validation import check_is_fitted

df = pd.read_csv("Salary Data.csv")

df.dropna(inplace=True)

jt = list(df["Job Title"])
job_dict = {}

for job in jt:
    if job in job_dict.keys():
        job_dict[job] += 1
    else:
        job_dict[job] = 1|

df[df["Job Title"] == "Director of Marketing"]

plt.scatter(x=df["Age"], y=df["Salary"])
plt.xlabel("Employee Age")
plt.ylabel("Salary")
plt.title("Salary vs. Age");

# Splitting Data
# Training Data and Testing Data

# 1. Feature Matrix
features = ["Age"]
X_train = df[features]
X_train.shape

target = "Salary"
y_train = df[target]
y_train.shape

# Getting a baseline for our model
y_mean = y_train.mean()
y_mean

y_prediction_baseline = [y_mean] * len(y_train)
y_prediction_baseline[:2]

plt.plot(X_train["Age"], y_prediction_baseline, color="orange", label="Model Baseline")
plt.scatter(x=df["Age"], y=df["Salary"])
plt.xlabel("Employee Age")
plt.ylabel("Salary")
plt.title("Salary vs. Age");

# Getting the mean absolute error
mae = mean_absolute_error(y_train, y_prediction_baseline)
mae

# Creating the model
model = LinearRegression()

isinstance(model, LinearRegression)

# Fitting/training the model
model.fit(X_train, y_train)

‌
‌
‌