Skip to content
New Workbook
Sign up
INFC Example: Predicting GPA, Supervise Learning ML Multivariate Linear Regression
# Machine Learning Project Sample for INFC: Predicting Student GPA with Multivariate Linear Regression

# This project demonstrates how to use multivariate linear regression to predict student GPA based on several predictors such as study hours, attendance rate, previous GPA, and participation in extracurricular activities.

## Import Necessary Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns

# Sample data, modified for privacy puposes 
data = {
    'Study_Hours': [2, 4, 6, 8, 5, 7, 9, 11, 3, 10],
    'Attendance_Rate': [90, 95, 85, 80, 75, 70, 95, 90, 85, 100],
    'Previous_GPA': [3.0, 3.2, 2.8, 3.5, 2.5, 3.0, 3.8, 3.6, 2.9, 3.9],
    'Participation': [1, 1, 0, 1, 0, 0, 1, 1, 0, 1],
    'Target_GPA': [3.1, 3.5, 2.9, 3.6, 2.7, 3.0, 3.9, 4.0, 2.8, 3.8]
}

# Create DataFrame
df = pd.DataFrame(data)

# Pairplot to visualize relationships
sns.pairplot(df, x_vars=['Study_Hours', 'Attendance_Rate', 'Previous_GPA', 'Participation'], y_vars='Target_GPA', kind='reg')
plt.show()

# Heatmap to visualize correlations
plt.figure(figsize=(8, 6))
sns.heatmap(df.corr(), annot=True, fmt=".2f")
plt.show()


# Independent variables (features)
X = df[['Study_Hours', 'Attendance_Rate', 'Previous_GPA', 'Participation']]

# Dependent variable (target)
y = df['Target_GPA']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Initialize the model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

# Predict GPA for a new student
new_student = np.array([[7, 85, 3.3, 1]])  # Example features for a new student
predicted_gpa = model.predict(new_student.reshape(1, -1))
print(f'Predicted GPA: {predicted_gpa[0]}')