Skip to content

Traffic data fluctuates constantly or is affected by time. Predicting it can be challenging, but this task will help sharpen your time-series skills. With deep learning, you can use abstract patterns in data that can help boost predictability.

Your task is to build a system that can be applied to help you predict traffic volume or the number of vehicles passing at a specific point and time. Determining this can help reduce road congestion, support new designs for roads or intersections, improve safety, and more! Or, you can use to help plan your commute to avoid traffic!

The dataset provided contains the hourly traffic volume on an interstate highway in Minnesota, USA. It also includes weather features and holidays, which often impact traffic volume.

Time to predict some traffic!

The data:

The dataset is collected and maintained by UCI Machine Learning Repository. The target variable is traffic_volume. The dataset contains the following and has already been normalized and saved into training and test sets:

train_scaled.csv, test_scaled.csv

ColumnTypeDescription
tempNumericAverage temp in kelvin
rain_1hNumericAmount in mm of rain that occurred in the hour
snow_1hNumericAmount in mm of snow that occurred in the hour
clouds_allNumericPercentage of cloud cover
date_timeDateTimeHour of the data collected in local CST time
holiday_ (11 columns)CategoricalUS National holidays plus regional holiday, Minnesota State Fair
weather_main_ (11 columns)CategoricalShort textual description of the current weather
weather_description_ (35 columns)CategoricalLonger textual description of the current weather
hour_of_dayNumericThe hour of the day
day_of_weekNumericThe day of the week (0=Monday, Sunday=6)
day_of_monthNumericThe day of the month
monthNumericThe number of the month
traffic_volumeNumericHourly I-94 ATR 301 reported westbound traffic volume
# Import the relevant libraries
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
# Read the traffic data from the CSV training and test files
train_scaled_df = pd.read_csv('train_scaled.csv')
test_scaled_df = pd.read_csv('test_scaled.csv')

# Convert the DataFrame to NumPy arrays
train_scaled = train_scaled_df.to_numpy()
test_scaled = test_scaled_df.to_numpy()
# Start coding here
# Use as many cells as you like!
def create_sequences(data, seq_length, y_col_idx):
   
    xs, ys = [], []
    for i in range(len(data) - seq_length):
        x = data[i:(i+seq_length)]
        y = data[i+seq_length, y_col_idx]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

# Create sequences for the training and test data
X_train, y_train = create_sequences(train_scaled, 12, -1)
X_test, y_test = create_sequences(test_scaled, 12, -1)

# Create a TensorDataset for the training and test data for PyTorch modeling compatibility
dataset_train = TensorDataset(
    torch.tensor(X_train.astype(np.float32)).float(), torch.tensor(y_train.astype(np.float32)).float(),
)
dataset_test = TensorDataset(
    torch.tensor(X_test.astype(np.float32)).float(), torch.tensor(y_test.astype(np.float32)).float(),
)

# Create a DataLoader for the training and test data, loading the data in batches and shuffling the data
# Batch size 64 is a common choice to balance training speed and memory usage
# Shuffle is True to reduce the risk of overfitting
# Shuffle is False for test data following best practices
dataloader_train = DataLoader(dataset_train, batch_size=64, shuffle=True)
dataloader_test = DataLoader(dataset_test, batch_size=64, shuffle=False)

# Define an LSTM network
class TrafficVolume(nn.Module):
    def __init__(self):
        super().__init__()
        # Define the LSTM layer
        # 66 for the input_size is the number of features 
        # 64 is a common choice for hidden size
        # 2 layers are selected to help learn more complex time-series patterns
        self.lstm = nn.LSTM(
            input_size=66,
            hidden_size=64,
            num_layers=2,
            batch_first=True
        )
        # Define the activation function
        self.relu = nn.LeakyReLU()
        
        # Define the fully connected layer
        self.fc1 = nn.Linear(64, 1)

    def forward(self, x):
        # Capture the final hidden state
        _, (h_0, _) = self.lstm(x)
        # Take the hidden state from the last layer
        out = h_0[-1]
        # Apply ReLU
        return self.relu(self.fc1(out))
    
# Set-up for training 
n_features = 66
hidden_size = 64
num_layers = 2

# Initialize the model, saving it to traffic_model
traffic_model = TrafficVolume()

# Define the loss function and optimizer
# MSE is commonly used for regression tasks
criterion = nn.MSELoss()
optimizer = optim.Adam(traffic_model.parameters(), lr=0.0001)

# Train the model with 2 epochs
final_training_loss = 0
for epoch in range(2):
    for batch_x, batch_y in dataloader_train:
        optimizer.zero_grad()
        outputs = traffic_model(batch_x)
        loss = criterion(outputs, batch_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print("Epoch: %d, train loss: %1.5f" % (epoch+1, loss))
    final_training_loss = loss

# Set the model to evaluation mode
traffic_model.eval()

# Initialize variables to store outputs and labels
all_predictions = []
all_labels = []

# Disable gradient calculation during inference
with torch.no_grad():
    for seqs, labels in dataloader_test:
        outputs = traffic_model(seqs).squeeze()
        all_predictions.append(outputs)
        all_labels.append(labels)

# Concatenate all predictions and labels as PyTorch tensors
all_predictions = torch.cat(all_predictions)
all_labels = torch.cat(all_labels)

# Calculate MSE directly with PyTorch
test_mse = F.mse_loss(all_predictions, all_labels)

print(f'Test MSE: {test_mse.item()}')