Skip to content

Fashion Forward is a new AI-based e-commerce clothing retailer. They want to use image classification to automatically categorize new product listings, making it easier for customers to find what they're looking for. It will also assist in inventory management by quickly sorting items.

As a data scientist tasked with implementing a garment classifier, my primary objective is to develop a machine learning model capable of accurately categorizing images of clothing items into distinct garment types such as shirts, trousers, shoes, etc.

I will do this by using the FashionMNIST dataset class from the torchvision.datasets module. It contains images of fashion items (like shoes, shirts, etc.) used for training machine learning models. It consists of:

60,000 training images

10,000 test images

Each image is a 28x28 grayscale image of a fashion item from one of 10 categories, such as sneakers, boots, or T-shirts.

# Run the cells below first
!pip install torchmetrics
!pip install torchvision
Hidden output
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets
import torchvision.transforms as transforms
from torchmetrics import Accuracy, Precision, Recall

# Creating transformations that include Data Augmentation
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
])

# Loading the data
train_data = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_data = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())

# Getting class information and setting important parameters
classes = train_data.classes
num_classes = len(classes)
num_input_channels = 1
num_output_channels = 16
image_size = train_data[0][0].shape[1]

# Defining the CNN with Batch Normalization
class MultiClassImageClassifier(nn.Module):
    def __init__(self, num_classes):
        super(MultiClassImageClassifier, self).__init__()
        self.conv1 = nn.Conv2d(num_input_channels, num_output_channels, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(num_output_channels)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(num_output_channels * (image_size // 2) ** 2, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x

# DataLoader for training and test data
dataloader_train = DataLoader(train_data, batch_size=10, shuffle=True)
dataloader_test = DataLoader(test_data, batch_size=10, shuffle=False)

# Training function with early stopping mechanism 
def train_model(optimizer, net, num_epochs, patience):
    criterion = nn.CrossEntropyLoss()
    best_loss = float('inf')
    epochs_no_improve = 0
    for epoch in range(num_epochs):
        net.train()
        running_loss = 0
        num_processed = 0
        for features, labels in dataloader_train:
            optimizer.zero_grad()
            outputs = net(features)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            num_processed += len(labels)
        train_loss = running_loss / num_processed
        print(f'epoch {epoch}, loss: {train_loss}')

        # Early stopping
        net.eval()
        val_loss = 0
        for features, labels in dataloader_test:
            outputs = net(features)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
        val_loss /= len(dataloader_test)

        if val_loss < best_loss:
            best_loss = val_loss
            epochs_no_improve = 0
            best_model = net.state_dict()
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print('Early stopping!')
                net.load_state_dict(best_model)
                break

# Train the model with early stopping
net = MultiClassImageClassifier(num_classes)
optimizer = optim.Adam(net.parameters(), lr=0.001)
train_model(optimizer=optimizer, net=net, num_epochs=50, patience=5)

# Define metrics
accuracy_metric = Accuracy(task='multiclass', num_classes=num_classes)
precision_metric = Precision(task='multiclass', num_classes=num_classes, average=None)
recall_metric = Recall(task='multiclass', num_classes=num_classes, average=None)

# Evaluating the model
net.eval()
predictions = []
for i, (features, labels) in enumerate(dataloader_test):
    output = net(features)
    cat = torch.argmax(output, dim=-1)
    predictions.extend(cat.tolist())
    accuracy_metric(cat, labels)
    precision_metric(cat, labels)
    recall_metric(cat, labels)

# Computing metrics
accuracy = accuracy_metric.compute().item()
precision = precision_metric.compute().tolist()
recall = recall_metric.compute().tolist()
print('Accuracy:', accuracy)
print('Precision (per class):', precision)
print('Recall (per class):', recall)

The model is trained on the FashionMNIST dataset class. It is a Convolutional Neural Network that I initially trained to run for 50 epochs, but because of an early stopping function I applied, the model was able to achieve its best value at 15 epochs and didn't need to run all the way to 50. This early stopping mechanism monitors validation loss and stops training if it doesn’t improve for a certain number of epochs (the "patience" argument, which I set to 5). This helps optimize the number of training epochs and prevent overfitting. The model achieved an accuracy of apprx 90%.