Skip to content
Deep Learning for Garbage Classification using Images
Introduction to Machine Learning in Geosciences Summer School at the University of Pisa
Final Project: Deep Learning for Garbage Classification using Images
Author: Alina Cherkas
This notebook is used to develop a Convolutional Neural Network for garbage classification using Recycling Dataset from Portland State University.
Libraries
# install specific package versions
!pip install -q -r requirements.txt
!pip show tensorflowimport os
import numpy as np
import pandas as pd
from PIL import Image
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import image_dataset_from_directory
from sklearn.metrics import classification_report, confusion_matrix
from tqdm import tqdm
import utils
# from dataset documentation
LABELS = ['boxes', 'glass_bottles', 'soda_cans', 'crushed_soda_cans', 'water_bottles']1. Data Preparation
# download and unpack archived dataset
!curl http://web.cecs.pdx.edu/~singh/rcyc-web/recycle_data_shuffled.tar.gz --output recycle_data_shuffled.tar.gz
!tar -xvf recycle_data_shuffled.tar.gz
!rm recycle_data_shuffled.tar.gz# load dataset as a numpy array
data = np.load('recycle_data_shuffled.npz')
list(data.keys())print('Train:', data['x_train'].shape)
print('Test:', data['x_test'].shape)title = 'Figure 1. Actual Labels for Images in the Train Set'
images_sample, labels_sample = utils.sample_images_and_labels(
images=data['x_train'],
labels=[LABELS[y] for y in data['y_train'].flatten()]
)
fig = utils.display_images_with_labels(images_sample, labels_sample)
fig.update_layout(height=500, width=1000, title=title).show(renderer='svg')# create folders and subfolders to store raw images
for split in ('train', 'test'):
for label in LABELS:
folder_path = os.path.join('data', 'images', split, label)
if not os.path.exists(folder_path):
os.makedirs(folder_path)# save images in their respective subfolders
for split in ('train', 'test'):
for i, (x, y) in tqdm(enumerate(zip(data[f'x_{split}'], data[f'y_{split}']))):
label = LABELS[y.item()]
image = Image.fromarray(x)
path = os.path.join('data', 'images', split, label, f'image_{i}.jpeg')
image.save(path)# clean up the workspace
!rm recycle_data_shuffled.npz2. Model Training
# raw image size
image_size = (128, 128)# create training and validation data loades using 80/20 split
dataset_train, dataset_valid = image_dataset_from_directory(
directory=str(os.path.join('data', 'images', 'train')),
labels='inferred',
label_mode='int',
class_names=LABELS,
color_mode='rgb',
batch_size=32,
image_size=image_size,
shuffle=True,
seed=42,
validation_split=.2,
subset='both',
interpolation='bilinear',
follow_links=False,
crop_to_aspect_ratio=False,
)