Skip to content
import pandas as pd
from tqdm import tqdm
train_set = pd.read_csv("covid_data/train.csv")
train_set.head()
import seaborn as sns
import matplotlib.pyplot as plt
sns.countplot(x="label", data=train_set)
plt.show()
from pathlib import Path
from skimage import io
img_path = Path("covid_data/data")
images = pd.DataFrame(
[{"image_id": img.name, "image": io.imread(str(img))} for img in tqdm(img_path.glob("*"))]
)
images.head()
def abnormal_image():
abnormal_image = {
index: img.image.shape
for index, img in images.iterrows()
if img.image.shape != (256, 256, 3)
}
if abnormal_image:
from collections import Counter
print(f"Abnormal shape: {Counter(list(abnormal_image.values()))}")
return abnormal_image
incorrect_image = abnormal_image()
import numpy as np
def compare_images(*images: dict):
n = len(images)
fig, axes = plt.subplots(1, n, figsize=(10*n, 5))
for i, image in enumerate(images):
ax = axes[i]
ax.imshow(image['img'].astype("float"), cmap="gray")
ax.set_title(f"{image['title']}")
plt.show()
from skimage import transform
target_size = (256, 256, 3)
img = images.loc[2641, "image"] / 255
img_remove = img[:, :, :3]
img_resize = transform.resize(img, target_size)
compare_images(
{"img": img, "title": "original image"},
{"img": img_remove, "title": "removed image"},
{"img": img_resize, "title": "resized image"},
)
Run cancelled
from tqdm import tqdm
for index in tqdm(incorrect_image.keys()):
image = images.loc[index, "image"]
resized_image = transform.resize(image, target_size)
images.loc[index, "image"] = resized_image
del resized_image