Skip to content
New Workbook
Sign up
SMS Spam Collection
from PIL import Image
import numpy as np

with Image.open('panda.png') as f:
    img = f.convert('L')

A = np.array(img)
print('max value: ', A.max())
print('shape: ', A.shape)
print('rank: ', np.linalg.matrix_rank(A))
import requests
link = ""
A
U, S, V = np.linalg.svd(A, full_matrices=True)
print('Shape U: ', U.shape)
print('Shape S: ', S.shape)
print('Shape V: ', V.shape)
Sigma = np.concatenate((np.diag(S) , np.zeros((350, 634-350))), axis=1)
print(Sigma.shape)
np.allclose(A, U @ Sigma @ V)
np.transpose(V[:, k].reshape(634, 1)).shape
def get_latent_component(j):
    assert j <= 350
    return S[j] * U[:, j].reshape(350, 1) @ np.transpose(V[:, j].reshape(634, 1))

def truncated_svd(k):
    result = np.zeros_like(A, dtype='float64')
    for j in range(k):
        result += get_latent_component(j)
    return result
approximation = truncated_svd(1)
print(approximation.shape)
Image.fromarray(approximation.round(0).astype('uint8')).save('1.png')
full = truncated_svd(350)
Image.fromarray(full.round(0).astype('uint8')).save('full.png')
A

SMS Spam Collection

This is a text corpus of over 5,500 English SMS messages with ~13% labeled as spam. The text file contains one message per line with two columns: the label ("ham" or "spam") and the raw text of the message. Messages labeled as "ham" are non-spam messages that can be considered legitimate.

Not sure where to begin? Scroll to the bottom to find challenges!

import numpy as np
D = np.array([[2,0,0],[0,2,0], [0,0,-1]])
P = np.array([[1,0,1], [0,1,-1], [1,0,2]])
Inv = np.linalg.inv(P)
Inv
P@D@Inv