Skip to content

Lets Start with Pytorch, create first layers, dealing with common problems

import torch
import torch.nn as nn

#create input_tensor with three features
input_tensor = torch.tensor([[0.3471, 0.4547, -0.2356]])

#define our first linear layer
linear_layer = nn.Linear(in_features=3, out_features=2)

#Pass input through linear layer
output = linear_layer(input_tensor)
print(output)

#example of a network with 3 linear layers
model = nn.Sequential(
        nn.Linear(10,18),#input size 10, output size 18
        nn.Linear(18,20),#input size 18, output size 20
        nn.Linear(20,5)
)
#non linear models
#sigmoid example - binary classification
import torch
import torch.nn as nn

input_tensor = torch.tensor([[6.0]])
sigmoid = nn.Sigmoid()  # Instantiate the nn.Sigmoid class
output = sigmoid(input_tensor)
model = nn.Sequential(
    nn.Linear(6, 4),  # first linear layer
    nn.Linear(4, 1),  # 2nd linear layer
    nn.Sigmoid()  # sigmoid activation function (this is akin to logistic regression)
)

input_tensor = torch.tensor([[1.0, -6.0, 2.5, -0.3, 1.2, 0.8]])

#softmax example
# Create a softmax function and apply it on input_tensor
softmax = nn.Softmax(dim=-1)
probabilities = softmax(input_tensor)
print(probabilities)
input_tensor = torch.Tensor([[3, 4, 6, 7, 10, 12, 2, 3, 6, 8, 9]])

# Implement a neural network with exactly four linear layers - this is creating a regression model
model = nn.Sequential(
    nn.Linear(input_tensor.shape[1], 10),
    nn.Linear(10,10),
    nn.Linear(10,5),
    nn.Linear(5,1)
)

output = model(input_tensor)
print(output)

#Create a multiclasss classification using previous model with 4 outputs
model2 = nn.Sequential(
    nn.Linear(input_tensor.shape[1], 10),
    nn.Linear(10,10),
    nn.Linear(10,5),
    nn.Linear(5,4),
    nn.Softmax(dim=-1)
)

output2= model2(input_tensor)
print(output2)
import torch
import torch.nn.functional as F
from torch.nn import CrossEntropyLoss

# one hot encoding in PyTorch
F.one_hot(torch.tensor(0), num_classes=3) # this creates an array predicting 1 in the first column out of 3 (similar to OHE)

#example of use
y = [2]
scores = torch.tensor([[0.1, 6.0, -2.0, 3.2]])

# Create a one-hot encoded vector of the label y
one_hot_label = F.one_hot(torch.tensor(y), num_classes=4)

#create crossentroploss
criterion=CrossEntropyLoss()

#calculate the cross entropy loss
loss = criterion(scores.double(), one_hot_label.double())
print(loss)
import torch
import torch.nn as nn

#using derivatives to update model parameters
weight = torch.tensor([[-1.5982, -0.8238, -0.4322, -0.5797,  0.7036,  1.3835, -0.9542,  0.1416, 2.4718]], requires_grad=True)

bias = torch.tensor([-1.1248,  0.5473], requires_grad=True)

preds = torch.tensor([[-0.9453, -0.2827]], requires_grad=True)

target = torch.tensor([[1., 0.]])

criterion = nn.CrossEntropyLoss()

# Calculate the loss
loss = criterion(preds, target)

# Compute the gradients of the loss
loss.backward()

# Display gradients of the weight and bias tensors in order
print(weight.grad)
print(bias.grad)
import torch
import torch.nn as nn
import torch.optim as optim


pred = torch.tensor([[-0.4624, -0.0940]], requires_grad=True)
target = torch.tensor([[1., 0.]])

model = nn.Sequential(nn.Linear(16, 8),
                      nn.Sigmoid(),
                      nn.Linear(8, 2))

# Access the weight of the first linear layer
weight_0 = model[0].weight

# Access the bias of the second linear layer
bias_1 = model[2].bias

print("weight of first linear layer:", weight_0, "\n bias of 2nd linear layer:", bias_1)

# Create the optimizer
optimizer = optim.SGD(model.parameters(), lr=0.001)

criterion = nn.MSELoss()

loss = criterion(pred, target)
loss.backward()

# Update the model's parameters using the optimizer
optimizer.step()
def show_results(model, dataloader):
    model.eval()
    iter_loader = iter(dataloader)
    for _ in range(3):
        feature, target = next(iter_loader)
        preds = model(feature)
        for p, t in zip(preds, target):
            print(f'Ground truth salary: {t.item():.3f}. Predicted salary: {p.item():.3f}.')
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
ds_salary = pd.read_csv("ds_salaries.csv")
ds_salary=pd.get_dummies(ds_salary, dtype="int64")


# Extract features and target from the dataset
features = ds_salary.drop(columns=['salary_in_usd']).values
target = ds_salary['salary_in_usd'].values.astype(float)  # Convert target to float

# Create a TensorDataset object with features and target
dataset = TensorDataset(torch.tensor(features).float(), torch.tensor(target).float())
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

model=nn.Sequential(
    nn.Linear(features.shape[1],100),
    nn.Linear(100,100),
    nn.Linear(100,10),
    nn.Linear(10,1)
)

num_epochs = 15
y_hat = np.array(10).astype(float)  # Corrected to be an array and converted to float
y = np.array([1]).astype(float)  # Corrected to be an array and converted to float

# Calculate the MSELoss using NumPy
mse_numpy = np.mean((y_hat-y)**2)

# Create the MSELoss function
criterion = nn.MSELoss()

# Calculate the MSELoss using the created loss function
mse_pytorch = criterion(torch.tensor(y).float(), torch.tensor(y_hat).float())
print(mse_pytorch)

# Loop over the number of epochs and the dataloader
for i in range(num_epochs):
  for data in dataloader:
    # Set the gradients to zero
    optimizer.zero_grad()
    # Run a forward pass
    feature, target = data
    prediction = model(feature)    
    # Calculate the loss
    loss = criterion(prediction, target)    
    # Compute the gradients
    loss.backward()
    # Update the model's parameters
    optimizer.step()
    
show_results(model, dataloader)    
#ReLU and LeakyRelU

# Create a ReLU function with PyTorch
relu_pytorch = nn.ReLU()

# Apply your ReLU function on x, and calculate gradients
x = torch.tensor(-1.0, requires_grad=True)
y = relu_pytorch(x)
y.backward()

# Print the gradient of the ReLU function for x
gradient = x.grad
print(gradient)

leaky_relu_pytorch=nn.LeakyReLU(negative_slope=0.05)
x=torch.tensor(-3.0)
leaky_relu_pytorch(x)
#counting parameters
model = nn.Sequential(nn.Linear(16, 4),
                      nn.Linear(4, 2),
                      nn.Linear(2, 1))

total = 0

# Calculate the number of parameters in the model
for parameter in model.parameters():
  total += parameter.numel()

print(total)
#optimize and plot function
import matplotlib.pyplot as plt

def function(x):
    return x**4 + x**3 - 5*x**2


def optimize_and_plot(lr=0.01, momentum=0.0):
  x = torch.tensor(2.0, requires_grad=True)
  buffer = torch.zeros_like(x.data)
  values = []
  for i in range(10):

      y = function(x)
      values.append((x.clone(), y.clone()))
      y.backward()

      d_p = x.grad.data
      if momentum !=0 :
          buffer.mul_(momentum).add_(d_p)
          d_p = buffer

      x.data.add_(d_p, alpha=-lr)
      x.grad.zero_()
      
  x = np.arange(-3, 2, 0.001)
  y = function(x)

  plt.figure(figsize=(8, 4))
  plt.plot([v[0].detach().numpy() for v in values], [v[1].detach().numpy() for v in values], 'r-X', 
           linewidth=2, markersize=7)
  for i in range(10):
      plt.text(values[i][0]+0.1, values[i][1], f'step {i}', fontdict={'color': 'r'})
  plt.plot(x, y, linewidth=2)
  plt.grid()
  plt.tick_params(axis='both', which='major', labelsize=12)
  plt.legend(['Optimizer steps', 'Square function'])
  plt.show()
#Learning Rate and momentum
lr0 = 0.001
optimize_and_plot(lr=lr0)

lr2 = 0.09
optimize_and_plot(lr=lr2)

# Try a first value for momentum such that it gets stuck in the first minimum
mom0 = 0.005
optimize_and_plot(momentum=mom0)

# Try a second value for momentum such that it finds the global optimum
mom1 = 0.98
optimize_and_plot(momentum=mom1)
for name, param in model.named_parameters():    
  
    # Check if the parameters belong to the first layer
    if name == '0.weight' or name == '0.bias':
      
        # Freeze the parameters
        param.requires_grad = False
  
    # Check if the parameters belong to the second layer
    if name == '1.weight' or name == '1.bias':
      
        # Freeze the parameters
        param.requires_grad = False
        
        
layer0 = nn.Linear(16, 32)
layer1 = nn.Linear(32, 64)

# Use uniform initialization for layer0 and layer1 weights
nn.init.uniform_(layer0.weight)
nn.init.uniform_(layer1.weight)

model = nn.Sequential(layer0, layer1)       
np_features = np.array(np.random.rand(12, 8))
np_target = np.array(np.random.rand(12, 1))

# Convert arrays to PyTorch tensors
torch_features = torch.tensor(np_features)
torch_target = torch.tensor(np_target)

# Create a TensorDataset from two tensors
dataset = TensorDataset(torch_features, torch_target)

# Return the last element of this dataset
print(dataset[-1])