Skip to content
ppo.py
  • AI Chat
  • Code
  • Report
  • Install the tools

    !pip install torch numpy matplotlib gym==0.25.2
    Hidden output

    Import the packages in Python

    import torch
    import torch.nn as nn
    import torch.optim as optim
    import torch.nn.functional as f
    import torch.distributions as distributions
    from torch.utils.data import DataLoader, TensorDataset
    import matplotlib.pyplot as plt
    import numpy as np
    import gym
    

    Create two environments - for training and testing

    env_train = gym.make('CartPole-v1')
    env_test = gym.make('CartPole-v1')
    
    Hidden output

    Create the backbone network

    class BackboneNetwork(nn.Module):
        def __init__(self, in_features, hidden_dimensions, out_features, dropout):
            super().__init__()
    
            self.layer1 = nn.Linear(in_features, hidden_dimensions)
            self.layer2 = nn.Linear(hidden_dimensions, hidden_dimensions)
            self.layer3 = nn.Linear(hidden_dimensions, out_features)
            self.dropout = nn.Dropout(dropout)
    
        def forward(self, x):
            x = self.layer1(x)
            x = f.relu(x)
            x = self.dropout(x)
            x = self.layer2(x)
            x = f.relu(x)
            x = self.dropout(x)
            x = self.layer3(x)
            return x
    

    Define the actor-critic model

    class ActorCritic(nn.Module):
        def __init__(self, actor, critic):
            super().__init__()
    
            self.actor = actor
            self.critic = critic
    
        def forward(self, state):
    
            action_pred = self.actor(state)
            value_pred = self.critic(state)
    
            return action_pred, value_pred
    

    Create an agent

    def create_agent(hidden_dimensions, dropout):
        INPUT_FEATURES = env_train.observation_space.shape[0]
        HIDDEN_DIMENSIONS = hidden_dimensions
        ACTOR_OUTPUT_FEATURES = env_train.action_space.n
        CRITIC_OUTPUT_FEATURES = 1
        DROPOUT = dropout
    
        actor = BackboneNetwork(
                INPUT_FEATURES, HIDDEN_DIMENSIONS, ACTOR_OUTPUT_FEATURES, DROPOUT)
        critic = BackboneNetwork(
                INPUT_FEATURES, HIDDEN_DIMENSIONS, CRITIC_OUTPUT_FEATURES, DROPOUT)
        agent = ActorCritic(actor, critic)
        return agent
    

    Define a function to calculate the returns from the rewards

    def calculate_returns(rewards, discount_factor):
        returns = []
        cumulative_reward = 0
        for r in reversed(rewards):
            cumulative_reward = r + cumulative_reward * discount_factor
            returns.insert(0, cumulative_reward)
    
        returns = torch.tensor(returns)
        # normalize the return
        returns = (returns - returns.mean()) / returns.std()
    
        return returns
    

    Define a function to calculate the advantages