'RuntimeError: Found dtype Double but expected Float - Pytorch RL
I am trying to get an actor critic variant of the pendulum running, but I seem to be running into a particular problem.
RuntimeError: Found dtype Double but expected Float
I saw this had come up multiple times before so I have been through those and have attempted to change the data types of my loss (kept in comments) but it is still not working. Could anyone point out how to resolve this so that I can learn from it?
Full code below
import gym, os
import numpy as np
from itertools import count
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.distributions import Normal
from collections import namedtuple
SavedAction = namedtuple('SavedAction', ['log_prob', 'value'])
LOG_SIG_MAX = 2
LOG_SIG_MIN = -20
class ActorCritic(nn.Module):
"""
Implementing both heads of the actor critic model
"""
def __init__(self, state_space, action_space):
super(ActorCritic, self).__init__()
self.state_space = state_space
self.action_space = action_space
# HL 1
self.linear1 = nn.Linear(self.state_space, 128)
# HL 2
self.linear2 = nn.Linear(128, 256)
# Outputs
self.critic_head = nn.Linear(256, 1)
self.action_mean = nn.Linear(256, self.action_space)
self.action_std = nn.Linear(256, self.action_space)
# Saving
self.saved_actions = []
self.rewards = []
# Optimizer
self.optimizer = optim.Adam(self.parameters(), lr = 1e-3)
self.eps = np.finfo(np.float32).eps.item()
def forward(self, state):
"""
Forward pass for both actor and critic
"""
# State to Layer 1
l1_output = F.relu(self.linear1(state))
# Layer 1 to Layer 2
l2_output = F.relu(self.linear2(l1_output))
# Layer 2 to Action
mean = self.action_mean(l2_output)
std = self.action_std(l2_output)
std = torch.clamp(std, min=LOG_SIG_MIN, max = LOG_SIG_MAX)
std = std.exp()
# Layer 2 to Value
value_est = self.critic_head(l2_output)
return value_est, mean, std
def select_action(self,state):
state = torch.from_numpy(state).float().unsqueeze(0)
value_est, mean, std = self.forward(state)
value_est = value_est.reshape(-1)
# Make prob Normal dist
dist = Normal(mean, std)
action = dist.sample()
action = torch.tanh(action)
ln_prob = dist.log_prob(action)
ln_prob = ln_prob.sum()
self.saved_actions.append(SavedAction(ln_prob, value_est))
action = action.numpy()
return action[0]
def compute_returns(self, gamma): # This is the error causing code
"""
Calculate losses and do backprop
"""
R = 0
saved_actions = self.saved_actions
policy_losses = []
value_losses = []
returns = []
for r in self.rewards[::-1]:
# Discount value
R = r + gamma*R
returns.insert(0,R)
returns = torch.tensor(returns)
returns = (returns - returns.mean())/(returns.std()+self.eps)
for (log_prob, value), R in zip(saved_actions, returns):
advantage = R - value.item()
advantage = advantage.type(torch.FloatTensor)
policy_losses.append(-log_prob*advantage)
value_losses.append(F.mse_loss(value, torch.tensor([R])))
self.optimizer.zero_grad()
loss = torch.stack(policy_losses).sum() + torch.stack(value_losses).sum()
loss = loss.type(torch.FloatTensor)
loss.backward()
self.optimizer.step()
del self.rewards[:]
del self.saved_actions[:]
env = gym.make("Pendulum-v0")
state_space = env.observation_space.shape[0]
action_space = env.action_space.shape[0]
# Train Expert AC
model = ActorCritic(state_space, action_space)
train = True
if train == True:
# Main loop
window = 50
reward_history = []
for ep in count():
state = env.reset()
ep_reward = 0
for t in range(1,1000):
if ep%50 == 0:
env.render()
action = model.select_action(state)
state, reward, done, _ = env.step(action)
model.rewards.append(reward)
ep_reward += reward
if done:
break
print(reward)
model.compute_returns(0.99) # Error begins here
reward_history.append(ep_reward)
# Result information
if ep % 50 == 0:
mean = np.mean(reward_history[-window:])
print(f"Episode: {ep} Last Reward: {ep_reward} Rolling Mean: {mean}")
if np.mean(reward_history[-100:])>199:
print(f"Environment solved at episode {ep}, average run length > 200")
break
Complete error log, some elements redacted for privacy. Originally the loop actor critic and main loop were in separate files. Comments added to appropriate error causing lines.
Traceback (most recent call last):
File "pendulum.py", line 59, in <module>
model.compute_returns(0.99)
File "/home/x/Software/git/x/x/solvers/actorcritic_cont.py", line 121, in compute_returns
loss.backward()
File "/home/x/.local/lib/python3.8/site-packages/torch/_tensor.py", line 255, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File "/home/x/.local/lib/python3.8/site-packages/torch/autograd/__init__.py", line 147, in backward
Variable._execution_engine.run_backward(
RuntimeError: Found dtype Double but expected Float
Solution 1:[1]
Answering here in case anyone has similar issues in the future. The output of the reward in OpenAI Gym Pendulum-v0 is a double, so when you compute the return over the episode you need to change that to a float tensor.
I did this just by:
returns = torch.tensor(returns)
returns = (returns - returns.mean())/(returns.std()+self.eps)
returns = returns.type(torch.FloatTensor)
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | EmptySet |
