You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# Get observation and action dimensions from the environment specs
observation_dim = parallel_env.observation_spec["observation"].shape[-1]
action_dim = parallel_env.action_spec.shape[-1]
Define the actor network using nn.Sequential (outputs logits for a categorical distribution)
class ActorNet(nn.Module):
def __init__(self, observation_dim, action_dim):
super(ActorNet, self).__init__()
self.net = nn.Sequential(
nn.Linear(observation_dim, 64),
nn.ReLU(),
nn.Linear(64, 64),
nn.ReLU(),
nn.Linear(64, action_dim) # Outputs logits for each action in the discrete space
)
def forward(self, x):
print("actor net input shape ",x.shape)
temp= self.net(x)
print("actor net output shape ",temp.shape)
return temp
# Instantiate the actor network
actor_net = ActorNet(observation_dim, action_dim).to(device)
Wrap actor network in TensorDictModule to work with TorchRL's TensorDict system
tensordict_module = TensorDictModule(actor_net, in_keys=["observation"], out_keys=["logits"])
actor_module = ProbabilisticActor(
module=tensordict_module,
spec=parallel_env.action_spec,
in_keys=["logits"],
out_keys=["action"],
distribution_class=Categorical,
return_log_prob=True # Return log probability of sampled actions (required for PPO loss)
)
error on rollout
parallel_env.rollout(1000,actor_module)
`
Issue is created tensordict['action'] has shape torch.size[7]
should have been tensordict['action'] has shape torch.size[7,1]
###########Fix
`
# Use ProbabilisticActor to sample actions probabilistically from Categorical distribution based on logits
class CustomProbabilisticActor(ProbabilisticActor):
def forward(self, tensordict):
# Call the parent class's forward method to get actions and log probabilities
tensordict = super().forward(tensordict)
# Reshape the action tensor to have shape [batch_size, 1]
tensordict.set("action", tensordict.get("action").unsqueeze(-1))
return tensordict
# Use ProbabilisticActor to sample actions probabilistically from Categorical distribution based on logits
actor_module = CustomProbabilisticActor(
module=tensordict_module,
spec=parallel_env.action_spec,
in_keys=["logits"],
out_keys=["action"],
distribution_class=Categorical,
return_log_prob=True # Return log probability of sampled actions (required for PPO loss)
)
########Bug
`def make_env():
base_env = GymEnv('CartPole-v1', device=device)
parallel_env = ParallelEnv(num_workers= 7, create_env_fn = lambda : make_env())
check_env_specs(parallel_env)
Define the actor network using nn.Sequential (outputs logits for a categorical distribution)
Wrap actor network in TensorDictModule to work with TorchRL's TensorDict system
error on rollout
parallel_env.rollout(1000,actor_module)
`
Issue is created tensordict['action'] has shape torch.size[7]
should have been tensordict['action'] has shape torch.size[7,1]
###########Fix
`
# Use ProbabilisticActor to sample actions probabilistically from Categorical distribution based on logits
class CustomProbabilisticActor(ProbabilisticActor):
def forward(self, tensordict):
# Call the parent class's forward method to get actions and log probabilities
tensordict = super().forward(tensordict)
rollout works now
parallel_env.rollout(1000,actor_module)
`
Version:
torch 2.5.0 pypi_0 pypi
torchrl 0.5.0 pypi_0 pypi
tensordict 0.5.0 pypi_0 pypi
The text was updated successfully, but these errors were encountered: