diff --git a/Aimbot-PPO-Python/Pytorch/aimemory.py b/Aimbot-PPO-Python/Pytorch/aimemory.py index 89aad78..8aa3ee3 100644 --- a/Aimbot-PPO-Python/Pytorch/aimemory.py +++ b/Aimbot-PPO-Python/Pytorch/aimemory.py @@ -107,12 +107,12 @@ class PPOMem: next_done=torch.Tensor([next_done[i]]).to(self.device), ) # send memories to training datasets - self.obs[roundTargetType] = torch.cat((self.obs[roundTargetType], torch.tensor(self.ob_bf[i]).to(self.device)), 0) - self.actions[roundTargetType] = torch.cat((self.actions[roundTargetType], torch.tensor(self.act_bf[i]).to(self.device)), 0) - self.dis_logprobs[roundTargetType] = torch.cat((self.dis_logprobs[roundTargetType], torch.tensor(self.dis_logprobs_bf[i]).to(self.device)), 0) - self.con_logprobs[roundTargetType] = torch.cat((self.con_logprobs[roundTargetType], torch.tensor(self.con_logprobs_bf[i]).to(self.device)), 0) + self.obs[roundTargetType] = torch.cat((self.obs[roundTargetType], torch.tensor(np.array(self.ob_bf[i])).to(self.device)), 0) + self.actions[roundTargetType] = torch.cat((self.actions[roundTargetType], torch.tensor(np.array(self.act_bf[i])).to(self.device)), 0) + self.dis_logprobs[roundTargetType] = torch.cat((self.dis_logprobs[roundTargetType], torch.tensor(np.array(self.dis_logprobs_bf[i])).to(self.device)), 0) + self.con_logprobs[roundTargetType] = torch.cat((self.con_logprobs[roundTargetType], torch.tensor(np.array(self.con_logprobs_bf[i])).to(self.device)), 0) self.rewards[roundTargetType] = torch.cat((self.rewards[roundTargetType], thisRewardsTensor), 0) - self.values[roundTargetType] = torch.cat((self.values[roundTargetType], torch.tensor(self.values_bf[i]).to(self.device)), 0) + self.values[roundTargetType] = torch.cat((self.values[roundTargetType], torch.tensor(np.array(self.values_bf[i])).to(self.device)), 0) self.advantages[roundTargetType] = torch.cat((self.advantages[roundTargetType], adv), 0) self.returns[roundTargetType] = torch.cat((self.returns[roundTargetType], rt), 0)