From 2741d6d51a896c716bc1bea36fc1df9fedb56bc0 Mon Sep 17 00:00:00 2001 From: Koha9 Date: Tue, 8 Aug 2023 20:47:56 +0900 Subject: [PATCH] =?UTF-8?q?=E5=B0=86Tensor=E6=94=B9=E4=B8=BAtensor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tensor与tensor的问题,规范化tensor使用。 --- Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py | 2 +- Aimbot-PPO-Python/Pytorch/aimemory.py | 10 +++++----- Aimbot-PPO-Python/Pytorch/arguments.py | 4 ++-- Aimbot-PPO-Python/Pytorch/ppoagent.py | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py b/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py index fe87df8..f32a1bd 100644 --- a/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py +++ b/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py @@ -111,7 +111,7 @@ if __name__ == "__main__": with torch.no_grad(): # predict actions action, dis_logprob, _, con_logprob, _, value = agent.get_actions_value( - torch.Tensor(state).to(device) + torch.tensor(state,dtype=torch.float32).to(device) ) value = value.flatten() diff --git a/Aimbot-PPO-Python/Pytorch/aimemory.py b/Aimbot-PPO-Python/Pytorch/aimemory.py index 8aa3ee3..4ef2dbb 100644 --- a/Aimbot-PPO-Python/Pytorch/aimemory.py +++ b/Aimbot-PPO-Python/Pytorch/aimemory.py @@ -61,7 +61,7 @@ class PPOMem: thisRewardBF = (np.asarray(thisRewardBF) + (remainTime * self.result_broadcast_ratio)).tolist() else: print("!!!!!DIDNT GET RESULT REWARD!!!!!!", rewardBF[-1]) - return torch.Tensor(thisRewardBF).to(self.device) + return torch.tensor(thisRewardBF,dtype=torch.float32).to(self.device) def save_memories( self, @@ -101,10 +101,10 @@ class PPOMem: thisRewardsTensor = self.broad_cast_end_reward(self.rewards_bf[i], remainTime) adv, rt = agent.gae( rewards=thisRewardsTensor, - dones=torch.Tensor(self.dones_bf[i]).to(self.device), + dones=torch.tensor(self.dones_bf[i],dtype=torch.float32).to(self.device), values=torch.tensor(self.values_bf[i]).to(self.device), next_obs=torch.tensor(next_state[i]).to(self.device).unsqueeze(0), - next_done=torch.Tensor([next_done[i]]).to(self.device), + next_done=torch.tensor([next_done[i]],dtype=torch.float32).to(self.device), ) # send memories to training datasets self.obs[roundTargetType] = torch.cat((self.obs[roundTargetType], torch.tensor(np.array(self.ob_bf[i])).to(self.device)), 0) @@ -119,7 +119,7 @@ class PPOMem: # clear buffers self.clear_buffers(i) print(f"train dataset {Targets(roundTargetType).name} added:{self.obs[roundTargetType].size()[0]}/{self.data_set_size}") - + def clear_buffers(self,ind:int): # clear buffers self.ob_bf[ind] = [] @@ -129,7 +129,7 @@ class PPOMem: self.rewards_bf[ind] = [] self.dones_bf[ind] = [] self.values_bf[ind] = [] - + def clear_training_datasets(self,ind:int): # clear training datasets self.obs[ind] = torch.tensor([]).to(self.device) diff --git a/Aimbot-PPO-Python/Pytorch/arguments.py b/Aimbot-PPO-Python/Pytorch/arguments.py index 600f897..a7b78cb 100644 --- a/Aimbot-PPO-Python/Pytorch/arguments.py +++ b/Aimbot-PPO-Python/Pytorch/arguments.py @@ -35,8 +35,8 @@ ANNEAL_LEARNING_RATE = True CLIP_VLOSS = True NORM_ADV = False TRAIN = True -SAVE_MODEL = True -WANDB_TACK = True +SAVE_MODEL = False +WANDB_TACK = False LOAD_DIR = None #LOAD_DIR = "../PPO-Model/PList_Go_LeakyReLU_9331_1677965178_bestGoto/PList_Go_LeakyReLU_9331_1677965178_10.709002.pt" diff --git a/Aimbot-PPO-Python/Pytorch/ppoagent.py b/Aimbot-PPO-Python/Pytorch/ppoagent.py index 93b5a83..aa71166 100644 --- a/Aimbot-PPO-Python/Pytorch/ppoagent.py +++ b/Aimbot-PPO-Python/Pytorch/ppoagent.py @@ -261,8 +261,8 @@ class PPOAgent(nn.Module): self, rewards: torch.Tensor, dones: torch.Tensor, - values: torch.tensor, - next_obs: torch.tensor, + values: torch.Tensor, + next_obs: torch.Tensor, next_done: torch.Tensor, ) -> tuple: # GAE