From 1787872e821c1274445ff72df9cd22c09f7de42e Mon Sep 17 00:00:00 2001 From: Koha9 Date: Sun, 4 Dec 2022 09:20:05 +0900 Subject: [PATCH] wrong remain Time Fix wrong remain Time Fix, what a stupid mistake... and fix doubled WANDB writer --- Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py b/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py index 32a6e5e..7202ca6 100644 --- a/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py +++ b/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py @@ -65,6 +65,7 @@ class Targets(Enum): Attack = 2 Defence = 3 Num = 4 +STATE_REMAINTIME_POSITION = 6 BASE_WINREWARD = 999 BASE_LOSEREWARD = -999 TARGETNUM= 4 @@ -417,7 +418,7 @@ if __name__ == "__main__": value_cpu = value.cpu().numpy() # Environment step next_state, reward, next_done = env.step(action_cpu) - + remainTime = state[i,STATE_REMAINTIME_POSITION] # save memories for i in range(env.unity_agent_num): # save memories to buffers @@ -433,7 +434,7 @@ if __name__ == "__main__": # compute advantage and discounted reward #print(i,"over") roundTargetType = int(state[i,0]) - thisRewardsTensor = broadCastEndReward(rewards_bf[i],roundTargetType) + thisRewardsTensor = broadCastEndReward(rewards_bf[i],remainTime) adv, rt = GAE( agent, args, @@ -646,7 +647,6 @@ if __name__ == "__main__": # record rewards for plotting purposes writer.add_scalar(f"Target{targetName}/value_loss", v_loss.item(), target_steps[thisT]) - writer.add_scalar(f"Target{targetName}/value_loss", v_loss.item(), target_steps[thisT]) writer.add_scalar(f"Target{targetName}/dis_policy_loss", dis_pg_loss.item(), target_steps[thisT]) writer.add_scalar(f"Target{targetName}/con_policy_loss", con_pg_loss.item(), target_steps[thisT]) writer.add_scalar(f"Target{targetName}/total_loss", loss.item(), target_steps[thisT])