Side Channel added

add side Channel to save target win ratio. 
Fix some Bug
This commit is contained in:
Koha9 2022-11-30 06:45:07 +09:00
parent 32d398dbef
commit 5631569b31
3 changed files with 95 additions and 30 deletions

View File

@ -12,12 +12,13 @@ class Aimbot(gym.Env):
envPath: str, envPath: str,
workerID: int = 1, workerID: int = 1,
basePort: int = 100, basePort: int = 100,
side_channels: list = []
): ):
super(Aimbot, self).__init__() super(Aimbot, self).__init__()
self.env = UnityEnvironment( self.env = UnityEnvironment(
file_name=envPath, file_name=envPath,
seed=1, seed=1,
side_channels=[], side_channels=side_channels,
worker_id=workerID, worker_id=workerID,
base_port=basePort, base_port=basePort,
) )

View File

@ -3,6 +3,7 @@ import wandb
import time import time
import numpy as np import numpy as np
import random import random
import uuid
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.optim as optim import torch.optim as optim
@ -13,22 +14,31 @@ from torch.distributions.normal import Normal
from torch.distributions.categorical import Categorical from torch.distributions.categorical import Categorical
from distutils.util import strtobool from distutils.util import strtobool
from torch.utils.tensorboard import SummaryWriter from torch.utils.tensorboard import SummaryWriter
from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.side_channel.side_channel import (
SideChannel,
IncomingMessage,
OutgoingMessage,
)
from typing import List
bestReward = 0 bestReward = 0
DEFAULT_SEED = 9331 DEFAULT_SEED = 9331
ENV_PATH = "../Build/Build-ParallelEnv-BigArea-6Enemy-EndBonus/Aimbot-ParallelEnv" ENV_PATH = "../Build/Build-ParallelEnv-Target-OffPolicy-SingleStack-SideChannel/Aimbot-ParallelEnv"
SIDE_CHANNEL_UUID = uuid.UUID("8bbfb62a-99b4-457c-879d-b78b69066b5e")
WAND_ENTITY = "koha9" WAND_ENTITY = "koha9"
WORKER_ID = 1 WORKER_ID = 1
BASE_PORT = 1000 BASE_PORT = 1000
# max round steps per agent is 2500, 25 seconds # max round steps per agent is 2500/Decision_period, 25 seconds
# !!!check every parameters before run!!!
TOTAL_STEPS = 4000000 TOTAL_STEPS = 4000000
BATCH_SIZE = 512 BATCH_SIZE = 512
MAX_TRAINNING_DATASETS = 8000 MAX_TRAINNING_DATASETS = 8000
DECISION_PERIOD = 2 DECISION_PERIOD = 1
LEARNING_RATE = 7e-4 LEARNING_RATE = 1e-3
GAMMA = 0.99 GAMMA = 0.99
GAE_LAMBDA = 0.95 GAE_LAMBDA = 0.95
EPOCHS = 4 EPOCHS = 4
@ -37,14 +47,19 @@ POLICY_COEF = 1.0
ENTROPY_COEF = 0.01 ENTROPY_COEF = 0.01
CRITIC_COEF = 0.5 CRITIC_COEF = 0.5
ANNEAL_LEARNING_RATE = False ANNEAL_LEARNING_RATE = True
CLIP_VLOSS = True CLIP_VLOSS = True
NORM_ADV = True NORM_ADV = True
TRAIN = False TRAIN = True
WANDB_TACK = False WANDB_TACK = True
LOAD_DIR = None LOAD_DIR = None
LOAD_DIR = "../PPO-Model/bigArea-4.pt" LOAD_DIR = "../PPO-Model/Aimbot-target-last.pt"
# public data
TotalRounds = {"Go":0,"Attack":0,"Free":0}
WinRounds = {"Go":0,"Attack":0,"Free":0}
def parse_args(): def parse_args():
# fmt: off # fmt: off
@ -127,9 +142,11 @@ class PPOAgent(nn.Module):
self.continuous_size = env.unity_continuous_size self.continuous_size = env.unity_continuous_size
self.network = nn.Sequential( self.network = nn.Sequential(
layer_init(nn.Linear(np.array(env.unity_observation_shape).prod(), 384)), layer_init(nn.Linear(np.array(env.unity_observation_shape).prod(), 700)),
nn.ReLU(), nn.ReLU(),
layer_init(nn.Linear(384, 256)), layer_init(nn.Linear(700, 500)),
nn.ReLU(),
layer_init(nn.Linear(500, 256)),
nn.ReLU(), nn.ReLU(),
) )
self.actor_dis = layer_init(nn.Linear(256, self.discrete_size), std=0.01) self.actor_dis = layer_init(nn.Linear(256, self.discrete_size), std=0.01)
@ -213,6 +230,52 @@ def GAE(agent, args, rewards, dones, values, next_obs, next_done):
advantages = returns - values advantages = returns - values
return advantages, returns return advantages, returns
class AimbotSideChannel(SideChannel):
def __init__(self, channel_id: uuid.UUID) -> None:
super().__init__(channel_id)
def on_message_received(self, msg: IncomingMessage) -> None:
"""
Note: We must implement this method of the SideChannel interface to
receive messages from Unity
"""
thisMessage = msg.read_string()
print(thisMessage)
thisResult = thisMessage.split("|")
if(thisResult[0] == "result"):
TotalRounds[thisResult[1]]+=1
if(thisResult[2] == "Win"):
WinRounds[thisResult[1]]+=1
print(TotalRounds)
print(WinRounds)
elif(thisResult[0] == "Error"):
print(thisMessage)
# 发送函数
def send_string(self, data: str) -> None:
"""发送一个字符串给C#"""
msg = OutgoingMessage()
msg.write_string(data)
super().queue_message_to_send(msg)
def send_bool(self, data: bool) -> None:
msg = OutgoingMessage()
msg.write_bool(data)
super().queue_message_to_send(msg)
def send_int(self, data: int) -> None:
msg = OutgoingMessage()
msg.write_int32(data)
super().queue_message_to_send(msg)
def send_float(self, data: float) -> None:
msg = OutgoingMessage()
msg.write_float32(data)
super().queue_message_to_send(msg)
def send_float_list(self, data: List[float]) -> None:
msg = OutgoingMessage()
msg.write_float32_list(data)
super().queue_message_to_send(msg)
if __name__ == "__main__": if __name__ == "__main__":
args = parse_args() args = parse_args()
@ -223,7 +286,8 @@ if __name__ == "__main__":
device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu") device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu")
# Initialize environment anget optimizer # Initialize environment anget optimizer
env = Aimbot(envPath=args.path, workerID=args.workerID, basePort=args.baseport) aimBotsideChannel = AimbotSideChannel(SIDE_CHANNEL_UUID);
env = Aimbot(envPath=args.path, workerID=args.workerID, basePort=args.baseport,side_channels=[aimBotsideChannel])
if args.load_dir is None: if args.load_dir is None:
agent = PPOAgent(env).to(device) agent = PPOAgent(env).to(device)
else: else:
@ -234,8 +298,9 @@ if __name__ == "__main__":
optimizer = optim.Adam(agent.parameters(), lr=args.lr, eps=1e-5) optimizer = optim.Adam(agent.parameters(), lr=args.lr, eps=1e-5)
# Tensorboard and WandB Recorder # Tensorboard and WandB Recorder
game_name = "Aimbot-BigArea-6Enemy-EndBonus" game_name = "Aimbot_Target"
run_name = f"{game_name}_{args.seed}_{int(time.time())}" game_type = "OffPolicy"
run_name = f"{game_name}_{game_type}_{args.seed}_{int(time.time())}"
if args.wandb_track: if args.wandb_track:
wandb.init( wandb.init(
project=game_name, project=game_name,
@ -326,6 +391,7 @@ if __name__ == "__main__":
if next_done[i] == True: if next_done[i] == True:
# finished a round, send finished memories to training datasets # finished a round, send finished memories to training datasets
# compute advantage and discounted reward # compute advantage and discounted reward
print(i,"over")
adv, rt = GAE( adv, rt = GAE(
agent, agent,
args, args,
@ -357,7 +423,7 @@ if __name__ == "__main__":
rewards_bf[i] = [] rewards_bf[i] = []
dones_bf[i] = [] dones_bf[i] = []
values_bf[i] = [] values_bf[i] = []
print(f"train dataset:{obs.size()[0]}/{args.datasetSize}") print(f"train dataset added:{obs.size()[0]}/{args.datasetSize}")
if obs.size()[0] >= args.datasetSize: if obs.size()[0] >= args.datasetSize:
# start train NN # start train NN
@ -365,10 +431,11 @@ if __name__ == "__main__":
state, done = next_state, next_done state, done = next_state, next_done
else: else:
# skip this step use last predict action # skip this step use last predict action
next_obs, reward, done = env.step(action_cpu) next_obs, reward, next_done = env.step(action_cpu)
# save memories # save memories
for i in range(env.unity_agent_num): for i in range(env.unity_agent_num):
if next_done[i] == True: if next_done[i] == True:
print(i,"over???")
# save last memories to buffers # save last memories to buffers
ob_bf[i].append(state[i]) ob_bf[i].append(state[i])
act_bf[i].append(action_cpu[i]) act_bf[i].append(action_cpu[i])
@ -410,7 +477,7 @@ if __name__ == "__main__":
rewards_bf[i] = [] rewards_bf[i] = []
dones_bf[i] = [] dones_bf[i] = []
values_bf[i] = [] values_bf[i] = []
print(f"train dataset:{obs.size()[0]}/{args.datasetSize}") print(f"train dataset added:{obs.size()[0]}/{args.datasetSize}")
state, done = next_state, next_done state, done = next_state, next_done
i += 1 i += 1
@ -530,9 +597,12 @@ if __name__ == "__main__":
"charts/SPS", int(global_step / (time.time() - start_time)), global_step "charts/SPS", int(global_step / (time.time() - start_time)), global_step
) )
writer.add_scalar("charts/Reward", rewardsMean, global_step) writer.add_scalar("charts/Reward", rewardsMean, global_step)
writer.add_scalar("charts/GoWinRatio", WinRounds["Go"]/TotalRounds["Go"], global_step)
writer.add_scalar("charts/AttackWinRatio", WinRounds["Attack"]/TotalRounds["Attack"], global_step)
writer.add_scalar("charts/FreeWinRatio", WinRounds["Free"]/TotalRounds["Free"], global_step)
if rewardsMean > bestReward: if rewardsMean > bestReward:
bestReward = rewardsMean bestReward = rewardsMean
saveDir = "../PPO-Model/bigArea-384-128-hybrid-" + str(rewardsMean) + ".pt" saveDir = "../PPO-Model/Target-700-500-256-hybrid-" + str(rewardsMean) + ".pt"
torch.save(agent, saveDir) torch.save(agent, saveDir)
env.close() env.close()

View File

@ -525,28 +525,22 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 13,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"start 0\n", "{'Go': 1, 'Attack': 0, 'Free': 0}\n"
"end 3\n",
"start 3\n",
"end 6\n",
"start 6\n",
"end 9\n",
"start 9\n",
"end 12\n"
] ]
} }
], ],
"source": [ "source": [
"for i in range(0,10,3):\n", "Total = {\"Go\":0,\"Attack\":0,\"Free\":0}\n",
" print(\"start\",i)\n", "\n",
" print('end',i+3)" "Total[\"Go\"] +=1\n",
"print(Total)"
] ]
} }
], ],