Side Channel added
add side Channel to save target win ratio. Fix some Bug
This commit is contained in:
parent
32d398dbef
commit
5631569b31
@ -12,12 +12,13 @@ class Aimbot(gym.Env):
|
|||||||
envPath: str,
|
envPath: str,
|
||||||
workerID: int = 1,
|
workerID: int = 1,
|
||||||
basePort: int = 100,
|
basePort: int = 100,
|
||||||
|
side_channels: list = []
|
||||||
):
|
):
|
||||||
super(Aimbot, self).__init__()
|
super(Aimbot, self).__init__()
|
||||||
self.env = UnityEnvironment(
|
self.env = UnityEnvironment(
|
||||||
file_name=envPath,
|
file_name=envPath,
|
||||||
seed=1,
|
seed=1,
|
||||||
side_channels=[],
|
side_channels=side_channels,
|
||||||
worker_id=workerID,
|
worker_id=workerID,
|
||||||
base_port=basePort,
|
base_port=basePort,
|
||||||
)
|
)
|
||||||
|
@ -3,6 +3,7 @@ import wandb
|
|||||||
import time
|
import time
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import random
|
import random
|
||||||
|
import uuid
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
@ -13,22 +14,31 @@ from torch.distributions.normal import Normal
|
|||||||
from torch.distributions.categorical import Categorical
|
from torch.distributions.categorical import Categorical
|
||||||
from distutils.util import strtobool
|
from distutils.util import strtobool
|
||||||
from torch.utils.tensorboard import SummaryWriter
|
from torch.utils.tensorboard import SummaryWriter
|
||||||
|
from mlagents_envs.environment import UnityEnvironment
|
||||||
|
from mlagents_envs.side_channel.side_channel import (
|
||||||
|
SideChannel,
|
||||||
|
IncomingMessage,
|
||||||
|
OutgoingMessage,
|
||||||
|
)
|
||||||
|
from typing import List
|
||||||
|
|
||||||
bestReward = 0
|
bestReward = 0
|
||||||
|
|
||||||
DEFAULT_SEED = 9331
|
DEFAULT_SEED = 9331
|
||||||
ENV_PATH = "../Build/Build-ParallelEnv-BigArea-6Enemy-EndBonus/Aimbot-ParallelEnv"
|
ENV_PATH = "../Build/Build-ParallelEnv-Target-OffPolicy-SingleStack-SideChannel/Aimbot-ParallelEnv"
|
||||||
|
SIDE_CHANNEL_UUID = uuid.UUID("8bbfb62a-99b4-457c-879d-b78b69066b5e")
|
||||||
WAND_ENTITY = "koha9"
|
WAND_ENTITY = "koha9"
|
||||||
WORKER_ID = 1
|
WORKER_ID = 1
|
||||||
BASE_PORT = 1000
|
BASE_PORT = 1000
|
||||||
|
|
||||||
# max round steps per agent is 2500, 25 seconds
|
# max round steps per agent is 2500/Decision_period, 25 seconds
|
||||||
|
# !!!check every parameters before run!!!
|
||||||
|
|
||||||
TOTAL_STEPS = 4000000
|
TOTAL_STEPS = 4000000
|
||||||
BATCH_SIZE = 512
|
BATCH_SIZE = 512
|
||||||
MAX_TRAINNING_DATASETS = 8000
|
MAX_TRAINNING_DATASETS = 8000
|
||||||
DECISION_PERIOD = 2
|
DECISION_PERIOD = 1
|
||||||
LEARNING_RATE = 7e-4
|
LEARNING_RATE = 1e-3
|
||||||
GAMMA = 0.99
|
GAMMA = 0.99
|
||||||
GAE_LAMBDA = 0.95
|
GAE_LAMBDA = 0.95
|
||||||
EPOCHS = 4
|
EPOCHS = 4
|
||||||
@ -37,14 +47,19 @@ POLICY_COEF = 1.0
|
|||||||
ENTROPY_COEF = 0.01
|
ENTROPY_COEF = 0.01
|
||||||
CRITIC_COEF = 0.5
|
CRITIC_COEF = 0.5
|
||||||
|
|
||||||
ANNEAL_LEARNING_RATE = False
|
ANNEAL_LEARNING_RATE = True
|
||||||
CLIP_VLOSS = True
|
CLIP_VLOSS = True
|
||||||
NORM_ADV = True
|
NORM_ADV = True
|
||||||
TRAIN = False
|
TRAIN = True
|
||||||
|
|
||||||
WANDB_TACK = False
|
WANDB_TACK = True
|
||||||
LOAD_DIR = None
|
LOAD_DIR = None
|
||||||
LOAD_DIR = "../PPO-Model/bigArea-4.pt"
|
LOAD_DIR = "../PPO-Model/Aimbot-target-last.pt"
|
||||||
|
|
||||||
|
# public data
|
||||||
|
TotalRounds = {"Go":0,"Attack":0,"Free":0}
|
||||||
|
WinRounds = {"Go":0,"Attack":0,"Free":0}
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
# fmt: off
|
# fmt: off
|
||||||
@ -127,9 +142,11 @@ class PPOAgent(nn.Module):
|
|||||||
self.continuous_size = env.unity_continuous_size
|
self.continuous_size = env.unity_continuous_size
|
||||||
|
|
||||||
self.network = nn.Sequential(
|
self.network = nn.Sequential(
|
||||||
layer_init(nn.Linear(np.array(env.unity_observation_shape).prod(), 384)),
|
layer_init(nn.Linear(np.array(env.unity_observation_shape).prod(), 700)),
|
||||||
nn.ReLU(),
|
nn.ReLU(),
|
||||||
layer_init(nn.Linear(384, 256)),
|
layer_init(nn.Linear(700, 500)),
|
||||||
|
nn.ReLU(),
|
||||||
|
layer_init(nn.Linear(500, 256)),
|
||||||
nn.ReLU(),
|
nn.ReLU(),
|
||||||
)
|
)
|
||||||
self.actor_dis = layer_init(nn.Linear(256, self.discrete_size), std=0.01)
|
self.actor_dis = layer_init(nn.Linear(256, self.discrete_size), std=0.01)
|
||||||
@ -213,6 +230,52 @@ def GAE(agent, args, rewards, dones, values, next_obs, next_done):
|
|||||||
advantages = returns - values
|
advantages = returns - values
|
||||||
return advantages, returns
|
return advantages, returns
|
||||||
|
|
||||||
|
class AimbotSideChannel(SideChannel):
|
||||||
|
def __init__(self, channel_id: uuid.UUID) -> None:
|
||||||
|
super().__init__(channel_id)
|
||||||
|
def on_message_received(self, msg: IncomingMessage) -> None:
|
||||||
|
"""
|
||||||
|
Note: We must implement this method of the SideChannel interface to
|
||||||
|
receive messages from Unity
|
||||||
|
"""
|
||||||
|
thisMessage = msg.read_string()
|
||||||
|
print(thisMessage)
|
||||||
|
thisResult = thisMessage.split("|")
|
||||||
|
if(thisResult[0] == "result"):
|
||||||
|
TotalRounds[thisResult[1]]+=1
|
||||||
|
if(thisResult[2] == "Win"):
|
||||||
|
WinRounds[thisResult[1]]+=1
|
||||||
|
print(TotalRounds)
|
||||||
|
print(WinRounds)
|
||||||
|
elif(thisResult[0] == "Error"):
|
||||||
|
print(thisMessage)
|
||||||
|
# 发送函数
|
||||||
|
def send_string(self, data: str) -> None:
|
||||||
|
"""发送一个字符串给C#"""
|
||||||
|
msg = OutgoingMessage()
|
||||||
|
msg.write_string(data)
|
||||||
|
super().queue_message_to_send(msg)
|
||||||
|
|
||||||
|
def send_bool(self, data: bool) -> None:
|
||||||
|
msg = OutgoingMessage()
|
||||||
|
msg.write_bool(data)
|
||||||
|
super().queue_message_to_send(msg)
|
||||||
|
|
||||||
|
def send_int(self, data: int) -> None:
|
||||||
|
msg = OutgoingMessage()
|
||||||
|
msg.write_int32(data)
|
||||||
|
super().queue_message_to_send(msg)
|
||||||
|
|
||||||
|
def send_float(self, data: float) -> None:
|
||||||
|
msg = OutgoingMessage()
|
||||||
|
msg.write_float32(data)
|
||||||
|
super().queue_message_to_send(msg)
|
||||||
|
|
||||||
|
def send_float_list(self, data: List[float]) -> None:
|
||||||
|
msg = OutgoingMessage()
|
||||||
|
msg.write_float32_list(data)
|
||||||
|
super().queue_message_to_send(msg)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
@ -223,7 +286,8 @@ if __name__ == "__main__":
|
|||||||
device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu")
|
device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu")
|
||||||
|
|
||||||
# Initialize environment anget optimizer
|
# Initialize environment anget optimizer
|
||||||
env = Aimbot(envPath=args.path, workerID=args.workerID, basePort=args.baseport)
|
aimBotsideChannel = AimbotSideChannel(SIDE_CHANNEL_UUID);
|
||||||
|
env = Aimbot(envPath=args.path, workerID=args.workerID, basePort=args.baseport,side_channels=[aimBotsideChannel])
|
||||||
if args.load_dir is None:
|
if args.load_dir is None:
|
||||||
agent = PPOAgent(env).to(device)
|
agent = PPOAgent(env).to(device)
|
||||||
else:
|
else:
|
||||||
@ -234,8 +298,9 @@ if __name__ == "__main__":
|
|||||||
optimizer = optim.Adam(agent.parameters(), lr=args.lr, eps=1e-5)
|
optimizer = optim.Adam(agent.parameters(), lr=args.lr, eps=1e-5)
|
||||||
|
|
||||||
# Tensorboard and WandB Recorder
|
# Tensorboard and WandB Recorder
|
||||||
game_name = "Aimbot-BigArea-6Enemy-EndBonus"
|
game_name = "Aimbot_Target"
|
||||||
run_name = f"{game_name}_{args.seed}_{int(time.time())}"
|
game_type = "OffPolicy"
|
||||||
|
run_name = f"{game_name}_{game_type}_{args.seed}_{int(time.time())}"
|
||||||
if args.wandb_track:
|
if args.wandb_track:
|
||||||
wandb.init(
|
wandb.init(
|
||||||
project=game_name,
|
project=game_name,
|
||||||
@ -326,6 +391,7 @@ if __name__ == "__main__":
|
|||||||
if next_done[i] == True:
|
if next_done[i] == True:
|
||||||
# finished a round, send finished memories to training datasets
|
# finished a round, send finished memories to training datasets
|
||||||
# compute advantage and discounted reward
|
# compute advantage and discounted reward
|
||||||
|
print(i,"over")
|
||||||
adv, rt = GAE(
|
adv, rt = GAE(
|
||||||
agent,
|
agent,
|
||||||
args,
|
args,
|
||||||
@ -357,7 +423,7 @@ if __name__ == "__main__":
|
|||||||
rewards_bf[i] = []
|
rewards_bf[i] = []
|
||||||
dones_bf[i] = []
|
dones_bf[i] = []
|
||||||
values_bf[i] = []
|
values_bf[i] = []
|
||||||
print(f"train dataset:{obs.size()[0]}/{args.datasetSize}")
|
print(f"train dataset added:{obs.size()[0]}/{args.datasetSize}")
|
||||||
|
|
||||||
if obs.size()[0] >= args.datasetSize:
|
if obs.size()[0] >= args.datasetSize:
|
||||||
# start train NN
|
# start train NN
|
||||||
@ -365,10 +431,11 @@ if __name__ == "__main__":
|
|||||||
state, done = next_state, next_done
|
state, done = next_state, next_done
|
||||||
else:
|
else:
|
||||||
# skip this step use last predict action
|
# skip this step use last predict action
|
||||||
next_obs, reward, done = env.step(action_cpu)
|
next_obs, reward, next_done = env.step(action_cpu)
|
||||||
# save memories
|
# save memories
|
||||||
for i in range(env.unity_agent_num):
|
for i in range(env.unity_agent_num):
|
||||||
if next_done[i] == True:
|
if next_done[i] == True:
|
||||||
|
print(i,"over???")
|
||||||
# save last memories to buffers
|
# save last memories to buffers
|
||||||
ob_bf[i].append(state[i])
|
ob_bf[i].append(state[i])
|
||||||
act_bf[i].append(action_cpu[i])
|
act_bf[i].append(action_cpu[i])
|
||||||
@ -410,7 +477,7 @@ if __name__ == "__main__":
|
|||||||
rewards_bf[i] = []
|
rewards_bf[i] = []
|
||||||
dones_bf[i] = []
|
dones_bf[i] = []
|
||||||
values_bf[i] = []
|
values_bf[i] = []
|
||||||
print(f"train dataset:{obs.size()[0]}/{args.datasetSize}")
|
print(f"train dataset added:{obs.size()[0]}/{args.datasetSize}")
|
||||||
state, done = next_state, next_done
|
state, done = next_state, next_done
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
@ -530,9 +597,12 @@ if __name__ == "__main__":
|
|||||||
"charts/SPS", int(global_step / (time.time() - start_time)), global_step
|
"charts/SPS", int(global_step / (time.time() - start_time)), global_step
|
||||||
)
|
)
|
||||||
writer.add_scalar("charts/Reward", rewardsMean, global_step)
|
writer.add_scalar("charts/Reward", rewardsMean, global_step)
|
||||||
|
writer.add_scalar("charts/GoWinRatio", WinRounds["Go"]/TotalRounds["Go"], global_step)
|
||||||
|
writer.add_scalar("charts/AttackWinRatio", WinRounds["Attack"]/TotalRounds["Attack"], global_step)
|
||||||
|
writer.add_scalar("charts/FreeWinRatio", WinRounds["Free"]/TotalRounds["Free"], global_step)
|
||||||
if rewardsMean > bestReward:
|
if rewardsMean > bestReward:
|
||||||
bestReward = rewardsMean
|
bestReward = rewardsMean
|
||||||
saveDir = "../PPO-Model/bigArea-384-128-hybrid-" + str(rewardsMean) + ".pt"
|
saveDir = "../PPO-Model/Target-700-500-256-hybrid-" + str(rewardsMean) + ".pt"
|
||||||
torch.save(agent, saveDir)
|
torch.save(agent, saveDir)
|
||||||
|
|
||||||
env.close()
|
env.close()
|
||||||
|
@ -525,28 +525,22 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 12,
|
"execution_count": 13,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"start 0\n",
|
"{'Go': 1, 'Attack': 0, 'Free': 0}\n"
|
||||||
"end 3\n",
|
|
||||||
"start 3\n",
|
|
||||||
"end 6\n",
|
|
||||||
"start 6\n",
|
|
||||||
"end 9\n",
|
|
||||||
"start 9\n",
|
|
||||||
"end 12\n"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"for i in range(0,10,3):\n",
|
"Total = {\"Go\":0,\"Attack\":0,\"Free\":0}\n",
|
||||||
" print(\"start\",i)\n",
|
"\n",
|
||||||
" print('end',i+3)"
|
"Total[\"Go\"] +=1\n",
|
||||||
|
"print(Total)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
Loading…
Reference in New Issue
Block a user