添加对Save_in_next_Trainning的SideChannel支持
This commit is contained in:
parent
2741d6d51a
commit
3bc5c30fd3
@ -11,6 +11,7 @@ from mlagents_envs.side_channel.side_channel import (
|
|||||||
IncomingMessage,
|
IncomingMessage,
|
||||||
OutgoingMessage,
|
OutgoingMessage,
|
||||||
)
|
)
|
||||||
|
from arguments import set_save_model
|
||||||
|
|
||||||
|
|
||||||
class Aimbot(gym.Env):
|
class Aimbot(gym.Env):
|
||||||
@ -176,18 +177,21 @@ class AimbotSideChannel(SideChannel):
|
|||||||
"Warning|Message1|Message2|Message3" or
|
"Warning|Message1|Message2|Message3" or
|
||||||
"Error|Message1|Message2|Message3"
|
"Error|Message1|Message2|Message3"
|
||||||
"""
|
"""
|
||||||
this_message = msg.read_string()
|
this_message_Original = msg.read_string()
|
||||||
this_result = this_message.split("|")
|
this_message = this_message_Original.split("|")
|
||||||
print(this_result)
|
print(this_message)
|
||||||
if this_result[0] == "Warning":
|
if this_message[0] == "Warning":
|
||||||
if this_result[1] == "Result":
|
if this_message[1] == "Result":
|
||||||
airecorder.total_rounds[this_result[2]] += 1
|
airecorder.total_rounds[this_message[2]] += 1
|
||||||
if this_result[3] == "Win":
|
if this_message[3] == "Win":
|
||||||
airecorder.win_rounds[this_result[2]] += 1
|
airecorder.win_rounds[this_message[2]] += 1
|
||||||
# print(TotalRounds)
|
# print(TotalRounds)
|
||||||
# print(WinRounds)
|
# print(WinRounds)
|
||||||
elif this_result[0] == "Error":
|
if this_message[1] == "Command":
|
||||||
print(this_message)
|
set_save_model(True)
|
||||||
|
print("Command: " + this_message_Original)
|
||||||
|
elif this_message[0] == "Error":
|
||||||
|
print(this_message_Original)
|
||||||
# # while Message type is Warning
|
# # while Message type is Warning
|
||||||
# if(thisResult[0] == "Warning"):
|
# if(thisResult[0] == "Warning"):
|
||||||
# # while Message1 is result means one game is over
|
# # while Message1 is result means one game is over
|
||||||
|
@ -181,30 +181,84 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 6,
|
"execution_count": 4,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"array([[0., 0., 0., 0.],\n",
|
"3"
|
||||||
" [0., 0., 0., 0.],\n",
|
|
||||||
" [0., 0., 0., 0.],\n",
|
|
||||||
" [0., 0., 0., 0.],\n",
|
|
||||||
" [0., 0., 0., 0.],\n",
|
|
||||||
" [0., 0., 0., 0.],\n",
|
|
||||||
" [0., 0., 0., 0.],\n",
|
|
||||||
" [0., 0., 0., 0.]])"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 6,
|
"execution_count": 4,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"import numpy as np\n",
|
"y=\"a;b;c\"\n",
|
||||||
"np.zeros((8, 4))"
|
"len(y.split(\";\"))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"[2]\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"a = np.array([1,2,3,4])\n",
|
||||||
|
"print(a[[False,True,False,False]])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 16,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"{1, 2, 3, 4}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 16,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"a = {1,2,3}\n",
|
||||||
|
"a.add(4)\n",
|
||||||
|
"a"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"array([3, 4])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"a = np.array([[1,3],[2,4]])\n",
|
||||||
|
"a.max(axis=1)\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -4,6 +4,7 @@ import random
|
|||||||
import uuid
|
import uuid
|
||||||
import torch
|
import torch
|
||||||
import atexit
|
import atexit
|
||||||
|
import os
|
||||||
|
|
||||||
from aimbotEnv import Aimbot
|
from aimbotEnv import Aimbot
|
||||||
from aimbotEnv import AimbotSideChannel
|
from aimbotEnv import AimbotSideChannel
|
||||||
@ -12,13 +13,14 @@ from airecorder import WandbRecorder
|
|||||||
from aimemory import PPOMem
|
from aimemory import PPOMem
|
||||||
from aimemory import Targets
|
from aimemory import Targets
|
||||||
from arguments import parse_args
|
from arguments import parse_args
|
||||||
|
from arguments import set_save_model, is_save_model
|
||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
|
|
||||||
# side channel uuid
|
# side channel uuid
|
||||||
SIDE_CHANNEL_UUID = uuid.UUID("8bbfb62a-99b4-457c-879d-b78b69066b5e")
|
SIDE_CHANNEL_UUID = uuid.UUID("8bbfb62a-99b4-457c-879d-b78b69066b5e")
|
||||||
# tensorboard names
|
# tensorboard names
|
||||||
GAME_NAME = "Aimbot_Hybrid_V3"
|
GAME_NAME = "Aimbot_Hybrid_Full_MNN_MultiLevel"
|
||||||
GAME_TYPE = "Mix_Verification"
|
GAME_TYPE = "GotoOnly-Level2345"
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
@ -57,16 +59,6 @@ if __name__ == "__main__":
|
|||||||
run_name = f"{GAME_TYPE}_{args.seed}_{int(time.time())}"
|
run_name = f"{GAME_TYPE}_{args.seed}_{int(time.time())}"
|
||||||
wdb_recorder = WandbRecorder(GAME_NAME, GAME_TYPE, run_name, args)
|
wdb_recorder = WandbRecorder(GAME_NAME, GAME_TYPE, run_name, args)
|
||||||
|
|
||||||
@atexit.register
|
|
||||||
def save_model():
|
|
||||||
# close env
|
|
||||||
env.close()
|
|
||||||
if args.save_model:
|
|
||||||
# save model while exit
|
|
||||||
save_dir = "../PPO-Model/" + run_name + "_last.pt"
|
|
||||||
torch.save(agent, save_dir)
|
|
||||||
print("save model to " + save_dir)
|
|
||||||
|
|
||||||
# start the game
|
# start the game
|
||||||
total_update_step = args.target_num * args.total_timesteps // args.datasetSize
|
total_update_step = args.target_num * args.total_timesteps // args.datasetSize
|
||||||
target_steps = [0 for i in range(args.target_num)]
|
target_steps = [0 for i in range(args.target_num)]
|
||||||
@ -222,11 +214,16 @@ if __name__ == "__main__":
|
|||||||
)
|
)
|
||||||
# print cost time as seconds
|
# print cost time as seconds
|
||||||
print("cost time:", time.time() - start_time)
|
print("cost time:", time.time() - start_time)
|
||||||
# New Record!
|
# New Record! or save model
|
||||||
if TotalRewardMean > best_reward and args.save_model:
|
if ((is_save_model() or TotalRewardMean > best_reward) and args.save_model):
|
||||||
best_reward = target_reward_mean
|
# check saveDir is exist
|
||||||
saveDir = "../PPO-Model/" + run_name + "_" + str(TotalRewardMean) + ".pt"
|
saveDir = "../PPO-Model/" + run_name + "/"
|
||||||
torch.save(agent, saveDir)
|
if not os.path.isdir(saveDir):
|
||||||
|
os.mkdir(saveDir)
|
||||||
|
best_reward = TotalRewardMean
|
||||||
|
torch.save(agent, saveDir + str(TotalRewardMean) + ".pt")
|
||||||
|
print("Model Saved!")
|
||||||
|
set_save_model(False)
|
||||||
else:
|
else:
|
||||||
# train mode off
|
# train mode off
|
||||||
mean_reward_list = [] # for WANDB
|
mean_reward_list = [] # for WANDB
|
||||||
@ -249,7 +246,10 @@ if __name__ == "__main__":
|
|||||||
TotalRewardMean = np.mean(mean_reward_list)
|
TotalRewardMean = np.mean(mean_reward_list)
|
||||||
wdb_recorder.writer.add_scalar("GlobalCharts/TotalRewardMean", TotalRewardMean, total_steps)
|
wdb_recorder.writer.add_scalar("GlobalCharts/TotalRewardMean", TotalRewardMean, total_steps)
|
||||||
|
|
||||||
saveDir = "../PPO-Model/" + run_name + "_last.pt"
|
saveDir = "../PPO-Model/" + run_name + "/"
|
||||||
torch.save(agent, saveDir)
|
if not os.path.isdir(saveDir):
|
||||||
|
os.mkdir(saveDir)
|
||||||
|
best_reward = target_reward_mean
|
||||||
|
torch.save(agent, saveDir + "_last.pt")
|
||||||
env.close()
|
env.close()
|
||||||
wdb_recorder.writer.close()
|
wdb_recorder.writer.close()
|
||||||
|
@ -4,21 +4,19 @@ import uuid
|
|||||||
from distutils.util import strtobool
|
from distutils.util import strtobool
|
||||||
|
|
||||||
DEFAULT_SEED = 9331
|
DEFAULT_SEED = 9331
|
||||||
ENV_PATH = "../Build/3.1.6/Aimbot-ParallelEnv"
|
ENV_PATH = "../Build/3.4/Aimbot-ParallelEnv"
|
||||||
WAND_ENTITY = "koha9"
|
WAND_ENTITY = "koha9"
|
||||||
WORKER_ID = 1
|
WORKER_ID = 1
|
||||||
BASE_PORT = 1000
|
BASE_PORT = 1000
|
||||||
|
|
||||||
# tensorboard names
|
# tensorboard names
|
||||||
GAME_NAME = "Aimbot_Target_Hybrid_Full_MNN_V1"
|
|
||||||
GAME_TYPE = "Mix_Train"
|
|
||||||
|
|
||||||
# max round steps per agent is 2500/Decision_period, 25 seconds
|
# max round steps per agent is 2500/Decision_period, 25 seconds
|
||||||
TOTAL_STEPS = 3150000
|
TOTAL_STEPS = 3150000
|
||||||
BATCH_SIZE = 512
|
BATCH_SIZE = 512
|
||||||
MAX_TRAINNING_DATASETS = 6000
|
MAX_TRAINNING_DATASETS = 6000
|
||||||
DECISION_PERIOD = 1
|
DECISION_PERIOD = 1
|
||||||
LEARNING_RATE = 6.5e-4
|
LEARNING_RATE = 1.5e-4
|
||||||
GAMMA = 0.99
|
GAMMA = 0.99
|
||||||
GAE_LAMBDA = 0.95
|
GAE_LAMBDA = 0.95
|
||||||
EPOCHS = 3
|
EPOCHS = 3
|
||||||
@ -27,18 +25,17 @@ LOSS_COEF = [1.0, 1.0, 1.0, 1.0] # free go attack defence
|
|||||||
POLICY_COEF = [1.0, 1.0, 1.0, 1.0]
|
POLICY_COEF = [1.0, 1.0, 1.0, 1.0]
|
||||||
ENTROPY_COEF = [0.05, 0.05, 0.05, 0.05]
|
ENTROPY_COEF = [0.05, 0.05, 0.05, 0.05]
|
||||||
CRITIC_COEF = [0.5, 0.5, 0.5, 0.5]
|
CRITIC_COEF = [0.5, 0.5, 0.5, 0.5]
|
||||||
TARGET_LEARNING_RATE = 1e-5
|
TARGET_LEARNING_RATE = 1e-6
|
||||||
|
|
||||||
FREEZE_VIEW_NETWORK = False
|
FREEZE_VIEW_NETWORK = False
|
||||||
BROADCASTREWARD = False
|
|
||||||
ANNEAL_LEARNING_RATE = True
|
ANNEAL_LEARNING_RATE = True
|
||||||
CLIP_VLOSS = True
|
CLIP_VLOSS = True
|
||||||
NORM_ADV = False
|
NORM_ADV = False
|
||||||
TRAIN = True
|
TRAIN = True
|
||||||
SAVE_MODEL = False
|
SAVE_MODEL = True
|
||||||
WANDB_TACK = False
|
WANDB_TACK = True
|
||||||
LOAD_DIR = None
|
LOAD_DIR = None
|
||||||
#LOAD_DIR = "../PPO-Model/PList_Go_LeakyReLU_9331_1677965178_bestGoto/PList_Go_LeakyReLU_9331_1677965178_10.709002.pt"
|
LOAD_DIR = "../PPO-Model/GotoOnly-Level1234_9331_1697122986/8.853553.pt"
|
||||||
|
|
||||||
# Unity Environment Parameters
|
# Unity Environment Parameters
|
||||||
TARGET_STATE_SIZE = 6
|
TARGET_STATE_SIZE = 6
|
||||||
@ -53,6 +50,16 @@ TARGETNUM= 4
|
|||||||
ENV_TIMELIMIT = 30
|
ENV_TIMELIMIT = 30
|
||||||
RESULT_BROADCAST_RATIO = 1/ENV_TIMELIMIT
|
RESULT_BROADCAST_RATIO = 1/ENV_TIMELIMIT
|
||||||
|
|
||||||
|
save_model_this_episode = False
|
||||||
|
|
||||||
|
def is_save_model():
|
||||||
|
global save_model_this_episode
|
||||||
|
return save_model_this_episode
|
||||||
|
def set_save_model(save_model:bool):
|
||||||
|
print("set save model to ",save_model)
|
||||||
|
global save_model_this_episode
|
||||||
|
save_model_this_episode = save_model
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
# fmt: off
|
# fmt: off
|
||||||
# pytorch and environment parameters
|
# pytorch and environment parameters
|
||||||
@ -97,12 +104,10 @@ def parse_args():
|
|||||||
help="the number of steps to run in each environment per policy rollout")
|
help="the number of steps to run in each environment per policy rollout")
|
||||||
parser.add_argument("--result-broadcast-ratio", type=float, default=RESULT_BROADCAST_RATIO,
|
parser.add_argument("--result-broadcast-ratio", type=float, default=RESULT_BROADCAST_RATIO,
|
||||||
help="broadcast result when win round is reached,r=result-broadcast-ratio*remainTime")
|
help="broadcast result when win round is reached,r=result-broadcast-ratio*remainTime")
|
||||||
parser.add_argument("--broadCastEndReward", type=lambda x: bool(strtobool(x)), default=BROADCASTREWARD, nargs="?", const=True,
|
|
||||||
help="save model or not")
|
|
||||||
# target_learning_rate
|
# target_learning_rate
|
||||||
parser.add_argument("--target-lr", type=float, default=TARGET_LEARNING_RATE,
|
parser.add_argument("--target-lr", type=float, default=TARGET_LEARNING_RATE,
|
||||||
help="target value of downscaling the learning rate")
|
help="target value of downscaling the learning rate")
|
||||||
|
|
||||||
# POLICY_COEF ENTROPY_COEF CRITIC_COEF LOSS_COEF
|
# POLICY_COEF ENTROPY_COEF CRITIC_COEF LOSS_COEF
|
||||||
parser.add_argument("--policy-coef", type=float, default=POLICY_COEF,
|
parser.add_argument("--policy-coef", type=float, default=POLICY_COEF,
|
||||||
help="coefficient of the policy loss")
|
help="coefficient of the policy loss")
|
||||||
|
@ -49,9 +49,9 @@ class PPOAgent(nn.Module):
|
|||||||
self.hidden_networks = nn.ModuleList(
|
self.hidden_networks = nn.ModuleList(
|
||||||
[
|
[
|
||||||
nn.Sequential(
|
nn.Sequential(
|
||||||
layer_init(nn.Linear(self.state_size, 128)),
|
layer_init(nn.Linear(self.state_size, 256)),
|
||||||
nn.LeakyReLU(),
|
nn.LeakyReLU(),
|
||||||
layer_init(nn.Linear(128, 64)),
|
layer_init(nn.Linear(256, 128)),
|
||||||
nn.LeakyReLU(),
|
nn.LeakyReLU(),
|
||||||
)
|
)
|
||||||
for i in range(self.target_num)
|
for i in range(self.target_num)
|
||||||
@ -59,16 +59,16 @@ class PPOAgent(nn.Module):
|
|||||||
)
|
)
|
||||||
|
|
||||||
self.actor_dis = nn.ModuleList(
|
self.actor_dis = nn.ModuleList(
|
||||||
[layer_init(nn.Linear(64, self.discrete_size), std=0.5) for i in range(self.target_num)]
|
[layer_init(nn.Linear(128, self.discrete_size), std=0.5) for i in range(self.target_num)]
|
||||||
)
|
)
|
||||||
self.actor_mean = nn.ModuleList(
|
self.actor_mean = nn.ModuleList(
|
||||||
[layer_init(nn.Linear(64, self.continuous_size), std=0.5) for i in range(self.target_num)]
|
[layer_init(nn.Linear(128, self.continuous_size), std=0) for i in range(self.target_num)]
|
||||||
)
|
)
|
||||||
self.actor_logstd = nn.ParameterList(
|
self.actor_logstd = nn.ParameterList(
|
||||||
[nn.Parameter(torch.zeros(1, self.continuous_size)) for i in range(self.target_num)]
|
[nn.Parameter(torch.zeros(1, self.continuous_size)) for i in range(self.target_num)]
|
||||||
)
|
)
|
||||||
self.critic = nn.ModuleList(
|
self.critic = nn.ModuleList(
|
||||||
[layer_init(nn.Linear(64, 1), std=1) for i in range(self.target_num)]
|
[layer_init(nn.Linear(128, 1), std=0) for i in range(self.target_num)]
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_value(self, state: torch.Tensor):
|
def get_value(self, state: torch.Tensor):
|
||||||
|
Loading…
Reference in New Issue
Block a user