添加对Save_in_next_Trainning的SideChannel支持

This commit is contained in:
Koha9 2023-10-15 06:05:59 +09:00
parent 2741d6d51a
commit 3bc5c30fd3
5 changed files with 121 additions and 58 deletions

View File

@ -11,6 +11,7 @@ from mlagents_envs.side_channel.side_channel import (
IncomingMessage, IncomingMessage,
OutgoingMessage, OutgoingMessage,
) )
from arguments import set_save_model
class Aimbot(gym.Env): class Aimbot(gym.Env):
@ -176,18 +177,21 @@ class AimbotSideChannel(SideChannel):
"Warning|Message1|Message2|Message3" or "Warning|Message1|Message2|Message3" or
"Error|Message1|Message2|Message3" "Error|Message1|Message2|Message3"
""" """
this_message = msg.read_string() this_message_Original = msg.read_string()
this_result = this_message.split("|") this_message = this_message_Original.split("|")
print(this_result) print(this_message)
if this_result[0] == "Warning": if this_message[0] == "Warning":
if this_result[1] == "Result": if this_message[1] == "Result":
airecorder.total_rounds[this_result[2]] += 1 airecorder.total_rounds[this_message[2]] += 1
if this_result[3] == "Win": if this_message[3] == "Win":
airecorder.win_rounds[this_result[2]] += 1 airecorder.win_rounds[this_message[2]] += 1
# print(TotalRounds) # print(TotalRounds)
# print(WinRounds) # print(WinRounds)
elif this_result[0] == "Error": if this_message[1] == "Command":
print(this_message) set_save_model(True)
print("Command: " + this_message_Original)
elif this_message[0] == "Error":
print(this_message_Original)
# # while Message type is Warning # # while Message type is Warning
# if(thisResult[0] == "Warning"): # if(thisResult[0] == "Warning"):
# # while Message1 is result means one game is over # # while Message1 is result means one game is over

View File

@ -181,30 +181,84 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 4,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"array([[0., 0., 0., 0.],\n", "3"
" [0., 0., 0., 0.],\n",
" [0., 0., 0., 0.],\n",
" [0., 0., 0., 0.],\n",
" [0., 0., 0., 0.],\n",
" [0., 0., 0., 0.],\n",
" [0., 0., 0., 0.],\n",
" [0., 0., 0., 0.]])"
] ]
}, },
"execution_count": 6, "execution_count": 4,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"import numpy as np\n", "y=\"a;b;c\"\n",
"np.zeros((8, 4))" "len(y.split(\";\"))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2]\n"
]
}
],
"source": [
"a = np.array([1,2,3,4])\n",
"print(a[[False,True,False,False]])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{1, 2, 3, 4}"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a = {1,2,3}\n",
"a.add(4)\n",
"a"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([3, 4])"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a = np.array([[1,3],[2,4]])\n",
"a.max(axis=1)\n"
] ]
} }
], ],

View File

@ -4,6 +4,7 @@ import random
import uuid import uuid
import torch import torch
import atexit import atexit
import os
from aimbotEnv import Aimbot from aimbotEnv import Aimbot
from aimbotEnv import AimbotSideChannel from aimbotEnv import AimbotSideChannel
@ -12,13 +13,14 @@ from airecorder import WandbRecorder
from aimemory import PPOMem from aimemory import PPOMem
from aimemory import Targets from aimemory import Targets
from arguments import parse_args from arguments import parse_args
from arguments import set_save_model, is_save_model
import torch.optim as optim import torch.optim as optim
# side channel uuid # side channel uuid
SIDE_CHANNEL_UUID = uuid.UUID("8bbfb62a-99b4-457c-879d-b78b69066b5e") SIDE_CHANNEL_UUID = uuid.UUID("8bbfb62a-99b4-457c-879d-b78b69066b5e")
# tensorboard names # tensorboard names
GAME_NAME = "Aimbot_Hybrid_V3" GAME_NAME = "Aimbot_Hybrid_Full_MNN_MultiLevel"
GAME_TYPE = "Mix_Verification" GAME_TYPE = "GotoOnly-Level2345"
if __name__ == "__main__": if __name__ == "__main__":
args = parse_args() args = parse_args()
@ -57,16 +59,6 @@ if __name__ == "__main__":
run_name = f"{GAME_TYPE}_{args.seed}_{int(time.time())}" run_name = f"{GAME_TYPE}_{args.seed}_{int(time.time())}"
wdb_recorder = WandbRecorder(GAME_NAME, GAME_TYPE, run_name, args) wdb_recorder = WandbRecorder(GAME_NAME, GAME_TYPE, run_name, args)
@atexit.register
def save_model():
# close env
env.close()
if args.save_model:
# save model while exit
save_dir = "../PPO-Model/" + run_name + "_last.pt"
torch.save(agent, save_dir)
print("save model to " + save_dir)
# start the game # start the game
total_update_step = args.target_num * args.total_timesteps // args.datasetSize total_update_step = args.target_num * args.total_timesteps // args.datasetSize
target_steps = [0 for i in range(args.target_num)] target_steps = [0 for i in range(args.target_num)]
@ -222,11 +214,16 @@ if __name__ == "__main__":
) )
# print cost time as seconds # print cost time as seconds
print("cost time:", time.time() - start_time) print("cost time:", time.time() - start_time)
# New Record! # New Record! or save model
if TotalRewardMean > best_reward and args.save_model: if ((is_save_model() or TotalRewardMean > best_reward) and args.save_model):
best_reward = target_reward_mean # check saveDir is exist
saveDir = "../PPO-Model/" + run_name + "_" + str(TotalRewardMean) + ".pt" saveDir = "../PPO-Model/" + run_name + "/"
torch.save(agent, saveDir) if not os.path.isdir(saveDir):
os.mkdir(saveDir)
best_reward = TotalRewardMean
torch.save(agent, saveDir + str(TotalRewardMean) + ".pt")
print("Model Saved!")
set_save_model(False)
else: else:
# train mode off # train mode off
mean_reward_list = [] # for WANDB mean_reward_list = [] # for WANDB
@ -249,7 +246,10 @@ if __name__ == "__main__":
TotalRewardMean = np.mean(mean_reward_list) TotalRewardMean = np.mean(mean_reward_list)
wdb_recorder.writer.add_scalar("GlobalCharts/TotalRewardMean", TotalRewardMean, total_steps) wdb_recorder.writer.add_scalar("GlobalCharts/TotalRewardMean", TotalRewardMean, total_steps)
saveDir = "../PPO-Model/" + run_name + "_last.pt" saveDir = "../PPO-Model/" + run_name + "/"
torch.save(agent, saveDir) if not os.path.isdir(saveDir):
os.mkdir(saveDir)
best_reward = target_reward_mean
torch.save(agent, saveDir + "_last.pt")
env.close() env.close()
wdb_recorder.writer.close() wdb_recorder.writer.close()

View File

@ -4,21 +4,19 @@ import uuid
from distutils.util import strtobool from distutils.util import strtobool
DEFAULT_SEED = 9331 DEFAULT_SEED = 9331
ENV_PATH = "../Build/3.1.6/Aimbot-ParallelEnv" ENV_PATH = "../Build/3.4/Aimbot-ParallelEnv"
WAND_ENTITY = "koha9" WAND_ENTITY = "koha9"
WORKER_ID = 1 WORKER_ID = 1
BASE_PORT = 1000 BASE_PORT = 1000
# tensorboard names # tensorboard names
GAME_NAME = "Aimbot_Target_Hybrid_Full_MNN_V1"
GAME_TYPE = "Mix_Train"
# max round steps per agent is 2500/Decision_period, 25 seconds # max round steps per agent is 2500/Decision_period, 25 seconds
TOTAL_STEPS = 3150000 TOTAL_STEPS = 3150000
BATCH_SIZE = 512 BATCH_SIZE = 512
MAX_TRAINNING_DATASETS = 6000 MAX_TRAINNING_DATASETS = 6000
DECISION_PERIOD = 1 DECISION_PERIOD = 1
LEARNING_RATE = 6.5e-4 LEARNING_RATE = 1.5e-4
GAMMA = 0.99 GAMMA = 0.99
GAE_LAMBDA = 0.95 GAE_LAMBDA = 0.95
EPOCHS = 3 EPOCHS = 3
@ -27,18 +25,17 @@ LOSS_COEF = [1.0, 1.0, 1.0, 1.0] # free go attack defence
POLICY_COEF = [1.0, 1.0, 1.0, 1.0] POLICY_COEF = [1.0, 1.0, 1.0, 1.0]
ENTROPY_COEF = [0.05, 0.05, 0.05, 0.05] ENTROPY_COEF = [0.05, 0.05, 0.05, 0.05]
CRITIC_COEF = [0.5, 0.5, 0.5, 0.5] CRITIC_COEF = [0.5, 0.5, 0.5, 0.5]
TARGET_LEARNING_RATE = 1e-5 TARGET_LEARNING_RATE = 1e-6
FREEZE_VIEW_NETWORK = False FREEZE_VIEW_NETWORK = False
BROADCASTREWARD = False
ANNEAL_LEARNING_RATE = True ANNEAL_LEARNING_RATE = True
CLIP_VLOSS = True CLIP_VLOSS = True
NORM_ADV = False NORM_ADV = False
TRAIN = True TRAIN = True
SAVE_MODEL = False SAVE_MODEL = True
WANDB_TACK = False WANDB_TACK = True
LOAD_DIR = None LOAD_DIR = None
#LOAD_DIR = "../PPO-Model/PList_Go_LeakyReLU_9331_1677965178_bestGoto/PList_Go_LeakyReLU_9331_1677965178_10.709002.pt" LOAD_DIR = "../PPO-Model/GotoOnly-Level1234_9331_1697122986/8.853553.pt"
# Unity Environment Parameters # Unity Environment Parameters
TARGET_STATE_SIZE = 6 TARGET_STATE_SIZE = 6
@ -53,6 +50,16 @@ TARGETNUM= 4
ENV_TIMELIMIT = 30 ENV_TIMELIMIT = 30
RESULT_BROADCAST_RATIO = 1/ENV_TIMELIMIT RESULT_BROADCAST_RATIO = 1/ENV_TIMELIMIT
save_model_this_episode = False
def is_save_model():
global save_model_this_episode
return save_model_this_episode
def set_save_model(save_model:bool):
print("set save model to ",save_model)
global save_model_this_episode
save_model_this_episode = save_model
def parse_args(): def parse_args():
# fmt: off # fmt: off
# pytorch and environment parameters # pytorch and environment parameters
@ -97,8 +104,6 @@ def parse_args():
help="the number of steps to run in each environment per policy rollout") help="the number of steps to run in each environment per policy rollout")
parser.add_argument("--result-broadcast-ratio", type=float, default=RESULT_BROADCAST_RATIO, parser.add_argument("--result-broadcast-ratio", type=float, default=RESULT_BROADCAST_RATIO,
help="broadcast result when win round is reached,r=result-broadcast-ratio*remainTime") help="broadcast result when win round is reached,r=result-broadcast-ratio*remainTime")
parser.add_argument("--broadCastEndReward", type=lambda x: bool(strtobool(x)), default=BROADCASTREWARD, nargs="?", const=True,
help="save model or not")
# target_learning_rate # target_learning_rate
parser.add_argument("--target-lr", type=float, default=TARGET_LEARNING_RATE, parser.add_argument("--target-lr", type=float, default=TARGET_LEARNING_RATE,
help="target value of downscaling the learning rate") help="target value of downscaling the learning rate")

View File

@ -49,9 +49,9 @@ class PPOAgent(nn.Module):
self.hidden_networks = nn.ModuleList( self.hidden_networks = nn.ModuleList(
[ [
nn.Sequential( nn.Sequential(
layer_init(nn.Linear(self.state_size, 128)), layer_init(nn.Linear(self.state_size, 256)),
nn.LeakyReLU(), nn.LeakyReLU(),
layer_init(nn.Linear(128, 64)), layer_init(nn.Linear(256, 128)),
nn.LeakyReLU(), nn.LeakyReLU(),
) )
for i in range(self.target_num) for i in range(self.target_num)
@ -59,16 +59,16 @@ class PPOAgent(nn.Module):
) )
self.actor_dis = nn.ModuleList( self.actor_dis = nn.ModuleList(
[layer_init(nn.Linear(64, self.discrete_size), std=0.5) for i in range(self.target_num)] [layer_init(nn.Linear(128, self.discrete_size), std=0.5) for i in range(self.target_num)]
) )
self.actor_mean = nn.ModuleList( self.actor_mean = nn.ModuleList(
[layer_init(nn.Linear(64, self.continuous_size), std=0.5) for i in range(self.target_num)] [layer_init(nn.Linear(128, self.continuous_size), std=0) for i in range(self.target_num)]
) )
self.actor_logstd = nn.ParameterList( self.actor_logstd = nn.ParameterList(
[nn.Parameter(torch.zeros(1, self.continuous_size)) for i in range(self.target_num)] [nn.Parameter(torch.zeros(1, self.continuous_size)) for i in range(self.target_num)]
) )
self.critic = nn.ModuleList( self.critic = nn.ModuleList(
[layer_init(nn.Linear(64, 1), std=1) for i in range(self.target_num)] [layer_init(nn.Linear(128, 1), std=0) for i in range(self.target_num)]
) )
def get_value(self, state: torch.Tensor): def get_value(self, state: torch.Tensor):