Aimbot-PPO/Aimbot-PPO-Python/aimBotEnv.py
Koha9 ae8a1ba8e2 Update PPO class,add python human control
Python:
Update PPO class
add python human control 
Unity: 
add FP/TP choose button
2022-10-11 06:40:15 +09:00

131 lines
5.2 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from mlagents_envs.base_env import ActionTuple
from mlagents_envs.environment import UnityEnvironment
import numpy as np
from numpy import ndarray
class makeEnv(object):
def __init__(self, envPath, workerID, basePort):
self.env = UnityEnvironment(
file_name=envPath,
seed=1,
side_channels=[],
worker_id=workerID,
base_port=basePort,
)
self.env.reset()
# get enviroment specs
self.LOAD_DIR_SIZE_IN_STATE = 3
self.TRACKED_AGENT = -1
self.BEHA_SPECS = self.env.behavior_specs
self.BEHA_NAME = list(self.BEHA_SPECS)[0]
self.SPEC = self.BEHA_SPECS[self.BEHA_NAME]
self.OBSERVATION_SPECS = self.SPEC.observation_specs[0] # observation spec
self.ACTION_SPEC = self.SPEC.action_spec # action specs
self.DISCRETE_SIZE = self.ACTION_SPEC.discrete_size #  連続的な動作のSize
self.DISCRETE_SHAPE = list(self.ACTION_SPEC.discrete_branches)
self.CONTINUOUS_SIZE = self.ACTION_SPEC.continuous_size #  離散的な動作のSize
self.STATE_SIZE = self.OBSERVATION_SPECS.shape[0] - self.LOAD_DIR_SIZE_IN_STATE # 環境観測データ数
print("√√√√√Enviroment Initialized Success√√√√√")
def step(
self,
actions: list,
behaviorName: ndarray = None,
trackedAgent: ndarray = None,
):
"""change ations list to ActionTuple then send it to enviroment
Args:
actions (list): PPO chooseAction output action list
behaviorName (ndarray, optional): behaviorName. Defaults to None.
trackedAgent (ndarray, optional): trackedAgentID. Defaults to None.
Returns:
ndarray: nextState, reward, done, loadDir, saveNow
"""
# take action to enviroment
# return mextState,reward,done
if self.DISCRETE_SIZE == 0:
# create empty discrete action
discreteActions = np.asarray([[0]])
else:
# create discrete action from actions list
discreteActions = np.asanyarray([actions[0:self.DISCRETE_SIZE]])
if self.CONTINUOUS_SIZE == 0:
# create empty continuous action
continuousActions = np.asanyarray([[0.0]])
else:
# create continuous actions from actions list
continuousActions = np.asanyarray(actions[self.DISCRETE_SIZE:])
if behaviorName is None:
behaviorName = self.BEHA_NAME
if trackedAgent is None:
trackedAgent = self.TRACKED_AGENT
# create actionTuple
thisActionTuple = ActionTuple(continuous=continuousActions, discrete=discreteActions)
# take action to env
self.env.set_actions(behavior_name=behaviorName, action=thisActionTuple)
self.env.step()
# get nextState & reward & done after this action
nextState, reward, done, loadDir, saveNow = self.getSteps(behaviorName, trackedAgent)
return nextState, reward, done, loadDir, saveNow
def getSteps(self, behaviorName=None, trackedAgent=None):
"""get enviroment now observations.
Include State, Reward, Done, LoadDir, SaveNow
Args:
behaviorName (_type_, optional): behaviorName. Defaults to None.
trackedAgent (_type_, optional): trackedAgent. Defaults to None.
Returns:
ndarray: nextState, reward, done, loadDir, saveNow
"""
# get nextState & reward & done
if behaviorName is None:
behaviorName = self.BEHA_NAME
decisionSteps, terminalSteps = self.env.get_steps(behaviorName)
if self.TRACKED_AGENT == -1 and len(decisionSteps) >= 1:
self.TRACKED_AGENT = decisionSteps.agent_id[0]
if trackedAgent is None:
trackedAgent = self.TRACKED_AGENT
if trackedAgent in decisionSteps: # ゲーム終了していない場合、環境状態がdecision_stepsに保存される
nextState = decisionSteps[trackedAgent].obs[0]
nextState = np.reshape(nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE])
saveNow = nextState[0][-1]
loadDir = nextState[0][-3:-1]
nextState = nextState[0][:-3]
reward = decisionSteps[trackedAgent].reward
done = False
if trackedAgent in terminalSteps: # ゲーム終了した場合、環境状態がterminal_stepsに保存される
nextState = terminalSteps[trackedAgent].obs[0]
nextState = np.reshape(nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE])
saveNow = nextState[0][-1]
loadDir = nextState[0][-3:-1]
nextState = nextState[0][:-3]
reward = terminalSteps[trackedAgent].reward
done = True
return nextState, reward, done, loadDir, saveNow
def reset(self):
"""reset enviroment and get observations
Returns:
ndarray: nextState, reward, done, loadDir, saveNow
"""
self.env.reset()
nextState, reward, done, loadDir, saveNow = self.getSteps()
return nextState, reward, done, loadDir, saveNow
def render(self):
"""render enviroment
"""
self.env.render()