Aimbot-PPO/Aimbot-PPO-Python/main.ipynb
Koha9 de066f3a65 Delete near Enemy Detect future. Use different density sensor.
Unity:
No more detect Closest enemy info. Add different density sensor let agent get more state information on the center of view. 
Adjust Start Scene UI manager. Add in game visible rayCast & information that rayCast detect.
Python:
Start use mypy black and flake8 to format Python.
2022-09-30 22:36:47 +09:00

357 lines
11 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import aimBotEnv\n",
"import PPO\n",
"import buffer\n",
"import numpy as np\n",
"\n",
"import tensorflow as tf\n",
"import time\n",
"import datetime\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Attempts to allocate only the GPU memory needed for allocation\n",
"physical_devices = tf.config.list_physical_devices('GPU')\n",
"tf.config.experimental.set_memory_growth(physical_devices[0], True)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Env\n",
"ENV_PATH = \"./Build-CloseEnemyCut/Aimbot-PPO\"\n",
"WORKER_ID = 1\n",
"BASE_PORT = 200\n",
"\n",
"MAX_EP = 1000\n",
"EP_LENGTH = 100000\n",
"GAMMA = 0.99 # discount future reward (UP?)\n",
"EPSILON = 0.2 # clip Ratio range[1-EPSILON,1+EPSILON]\n",
"ACTOR_LR = 1e-5 # LR\n",
"CRITIC_LR = 2e-5 # LR\n",
"BATCH = 256 # learning step\n",
"ACTOR_EPOCH = 15 # epoch\n",
"CRITIC_EPOCH = 15 # epoch\n",
"ENTROPY_WHEIGHT = 0.001 # sigma's entropy in Actor loss\n",
"ACTION_INTERVAL = 1 # take action every ACTION_INTERVAL steps\n",
"\n",
"\n",
"TRAIN = True\n",
"SAVE_DIR = \"PPO-Model/\" + datetime.datetime.now().strftime(\"%m%d%H%M\") + \"/\"\n",
"LOAD_DIR = None\n",
"\n",
"CTN_ACTION_RANGE = 10\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"√√√√√Enviroment Initialized Success√√√√√\n",
"√√√√√Buffer Initialized Success√√√√√\n",
"No loadDir specified,Create a New Model\n",
"CONTINUOUS_SIZE 1\n",
"DISCRETE_SIZE 5\n",
"STATE_SIZE 30\n"
]
}
],
"source": [
"# initialize enviroment & buffer class\n",
"env = aimBotEnv.makeEnv(\n",
" envPath=ENV_PATH, workerID=WORKER_ID, basePort=BASE_PORT\n",
")\n",
"epBuffer = buffer.buffer()\n",
"\n",
"STATE_SIZE = env.STATE_SIZE\n",
"CONTINUOUS_SIZE = env.CONTINUOUS_SIZE\n",
"DISCRETE_SIZE = env.DISCRETE_SIZE\n",
"_, _, _, loadDir, _ = env.getSteps()\n",
"\n",
"# check load model or not\n",
"if np.any(loadDir == 0):\n",
" # create a new model\n",
" print(\"No loadDir specified,Create a New Model\")\n",
" LOAD_DIR = None\n",
"else:\n",
" # load model\n",
" loadDirDateSTR = str(int(loadDir[0]))\n",
" loadDirTimeSTR = str(int(loadDir[1]))\n",
" if len(loadDirDateSTR) != 8:\n",
" # fill lost 0 while converse float to string\n",
" for _ in range(8 - len(loadDirDateSTR)):\n",
" loadDirDateSTR = \"0\" + loadDirDateSTR\n",
" if len(loadDirTimeSTR) != 6:\n",
" # fill lost 0 while converse float to string\n",
" for _ in range(6 - len(loadDirTimeSTR)):\n",
" loadDirTimeSTR = \"0\" + loadDirTimeSTR\n",
" LOAD_DIR = \"PPO-Model/\" + loadDirDateSTR + \"/\" + loadDirTimeSTR\n",
" print(\"Load Model:\")\n",
" print(LOAD_DIR)\n",
"\n",
"print(\"CONTINUOUS_SIZE\", CONTINUOUS_SIZE)\n",
"print(\"DISCRETE_SIZE\", DISCRETE_SIZE)\n",
"print(\"STATE_SIZE\", STATE_SIZE)\n",
"\n",
"disActShape = [3, 3, 2]\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def actToKey(disAct1,disAct2,disAct3,conAct):\n",
" kW = 0\n",
" kS = 0\n",
" kA = 0\n",
" kD = 0\n",
" mouseShoot = 0\n",
" if disAct1 == 0:\n",
" kW = 0\n",
" kS = 1\n",
" elif disAct1 == 1:\n",
" kW = 0\n",
" kS = 0\n",
" elif disAct1 == 2:\n",
" kW = 1\n",
" kS = 0\n",
" if disAct2 == 0:\n",
" kA = 0\n",
" kD = 1\n",
" elif disAct2 == 1:\n",
" kA = 0\n",
" kD = 0\n",
" elif disAct2 == 2:\n",
" kA = 1\n",
" kD = 0\n",
" mouseShoot = disAct3\n",
" return kW,kS,kA,kD,mouseShoot,conAct"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"EP 0 START\n",
"√√√√√Buffer Initialized Success√√√√√\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\numpy\\core\\fromnumeric.py:3474: RuntimeWarning: Mean of empty slice.\n",
" return _methods._mean(a, axis=axis, dtype=dtype,\n",
"c:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\numpy\\core\\_methods.py:189: RuntimeWarning: invalid value encountered in double_scalars\n",
" ret = ret.dtype.type(ret / rcount)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"A_Loss: 0.4477495511372884 C_Loss: 3.155759557088216\n",
"A_Loss: 0.14549287557601928 C_Loss: 0.5123071213563283\n",
"A_Loss: 0.055241942902406055 C_Loss: 0.13002794484297434\n",
"A_Loss: 0.057325509190559384 C_Loss: 0.11068039039770762\n",
"A_Loss: 0.04376962607105573 C_Loss: 0.03923700377345085\n"
]
}
],
"source": [
"bestScore = 200.0\n",
"stopTrainCounter = 0\n",
"\n",
"totalRewardHis = []\n",
"totalActorLossHis = []\n",
"totalCriticLossHis = []\n",
"epHis = []\n",
"maxTotalReward = -99999999999\n",
"\n",
"for ep in range(MAX_EP):\n",
" print(\"EP \", ep, \" START\")\n",
" # first time run game\n",
" s, _, _, _, _ = env.reset()\n",
" if ep == 0:\n",
" epBuffer = buffer.buffer()\n",
" s = s.reshape([STATE_SIZE])\n",
" agent = PPO.PPO(\n",
" stateSize=STATE_SIZE,\n",
" disActShape=disActShape,\n",
" conActSize=1,\n",
" conActRange=CTN_ACTION_RANGE,\n",
" criticLR=CRITIC_LR,\n",
" actorLR=ACTOR_LR,\n",
" gamma=GAMMA,\n",
" epsilon=EPSILON,\n",
" entropyWeight=ENTROPY_WHEIGHT,\n",
" saveDir=SAVE_DIR,\n",
" loadModelDir=LOAD_DIR,\n",
" )\n",
" step = 0\n",
" done = False\n",
" stopTrainCounter -= 1\n",
" epHis.append(ep)\n",
"\n",
" # reset total reward\n",
" epTotalReward = 0\n",
"\n",
" # Recorder list\n",
" epStepHis = []\n",
" epRewardHis = []\n",
" epActorLossHis = []\n",
" epCriticLossHis = []\n",
"\n",
" # save weight immediately?\n",
" saveNow = 0\n",
"\n",
" while not done:\n",
" step += 1\n",
" if (\n",
" step % ACTION_INTERVAL == 0\n",
" ): # take action every ACTION_INTERVAL steps\n",
" epStepHis.append(step)\n",
" (\n",
" disAct1,\n",
" disAct2,\n",
" disAct3,\n",
" conAct,\n",
" predictResult,\n",
" ) = agent.chooseAction(s)\n",
" kW, kS, kA, kD, mouseShoot, mouseMove = actToKey(\n",
" disAct1, disAct2, disAct3, conAct\n",
" )\n",
"\n",
" nextState, thisReward, done, _, saveNow = env.step(\n",
" discreteActions=np.array([[kW, kS, kA, kD, mouseShoot]]),\n",
" continuousActions=np.array([[mouseMove]]),\n",
" )\n",
"\n",
" epTotalReward += thisReward\n",
" epBuffer.saveBuffers(\n",
" s, [disAct1, disAct2, disAct3, conAct], thisReward\n",
" )\n",
" else:\n",
" disActs = np.array([[0, 0, 0, 0, 0]])\n",
" conActs = np.array([[0]])\n",
"\n",
" nextState, thisReward, done, _, saveNow = env.step(\n",
" discreteActions=disActs, continuousActions=conActs\n",
" )\n",
" epTotalReward += thisReward\n",
" nextState = nextState.reshape([STATE_SIZE])\n",
" s = nextState\n",
"\n",
" if done:\n",
" print(\"EP OVER!\")\n",
" if saveNow != 0:\n",
" print(\"SAVENOW!\")\n",
" saveNow = 0\n",
" agent.saveWeights()\n",
" # update PPO after Batch step or GameOver\n",
" if (step + 1) % BATCH == 0 or done:\n",
" bs = epBuffer.getStates()\n",
" ba = epBuffer.getActions()\n",
" br = epBuffer.getRewards()\n",
" epBuffer.clearBuffer()\n",
" if TRAIN:\n",
" epActorLoss, epCriticLoss = agent.trainCritcActor(\n",
" bs, ba, br, s, CRITIC_EPOCH, ACTOR_EPOCH\n",
" )\n",
" epActorLossHis.append(epActorLoss)\n",
" epCriticLossHis.append(epCriticLoss)\n",
" # update History Recorder\n",
" totalActorLossHis.append(np.mean(epActorLossHis))\n",
" totalCriticLossHis.append(np.mean(epCriticLossHis))\n",
" totalRewardHis.append(epTotalReward)\n",
"\n",
" if epTotalReward > maxTotalReward and epTotalReward != 0:\n",
" maxTotalReward = epTotalReward\n",
" agent.saveWeights(epTotalReward)\n",
" print(\"New Record! Save NN\", epTotalReward)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"aaa = 0\n",
"aaa = 1\n",
"aaa = 2\n",
"aaa = 3\n",
"aaa = 4\n",
"aaa = 5\n",
"aaa = 6\n",
"aaa = 7\n",
"aaa = 8\n",
"aaa = 9\n"
]
}
],
"source": [
"aaa = 0\n",
"while aaa<10:\n",
" print(\"aaa = \",aaa)\n",
" aaa+=1"
]
}
],
"metadata": {
"interpreter": {
"hash": "86e2db13b09bd6be22cb599ea60c1572b9ef36ebeaa27a4c8e961d6df315ac32"
},
"kernelspec": {
"display_name": "Python 3.9.7 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}