{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import aimBotEnv\n", "import PPO\n", "import buffer\n", "import numpy as np\n", "\n", "import tensorflow as tf\n", "import time\n", "import datetime\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Attempts to allocate only the GPU memory needed for allocation\n", "physical_devices = tf.config.list_physical_devices('GPU')\n", "tf.config.experimental.set_memory_growth(physical_devices[0], True)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Env\n", "ENV_PATH = \"./Build-CloseEnemyCut/Aimbot-PPO\"\n", "WORKER_ID = 1\n", "BASE_PORT = 200\n", "\n", "MAX_EP = 1000\n", "EP_LENGTH = 100000\n", "GAMMA = 0.99 # discount future reward (UP?)\n", "EPSILON = 0.2 # clip Ratio range[1-EPSILON,1+EPSILON]\n", "ACTOR_LR = 1e-5 # LR\n", "CRITIC_LR = 2e-5 # LR\n", "BATCH = 256 # learning step\n", "ACTOR_EPOCH = 15 # epoch\n", "CRITIC_EPOCH = 15 # epoch\n", "ENTROPY_WHEIGHT = 0.001 # sigma's entropy in Actor loss\n", "ACTION_INTERVAL = 1 # take action every ACTION_INTERVAL steps\n", "\n", "\n", "TRAIN = True\n", "SAVE_DIR = \"PPO-Model/\" + datetime.datetime.now().strftime(\"%m%d%H%M\") + \"/\"\n", "LOAD_DIR = None\n", "\n", "CTN_ACTION_RANGE = 10\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "√√√√√Enviroment Initialized Success√√√√√\n", "√√√√√Buffer Initialized Success√√√√√\n", "No loadDir specified,Create a New Model\n", "CONTINUOUS_SIZE 1\n", "DISCRETE_SIZE 5\n", "STATE_SIZE 30\n" ] } ], "source": [ "# initialize enviroment & buffer class\n", "env = aimBotEnv.makeEnv(\n", " envPath=ENV_PATH, workerID=WORKER_ID, basePort=BASE_PORT\n", ")\n", "epBuffer = buffer.buffer()\n", "\n", "STATE_SIZE = env.STATE_SIZE\n", "CONTINUOUS_SIZE = env.CONTINUOUS_SIZE\n", "DISCRETE_SIZE = env.DISCRETE_SIZE\n", "_, _, _, loadDir, _ = env.getSteps()\n", "\n", "# check load model or not\n", "if np.any(loadDir == 0):\n", " # create a new model\n", " print(\"No loadDir specified,Create a New Model\")\n", " LOAD_DIR = None\n", "else:\n", " # load model\n", " loadDirDateSTR = str(int(loadDir[0]))\n", " loadDirTimeSTR = str(int(loadDir[1]))\n", " if len(loadDirDateSTR) != 8:\n", " # fill lost 0 while converse float to string\n", " for _ in range(8 - len(loadDirDateSTR)):\n", " loadDirDateSTR = \"0\" + loadDirDateSTR\n", " if len(loadDirTimeSTR) != 6:\n", " # fill lost 0 while converse float to string\n", " for _ in range(6 - len(loadDirTimeSTR)):\n", " loadDirTimeSTR = \"0\" + loadDirTimeSTR\n", " LOAD_DIR = \"PPO-Model/\" + loadDirDateSTR + \"/\" + loadDirTimeSTR\n", " print(\"Load Model:\")\n", " print(LOAD_DIR)\n", "\n", "print(\"CONTINUOUS_SIZE\", CONTINUOUS_SIZE)\n", "print(\"DISCRETE_SIZE\", DISCRETE_SIZE)\n", "print(\"STATE_SIZE\", STATE_SIZE)\n", "\n", "disActShape = [3, 3, 2]\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def actToKey(disAct1,disAct2,disAct3,conAct):\n", " kW = 0\n", " kS = 0\n", " kA = 0\n", " kD = 0\n", " mouseShoot = 0\n", " if disAct1 == 0:\n", " kW = 0\n", " kS = 1\n", " elif disAct1 == 1:\n", " kW = 0\n", " kS = 0\n", " elif disAct1 == 2:\n", " kW = 1\n", " kS = 0\n", " if disAct2 == 0:\n", " kA = 0\n", " kD = 1\n", " elif disAct2 == 1:\n", " kA = 0\n", " kD = 0\n", " elif disAct2 == 2:\n", " kA = 1\n", " kD = 0\n", " mouseShoot = disAct3\n", " return kW,kS,kA,kD,mouseShoot,conAct" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "EP 0 START\n", "√√√√√Buffer Initialized Success√√√√√\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\numpy\\core\\fromnumeric.py:3474: RuntimeWarning: Mean of empty slice.\n", " return _methods._mean(a, axis=axis, dtype=dtype,\n", "c:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\numpy\\core\\_methods.py:189: RuntimeWarning: invalid value encountered in double_scalars\n", " ret = ret.dtype.type(ret / rcount)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "A_Loss: 0.4477495511372884 C_Loss: 3.155759557088216\n", "A_Loss: 0.14549287557601928 C_Loss: 0.5123071213563283\n", "A_Loss: 0.055241942902406055 C_Loss: 0.13002794484297434\n", "A_Loss: 0.057325509190559384 C_Loss: 0.11068039039770762\n", "A_Loss: 0.04376962607105573 C_Loss: 0.03923700377345085\n" ] } ], "source": [ "bestScore = 200.0\n", "stopTrainCounter = 0\n", "\n", "totalRewardHis = []\n", "totalActorLossHis = []\n", "totalCriticLossHis = []\n", "epHis = []\n", "maxTotalReward = -99999999999\n", "\n", "for ep in range(MAX_EP):\n", " print(\"EP \", ep, \" START\")\n", " # first time run game\n", " s, _, _, _, _ = env.reset()\n", " if ep == 0:\n", " epBuffer = buffer.buffer()\n", " s = s.reshape([STATE_SIZE])\n", " agent = PPO.PPO(\n", " stateSize=STATE_SIZE,\n", " disActShape=disActShape,\n", " conActSize=1,\n", " conActRange=CTN_ACTION_RANGE,\n", " criticLR=CRITIC_LR,\n", " actorLR=ACTOR_LR,\n", " gamma=GAMMA,\n", " epsilon=EPSILON,\n", " entropyWeight=ENTROPY_WHEIGHT,\n", " saveDir=SAVE_DIR,\n", " loadModelDir=LOAD_DIR,\n", " )\n", " step = 0\n", " done = False\n", " stopTrainCounter -= 1\n", " epHis.append(ep)\n", "\n", " # reset total reward\n", " epTotalReward = 0\n", "\n", " # Recorder list\n", " epStepHis = []\n", " epRewardHis = []\n", " epActorLossHis = []\n", " epCriticLossHis = []\n", "\n", " # save weight immediately?\n", " saveNow = 0\n", "\n", " while not done:\n", " step += 1\n", " if (\n", " step % ACTION_INTERVAL == 0\n", " ): # take action every ACTION_INTERVAL steps\n", " epStepHis.append(step)\n", " (\n", " disAct1,\n", " disAct2,\n", " disAct3,\n", " conAct,\n", " predictResult,\n", " ) = agent.chooseAction(s)\n", " kW, kS, kA, kD, mouseShoot, mouseMove = actToKey(\n", " disAct1, disAct2, disAct3, conAct\n", " )\n", "\n", " nextState, thisReward, done, _, saveNow = env.step(\n", " discreteActions=np.array([[kW, kS, kA, kD, mouseShoot]]),\n", " continuousActions=np.array([[mouseMove]]),\n", " )\n", "\n", " epTotalReward += thisReward\n", " epBuffer.saveBuffers(\n", " s, [disAct1, disAct2, disAct3, conAct], thisReward\n", " )\n", " else:\n", " disActs = np.array([[0, 0, 0, 0, 0]])\n", " conActs = np.array([[0]])\n", "\n", " nextState, thisReward, done, _, saveNow = env.step(\n", " discreteActions=disActs, continuousActions=conActs\n", " )\n", " epTotalReward += thisReward\n", " nextState = nextState.reshape([STATE_SIZE])\n", " s = nextState\n", "\n", " if done:\n", " print(\"EP OVER!\")\n", " if saveNow != 0:\n", " print(\"SAVENOW!\")\n", " saveNow = 0\n", " agent.saveWeights()\n", " # update PPO after Batch step or GameOver\n", " if (step + 1) % BATCH == 0 or done:\n", " bs = epBuffer.getStates()\n", " ba = epBuffer.getActions()\n", " br = epBuffer.getRewards()\n", " epBuffer.clearBuffer()\n", " if TRAIN:\n", " epActorLoss, epCriticLoss = agent.trainCritcActor(\n", " bs, ba, br, s, CRITIC_EPOCH, ACTOR_EPOCH\n", " )\n", " epActorLossHis.append(epActorLoss)\n", " epCriticLossHis.append(epCriticLoss)\n", " # update History Recorder\n", " totalActorLossHis.append(np.mean(epActorLossHis))\n", " totalCriticLossHis.append(np.mean(epCriticLossHis))\n", " totalRewardHis.append(epTotalReward)\n", "\n", " if epTotalReward > maxTotalReward and epTotalReward != 0:\n", " maxTotalReward = epTotalReward\n", " agent.saveWeights(epTotalReward)\n", " print(\"New Record! Save NN\", epTotalReward)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "aaa = 0\n", "aaa = 1\n", "aaa = 2\n", "aaa = 3\n", "aaa = 4\n", "aaa = 5\n", "aaa = 6\n", "aaa = 7\n", "aaa = 8\n", "aaa = 9\n" ] } ], "source": [ "aaa = 0\n", "while aaa<10:\n", " print(\"aaa = \",aaa)\n", " aaa+=1" ] } ], "metadata": { "interpreter": { "hash": "86e2db13b09bd6be22cb599ea60c1572b9ef36ebeaa27a4c8e961d6df315ac32" }, "kernelspec": { "display_name": "Python 3.9.7 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }