diff --git a/.gitignore b/.gitignore index a83550d..ec13f9a 100644 --- a/.gitignore +++ b/.gitignore @@ -74,8 +74,10 @@ crashlytics-build.properties # Python Folder /Aimbot-PPO-Python/.vscode/ +/Aimbot-PPO-Python/.mypy_cache/ /Aimbot-PPO-Python/__pycache__/ /Aimbot-PPO-Python/Backup/ /Aimbot-PPO-Python/Build-MultiScene-WithLoad/ /Aimbot-PPO-Python/Build-CloseEnemyCut/ -/Aimbot-PPO-Python/PPO-Model/ \ No newline at end of file +/Aimbot-PPO-Python/PPO-Model/ +/Aimbot-PPO-Python/GAIL-Expert-Data/ \ No newline at end of file diff --git a/Aimbot-PPO-Python/DemoRecorder.ipynb b/Aimbot-PPO-Python/DemoRecorder.ipynb index 3ac169a..a2fce9c 100644 --- a/Aimbot-PPO-Python/DemoRecorder.ipynb +++ b/Aimbot-PPO-Python/DemoRecorder.ipynb @@ -9,13 +9,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "√√√√√Enviroment Initialized Success√√√√√\n" + "√√√√√Enviroment Initialized Success√√√√√\n", + "√√√√√Buffer Initialized Success√√√√√\n" ] } ], "source": [ "import time\n", "import aimBotEnv\n", + "from GAILMem import GAILMem\n", "from HumanAction import HumanActions\n", "\n", "# Env\n", @@ -23,10 +25,21 @@ "WORKER_ID = 1\n", "BASE_PORT = 200\n", "\n", - "MOUSEDISCOUNT = 8.0\n", + "# ENV Para\n", + "MOUSEDISCOUNT = 20.0\n", "MAX_EP = 10000000\n", + "STACKSTATESIZE = 3\n", + "STACKINTERCE = 29\n", "\n", - "env = aimBotEnv.makeEnv(envPath=ENV_PATH, workerID=WORKER_ID, basePort=BASE_PORT)\n" + "env = aimBotEnv.makeEnv(\n", + " envPath=ENV_PATH,\n", + " workerID=WORKER_ID,\n", + " basePort=BASE_PORT,\n", + " stackSize=STACKSTATESIZE,\n", + " stackIntercal=STACKINTERCE,\n", + ")\n", + "demoMem = GAILMem()\n", + "demoAct = HumanActions(mouseDiscount=MOUSEDISCOUNT)\n" ] }, { @@ -34,6 +47,513 @@ "execution_count": 2, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "EP Start\n", + "EP Start\n", + "EP Start\n", + "nowMemNum 743\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-Python\\GAILMem.py:33: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n", + " actionsNP = np.asarray(self.actions)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "nowMemNum 993\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 1199\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 1426\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 1671\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 1890\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 2097\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 2307\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 2510\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 2710\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 2889\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 3079\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 3263\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 3506\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 3764\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 3982\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 4155\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 4338\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 4530\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 4749\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 4979\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 5159\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 5358\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 5641\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 5887\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 6085\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 6312\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 6471\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 6691\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 6885\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 7086\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 7248\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 7437\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 7608\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 7788\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 8020\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 8193\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 8447\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 8675\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 8869\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 9046\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 9260\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 9469\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 9633\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 9802\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 10019\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 10205\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 10387\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 10657\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 10834\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 11071\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 11284\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 11516\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 11735\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 11948\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 12157\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 12330\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 12565\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 12768\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 12944\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 13129\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 13292\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 13590\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 13765\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 13921\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 14083\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 14254\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 14445\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 14662\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 14833\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 15056\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 15258\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 15425\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 15590\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 15829\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 16057\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 16237\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 16411\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 16612\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 16812\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 17001\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 17173\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 17342\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 17515\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 17715\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 17890\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 18072\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 18261\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 18489\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 18701\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 18886\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 19100\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 19318\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 19487\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 19670\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 19881\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 20041\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 20279\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 20491\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 20679\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 20877\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 21070\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 21305\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 21519\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 21760\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 21936\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 22135\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 22304\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 22512\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 22706\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 22882\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 23123\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 23290\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 23453\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 23707\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 23942\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 24153\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 24346\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 24573\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 24757\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n", + "nowMemNum 24957\n", + "lastMemCheckPoint 1\n", + "mem_saved\n", + "EP Start\n" + ] + }, { "ename": "UnityCommunicatorStoppedException", "evalue": "Communicator has exited.", @@ -41,8 +561,8 @@ "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mUnityCommunicatorStoppedException\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_37248/645561173.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0mactions\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdemoAct\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetHumanActions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[1;32mc:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-Python\\aimBotEnv.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self, actions, behaviorName, trackedAgent)\u001b[0m\n\u001b[0;32m 72\u001b[0m \u001b[1;31m# take action to env\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 73\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset_actions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehavior_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mthisActionTuple\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 74\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 75\u001b[0m \u001b[1;31m# get nextState & reward & done after this action\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 76\u001b[0m \u001b[0mnextState\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mloadDir\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msaveNow\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetSteps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrackedAgent\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_19308/2258777724.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[0mactions\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdemoAct\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetHumanActions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 9\u001b[1;33m \u001b[0mnextState\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 10\u001b[0m \u001b[0mdemoMem\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msaveMems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstate\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mstate\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mactorProb\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[0mstate\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnextState\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mc:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-Python\\aimBotEnv.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self, actions, behaviorName, trackedAgent)\u001b[0m\n\u001b[0;32m 86\u001b[0m \u001b[1;31m# take action to env\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 87\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset_actions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehavior_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mthisActionTuple\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 88\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 89\u001b[0m \u001b[1;31m# get nextState & reward & done after this action\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 90\u001b[0m \u001b[0mnextState\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mloadDir\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msaveNow\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetSteps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrackedAgent\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\mlagents_envs\\timers.py\u001b[0m in \u001b[0;36mwrapped\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 303\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mwrapped\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 304\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mhierarchical_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__qualname__\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 305\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 306\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 307\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mwrapped\u001b[0m \u001b[1;31m# type: ignore\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\mlagents_envs\\environment.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 333\u001b[0m \u001b[0moutputs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_communicator\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexchange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstep_input\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_poll_process\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 334\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0moutputs\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 335\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mUnityCommunicatorStoppedException\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Communicator has exited.\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 336\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_update_behavior_specs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 337\u001b[0m \u001b[0mrl_output\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0moutputs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrl_output\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mUnityCommunicatorStoppedException\u001b[0m: Communicator has exited." @@ -50,13 +570,26 @@ } ], "source": [ - "done = False\n", - "env.reset()\n", - "demoAct = HumanActions(mouseDiscount=MOUSEDISCOUNT)\n", + "gailExpertDataDir = \"GAIL-Expert-Data/\"\n", + "state, _, _, _, _ = env.reset()\n", + "\n", "for ep in range(MAX_EP):\n", + " print(\"EP Start\")\n", + " done = False\n", " while not done:\n", " actions = demoAct.getHumanActions()\n", - " env.step(actions=actions)6\n" + " nextState, _, done, _, _ = env.step(actions=actions)\n", + " demoMem.saveMems(state=state, actorProb=None, action=actions, reward=None, done=None)\n", + " state = nextState\n", + " nowMemNum = demoMem.memNum\n", + " saveSteps = 500\n", + " lastMemCheckPoint = 0\n", + " if nowMemNum / saveSteps >= lastMemCheckPoint + 1:\n", + " lastMemCheckPoint +=1\n", + " print(\"nowMemNum\", nowMemNum)\n", + " print(\"lastMemCheckPoint\", lastMemCheckPoint)\n", + " demoMem.saveMemtoFile(gailExpertDataDir)\n", + " print(\"mem_saved\")\n" ] } ], diff --git a/Aimbot-PPO-Python/GAIL.py b/Aimbot-PPO-Python/GAIL.py new file mode 100644 index 0000000..0bb51aa --- /dev/null +++ b/Aimbot-PPO-Python/GAIL.py @@ -0,0 +1,170 @@ +import tensorflow as tf +import numpy as np +from numpy import ndarray + +from PPO import PPO +from tensorflow import keras +from tensorflow.keras import layers +from tensorflow.keras import optimizers + +from GAILConfig import GAILConfig + +EPS = 1e-8 + + +class GAIL(object): + def __init__( + self, + stateSize: int, + disActShape: list, + conActSize: int, + conActRange: float, + gailConfig: GAILConfig, + ): + self.stateSize = stateSize + self.disActShape = disActShape + self.disActSize = len(disActShape) + self.conActSize = conActSize + self.conActRange = conActRange + + self.totalActSize = self.disActSize + conActSize + self.discrimInputSize = stateSize + self.totalActSize + self.discriminatorNNShape = gailConfig.discrimNNShape + self.discrimLR = gailConfig.discrimLR + self.discrimTrainEpochs = gailConfig.discrimTrainEpochs + self.ppoConfig = gailConfig.ppoConfig + + self.ppo = PPO(stateSize, disActShape, conActSize, conActRange, self.ppoConfig) + self.discriminator = self.buildDiscriminatorNet(True) + + def buildDiscriminatorNet(self, compileModel: bool): + # -----------Input Layers----------- + stateInput = layers.Input(shape=(self.discrimInputSize,), name="stateInput") + + # -------Intermediate layers-------- + interLayers = [] + interLayersIndex = 0 + for neuralUnit in self.discriminatorNNShape: + thisLayerName = "dense" + str(interLayersIndex) + if interLayersIndex == 0: + interLayers.append( + layers.Dense(neuralUnit, activation="relu", name=thisLayerName)(stateInput) + ) + else: + interLayers.append( + layers.Dense(neuralUnit, activation="relu", name=thisLayerName)(interLayers[-1]) + ) + interLayersIndex += 1 + + # ----------Output Layers----------- + output = layers.Dense(1, activation="sigmoid")(interLayers[-1]) + + # ----------Model Compile----------- + model = keras.Model(inputs=stateInput, outputs=output) + if compileModel: + criticOPT = optimizers.Adam(learning_rate=self.discrimLR) + model.compile(optimizer=criticOPT, loss=self.discrimLoss()) + return model + + def discrimLoss(self): + def loss(y_true, y_pred): + """discriminator loss function + + Args: + y_true (tf.constant): demo trajectory + y_pred (tf.constant): agent trajectory predict value + + Returns: + _type_: _description_ + """ + demoP = self.discriminator(y_true) + agentLoss = tf.negative(tf.reduce_mean(tf.math.log(1.0 - y_pred + EPS))) + demoLoss = tf.negative(tf.reduce_mean(tf.math.log(demoP + EPS))) + loss = agentLoss + demoLoss + return loss + + return loss + + def inference(self, states: ndarray, actions: ndarray): + """discriminator predict result + + Args: + states (ndarray): states + actions (ndarray): actions + + Returns: + tf.constant: discrim predict result + """ + # check dimention + if states.ndim != 2: + stateNum = int(len(states) / self.stateSize) + states = states.reshape([stateNum, self.stateSize]) + if actions.ndim != 2: + actionsNum = int(len(actions) / self.totalActSize) + actions = actions.reshape([actionsNum, self.totalActSize]) + + thisTrajectory = tf.concat([states, actions], axis=1) + discrimPredict = self.discriminator(thisTrajectory) + return discrimPredict + + def discriminatorACC( + self, demoStates: ndarray, demoActions: ndarray, agentStates: ndarray, agentActions: ndarray + ): + demoAcc = np.mean(self.inference(demoStates, demoActions)) + agentAcc = np.mean(self.inference(agentStates, agentActions)) + return demoAcc, agentAcc + + def trainDiscriminator( + self, + demoStates: ndarray, + demoActions: ndarray, + agentStates: ndarray, + agentActions: ndarray, + epochs: int = None, + ): + """train Discriminator + + Args: + demoStates (ndarray): expert states + demoActions (ndarray): expert actions + agentStates (ndarray): agentPPO generated states + agentActions (ndarray): agentPPO generated actions + epoch (int): epoch times + + Returns: + tf.constant: all losses array + """ + if epochs == None: + epochs = self.discrimTrainEpochs + demoTrajectory = tf.concat([demoStates, demoActions], axis=1) + agentTrajectory = tf.concat([agentStates, agentActions], axis=1) + his = self.discriminator.fit(x=agentTrajectory, y=demoTrajectory, epochs=epochs, verbose=0) + + demoAcc = np.mean(self.inference(demoStates, demoActions)) + agentAcc = np.mean(self.inference(agentStates, agentActions)) + return his.history["loss"], demoAcc, 1 - agentAcc + + def getActions(self, state: ndarray): + actions, predictResult = self.ppo.chooseAction(state) + return actions, predictResult + + def trainPPO( + self, + states: ndarray, + oldActorResult: ndarray, + actions: ndarray, + newRewards: ndarray, + dones: ndarray, + nextState: ndarray, + epochs: int = None, + ): + criticV = self.ppo.getCriticV(states) + discountedR = self.ppo.discountReward(nextState, criticV, dones, newRewards) + advantage = self.ppo.getGAE(discountedR, criticV) + criticLosses = self.ppo.trainCritic(states, discountedR, epochs) + actorLosses = self.ppo.trainActor(states, oldActorResult, actions, advantage, epochs) + return actorLosses, criticLosses + + def generateAction(self, states: ndarray): + act, actorP = self.ppo.chooseAction(states) + return act, actorP diff --git a/Aimbot-PPO-Python/GAILConfig.py b/Aimbot-PPO-Python/GAILConfig.py new file mode 100644 index 0000000..489aaf1 --- /dev/null +++ b/Aimbot-PPO-Python/GAILConfig.py @@ -0,0 +1,24 @@ +import datetime +from typing import NamedTuple + +from PPOConfig import PPOConfig + + +class GAILConfig(NamedTuple): + discrimNNShape: list = [128, 64] + discrimLR: float = 1e-3 + discrimTrainEpochs: int = 8 + discrimSaveDir: str = "GAIL-Model/" + datetime.datetime.now().strftime("%m%d-%H%M") + "/" + + ppoConfig: PPOConfig = PPOConfig( + NNShape=[128, 64], + actorLR=2e-3, + criticLR=2e-3, + gamma=0.99, + lmbda=0.95, + clipRange=0.20, + entropyWeight=1e-2, + trainEpochs=8, + saveDir="GAIL-Model/" + datetime.datetime.now().strftime("%m%d-%H%M") + "/", + loadModelDir=None, + ) diff --git a/Aimbot-PPO-Python/GAILMem.py b/Aimbot-PPO-Python/GAILMem.py new file mode 100644 index 0000000..f730273 --- /dev/null +++ b/Aimbot-PPO-Python/GAILMem.py @@ -0,0 +1,175 @@ +import os +import random +import numpy as np + + +class GAILMem(object): + def __init__(self): + self.states = [] + self.actorProbs = [] + self.actions = [] + self.rewards = [] + self.dones = [] + self.memNum = 0 + print("√√√√√Buffer Initialized Success√√√√√") + + def clearMem(self): + """clearMemories""" + self.states = [] + self.actorProbs = [] + self.actions = [] + self.rewards = [] + self.dones = [] + self.memNum = 0 + + def saveMemtoFile(self, dir: str): + """save memories ndarray to npz file + + Args: + dir (str): save direction,like"GAIL-Expert-Data/",end with "/" + """ + statesNP = np.asarray(self.states) + actorProbsNP = np.asarray(self.actorProbs) + actionsNP = np.asarray(self.actions) + rewardsNP = np.asarray(self.rewards) + donesNP = np.asarray(self.dones) + thisSaveDir = dir + "pack-" + str(self.memNum) + try: + np.savez( + thisSaveDir, + states=statesNP, + actorProbs=actorProbsNP, + actions=actionsNP, + rewards=rewardsNP, + dones=donesNP, + ) + except FileNotFoundError: + os.mkdir(dir) + np.savez( + thisSaveDir, + states=statesNP, + actorProbs=actorProbsNP, + actions=actionsNP, + rewards=rewardsNP, + dones=donesNP, + ) + + def loadMemFile(self, dir: str): + """load memories from mpz file + + Args: + dir (str): file direction + """ + self.clearMem() + memFile = np.load(dir) + self.states = memFile["states"].tolist() + self.actorProbs = memFile["actorProbs"].tolist() + self.actions = memFile["actions"].tolist() + self.rewards = memFile["rewards"].tolist() + self.dones = memFile["dones"].tolist() + self.memNum = len(self.states) + + def getRandomSample(self, sampleNum: int = 0): + """get random unique sample set. + + Args: + sampleNum (int, optional): sample number, while 0 return all samples. Defaults to 0. + + Returns: + tuple: (states,actorProbs,actions,rewards,dones) + """ + if sampleNum == 0: + return ( + self.getStates(), + self.getActorProbs(), + self.getActions(), + self.getRewards(), + self.getDones(), + ) + else: + randIndex = random.sample(range(0, self.memNum), sampleNum) + return ( + self.standDims(np.asarray(self.states)[randIndex]), + self.standDims(np.asarray(self.actorProbs)[randIndex]), + self.standDims(np.asarray(self.actions)[randIndex]), + self.standDims(np.asarray(self.rewards)[randIndex]), + self.standDims(np.asarray(self.dones)[randIndex]), + ) + + def getStates(self): + """get all States data as ndarray + + Returns: + ndarray: ndarray type State data + """ + return self.standDims(np.asarray(self.states)) + + def getActorProbs(self): + """get all ActorProbs data as ndarray + + Returns: + ndarray: ndarray type ActorProbs data + """ + + return self.standDims(np.asarray(self.actorProbs)) + + def getActions(self): + """get all Actions data as ndarray + + Returns: + ndarray: ndarray type Actions data + """ + + return self.standDims(np.asarray(self.actions)) + + def getRewards(self): + """get all Rewards data as ndarray + + Returns: + ndarray: ndarray type Rewards data + """ + + return self.standDims(np.asarray(self.rewards)) + + def getDones(self): + """get all Dones data as ndarray + + Returns: + ndarray: ndarray type Dones data + """ + + return self.standDims(np.asarray(self.dones)) + + def standDims(self, data): + """standalize data's dimension + + Args: + data (list): data list + + Returns: + ndarray: ndarra type data + """ + # standarlize data's dimension + if np.ndim(data) > 2: + return np.squeeze(data, axis=1) + elif np.ndim(data) < 2: + return np.expand_dims(data, axis=1) + else: + return np.asarray(data) + + def saveMems(self, state, actorProb, action, reward, done): + """save memories + + Args: + state (_type_): sates + actorProb (_type_): actor predict result + action (_type_): actor choosed action + reward (_type_): reward + done (function): done + """ + self.states.append(state) + self.actorProbs.append(actorProb) + self.actions.append(action) + self.rewards.append(reward) + self.dones.append(done) + self.memNum += 1 diff --git a/Aimbot-PPO-Python/HumanAction.py b/Aimbot-PPO-Python/HumanAction.py index cd29dd4..cfc88ce 100644 --- a/Aimbot-PPO-Python/HumanAction.py +++ b/Aimbot-PPO-Python/HumanAction.py @@ -1,5 +1,6 @@ import keyboard import mouse +import math class HumanActions: @@ -14,11 +15,13 @@ class HumanActions: self.screenW = screenW self.screenH = screenH self.MOUSEDISCOUNT = mouseDiscount + self.mouseSmooth = 5 + self.mouseMax = 10 def getHumanActions(self): x, _ = mouse.get_position() xMovement = (x - self.screenW / 2) / self.MOUSEDISCOUNT - + xMovement = self.smoothMouseMovement(xMovement) ws = 0 ad = 0 click = 0 @@ -42,10 +45,14 @@ class HumanActions: elif keyboard.is_pressed("s+a"): ws = 2 ad = 2 - if mouse.is_pressed(button="left"): + if keyboard.is_pressed("0"): click = 1 actions = [ws, ad, click, [xMovement]] mouse.move(self.screenW / 2, self.screenH / 2) return actions + + def smoothMouseMovement(self, x: float): + out = (1 / (1 + math.exp(-x / self.mouseSmooth)) - 1 / 2) * self.mouseMax * 2 + return out diff --git a/Aimbot-PPO-Python/aimBotEnv.py b/Aimbot-PPO-Python/aimBotEnv.py index e5e1f71..b55ec7e 100644 --- a/Aimbot-PPO-Python/aimBotEnv.py +++ b/Aimbot-PPO-Python/aimBotEnv.py @@ -6,7 +6,14 @@ from numpy import ndarray class makeEnv(object): - def __init__(self, envPath, workerID, basePort): + def __init__( + self, + envPath: str, + workerID: int = 1, + basePort: int = 100, + stackSize: int = 1, + stackIntercal: int = 0, + ): self.env = UnityEnvironment( file_name=envPath, seed=1, @@ -25,24 +32,31 @@ class makeEnv(object): self.OBSERVATION_SPECS = self.SPEC.observation_specs[0] # observation spec self.ACTION_SPEC = self.SPEC.action_spec # action specs - self.DISCRETE_SIZE = self.ACTION_SPEC.discrete_size #  連続的な動作のSize + self.DISCRETE_SIZE = self.ACTION_SPEC.discrete_size self.DISCRETE_SHAPE = list(self.ACTION_SPEC.discrete_branches) - self.CONTINUOUS_SIZE = self.ACTION_SPEC.continuous_size #  離散的な動作のSize - self.STATE_SIZE = self.OBSERVATION_SPECS.shape[0] - self.LOAD_DIR_SIZE_IN_STATE # 環境観測データ数 + self.CONTINUOUS_SIZE = self.ACTION_SPEC.continuous_size + self.SINGLE_STATE_SIZE = self.OBSERVATION_SPECS.shape[0] - self.LOAD_DIR_SIZE_IN_STATE + self.STATE_SIZE = self.SINGLE_STATE_SIZE * stackSize + + # stacked State + self.STACK_SIZE = stackSize + self.STATE_BUFFER_SIZE = stackSize + ((stackSize - 1) * stackIntercal) + self.STACK_INDEX = list(range(0, self.STATE_BUFFER_SIZE, stackIntercal + 1)) + self.statesBuffer = np.array([[0.0] * self.SINGLE_STATE_SIZE] * self.STATE_BUFFER_SIZE) print("√√√√√Enviroment Initialized Success√√√√√") def step( self, actions: list, behaviorName: ndarray = None, - trackedAgent: ndarray = None, + trackedAgent: int = None, ): """change ations list to ActionTuple then send it to enviroment Args: actions (list): PPO chooseAction output action list behaviorName (ndarray, optional): behaviorName. Defaults to None. - trackedAgent (ndarray, optional): trackedAgentID. Defaults to None. + trackedAgent (int, optional): trackedAgentID. Defaults to None. Returns: ndarray: nextState, reward, done, loadDir, saveNow @@ -54,13 +68,13 @@ class makeEnv(object): discreteActions = np.asarray([[0]]) else: # create discrete action from actions list - discreteActions = np.asanyarray([actions[0:self.DISCRETE_SIZE]]) + discreteActions = np.asanyarray([actions[0 : self.DISCRETE_SIZE]]) if self.CONTINUOUS_SIZE == 0: # create empty continuous action continuousActions = np.asanyarray([[0.0]]) else: # create continuous actions from actions list - continuousActions = np.asanyarray(actions[self.DISCRETE_SIZE:]) + continuousActions = np.asanyarray(actions[self.DISCRETE_SIZE :]) if behaviorName is None: behaviorName = self.BEHA_NAME @@ -98,21 +112,28 @@ class makeEnv(object): if trackedAgent in decisionSteps: # ゲーム終了していない場合、環境状態がdecision_stepsに保存される nextState = decisionSteps[trackedAgent].obs[0] - nextState = np.reshape(nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE]) - saveNow = nextState[0][-1] - loadDir = nextState[0][-3:-1] - nextState = nextState[0][:-3] + nextState = np.reshape( + nextState, [self.SINGLE_STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE] + ) + saveNow = nextState[-1] + loadDir = nextState[-3:-1] + nextState = nextState[:-3] reward = decisionSteps[trackedAgent].reward done = False if trackedAgent in terminalSteps: # ゲーム終了した場合、環境状態がterminal_stepsに保存される nextState = terminalSteps[trackedAgent].obs[0] - nextState = np.reshape(nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE]) - saveNow = nextState[0][-1] - loadDir = nextState[0][-3:-1] - nextState = nextState[0][:-3] + nextState = np.reshape( + nextState, [self.SINGLE_STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE] + ) + saveNow = nextState[-1] + loadDir = nextState[-3:-1] + nextState = nextState[:-3] reward = terminalSteps[trackedAgent].reward done = True - return nextState, reward, done, loadDir, saveNow + + # stack state + stackedStates = self.stackStates(nextState) + return stackedStates, reward, done, loadDir, saveNow def reset(self): """reset enviroment and get observations @@ -120,11 +141,21 @@ class makeEnv(object): Returns: ndarray: nextState, reward, done, loadDir, saveNow """ + # reset buffer + self.statesBuffer = np.array([[0.0] * self.SINGLE_STATE_SIZE] * self.STATE_BUFFER_SIZE) + # reset env self.env.reset() nextState, reward, done, loadDir, saveNow = self.getSteps() return nextState, reward, done, loadDir, saveNow + def stackStates(self, state): + # save buffer + self.statesBuffer[0:-1] = self.statesBuffer[1:] + self.statesBuffer[-1] = state + + # return stacked states + return self.statesBuffer[self.STACK_INDEX] + def render(self): - """render enviroment - """ + """render enviroment""" self.env.render() diff --git a/Aimbot-PPO-Python/testarea.ipynb b/Aimbot-PPO-Python/testarea.ipynb index 2d71fe7..ba25b81 100644 --- a/Aimbot-PPO-Python/testarea.ipynb +++ b/Aimbot-PPO-Python/testarea.ipynb @@ -361,17 +361,102 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3\n", + "deque([[0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [1, 1, 1, 1, 1]], maxlen=3)\n", + "3\n", + "deque([[0.0, 0.0, 0.0, 0.0, 0.0], [1, 1, 1, 1, 1], [2, 2, 2, 2, 2]], maxlen=3)\n", + "3\n", + "deque([[1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3]], maxlen=3)\n", + "3\n", + "deque([[2, 2, 2, 2, 2], [3, 3, 3, 3, 3], [4, 4, 4, 4, 4]], maxlen=3)\n", + "3\n", + "deque([[3, 3, 3, 3, 3], [4, 4, 4, 4, 4], [5, 5, 5, 5, 5]], maxlen=3)\n", + "3\n", + "deque([[4, 4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6]], maxlen=3)\n", + "3\n", + "deque([[5, 5, 5, 5, 5], [6, 6, 6, 6, 6], [7, 7, 7, 7, 7]], maxlen=3)\n", + "3\n", + "deque([[6, 6, 6, 6, 6], [7, 7, 7, 7, 7], [8, 8, 8, 8, 8]], maxlen=3)\n", + "3\n", + "deque([[7, 7, 7, 7, 7], [8, 8, 8, 8, 8], [9, 9, 9, 9, 9]], maxlen=3)\n" + ] + } + ], "source": [ - "import keyboard\n", + "from collections import deque\n", + "import numpy as np\n", "\n", - "while True:\n", - " if keyboard.is_pressed(\"w\"):\n", - " print(\"w\")\n", - " elif keyboard.is_pressed(\"s\"):\n", - " print(\"s\")" + "maxBuffer = 3\n", + "stateSize = 5\n", + "\n", + "aa = deque([[0.0]*stateSize],maxlen=maxBuffer)\n", + "\n", + "def ss(s):\n", + " aa.append(s)\n", + " if len(aa) < maxBuffer:\n", + " for i in range(maxBuffer - len(aa)):\n", + " aa.appendleft([0.0] * stateSize)\n", + "\n", + "for i in range(1,10):\n", + " ss([i,i,i,i,i])\n", + " print(len(aa))\n", + " print(aa)\n", + "'''\n", + "3\n", + "deque([[0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [1, 1, 1, 1, 1]], maxlen=3)\n", + "3\n", + "deque([[0.0, 0.0, 0.0, 0.0, 0.0], [1, 1, 1, 1, 1], [2, 2, 2, 2, 2]], maxlen=3)\n", + "3\n", + "deque([[1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3]], maxlen=3)\n", + "3\n", + "deque([[2, 2, 2, 2, 2], [3, 3, 3, 3, 3], [4, 4, 4, 4, 4]], maxlen=3)\n", + "3\n", + "deque([[3, 3, 3, 3, 3], [4, 4, 4, 4, 4], [5, 5, 5, 5, 5]], maxlen=3)\n", + "3\n", + "deque([[4, 4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6]], maxlen=3)\n", + "3\n", + "deque([[5, 5, 5, 5, 5], [6, 6, 6, 6, 6], [7, 7, 7, 7, 7]], maxlen=3)\n", + "3\n", + "deque([[6, 6, 6, 6, 6], [7, 7, 7, 7, 7], [8, 8, 8, 8, 8]], maxlen=3)\n", + "3\n", + "deque([[7, 7, 7, 7, 7], [8, 8, 8, 8, 8], [9, 9, 9, 9, 9]], maxlen=3)'''" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n", + "[0]\n" + ] + } + ], + "source": [ + "from collections import deque\n", + "import numpy as np\n", + "\n", + "ss = 1\n", + "si = 0\n", + "buffersize = ss + ((ss-1)*si)\n", + "print(buffersize)\n", + "stackedStates = deque([[0.0] * 10]*6, maxlen=3)\n", + "stackedStates.append([1.0]*10)\n", + "ssnp = stackedStates\n", + "\n", + "aa = list(range(0,buffersize,si+1))\n", + "print(aa)" ] } ],