diff --git a/.gitignore b/.gitignore
index a83550d..ec13f9a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -74,8 +74,10 @@ crashlytics-build.properties
 
 # Python Folder
 /Aimbot-PPO-Python/.vscode/
+/Aimbot-PPO-Python/.mypy_cache/
 /Aimbot-PPO-Python/__pycache__/
 /Aimbot-PPO-Python/Backup/
 /Aimbot-PPO-Python/Build-MultiScene-WithLoad/
 /Aimbot-PPO-Python/Build-CloseEnemyCut/
-/Aimbot-PPO-Python/PPO-Model/
\ No newline at end of file
+/Aimbot-PPO-Python/PPO-Model/
+/Aimbot-PPO-Python/GAIL-Expert-Data/
\ No newline at end of file
diff --git a/Aimbot-PPO-Python/DemoRecorder.ipynb b/Aimbot-PPO-Python/DemoRecorder.ipynb
index 3ac169a..a2fce9c 100644
--- a/Aimbot-PPO-Python/DemoRecorder.ipynb
+++ b/Aimbot-PPO-Python/DemoRecorder.ipynb
@@ -9,13 +9,15 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "√√√√√Enviroment Initialized Success√√√√√\n"
+      "√√√√√Enviroment Initialized Success√√√√√\n",
+      "√√√√√Buffer Initialized Success√√√√√\n"
      ]
     }
    ],
    "source": [
     "import time\n",
     "import aimBotEnv\n",
+    "from GAILMem import GAILMem\n",
     "from HumanAction import HumanActions\n",
     "\n",
     "# Env\n",
@@ -23,10 +25,21 @@
     "WORKER_ID = 1\n",
     "BASE_PORT = 200\n",
     "\n",
-    "MOUSEDISCOUNT = 8.0\n",
+    "# ENV Para\n",
+    "MOUSEDISCOUNT = 20.0\n",
     "MAX_EP = 10000000\n",
+    "STACKSTATESIZE = 3\n",
+    "STACKINTERCE = 29\n",
     "\n",
-    "env = aimBotEnv.makeEnv(envPath=ENV_PATH, workerID=WORKER_ID, basePort=BASE_PORT)\n"
+    "env = aimBotEnv.makeEnv(\n",
+    "    envPath=ENV_PATH,\n",
+    "    workerID=WORKER_ID,\n",
+    "    basePort=BASE_PORT,\n",
+    "    stackSize=STACKSTATESIZE,\n",
+    "    stackIntercal=STACKINTERCE,\n",
+    ")\n",
+    "demoMem = GAILMem()\n",
+    "demoAct = HumanActions(mouseDiscount=MOUSEDISCOUNT)\n"
    ]
   },
   {
@@ -34,6 +47,513 @@
    "execution_count": 2,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "EP Start\n",
+      "EP Start\n",
+      "EP Start\n",
+      "nowMemNum 743\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-Python\\GAILMem.py:33: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
+      "  actionsNP = np.asarray(self.actions)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "nowMemNum 993\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 1199\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 1426\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 1671\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 1890\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 2097\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 2307\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 2510\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 2710\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 2889\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 3079\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 3263\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 3506\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 3764\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 3982\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 4155\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 4338\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 4530\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 4749\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 4979\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 5159\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 5358\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 5641\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 5887\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 6085\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 6312\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 6471\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 6691\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 6885\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 7086\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 7248\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 7437\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 7608\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 7788\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 8020\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 8193\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 8447\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 8675\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 8869\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 9046\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 9260\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 9469\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 9633\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 9802\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 10019\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 10205\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 10387\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 10657\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 10834\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 11071\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 11284\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 11516\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 11735\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 11948\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 12157\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 12330\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 12565\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 12768\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 12944\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 13129\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 13292\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 13590\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 13765\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 13921\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 14083\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 14254\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 14445\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 14662\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 14833\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 15056\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 15258\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 15425\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 15590\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 15829\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 16057\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 16237\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 16411\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 16612\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 16812\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 17001\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 17173\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 17342\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 17515\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 17715\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 17890\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 18072\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 18261\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 18489\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 18701\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 18886\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 19100\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 19318\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 19487\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 19670\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 19881\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 20041\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 20279\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 20491\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 20679\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 20877\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 21070\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 21305\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 21519\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 21760\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 21936\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 22135\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 22304\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 22512\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 22706\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 22882\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 23123\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 23290\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 23453\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 23707\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 23942\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 24153\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 24346\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 24573\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 24757\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 24957\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n"
+     ]
+    },
     {
      "ename": "UnityCommunicatorStoppedException",
      "evalue": "Communicator has exited.",
@@ -41,8 +561,8 @@
      "traceback": [
       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[1;31mUnityCommunicatorStoppedException\u001b[0m         Traceback (most recent call last)",
-      "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_37248/645561173.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      5\u001b[0m     \u001b[1;32mwhile\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      6\u001b[0m         \u001b[0mactions\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdemoAct\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetHumanActions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m         \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[1;32mc:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-Python\\aimBotEnv.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self, actions, behaviorName, trackedAgent)\u001b[0m\n\u001b[0;32m     72\u001b[0m         \u001b[1;31m# take action to env\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     73\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset_actions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehavior_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mthisActionTuple\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 74\u001b[1;33m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     75\u001b[0m         \u001b[1;31m# get nextState & reward & done after this action\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     76\u001b[0m         \u001b[0mnextState\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mloadDir\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msaveNow\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetSteps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrackedAgent\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_19308/2258777724.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      7\u001b[0m     \u001b[1;32mwhile\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      8\u001b[0m         \u001b[0mactions\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdemoAct\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetHumanActions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 9\u001b[1;33m         \u001b[0mnextState\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     10\u001b[0m         \u001b[0mdemoMem\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msaveMems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstate\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mstate\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mactorProb\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     11\u001b[0m         \u001b[0mstate\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnextState\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mc:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-Python\\aimBotEnv.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self, actions, behaviorName, trackedAgent)\u001b[0m\n\u001b[0;32m     86\u001b[0m         \u001b[1;31m# take action to env\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     87\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset_actions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehavior_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mthisActionTuple\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 88\u001b[1;33m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     89\u001b[0m         \u001b[1;31m# get nextState & reward & done after this action\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     90\u001b[0m         \u001b[0mnextState\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mloadDir\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msaveNow\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetSteps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrackedAgent\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
       "\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\mlagents_envs\\timers.py\u001b[0m in \u001b[0;36mwrapped\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    303\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mwrapped\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    304\u001b[0m         \u001b[1;32mwith\u001b[0m \u001b[0mhierarchical_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__qualname__\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 305\u001b[1;33m             \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    306\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    307\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0mwrapped\u001b[0m  \u001b[1;31m# type: ignore\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
       "\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\mlagents_envs\\environment.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    333\u001b[0m             \u001b[0moutputs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_communicator\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexchange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstep_input\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_poll_process\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    334\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0moutputs\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 335\u001b[1;33m             \u001b[1;32mraise\u001b[0m \u001b[0mUnityCommunicatorStoppedException\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Communicator has exited.\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    336\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_update_behavior_specs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    337\u001b[0m         \u001b[0mrl_output\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0moutputs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrl_output\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
       "\u001b[1;31mUnityCommunicatorStoppedException\u001b[0m: Communicator has exited."
@@ -50,13 +570,26 @@
     }
    ],
    "source": [
-    "done = False\n",
-    "env.reset()\n",
-    "demoAct = HumanActions(mouseDiscount=MOUSEDISCOUNT)\n",
+    "gailExpertDataDir = \"GAIL-Expert-Data/\"\n",
+    "state, _, _, _, _ = env.reset()\n",
+    "\n",
     "for ep in range(MAX_EP):\n",
+    "    print(\"EP Start\")\n",
+    "    done = False\n",
     "    while not done:\n",
     "        actions = demoAct.getHumanActions()\n",
-    "        env.step(actions=actions)6\n"
+    "        nextState, _, done, _, _ = env.step(actions=actions)\n",
+    "        demoMem.saveMems(state=state, actorProb=None, action=actions, reward=None, done=None)\n",
+    "        state = nextState\n",
+    "    nowMemNum = demoMem.memNum\n",
+    "    saveSteps = 500\n",
+    "    lastMemCheckPoint = 0\n",
+    "    if nowMemNum / saveSteps >= lastMemCheckPoint + 1:\n",
+    "        lastMemCheckPoint +=1\n",
+    "        print(\"nowMemNum\", nowMemNum)\n",
+    "        print(\"lastMemCheckPoint\", lastMemCheckPoint)\n",
+    "        demoMem.saveMemtoFile(gailExpertDataDir)\n",
+    "        print(\"mem_saved\")\n"
    ]
   }
  ],
diff --git a/Aimbot-PPO-Python/GAIL.py b/Aimbot-PPO-Python/GAIL.py
new file mode 100644
index 0000000..0bb51aa
--- /dev/null
+++ b/Aimbot-PPO-Python/GAIL.py
@@ -0,0 +1,170 @@
+import tensorflow as tf
+import numpy as np
+from numpy import ndarray
+
+from PPO import PPO
+from tensorflow import keras
+from tensorflow.keras import layers
+from tensorflow.keras import optimizers
+
+from GAILConfig import GAILConfig
+
+EPS = 1e-8
+
+
+class GAIL(object):
+    def __init__(
+        self,
+        stateSize: int,
+        disActShape: list,
+        conActSize: int,
+        conActRange: float,
+        gailConfig: GAILConfig,
+    ):
+        self.stateSize = stateSize
+        self.disActShape = disActShape
+        self.disActSize = len(disActShape)
+        self.conActSize = conActSize
+        self.conActRange = conActRange
+
+        self.totalActSize = self.disActSize + conActSize
+        self.discrimInputSize = stateSize + self.totalActSize
+        self.discriminatorNNShape = gailConfig.discrimNNShape
+        self.discrimLR = gailConfig.discrimLR
+        self.discrimTrainEpochs = gailConfig.discrimTrainEpochs
+        self.ppoConfig = gailConfig.ppoConfig
+
+        self.ppo = PPO(stateSize, disActShape, conActSize, conActRange, self.ppoConfig)
+        self.discriminator = self.buildDiscriminatorNet(True)
+
+    def buildDiscriminatorNet(self, compileModel: bool):
+        # -----------Input Layers-----------
+        stateInput = layers.Input(shape=(self.discrimInputSize,), name="stateInput")
+
+        # -------Intermediate layers--------
+        interLayers = []
+        interLayersIndex = 0
+        for neuralUnit in self.discriminatorNNShape:
+            thisLayerName = "dense" + str(interLayersIndex)
+            if interLayersIndex == 0:
+                interLayers.append(
+                    layers.Dense(neuralUnit, activation="relu", name=thisLayerName)(stateInput)
+                )
+            else:
+                interLayers.append(
+                    layers.Dense(neuralUnit, activation="relu", name=thisLayerName)(interLayers[-1])
+                )
+            interLayersIndex += 1
+
+        # ----------Output Layers-----------
+        output = layers.Dense(1, activation="sigmoid")(interLayers[-1])
+
+        # ----------Model Compile-----------
+        model = keras.Model(inputs=stateInput, outputs=output)
+        if compileModel:
+            criticOPT = optimizers.Adam(learning_rate=self.discrimLR)
+            model.compile(optimizer=criticOPT, loss=self.discrimLoss())
+        return model
+
+    def discrimLoss(self):
+        def loss(y_true, y_pred):
+            """discriminator loss function
+
+            Args:
+                y_true (tf.constant): demo trajectory
+                y_pred (tf.constant): agent trajectory predict value
+
+            Returns:
+                _type_: _description_
+            """
+            demoP = self.discriminator(y_true)
+            agentLoss = tf.negative(tf.reduce_mean(tf.math.log(1.0 - y_pred + EPS)))
+            demoLoss = tf.negative(tf.reduce_mean(tf.math.log(demoP + EPS)))
+            loss = agentLoss + demoLoss
+            return loss
+
+        return loss
+
+    def inference(self, states: ndarray, actions: ndarray):
+        """discriminator predict result
+
+        Args:
+            states (ndarray): states
+            actions (ndarray): actions
+
+        Returns:
+            tf.constant: discrim predict result
+        """
+        # check dimention
+        if states.ndim != 2:
+            stateNum = int(len(states) / self.stateSize)
+            states = states.reshape([stateNum, self.stateSize])
+        if actions.ndim != 2:
+            actionsNum = int(len(actions) / self.totalActSize)
+            actions = actions.reshape([actionsNum, self.totalActSize])
+
+        thisTrajectory = tf.concat([states, actions], axis=1)
+        discrimPredict = self.discriminator(thisTrajectory)
+        return discrimPredict
+
+    def discriminatorACC(
+        self, demoStates: ndarray, demoActions: ndarray, agentStates: ndarray, agentActions: ndarray
+    ):
+        demoAcc = np.mean(self.inference(demoStates, demoActions))
+        agentAcc = np.mean(self.inference(agentStates, agentActions))
+        return demoAcc, agentAcc
+
+    def trainDiscriminator(
+        self,
+        demoStates: ndarray,
+        demoActions: ndarray,
+        agentStates: ndarray,
+        agentActions: ndarray,
+        epochs: int = None,
+    ):
+        """train Discriminator
+
+        Args:
+            demoStates (ndarray): expert states
+            demoActions (ndarray): expert actions
+            agentStates (ndarray): agentPPO generated states
+            agentActions (ndarray): agentPPO generated actions
+            epoch (int): epoch times
+
+        Returns:
+            tf.constant: all losses array
+        """
+        if epochs == None:
+            epochs = self.discrimTrainEpochs
+        demoTrajectory = tf.concat([demoStates, demoActions], axis=1)
+        agentTrajectory = tf.concat([agentStates, agentActions], axis=1)
+        his = self.discriminator.fit(x=agentTrajectory, y=demoTrajectory, epochs=epochs, verbose=0)
+
+        demoAcc = np.mean(self.inference(demoStates, demoActions))
+        agentAcc = np.mean(self.inference(agentStates, agentActions))
+        return his.history["loss"], demoAcc, 1 - agentAcc
+
+    def getActions(self, state: ndarray):
+        actions, predictResult = self.ppo.chooseAction(state)
+        return actions, predictResult
+
+    def trainPPO(
+        self,
+        states: ndarray,
+        oldActorResult: ndarray,
+        actions: ndarray,
+        newRewards: ndarray,
+        dones: ndarray,
+        nextState: ndarray,
+        epochs: int = None,
+    ):
+        criticV = self.ppo.getCriticV(states)
+        discountedR = self.ppo.discountReward(nextState, criticV, dones, newRewards)
+        advantage = self.ppo.getGAE(discountedR, criticV)
+        criticLosses = self.ppo.trainCritic(states, discountedR, epochs)
+        actorLosses = self.ppo.trainActor(states, oldActorResult, actions, advantage, epochs)
+        return actorLosses, criticLosses
+
+    def generateAction(self, states: ndarray):
+        act, actorP = self.ppo.chooseAction(states)
+        return act, actorP
diff --git a/Aimbot-PPO-Python/GAILConfig.py b/Aimbot-PPO-Python/GAILConfig.py
new file mode 100644
index 0000000..489aaf1
--- /dev/null
+++ b/Aimbot-PPO-Python/GAILConfig.py
@@ -0,0 +1,24 @@
+import datetime
+from typing import NamedTuple
+
+from PPOConfig import PPOConfig
+
+
+class GAILConfig(NamedTuple):
+    discrimNNShape: list = [128, 64]
+    discrimLR: float = 1e-3
+    discrimTrainEpochs: int = 8
+    discrimSaveDir: str = "GAIL-Model/" + datetime.datetime.now().strftime("%m%d-%H%M") + "/"
+
+    ppoConfig: PPOConfig = PPOConfig(
+        NNShape=[128, 64],
+        actorLR=2e-3,
+        criticLR=2e-3,
+        gamma=0.99,
+        lmbda=0.95,
+        clipRange=0.20,
+        entropyWeight=1e-2,
+        trainEpochs=8,
+        saveDir="GAIL-Model/" + datetime.datetime.now().strftime("%m%d-%H%M") + "/",
+        loadModelDir=None,
+    )
diff --git a/Aimbot-PPO-Python/GAILMem.py b/Aimbot-PPO-Python/GAILMem.py
new file mode 100644
index 0000000..f730273
--- /dev/null
+++ b/Aimbot-PPO-Python/GAILMem.py
@@ -0,0 +1,175 @@
+import os
+import random
+import numpy as np
+
+
+class GAILMem(object):
+    def __init__(self):
+        self.states = []
+        self.actorProbs = []
+        self.actions = []
+        self.rewards = []
+        self.dones = []
+        self.memNum = 0
+        print("√√√√√Buffer Initialized Success√√√√√")
+
+    def clearMem(self):
+        """clearMemories"""
+        self.states = []
+        self.actorProbs = []
+        self.actions = []
+        self.rewards = []
+        self.dones = []
+        self.memNum = 0
+
+    def saveMemtoFile(self, dir: str):
+        """save memories ndarray to npz file
+
+        Args:
+            dir (str): save direction,like"GAIL-Expert-Data/",end with "/"
+        """
+        statesNP = np.asarray(self.states)
+        actorProbsNP = np.asarray(self.actorProbs)
+        actionsNP = np.asarray(self.actions)
+        rewardsNP = np.asarray(self.rewards)
+        donesNP = np.asarray(self.dones)
+        thisSaveDir = dir + "pack-" + str(self.memNum)
+        try:
+            np.savez(
+                thisSaveDir,
+                states=statesNP,
+                actorProbs=actorProbsNP,
+                actions=actionsNP,
+                rewards=rewardsNP,
+                dones=donesNP,
+            )
+        except FileNotFoundError:
+            os.mkdir(dir)
+            np.savez(
+                thisSaveDir,
+                states=statesNP,
+                actorProbs=actorProbsNP,
+                actions=actionsNP,
+                rewards=rewardsNP,
+                dones=donesNP,
+            )
+
+    def loadMemFile(self, dir: str):
+        """load memories from mpz file
+
+        Args:
+            dir (str): file direction
+        """
+        self.clearMem()
+        memFile = np.load(dir)
+        self.states = memFile["states"].tolist()
+        self.actorProbs = memFile["actorProbs"].tolist()
+        self.actions = memFile["actions"].tolist()
+        self.rewards = memFile["rewards"].tolist()
+        self.dones = memFile["dones"].tolist()
+        self.memNum = len(self.states)
+
+    def getRandomSample(self, sampleNum: int = 0):
+        """get random unique sample set.
+
+        Args:
+            sampleNum (int, optional): sample number, while 0 return all samples. Defaults to 0.
+
+        Returns:
+            tuple: (states,actorProbs,actions,rewards,dones)
+        """
+        if sampleNum == 0:
+            return (
+                self.getStates(),
+                self.getActorProbs(),
+                self.getActions(),
+                self.getRewards(),
+                self.getDones(),
+            )
+        else:
+            randIndex = random.sample(range(0, self.memNum), sampleNum)
+            return (
+                self.standDims(np.asarray(self.states)[randIndex]),
+                self.standDims(np.asarray(self.actorProbs)[randIndex]),
+                self.standDims(np.asarray(self.actions)[randIndex]),
+                self.standDims(np.asarray(self.rewards)[randIndex]),
+                self.standDims(np.asarray(self.dones)[randIndex]),
+            )
+
+    def getStates(self):
+        """get all States data as ndarray
+
+        Returns:
+            ndarray: ndarray type State data
+        """
+        return self.standDims(np.asarray(self.states))
+
+    def getActorProbs(self):
+        """get all ActorProbs data as ndarray
+
+        Returns:
+            ndarray: ndarray type ActorProbs data
+        """
+
+        return self.standDims(np.asarray(self.actorProbs))
+
+    def getActions(self):
+        """get all Actions data as ndarray
+
+        Returns:
+            ndarray: ndarray type Actions data
+        """
+
+        return self.standDims(np.asarray(self.actions))
+
+    def getRewards(self):
+        """get all Rewards data as ndarray
+
+        Returns:
+            ndarray: ndarray type Rewards data
+        """
+
+        return self.standDims(np.asarray(self.rewards))
+
+    def getDones(self):
+        """get all Dones data as ndarray
+
+        Returns:
+            ndarray: ndarray type Dones data
+        """
+
+        return self.standDims(np.asarray(self.dones))
+
+    def standDims(self, data):
+        """standalize data's dimension
+
+        Args:
+            data (list): data list
+
+        Returns:
+            ndarray: ndarra type data
+        """
+        # standarlize data's dimension
+        if np.ndim(data) > 2:
+            return np.squeeze(data, axis=1)
+        elif np.ndim(data) < 2:
+            return np.expand_dims(data, axis=1)
+        else:
+            return np.asarray(data)
+
+    def saveMems(self, state, actorProb, action, reward, done):
+        """save memories
+
+        Args:
+            state (_type_): sates
+            actorProb (_type_): actor predict result
+            action (_type_): actor choosed action
+            reward (_type_): reward
+            done (function): done
+        """
+        self.states.append(state)
+        self.actorProbs.append(actorProb)
+        self.actions.append(action)
+        self.rewards.append(reward)
+        self.dones.append(done)
+        self.memNum += 1
diff --git a/Aimbot-PPO-Python/HumanAction.py b/Aimbot-PPO-Python/HumanAction.py
index cd29dd4..cfc88ce 100644
--- a/Aimbot-PPO-Python/HumanAction.py
+++ b/Aimbot-PPO-Python/HumanAction.py
@@ -1,5 +1,6 @@
 import keyboard
 import mouse
+import math
 
 
 class HumanActions:
@@ -14,11 +15,13 @@ class HumanActions:
         self.screenW = screenW
         self.screenH = screenH
         self.MOUSEDISCOUNT = mouseDiscount
+        self.mouseSmooth = 5
+        self.mouseMax = 10
 
     def getHumanActions(self):
         x, _ = mouse.get_position()
         xMovement = (x - self.screenW / 2) / self.MOUSEDISCOUNT
-
+        xMovement = self.smoothMouseMovement(xMovement)
         ws = 0
         ad = 0
         click = 0
@@ -42,10 +45,14 @@ class HumanActions:
         elif keyboard.is_pressed("s+a"):
             ws = 2
             ad = 2
-        if mouse.is_pressed(button="left"):
+        if keyboard.is_pressed("0"):
             click = 1
 
         actions = [ws, ad, click, [xMovement]]
 
         mouse.move(self.screenW / 2, self.screenH / 2)
         return actions
+
+    def smoothMouseMovement(self, x: float):
+        out = (1 / (1 + math.exp(-x / self.mouseSmooth)) - 1 / 2) * self.mouseMax * 2
+        return out
diff --git a/Aimbot-PPO-Python/aimBotEnv.py b/Aimbot-PPO-Python/aimBotEnv.py
index e5e1f71..b55ec7e 100644
--- a/Aimbot-PPO-Python/aimBotEnv.py
+++ b/Aimbot-PPO-Python/aimBotEnv.py
@@ -6,7 +6,14 @@ from numpy import ndarray
 
 
 class makeEnv(object):
-    def __init__(self, envPath, workerID, basePort):
+    def __init__(
+        self,
+        envPath: str,
+        workerID: int = 1,
+        basePort: int = 100,
+        stackSize: int = 1,
+        stackIntercal: int = 0,
+    ):
         self.env = UnityEnvironment(
             file_name=envPath,
             seed=1,
@@ -25,24 +32,31 @@ class makeEnv(object):
         self.OBSERVATION_SPECS = self.SPEC.observation_specs[0]  # observation spec
         self.ACTION_SPEC = self.SPEC.action_spec  # action specs
 
-        self.DISCRETE_SIZE = self.ACTION_SPEC.discrete_size  # 　連続的な動作のSize
+        self.DISCRETE_SIZE = self.ACTION_SPEC.discrete_size
         self.DISCRETE_SHAPE = list(self.ACTION_SPEC.discrete_branches)
-        self.CONTINUOUS_SIZE = self.ACTION_SPEC.continuous_size  # 　離散的な動作のSize
-        self.STATE_SIZE = self.OBSERVATION_SPECS.shape[0] - self.LOAD_DIR_SIZE_IN_STATE  # 環境観測データ数
+        self.CONTINUOUS_SIZE = self.ACTION_SPEC.continuous_size
+        self.SINGLE_STATE_SIZE = self.OBSERVATION_SPECS.shape[0] - self.LOAD_DIR_SIZE_IN_STATE
+        self.STATE_SIZE = self.SINGLE_STATE_SIZE * stackSize
+
+        # stacked State
+        self.STACK_SIZE = stackSize
+        self.STATE_BUFFER_SIZE = stackSize + ((stackSize - 1) * stackIntercal)
+        self.STACK_INDEX = list(range(0, self.STATE_BUFFER_SIZE, stackIntercal + 1))
+        self.statesBuffer = np.array([[0.0] * self.SINGLE_STATE_SIZE] * self.STATE_BUFFER_SIZE)
         print("√√√√√Enviroment Initialized Success√√√√√")
 
     def step(
         self,
         actions: list,
         behaviorName: ndarray = None,
-        trackedAgent: ndarray = None,
+        trackedAgent: int = None,
     ):
         """change ations list to ActionTuple then send it to enviroment
 
         Args:
             actions (list): PPO chooseAction output action list
             behaviorName (ndarray, optional): behaviorName. Defaults to None.
-            trackedAgent (ndarray, optional): trackedAgentID. Defaults to None.
+            trackedAgent (int, optional): trackedAgentID. Defaults to None.
 
         Returns:
             ndarray: nextState, reward, done, loadDir, saveNow
@@ -54,13 +68,13 @@ class makeEnv(object):
             discreteActions = np.asarray([[0]])
         else:
             # create discrete action from actions list
-            discreteActions = np.asanyarray([actions[0:self.DISCRETE_SIZE]])
+            discreteActions = np.asanyarray([actions[0 : self.DISCRETE_SIZE]])
         if self.CONTINUOUS_SIZE == 0:
             # create empty continuous action
             continuousActions = np.asanyarray([[0.0]])
         else:
             # create continuous actions from actions list
-            continuousActions = np.asanyarray(actions[self.DISCRETE_SIZE:])
+            continuousActions = np.asanyarray(actions[self.DISCRETE_SIZE :])
 
         if behaviorName is None:
             behaviorName = self.BEHA_NAME
@@ -98,21 +112,28 @@ class makeEnv(object):
 
         if trackedAgent in decisionSteps:  # ゲーム終了していない場合、環境状態がdecision_stepsに保存される
             nextState = decisionSteps[trackedAgent].obs[0]
-            nextState = np.reshape(nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE])
-            saveNow = nextState[0][-1]
-            loadDir = nextState[0][-3:-1]
-            nextState = nextState[0][:-3]
+            nextState = np.reshape(
+                nextState, [self.SINGLE_STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE]
+            )
+            saveNow = nextState[-1]
+            loadDir = nextState[-3:-1]
+            nextState = nextState[:-3]
             reward = decisionSteps[trackedAgent].reward
             done = False
         if trackedAgent in terminalSteps:  # ゲーム終了した場合、環境状態がterminal_stepsに保存される
             nextState = terminalSteps[trackedAgent].obs[0]
-            nextState = np.reshape(nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE])
-            saveNow = nextState[0][-1]
-            loadDir = nextState[0][-3:-1]
-            nextState = nextState[0][:-3]
+            nextState = np.reshape(
+                nextState, [self.SINGLE_STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE]
+            )
+            saveNow = nextState[-1]
+            loadDir = nextState[-3:-1]
+            nextState = nextState[:-3]
             reward = terminalSteps[trackedAgent].reward
             done = True
-        return nextState, reward, done, loadDir, saveNow
+
+        # stack state
+        stackedStates = self.stackStates(nextState)
+        return stackedStates, reward, done, loadDir, saveNow
 
     def reset(self):
         """reset enviroment and get observations
@@ -120,11 +141,21 @@ class makeEnv(object):
         Returns:
             ndarray: nextState, reward, done, loadDir, saveNow
         """
+        # reset buffer
+        self.statesBuffer = np.array([[0.0] * self.SINGLE_STATE_SIZE] * self.STATE_BUFFER_SIZE)
+        # reset env
         self.env.reset()
         nextState, reward, done, loadDir, saveNow = self.getSteps()
         return nextState, reward, done, loadDir, saveNow
 
+    def stackStates(self, state):
+        # save buffer
+        self.statesBuffer[0:-1] = self.statesBuffer[1:]
+        self.statesBuffer[-1] = state
+
+        # return stacked states
+        return self.statesBuffer[self.STACK_INDEX]
+
     def render(self):
-        """render enviroment
-        """
+        """render enviroment"""
         self.env.render()
diff --git a/Aimbot-PPO-Python/testarea.ipynb b/Aimbot-PPO-Python/testarea.ipynb
index 2d71fe7..ba25b81 100644
--- a/Aimbot-PPO-Python/testarea.ipynb
+++ b/Aimbot-PPO-Python/testarea.ipynb
@@ -361,17 +361,102 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "3\n",
+      "deque([[0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [1, 1, 1, 1, 1]], maxlen=3)\n",
+      "3\n",
+      "deque([[0.0, 0.0, 0.0, 0.0, 0.0], [1, 1, 1, 1, 1], [2, 2, 2, 2, 2]], maxlen=3)\n",
+      "3\n",
+      "deque([[1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3]], maxlen=3)\n",
+      "3\n",
+      "deque([[2, 2, 2, 2, 2], [3, 3, 3, 3, 3], [4, 4, 4, 4, 4]], maxlen=3)\n",
+      "3\n",
+      "deque([[3, 3, 3, 3, 3], [4, 4, 4, 4, 4], [5, 5, 5, 5, 5]], maxlen=3)\n",
+      "3\n",
+      "deque([[4, 4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6]], maxlen=3)\n",
+      "3\n",
+      "deque([[5, 5, 5, 5, 5], [6, 6, 6, 6, 6], [7, 7, 7, 7, 7]], maxlen=3)\n",
+      "3\n",
+      "deque([[6, 6, 6, 6, 6], [7, 7, 7, 7, 7], [8, 8, 8, 8, 8]], maxlen=3)\n",
+      "3\n",
+      "deque([[7, 7, 7, 7, 7], [8, 8, 8, 8, 8], [9, 9, 9, 9, 9]], maxlen=3)\n"
+     ]
+    }
+   ],
    "source": [
-    "import keyboard\n",
+    "from collections import deque\n",
+    "import numpy as np\n",
     "\n",
-    "while True:\n",
-    "    if keyboard.is_pressed(\"w\"):\n",
-    "        print(\"w\")\n",
-    "    elif keyboard.is_pressed(\"s\"):\n",
-    "        print(\"s\")"
+    "maxBuffer = 3\n",
+    "stateSize = 5\n",
+    "\n",
+    "aa = deque([[0.0]*stateSize],maxlen=maxBuffer)\n",
+    "\n",
+    "def ss(s):\n",
+    "    aa.append(s)\n",
+    "    if len(aa) < maxBuffer:\n",
+    "        for i in range(maxBuffer - len(aa)):\n",
+    "            aa.appendleft([0.0] * stateSize)\n",
+    "\n",
+    "for i in range(1,10):\n",
+    "    ss([i,i,i,i,i])\n",
+    "    print(len(aa))\n",
+    "    print(aa)\n",
+    "'''\n",
+    "3\n",
+    "deque([[0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [1, 1, 1, 1, 1]], maxlen=3)\n",
+    "3\n",
+    "deque([[0.0, 0.0, 0.0, 0.0, 0.0], [1, 1, 1, 1, 1], [2, 2, 2, 2, 2]], maxlen=3)\n",
+    "3\n",
+    "deque([[1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3]], maxlen=3)\n",
+    "3\n",
+    "deque([[2, 2, 2, 2, 2], [3, 3, 3, 3, 3], [4, 4, 4, 4, 4]], maxlen=3)\n",
+    "3\n",
+    "deque([[3, 3, 3, 3, 3], [4, 4, 4, 4, 4], [5, 5, 5, 5, 5]], maxlen=3)\n",
+    "3\n",
+    "deque([[4, 4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6]], maxlen=3)\n",
+    "3\n",
+    "deque([[5, 5, 5, 5, 5], [6, 6, 6, 6, 6], [7, 7, 7, 7, 7]], maxlen=3)\n",
+    "3\n",
+    "deque([[6, 6, 6, 6, 6], [7, 7, 7, 7, 7], [8, 8, 8, 8, 8]], maxlen=3)\n",
+    "3\n",
+    "deque([[7, 7, 7, 7, 7], [8, 8, 8, 8, 8], [9, 9, 9, 9, 9]], maxlen=3)'''"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1\n",
+      "[0]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from collections import deque\n",
+    "import numpy as np\n",
+    "\n",
+    "ss = 1\n",
+    "si = 0\n",
+    "buffersize = ss + ((ss-1)*si)\n",
+    "print(buffersize)\n",
+    "stackedStates = deque([[0.0] * 10]*6, maxlen=3)\n",
+    "stackedStates.append([1.0]*10)\n",
+    "ssnp = stackedStates\n",
+    "\n",
+    "aa = list(range(0,buffersize,si+1))\n",
+    "print(aa)"
    ]
   }
  ],