{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import aimBotEnv\n",
    "import PPO\n",
    "import buffer\n",
    "import numpy as np\n",
    "\n",
    "import tensorflow as tf\n",
    "import time\n",
    "import datetime\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Attempts to allocate only the GPU memory needed for allocation\n",
    "physical_devices = tf.config.list_physical_devices('GPU')\n",
    "tf.config.experimental.set_memory_growth(physical_devices[0], True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Env\n",
    "ENV_PATH = \"./Build-CloseEnemyCut/Aimbot-PPO\"\n",
    "WORKER_ID = 1\n",
    "BASE_PORT = 200\n",
    "\n",
    "MAX_EP = 1000\n",
    "EP_LENGTH = 100000\n",
    "GAMMA = 0.99  # discount future reward (UP?)\n",
    "EPSILON = 0.2  # clip Ratio range[1-EPSILON,1+EPSILON]\n",
    "ACTOR_LR = 1e-5  # LR\n",
    "CRITIC_LR = 2e-5  # LR\n",
    "BATCH = 256  # learning step\n",
    "ACTOR_EPOCH = 15  # epoch\n",
    "CRITIC_EPOCH = 15  # epoch\n",
    "ENTROPY_WHEIGHT = 0.001  # sigma's entropy in Actor loss\n",
    "ACTION_INTERVAL = 1  # take action every ACTION_INTERVAL steps\n",
    "\n",
    "\n",
    "TRAIN = True\n",
    "SAVE_DIR = \"PPO-Model/\" + datetime.datetime.now().strftime(\"%m%d%H%M\") + \"/\"\n",
    "LOAD_DIR = None\n",
    "\n",
    "CTN_ACTION_RANGE = 10\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "√√√√√Enviroment Initialized Success√√√√√\n",
      "√√√√√Buffer Initialized Success√√√√√\n",
      "No loadDir specified,Create a New Model\n",
      "CONTINUOUS_SIZE 1\n",
      "DISCRETE_SIZE 5\n",
      "STATE_SIZE 30\n"
     ]
    }
   ],
   "source": [
    "# initialize enviroment & buffer class\n",
    "env = aimBotEnv.makeEnv(\n",
    "    envPath=ENV_PATH, workerID=WORKER_ID, basePort=BASE_PORT\n",
    ")\n",
    "epBuffer = buffer.buffer()\n",
    "\n",
    "STATE_SIZE = env.STATE_SIZE\n",
    "CONTINUOUS_SIZE = env.CONTINUOUS_SIZE\n",
    "DISCRETE_SIZE = env.DISCRETE_SIZE\n",
    "_, _, _, loadDir, _ = env.getSteps()\n",
    "\n",
    "# check load model or not\n",
    "if np.any(loadDir == 0):\n",
    "    # create a new model\n",
    "    print(\"No loadDir specified,Create a New Model\")\n",
    "    LOAD_DIR = None\n",
    "else:\n",
    "    # load model\n",
    "    loadDirDateSTR = str(int(loadDir[0]))\n",
    "    loadDirTimeSTR = str(int(loadDir[1]))\n",
    "    if len(loadDirDateSTR) != 8:\n",
    "        # fill lost 0 while converse float to string\n",
    "        for _ in range(8 - len(loadDirDateSTR)):\n",
    "            loadDirDateSTR = \"0\" + loadDirDateSTR\n",
    "    if len(loadDirTimeSTR) != 6:\n",
    "        # fill lost 0 while converse float to string\n",
    "        for _ in range(6 - len(loadDirTimeSTR)):\n",
    "            loadDirTimeSTR = \"0\" + loadDirTimeSTR\n",
    "    LOAD_DIR = \"PPO-Model/\" + loadDirDateSTR + \"/\" + loadDirTimeSTR\n",
    "    print(\"Load Model:\")\n",
    "    print(LOAD_DIR)\n",
    "\n",
    "print(\"CONTINUOUS_SIZE\", CONTINUOUS_SIZE)\n",
    "print(\"DISCRETE_SIZE\", DISCRETE_SIZE)\n",
    "print(\"STATE_SIZE\", STATE_SIZE)\n",
    "\n",
    "disActShape = [3, 3, 2]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def actToKey(disAct1,disAct2,disAct3,conAct):\n",
    "    kW = 0\n",
    "    kS = 0\n",
    "    kA = 0\n",
    "    kD = 0\n",
    "    mouseShoot = 0\n",
    "    if disAct1 == 0:\n",
    "        kW = 0\n",
    "        kS = 1\n",
    "    elif disAct1 == 1:\n",
    "        kW = 0\n",
    "        kS = 0\n",
    "    elif disAct1 == 2:\n",
    "        kW = 1\n",
    "        kS = 0\n",
    "    if disAct2 == 0:\n",
    "        kA = 0\n",
    "        kD = 1\n",
    "    elif disAct2 == 1:\n",
    "        kA = 0\n",
    "        kD = 0\n",
    "    elif disAct2 == 2:\n",
    "        kA = 1\n",
    "        kD = 0\n",
    "    mouseShoot = disAct3\n",
    "    return kW,kS,kA,kD,mouseShoot,conAct"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EP  0  START\n",
      "√√√√√Buffer Initialized Success√√√√√\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\numpy\\core\\fromnumeric.py:3474: RuntimeWarning: Mean of empty slice.\n",
      "  return _methods._mean(a, axis=axis, dtype=dtype,\n",
      "c:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\numpy\\core\\_methods.py:189: RuntimeWarning: invalid value encountered in double_scalars\n",
      "  ret = ret.dtype.type(ret / rcount)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "A_Loss: 0.4477495511372884 C_Loss: 3.155759557088216\n",
      "A_Loss: 0.14549287557601928 C_Loss: 0.5123071213563283\n",
      "A_Loss: 0.055241942902406055 C_Loss: 0.13002794484297434\n",
      "A_Loss: 0.057325509190559384 C_Loss: 0.11068039039770762\n",
      "A_Loss: 0.04376962607105573 C_Loss: 0.03923700377345085\n"
     ]
    }
   ],
   "source": [
    "bestScore = 200.0\n",
    "stopTrainCounter = 0\n",
    "\n",
    "totalRewardHis = []\n",
    "totalActorLossHis = []\n",
    "totalCriticLossHis = []\n",
    "epHis = []\n",
    "maxTotalReward = -99999999999\n",
    "\n",
    "for ep in range(MAX_EP):\n",
    "    print(\"EP \", ep, \" START\")\n",
    "    # first time run game\n",
    "    s, _, _, _, _ = env.reset()\n",
    "    if ep == 0:\n",
    "        epBuffer = buffer.buffer()\n",
    "        s = s.reshape([STATE_SIZE])\n",
    "        agent = PPO.PPO(\n",
    "            stateSize=STATE_SIZE,\n",
    "            disActShape=disActShape,\n",
    "            conActSize=1,\n",
    "            conActRange=CTN_ACTION_RANGE,\n",
    "            criticLR=CRITIC_LR,\n",
    "            actorLR=ACTOR_LR,\n",
    "            gamma=GAMMA,\n",
    "            epsilon=EPSILON,\n",
    "            entropyWeight=ENTROPY_WHEIGHT,\n",
    "            saveDir=SAVE_DIR,\n",
    "            loadModelDir=LOAD_DIR,\n",
    "        )\n",
    "    step = 0\n",
    "    done = False\n",
    "    stopTrainCounter -= 1\n",
    "    epHis.append(ep)\n",
    "\n",
    "    # reset total reward\n",
    "    epTotalReward = 0\n",
    "\n",
    "    # Recorder list\n",
    "    epStepHis = []\n",
    "    epRewardHis = []\n",
    "    epActorLossHis = []\n",
    "    epCriticLossHis = []\n",
    "\n",
    "    # save weight immediately?\n",
    "    saveNow = 0\n",
    "\n",
    "    while not done:\n",
    "        step += 1\n",
    "        if (\n",
    "            step % ACTION_INTERVAL == 0\n",
    "        ):  # take action every ACTION_INTERVAL steps\n",
    "            epStepHis.append(step)\n",
    "            (\n",
    "                disAct1,\n",
    "                disAct2,\n",
    "                disAct3,\n",
    "                conAct,\n",
    "                predictResult,\n",
    "            ) = agent.chooseAction(s)\n",
    "            kW, kS, kA, kD, mouseShoot, mouseMove = actToKey(\n",
    "                disAct1, disAct2, disAct3, conAct\n",
    "            )\n",
    "\n",
    "            nextState, thisReward, done, _, saveNow = env.step(\n",
    "                discreteActions=np.array([[kW, kS, kA, kD, mouseShoot]]),\n",
    "                continuousActions=np.array([[mouseMove]]),\n",
    "            )\n",
    "\n",
    "            epTotalReward += thisReward\n",
    "            epBuffer.saveBuffers(\n",
    "                s, [disAct1, disAct2, disAct3, conAct], thisReward\n",
    "            )\n",
    "        else:\n",
    "            disActs = np.array([[0, 0, 0, 0, 0]])\n",
    "            conActs = np.array([[0]])\n",
    "\n",
    "            nextState, thisReward, done, _, saveNow = env.step(\n",
    "                discreteActions=disActs, continuousActions=conActs\n",
    "            )\n",
    "            epTotalReward += thisReward\n",
    "        nextState = nextState.reshape([STATE_SIZE])\n",
    "        s = nextState\n",
    "\n",
    "        if done:\n",
    "            print(\"EP OVER!\")\n",
    "        if saveNow != 0:\n",
    "            print(\"SAVENOW!\")\n",
    "            saveNow = 0\n",
    "            agent.saveWeights()\n",
    "        # update PPO after Batch step or GameOver\n",
    "        if (step + 1) % BATCH == 0 or done:\n",
    "            bs = epBuffer.getStates()\n",
    "            ba = epBuffer.getActions()\n",
    "            br = epBuffer.getRewards()\n",
    "            epBuffer.clearBuffer()\n",
    "            if TRAIN:\n",
    "                epActorLoss, epCriticLoss = agent.trainCritcActor(\n",
    "                    bs, ba, br, s, CRITIC_EPOCH, ACTOR_EPOCH\n",
    "                )\n",
    "                epActorLossHis.append(epActorLoss)\n",
    "                epCriticLossHis.append(epCriticLoss)\n",
    "        # update History Recorder\n",
    "        totalActorLossHis.append(np.mean(epActorLossHis))\n",
    "        totalCriticLossHis.append(np.mean(epCriticLossHis))\n",
    "        totalRewardHis.append(epTotalReward)\n",
    "\n",
    "    if epTotalReward > maxTotalReward and epTotalReward != 0:\n",
    "        maxTotalReward = epTotalReward\n",
    "        agent.saveWeights(epTotalReward)\n",
    "        print(\"New Record! Save NN\", epTotalReward)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "aaa =  0\n",
      "aaa =  1\n",
      "aaa =  2\n",
      "aaa =  3\n",
      "aaa =  4\n",
      "aaa =  5\n",
      "aaa =  6\n",
      "aaa =  7\n",
      "aaa =  8\n",
      "aaa =  9\n"
     ]
    }
   ],
   "source": [
    "aaa = 0\n",
    "while aaa<10:\n",
    "    print(\"aaa = \",aaa)\n",
    "    aaa+=1"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "86e2db13b09bd6be22cb599ea60c1572b9ef36ebeaa27a4c8e961d6df315ac32"
  },
  "kernelspec": {
   "display_name": "Python 3.9.7 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}