add save button@Unity then send "saveNow" Toggle to Python. delete useless method "saveModel" and "loadModel". Use Save/load weights instead.
350 lines
14 KiB
Plaintext
350 lines
14 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import aimBotEnv\n",
|
|
"import PPO\n",
|
|
"import buffer\n",
|
|
"import numpy as np\n",
|
|
"\n",
|
|
"import tensorflow as tf\n",
|
|
"import time\n",
|
|
"import datetime"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Attempts to allocate only the GPU memory needed for allocation\n",
|
|
"physical_devices = tf.config.list_physical_devices('GPU')\n",
|
|
"tf.config.experimental.set_memory_growth(physical_devices[0], True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"ENV_PATH = './Build-MultiScene-WithLoad/Aimbot-PPO'\n",
|
|
"WORKER_ID = 1\n",
|
|
"BASE_PORT = 200\n",
|
|
"\n",
|
|
"MAX_EP = 1000\n",
|
|
"EP_LENGTH = 100000\n",
|
|
"GAMMA = 0.99 # discount future reward (UP?)\n",
|
|
"EPSILON = 0.2 # clip Ratio range[1-EPSILON,1+EPSILON]\n",
|
|
"ACTOR_LR = 1e-5 # LR\n",
|
|
"CRITIC_LR = 2e-5 # LR\n",
|
|
"BATCH = 512 # learning step\n",
|
|
"ACTOR_EPOCH = 15 # epoch\n",
|
|
"CRITIC_EPOCH = 15 # epoch\n",
|
|
"ENTROPY_WHEIGHT = 0.01 # sigma's entropy in Actor loss\n",
|
|
"ACTION_INTERVAL = 1 # take action every ACTION_INTERVAL steps\n",
|
|
"\n",
|
|
"\n",
|
|
"TRAIN = True\n",
|
|
"SAVE_DIR = \"PPO-Model/\"+datetime.datetime.now().strftime(\"%m%d%H%M\")+\"/\"\n",
|
|
"LOAD_DIR = None\n",
|
|
"\n",
|
|
"CTN_ACTION_RANGE = 10"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"√√√√√Enviroment Initialized Success√√√√√\n",
|
|
"√√√√√Buffer Initialized Success√√√√√\n",
|
|
"No loadDir specified,Create a New Model\n",
|
|
"CONTINUOUS_SIZE 1\n",
|
|
"DISCRETE_SIZE 5\n",
|
|
"STATE_SIZE 29\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# initialize enviroment & buffer class\n",
|
|
"env = aimBotEnv.makeEnv(envPath = ENV_PATH,\n",
|
|
" workerID = WORKER_ID,\n",
|
|
" basePort = BASE_PORT)\n",
|
|
"epBuffer = buffer.buffer()\n",
|
|
"\n",
|
|
"STATE_SIZE = env.STATE_SIZE\n",
|
|
"CONTINUOUS_SIZE = env.CONTINUOUS_SIZE\n",
|
|
"DISCRETE_SIZE = env.DISCRETE_SIZE\n",
|
|
"_,_,_,loadDir,_ = env.getSteps()\n",
|
|
"\n",
|
|
"# check load model or not\n",
|
|
"if(np.any(loadDir == 0)):\n",
|
|
" # create a new model\n",
|
|
" print(\"No loadDir specified,Create a New Model\")\n",
|
|
" LOAD_DIR = None\n",
|
|
"else:\n",
|
|
" # load model\n",
|
|
" loadDirDateSTR = str(int(loadDir[0]))\n",
|
|
" loadDirTimeSTR = str(int(loadDir[1]))\n",
|
|
" if len(loadDirDateSTR)!=8:\n",
|
|
" # fill lost 0 while converse float to string\n",
|
|
" for _ in range(8 - len(loadDirDateSTR)):\n",
|
|
" loadDirDateSTR = \"0\" + loadDirDateSTR\n",
|
|
" if len(loadDirTimeSTR)!=6:\n",
|
|
" # fill lost 0 while converse float to string\n",
|
|
" for _ in range(6 - len(loadDirTimeSTR)):\n",
|
|
" loadDirTimeSTR = \"0\" + loadDirTimeSTR\n",
|
|
" LOAD_DIR = \"PPO-Model/\"+loadDirDateSTR+\"/\"+loadDirTimeSTR\n",
|
|
" print(\"Load Model:\")\n",
|
|
" print(LOAD_DIR)\n",
|
|
"\n",
|
|
"print(\"CONTINUOUS_SIZE\",CONTINUOUS_SIZE)\n",
|
|
"print(\"DISCRETE_SIZE\",DISCRETE_SIZE)\n",
|
|
"print(\"STATE_SIZE\",STATE_SIZE)\n",
|
|
"\n",
|
|
"disActShape = [3,3,2]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def actToKey(disAct1,disAct2,disAct3,conAct):\n",
|
|
" kW = 0\n",
|
|
" kS = 0\n",
|
|
" kA = 0\n",
|
|
" kD = 0\n",
|
|
" mouseShoot = 0\n",
|
|
" if disAct1 == 0:\n",
|
|
" kW = 0\n",
|
|
" kS = 1\n",
|
|
" elif disAct1 == 1:\n",
|
|
" kW = 0\n",
|
|
" kS = 0\n",
|
|
" elif disAct1 == 2:\n",
|
|
" kW = 1\n",
|
|
" kS = 0\n",
|
|
" if disAct2 == 0:\n",
|
|
" kA = 0\n",
|
|
" kD = 1\n",
|
|
" elif disAct2 == 1:\n",
|
|
" kA = 0\n",
|
|
" kD = 0\n",
|
|
" elif disAct2 == 2:\n",
|
|
" kA = 1\n",
|
|
" kD = 0\n",
|
|
" mouseShoot = disAct3\n",
|
|
" return kW,kS,kA,kD,mouseShoot,conAct"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"EP 0 START\n",
|
|
"√√√√√Buffer Initialized Success√√√√√\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"c:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\numpy\\core\\fromnumeric.py:3474: RuntimeWarning: Mean of empty slice.\n",
|
|
" return _methods._mean(a, axis=axis, dtype=dtype,\n",
|
|
"c:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\numpy\\core\\_methods.py:189: RuntimeWarning: invalid value encountered in double_scalars\n",
|
|
" ret = ret.dtype.type(ret / rcount)\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"SAVENOW!\n",
|
|
"Model's Weights Saved\n",
|
|
"A_Loss: 9210259745450.666 C_Loss: 7842064320569890.0\n",
|
|
"SAVENOW!\n",
|
|
"Model's Weights Saved\n",
|
|
"EP OVER!\n",
|
|
"A_Loss: 4103941.316666667 C_Loss: 410607418692949.3\n"
|
|
]
|
|
},
|
|
{
|
|
"ename": "FileNotFoundError",
|
|
"evalue": "[Errno 2] No such file or directory: 'PPO-Model/09052116/211645/-53'",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
|
|
"\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_15440/420232317.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 86\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mepTotalReward\u001b[0m \u001b[1;33m>\u001b[0m \u001b[0mmaxTotalReward\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mepTotalReward\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 87\u001b[0m \u001b[0mmaxTotalReward\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mepTotalReward\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 88\u001b[1;33m \u001b[0magent\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msaveWeights\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mepTotalReward\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 89\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"New Record! Save NN\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mepTotalReward\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 90\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
"\u001b[1;32mc:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-Python\\PPO.py\u001b[0m in \u001b[0;36msaveWeights\u001b[1;34m(self, score)\u001b[0m\n\u001b[0;32m 403\u001b[0m \u001b[1;31m# create an empty file named as score to recored score\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 404\u001b[0m \u001b[0mscore_dir\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msaveDir\u001b[0m\u001b[1;33m+\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnow\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstrftime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"%H%M%S\"\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;34m\"/\"\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mstr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mround\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mscore\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 405\u001b[1;33m \u001b[0mscorefile\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mscore_dir\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'w'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 406\u001b[0m \u001b[0mscorefile\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 407\u001b[0m \u001b[0mactor_save_dir\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msaveDir\u001b[0m\u001b[1;33m+\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnow\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstrftime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"%H%M%S\"\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;34m\"/actor/\"\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;34m\"actor.ckpt\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
"\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'PPO-Model/09052116/211645/-53'"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"bestScore = 200.\n",
|
|
"stopTrainCounter = 0\n",
|
|
"\n",
|
|
"totalRewardHis = []\n",
|
|
"totalActorLossHis = []\n",
|
|
"totalCriticLossHis = []\n",
|
|
"epHis = []\n",
|
|
"maxTotalReward = -99999999999\n",
|
|
"\n",
|
|
"for ep in range(MAX_EP):\n",
|
|
" print(\"EP \",ep,\" START\")\n",
|
|
" # first time run game\n",
|
|
" s,_,_,_,_ = env.reset()\n",
|
|
" if (ep == 0):\n",
|
|
" epBuffer = buffer.buffer()\n",
|
|
" s = s.reshape([STATE_SIZE])\n",
|
|
" agent = PPO.PPO(stateSize=STATE_SIZE,\n",
|
|
" disActShape=disActShape,\n",
|
|
" conActSize=1,\n",
|
|
" conActRange=CTN_ACTION_RANGE,\n",
|
|
" criticLR=CRITIC_LR,\n",
|
|
" actorLR=ACTOR_LR,\n",
|
|
" gamma=GAMMA,\n",
|
|
" epsilon=EPSILON,\n",
|
|
" entropyWeight=ENTROPY_WHEIGHT,\n",
|
|
" saveDir=SAVE_DIR,\n",
|
|
" loadModelDir=LOAD_DIR)\n",
|
|
" step = 0\n",
|
|
" done = False\n",
|
|
" stopTrainCounter -= 1\n",
|
|
" epHis.append(ep)\n",
|
|
" \n",
|
|
" # reset total reward\n",
|
|
" epTotalReward = 0\n",
|
|
" \n",
|
|
" # Recorder list\n",
|
|
" epStepHis = []\n",
|
|
" epRewardHis = []\n",
|
|
" epActorLossHis = []\n",
|
|
" epCriticLossHis = []\n",
|
|
" \n",
|
|
" # save weight immediately?\n",
|
|
" saveNow = 0;\n",
|
|
"\n",
|
|
" while not done:\n",
|
|
" step += 1\n",
|
|
" if step % ACTION_INTERVAL == 0: # take action every ACTION_INTERVAL steps\n",
|
|
" epStepHis.append(step)\n",
|
|
" disAct1,disAct2,disAct3,conAct,predictResult = agent.chooseAction(s)\n",
|
|
" kW, kS, kA, kD, mouseShoot, mouseMove = actToKey(disAct1,disAct2,disAct3,conAct)\n",
|
|
" \n",
|
|
" nextState,thisReward,done,_,saveNow = env.step(discreteActions=np.array([[kW, kS, kA, kD, mouseShoot]]),continuousActions=np.array([[mouseMove]]))\n",
|
|
"\n",
|
|
" epTotalReward += thisReward\n",
|
|
" epBuffer.saveBuffers(s,[disAct1,disAct2,disAct3,conAct],thisReward)\n",
|
|
" else:\n",
|
|
" disActs = np.array([[0,0,0,0,0]])\n",
|
|
" conActs = np.array([[0]])\n",
|
|
"\n",
|
|
" nextState,thisReward,done,_,saveNow = env.step(discreteActions=disActs,continuousActions=conActs)\n",
|
|
" epTotalReward += thisReward\n",
|
|
" nextState = nextState.reshape([STATE_SIZE])\n",
|
|
" s = nextState\n",
|
|
" \n",
|
|
" if done:\n",
|
|
" print(\"EP OVER!\")\n",
|
|
" if saveNow != 0:\n",
|
|
" print(\"SAVENOW!\")\n",
|
|
" saveNow = 0\n",
|
|
" agent.saveWeights()\n",
|
|
" # update PPO after Batch step or GameOver\n",
|
|
" if (step+1)%BATCH == 0 or done:\n",
|
|
" bs = epBuffer.getStates()\n",
|
|
" ba = epBuffer.getActions()\n",
|
|
" br = epBuffer.getRewards()\n",
|
|
" epBuffer.clearBuffer()\n",
|
|
" if TRAIN:\n",
|
|
" epActorLoss,epCriticLoss = agent.trainCritcActor(bs,ba,br,s,CRITIC_EPOCH,ACTOR_EPOCH)\n",
|
|
" epActorLossHis.append(epActorLoss)\n",
|
|
" epCriticLossHis.append(epCriticLoss)\n",
|
|
" # update History Recorder\n",
|
|
" totalActorLossHis.append(np.mean(epActorLossHis))\n",
|
|
" totalCriticLossHis.append(np.mean(epCriticLossHis))\n",
|
|
" totalRewardHis.append(epTotalReward)\n",
|
|
" \n",
|
|
" if (epTotalReward > maxTotalReward and epTotalReward != 0):\n",
|
|
" maxTotalReward = epTotalReward\n",
|
|
" agent.saveWeights(epTotalReward)\n",
|
|
" print(\"New Record! Save NN\",epTotalReward)\n",
|
|
" "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"aaa = 0\n",
|
|
"aaa = 1\n",
|
|
"aaa = 2\n",
|
|
"aaa = 3\n",
|
|
"aaa = 4\n",
|
|
"aaa = 5\n",
|
|
"aaa = 6\n",
|
|
"aaa = 7\n",
|
|
"aaa = 8\n",
|
|
"aaa = 9\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"aaa = 0\n",
|
|
"while aaa<10:\n",
|
|
" print(\"aaa = \",aaa)\n",
|
|
" aaa+=1"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"interpreter": {
|
|
"hash": "86e2db13b09bd6be22cb599ea60c1572b9ef36ebeaa27a4c8e961d6df315ac32"
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3.9.7 64-bit",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.7"
|
|
},
|
|
"orig_nbformat": 4
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|