V2.1 Add spin penalty and reward adjust

Build-ParallelEnv-Target-OffPolicy-SingleStack-SideChannel-EndReward-Easy-V2.1 add spin penalty while agent keep spin will give a penalty reward. lower Go target in area reward.
2022-12-04 08:40:23 +09:00
parent 0d60c857b7
commit fa90ac3bb6
6 changed files with 185 additions and 134 deletions
@@ -38,6 +38,7 @@ public class AgentWithGun : Agent
    public float yRotation = 0.1f;//定义一个浮点类型的量，记录‘围绕’X轴旋转的角度

    [Header("Env")]
+    private List<float> spinRecord = new List<float>();
    private bool lockMouse;
    private float Damage;
    private float fireRate;
@@ -302,7 +303,7 @@ public class AgentWithGun : Agent

    // ------------Reward--------------
    // rewardCalculate 计算本动作的Reward
-    public float rewardCalculate(float sceneReward)
+    public float rewardCalculate(float sceneReward,float mouseX)
    {
        float epreward = 0f;
        // 击杀reward判断
@@ -321,6 +322,16 @@ public class AgentWithGun : Agent
        }
        // 射击动作reward判断
        epreward += ballistic() + sceneReward;
+        spinRecord.Add(mouseX);
+        if (spinRecord.Count >= paramContainer.spinRecordMax)
+        {
+            spinRecord.RemoveAt(0);
+        }
+        float spinPenaltyReward = Math.Abs(spinRecord.ToArray().Sum() * paramContainer.spinPenalty);
+        if(spinPenaltyReward >= paramContainer.spinPenaltyThreshold)
+        {
+            epreward -= spinPenaltyReward;
+        }
        return epreward;
    }

@@ -362,6 +373,7 @@ public class AgentWithGun : Agent
        //sensor.AddObservation(allEnemyNum); // 敌人数量 int
        sensor.AddObservation(targetStates);// targettype, target x,y,z, firebasesAreaDiameter
        sensor.AddObservation(remainTime);
+        sensor.AddObservation(targetCon.getInAreaState());
        sensor.AddObservation(myObserve); // 自机位置xyz+朝向 float[](4,1)
        sensor.AddObservation(rayTagResult); // 探测用RayTag结果 float[](raySensorNum,1)
        sensor.AddObservation(rayDisResult); // 探测用RayDis结果 float[](raySensorNum,1)
@@ -393,7 +405,7 @@ public class AgentWithGun : Agent
        float sceneReward = 0f;
        float endReward = 0f;
        (finishedState, sceneReward, endReward) = targetCon.checkOverAndRewards();
-        float thisRoundReward = rewardCalculate(sceneReward+ endReward);
+        float thisRoundReward = rewardCalculate(sceneReward+ endReward,Mouse_X);
        if (paramContainer.chartOn)
        {
            EnvUICon.updateChart(thisRoundReward);
@@ -22,6 +22,8 @@ public class ParameterContainer : MonoBehaviour
    public bool lockCameraX = false;
    public bool lockCameraY = true;
    public bool chartOn = false;
+    public int spinRecordMax = 20;
+    public float spinPenaltyThreshold = 50;


    [Header("Dynamic Defaut Rewards")]
@@ -70,9 +72,9 @@ public class ParameterContainer : MonoBehaviour
    public float killBonusReward = 0.0f;
    [Header("Penalty Rewards")]
    [Tooltip("Speed Penalty Reward")]
-    public float speedPanalty = 0f;
+    public float speedPenalty = 0f;
    [Tooltip("view Panalty Reward")]
-    public float viewPanalty = 0f;
+    public float spinPenalty = 0f;

    [Header("Dynamic Rewards")]
    [Tooltip("Free mode Hit Enemy reward")]
@@ -1,6 +1,7 @@
 using System;
 using System.Collections;
 using System.Collections.Generic;
+using System.Xml.Serialization;
 using UnityEngine;
 using Random = UnityEngine.Random;

@@ -46,6 +47,7 @@ public class TargetController : MonoBehaviour
    [System.NonSerialized] public int targetNum = 0;
    private Dictionary<int, float[]> oneHotRarget = new Dictionary<int, float[]>();

+    private int inArea = 0;
    private float freeProb;
    private float sceneSize;
    private float lastDistance;
@@ -238,7 +240,6 @@ public class TargetController : MonoBehaviour
        int endTypeInt = 0;
        float thisReward = 0;
        float endReward = 0;
-        int inArea = 0;
        float nowDistance = 0f;
        switch (targetTypeInt)
        {
@@ -246,7 +247,7 @@ public class TargetController : MonoBehaviour
                // goto
                (nowDistance, inArea) = blockCont.getAgentTargetDistanceAndInside(AgentObj.transform.position);
                envUICon.updateTargetGauge(blockCont.thisBlock.firebasesBelong, blockCont.thisBlock.belongMaxPoint);
-                float areaTargetReward = (paramCon.inAreaReward * inArea) + getDistanceReward(nowDistance);
+                float areaTargetReward = getDistanceReward(nowDistance, inArea);
                if (blockCont.thisBlock.firebasesBelong >= blockCont.thisBlock.belongMaxPoint)
                {
                    // win
@@ -363,7 +364,7 @@ public class TargetController : MonoBehaviour
    }

    // caulculate sceneReward if close to target then get great reward
-    public float getDistanceReward(float nowDistance)
+    public float getDistanceReward(float nowDistance,int inarea)
    {
        if (firstRewardFlag)
        {
@@ -371,7 +372,14 @@ public class TargetController : MonoBehaviour
            firstRewardFlag = false;
        }
        float thisSceneReward = 0f;
-        thisSceneReward = paramCon.distanceReward * (lastDistance - nowDistance);
+        if (inarea != 0)
+        {
+            thisSceneReward = paramCon.inAreaReward;
+        }
+        else
+        {
+            thisSceneReward = paramCon.distanceReward * (lastDistance - nowDistance);
+        }
        lastDistance = nowDistance;
        return thisSceneReward;
    }
@@ -438,4 +446,17 @@ public class TargetController : MonoBehaviour
        }
        return thisHitReward;
    }
+
+    // get in area state
+    public int getInAreaState()
+    {
+        if(targetTypeInt == (int)Targets.Go)
+        {
+            return inArea;
+        }
+        else
+        {
+            return 0;
+        }
+    }
 }