V2.1 Add spin penalty and reward adjust

Build-ParallelEnv-Target-OffPolicy-SingleStack-SideChannel-EndReward-Easy-V2.1
add spin penalty while agent keep spin will give a penalty reward.
lower Go target in area reward.
This commit is contained in:
2022-12-04 08:40:23 +09:00
parent 0d60c857b7
commit fa90ac3bb6
6 changed files with 185 additions and 134 deletions
+14 -2
View File
@@ -38,6 +38,7 @@ public class AgentWithGun : Agent
public float yRotation = 0.1f;//定义一个浮点类型的量,记录‘围绕’X轴旋转的角度
[Header("Env")]
private List<float> spinRecord = new List<float>();
private bool lockMouse;
private float Damage;
private float fireRate;
@@ -302,7 +303,7 @@ public class AgentWithGun : Agent
// ------------Reward--------------
// rewardCalculate 计算本动作的Reward
public float rewardCalculate(float sceneReward)
public float rewardCalculate(float sceneReward,float mouseX)
{
float epreward = 0f;
// 击杀reward判断
@@ -321,6 +322,16 @@ public class AgentWithGun : Agent
}
// 射击动作reward判断
epreward += ballistic() + sceneReward;
spinRecord.Add(mouseX);
if (spinRecord.Count >= paramContainer.spinRecordMax)
{
spinRecord.RemoveAt(0);
}
float spinPenaltyReward = Math.Abs(spinRecord.ToArray().Sum() * paramContainer.spinPenalty);
if(spinPenaltyReward >= paramContainer.spinPenaltyThreshold)
{
epreward -= spinPenaltyReward;
}
return epreward;
}
@@ -362,6 +373,7 @@ public class AgentWithGun : Agent
//sensor.AddObservation(allEnemyNum); // 敌人数量 int
sensor.AddObservation(targetStates);// targettype, target x,y,z, firebasesAreaDiameter
sensor.AddObservation(remainTime);
sensor.AddObservation(targetCon.getInAreaState());
sensor.AddObservation(myObserve); // 自机位置xyz+朝向 float[](4,1)
sensor.AddObservation(rayTagResult); // 探测用RayTag结果 float[](raySensorNum,1)
sensor.AddObservation(rayDisResult); // 探测用RayDis结果 float[](raySensorNum,1)
@@ -393,7 +405,7 @@ public class AgentWithGun : Agent
float sceneReward = 0f;
float endReward = 0f;
(finishedState, sceneReward, endReward) = targetCon.checkOverAndRewards();
float thisRoundReward = rewardCalculate(sceneReward+ endReward);
float thisRoundReward = rewardCalculate(sceneReward+ endReward,Mouse_X);
if (paramContainer.chartOn)
{
EnvUICon.updateChart(thisRoundReward);
+4 -2
View File
@@ -22,6 +22,8 @@ public class ParameterContainer : MonoBehaviour
public bool lockCameraX = false;
public bool lockCameraY = true;
public bool chartOn = false;
public int spinRecordMax = 20;
public float spinPenaltyThreshold = 50;
[Header("Dynamic Defaut Rewards")]
@@ -70,9 +72,9 @@ public class ParameterContainer : MonoBehaviour
public float killBonusReward = 0.0f;
[Header("Penalty Rewards")]
[Tooltip("Speed Penalty Reward")]
public float speedPanalty = 0f;
public float speedPenalty = 0f;
[Tooltip("view Panalty Reward")]
public float viewPanalty = 0f;
public float spinPenalty = 0f;
[Header("Dynamic Rewards")]
[Tooltip("Free mode Hit Enemy reward")]
+25 -4
View File
@@ -1,6 +1,7 @@
using System;
using System.Collections;
using System.Collections.Generic;
using System.Xml.Serialization;
using UnityEngine;
using Random = UnityEngine.Random;
@@ -46,6 +47,7 @@ public class TargetController : MonoBehaviour
[System.NonSerialized] public int targetNum = 0;
private Dictionary<int, float[]> oneHotRarget = new Dictionary<int, float[]>();
private int inArea = 0;
private float freeProb;
private float sceneSize;
private float lastDistance;
@@ -238,7 +240,6 @@ public class TargetController : MonoBehaviour
int endTypeInt = 0;
float thisReward = 0;
float endReward = 0;
int inArea = 0;
float nowDistance = 0f;
switch (targetTypeInt)
{
@@ -246,7 +247,7 @@ public class TargetController : MonoBehaviour
// goto
(nowDistance, inArea) = blockCont.getAgentTargetDistanceAndInside(AgentObj.transform.position);
envUICon.updateTargetGauge(blockCont.thisBlock.firebasesBelong, blockCont.thisBlock.belongMaxPoint);
float areaTargetReward = (paramCon.inAreaReward * inArea) + getDistanceReward(nowDistance);
float areaTargetReward = getDistanceReward(nowDistance, inArea);
if (blockCont.thisBlock.firebasesBelong >= blockCont.thisBlock.belongMaxPoint)
{
// win
@@ -363,7 +364,7 @@ public class TargetController : MonoBehaviour
}
// caulculate sceneReward if close to target then get great reward
public float getDistanceReward(float nowDistance)
public float getDistanceReward(float nowDistance,int inarea)
{
if (firstRewardFlag)
{
@@ -371,7 +372,14 @@ public class TargetController : MonoBehaviour
firstRewardFlag = false;
}
float thisSceneReward = 0f;
thisSceneReward = paramCon.distanceReward * (lastDistance - nowDistance);
if (inarea != 0)
{
thisSceneReward = paramCon.inAreaReward;
}
else
{
thisSceneReward = paramCon.distanceReward * (lastDistance - nowDistance);
}
lastDistance = nowDistance;
return thisSceneReward;
}
@@ -438,4 +446,17 @@ public class TargetController : MonoBehaviour
}
return thisHitReward;
}
// get in area state
public int getInAreaState()
{
if(targetTypeInt == (int)Targets.Go)
{
return inArea;
}
else
{
return 0;
}
}
}