V2.1 Add spin penalty and reward adjust
Build-ParallelEnv-Target-OffPolicy-SingleStack-SideChannel-EndReward-Easy-V2.1 add spin penalty while agent keep spin will give a penalty reward. lower Go target in area reward.
This commit is contained in:
@@ -38,6 +38,7 @@ public class AgentWithGun : Agent
|
||||
public float yRotation = 0.1f;//定义一个浮点类型的量,记录‘围绕’X轴旋转的角度
|
||||
|
||||
[Header("Env")]
|
||||
private List<float> spinRecord = new List<float>();
|
||||
private bool lockMouse;
|
||||
private float Damage;
|
||||
private float fireRate;
|
||||
@@ -302,7 +303,7 @@ public class AgentWithGun : Agent
|
||||
|
||||
// ------------Reward--------------
|
||||
// rewardCalculate 计算本动作的Reward
|
||||
public float rewardCalculate(float sceneReward)
|
||||
public float rewardCalculate(float sceneReward,float mouseX)
|
||||
{
|
||||
float epreward = 0f;
|
||||
// 击杀reward判断
|
||||
@@ -321,6 +322,16 @@ public class AgentWithGun : Agent
|
||||
}
|
||||
// 射击动作reward判断
|
||||
epreward += ballistic() + sceneReward;
|
||||
spinRecord.Add(mouseX);
|
||||
if (spinRecord.Count >= paramContainer.spinRecordMax)
|
||||
{
|
||||
spinRecord.RemoveAt(0);
|
||||
}
|
||||
float spinPenaltyReward = Math.Abs(spinRecord.ToArray().Sum() * paramContainer.spinPenalty);
|
||||
if(spinPenaltyReward >= paramContainer.spinPenaltyThreshold)
|
||||
{
|
||||
epreward -= spinPenaltyReward;
|
||||
}
|
||||
return epreward;
|
||||
}
|
||||
|
||||
@@ -362,6 +373,7 @@ public class AgentWithGun : Agent
|
||||
//sensor.AddObservation(allEnemyNum); // 敌人数量 int
|
||||
sensor.AddObservation(targetStates);// targettype, target x,y,z, firebasesAreaDiameter
|
||||
sensor.AddObservation(remainTime);
|
||||
sensor.AddObservation(targetCon.getInAreaState());
|
||||
sensor.AddObservation(myObserve); // 自机位置xyz+朝向 float[](4,1)
|
||||
sensor.AddObservation(rayTagResult); // 探测用RayTag结果 float[](raySensorNum,1)
|
||||
sensor.AddObservation(rayDisResult); // 探测用RayDis结果 float[](raySensorNum,1)
|
||||
@@ -393,7 +405,7 @@ public class AgentWithGun : Agent
|
||||
float sceneReward = 0f;
|
||||
float endReward = 0f;
|
||||
(finishedState, sceneReward, endReward) = targetCon.checkOverAndRewards();
|
||||
float thisRoundReward = rewardCalculate(sceneReward+ endReward);
|
||||
float thisRoundReward = rewardCalculate(sceneReward+ endReward,Mouse_X);
|
||||
if (paramContainer.chartOn)
|
||||
{
|
||||
EnvUICon.updateChart(thisRoundReward);
|
||||
|
||||
@@ -22,6 +22,8 @@ public class ParameterContainer : MonoBehaviour
|
||||
public bool lockCameraX = false;
|
||||
public bool lockCameraY = true;
|
||||
public bool chartOn = false;
|
||||
public int spinRecordMax = 20;
|
||||
public float spinPenaltyThreshold = 50;
|
||||
|
||||
|
||||
[Header("Dynamic Defaut Rewards")]
|
||||
@@ -70,9 +72,9 @@ public class ParameterContainer : MonoBehaviour
|
||||
public float killBonusReward = 0.0f;
|
||||
[Header("Penalty Rewards")]
|
||||
[Tooltip("Speed Penalty Reward")]
|
||||
public float speedPanalty = 0f;
|
||||
public float speedPenalty = 0f;
|
||||
[Tooltip("view Panalty Reward")]
|
||||
public float viewPanalty = 0f;
|
||||
public float spinPenalty = 0f;
|
||||
|
||||
[Header("Dynamic Rewards")]
|
||||
[Tooltip("Free mode Hit Enemy reward")]
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
using System;
|
||||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using System.Xml.Serialization;
|
||||
using UnityEngine;
|
||||
using Random = UnityEngine.Random;
|
||||
|
||||
@@ -46,6 +47,7 @@ public class TargetController : MonoBehaviour
|
||||
[System.NonSerialized] public int targetNum = 0;
|
||||
private Dictionary<int, float[]> oneHotRarget = new Dictionary<int, float[]>();
|
||||
|
||||
private int inArea = 0;
|
||||
private float freeProb;
|
||||
private float sceneSize;
|
||||
private float lastDistance;
|
||||
@@ -238,7 +240,6 @@ public class TargetController : MonoBehaviour
|
||||
int endTypeInt = 0;
|
||||
float thisReward = 0;
|
||||
float endReward = 0;
|
||||
int inArea = 0;
|
||||
float nowDistance = 0f;
|
||||
switch (targetTypeInt)
|
||||
{
|
||||
@@ -246,7 +247,7 @@ public class TargetController : MonoBehaviour
|
||||
// goto
|
||||
(nowDistance, inArea) = blockCont.getAgentTargetDistanceAndInside(AgentObj.transform.position);
|
||||
envUICon.updateTargetGauge(blockCont.thisBlock.firebasesBelong, blockCont.thisBlock.belongMaxPoint);
|
||||
float areaTargetReward = (paramCon.inAreaReward * inArea) + getDistanceReward(nowDistance);
|
||||
float areaTargetReward = getDistanceReward(nowDistance, inArea);
|
||||
if (blockCont.thisBlock.firebasesBelong >= blockCont.thisBlock.belongMaxPoint)
|
||||
{
|
||||
// win
|
||||
@@ -363,7 +364,7 @@ public class TargetController : MonoBehaviour
|
||||
}
|
||||
|
||||
// caulculate sceneReward if close to target then get great reward
|
||||
public float getDistanceReward(float nowDistance)
|
||||
public float getDistanceReward(float nowDistance,int inarea)
|
||||
{
|
||||
if (firstRewardFlag)
|
||||
{
|
||||
@@ -371,7 +372,14 @@ public class TargetController : MonoBehaviour
|
||||
firstRewardFlag = false;
|
||||
}
|
||||
float thisSceneReward = 0f;
|
||||
thisSceneReward = paramCon.distanceReward * (lastDistance - nowDistance);
|
||||
if (inarea != 0)
|
||||
{
|
||||
thisSceneReward = paramCon.inAreaReward;
|
||||
}
|
||||
else
|
||||
{
|
||||
thisSceneReward = paramCon.distanceReward * (lastDistance - nowDistance);
|
||||
}
|
||||
lastDistance = nowDistance;
|
||||
return thisSceneReward;
|
||||
}
|
||||
@@ -438,4 +446,17 @@ public class TargetController : MonoBehaviour
|
||||
}
|
||||
return thisHitReward;
|
||||
}
|
||||
|
||||
// get in area state
|
||||
public int getInAreaState()
|
||||
{
|
||||
if(targetTypeInt == (int)Targets.Go)
|
||||
{
|
||||
return inArea;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user