V2.4 Add new reward function

add new reward function in attack mode
calculate distance between closest enemy and facing center line.
let agent could spawn in whole map area.
add penalty while mouseX is moving.
This commit is contained in:
2022-12-08 06:24:51 +09:00
parent b2f80287d5
commit f9b806de02
7 changed files with 30521 additions and 30276 deletions
+30 -1
View File
@@ -12,6 +12,7 @@ using Unity.MLAgents.Actuators;
using System.Linq;
using System.Drawing;
using Color = UnityEngine.Color;
using static TargetController;
/*TODO:
√tag 攻击排他
@@ -289,6 +290,7 @@ public class AgentWithGun : Agent
float facingReward()
{
float thisReward = 0;
bool isFacingtoEnemy = false;
Ray ray = thisCam.ScreenPointToRay(new Vector3(thisCam.pixelWidth / 2, thisCam.pixelHeight / 2, 0));
if (targetCon.targetTypeInt == (int)TargetController.Targets.Free)
{
@@ -296,14 +298,37 @@ public class AgentWithGun : Agent
RaycastHit hit;
if (Physics.Raycast(ray, out hit, 100))
{
// facing to an enemy
if (hit.collider.tag != myTag && hit.collider.tag != "Wall")
{
thisReward = paramContainer.facingReward;
isFacingtoEnemy = true;
}
}
if (rayScript.inViewEnemies.Count > 0 && !isFacingtoEnemy) {
// have enemy in view
List<float> projectionDis = new List<float>();
foreach (GameObject thisEnemy in rayScript.inViewEnemies)
{
// for each enemy in view
Vector3 projection = Vector3.Project(thisEnemy.transform.position - transform.position, (ray.direction * 10));
Vector3 verticalToRay = transform.position + projection - thisEnemy.transform.position;
projectionDis.Add(verticalToRay.magnitude);
// Debug.Log("enemy!" + verticalToRay.magnitude);
// Debug.DrawRay(transform.position, (ray.direction * 100), Color.cyan);
// Debug.DrawRay(transform.position, thisEnemy.transform.position - transform.position, Color.yellow);
// Debug.DrawRay(transform.position, projection, Color.blue);
// Debug.DrawRay(thisEnemy.transform.position, verticalToRay, Color.magenta);
}
// enemy in view Reward
thisReward = 1 / MathF.Sqrt(paramContainer.facingInviewEnemyDisCOEF* projectionDis.Min()+0.00001f);
if (thisReward >= paramContainer.facingReward) thisReward = paramContainer.facingReward; // limit
Debug.Log("ninimum = " + thisReward);
}
}
else if(targetCon.targetTypeInt == (int)TargetController.Targets.Attack)
{
// attack mode
float targetDis = Vector3.Distance(blockContainer.thisBlock.transform.position, transform.position);
if(targetDis <= rayScript.viewDistance)
{
@@ -388,6 +413,10 @@ public class AgentWithGun : Agent
{
epreward -= spinPenaltyReward;
}
else
{
epreward -= Math.Abs(mouseX) * paramContainer.mousePenalty;
}
return epreward;
}
@@ -418,7 +447,6 @@ public class AgentWithGun : Agent
//List<float> enemyLDisList = RaySensors.enemyLDisList;// All Enemy Lside Distances
//List<float> enemyRDisList = RaySensors.enemyRDisList;// All Enemy Rside Distances
rayScript.updateRayInfo();
float[] myObserve = { transform.localPosition.x, transform.localPosition.y, transform.localPosition.z, transform.eulerAngles.y };
float[] rayTagResult = rayScript.rayTagResult;// 探测用RayTag结果 float[](raySensorNum,1)
float[] rayDisResult = rayScript.rayDisResult; // 探测用RayDis结果 float[](raySensorNum,1)
@@ -456,6 +484,7 @@ public class AgentWithGun : Agent
shoot = mouseShoot;
cameraControl(Mouse_X, 0);
moveAgent(vertical, horizontal);
rayScript.updateRayInfo(); // update raycast
//判断结束
float sceneReward = 0f;
+5 -1
View File
@@ -22,8 +22,10 @@ public class ParameterContainer : MonoBehaviour
public bool lockCameraX = false;
public bool lockCameraY = true;
public bool chartOn = false;
public bool spawnAgentInAllMap = false;
public int spinRecordMax = 20;
public float spinPenaltyThreshold = 50;
public float facingInviewEnemyDisCOEF = 0.5f;
[Header("Dynamic Defaut Rewards")]
@@ -78,8 +80,10 @@ public class ParameterContainer : MonoBehaviour
[Header("Penalty Rewards")]
[Tooltip("Speed Penalty Reward")]
public float speedPenalty = 0f;
[Tooltip("view Panalty Reward")]
[Tooltip("spiiiiiiin Panalty Reward")]
public float spinPenalty = 0f;
[Tooltip("while move mouse a little bit's penalty")]
public float mousePenalty = 0f;
[Header("Dynamic Rewards")]
[Tooltip("Free mode Hit Enemy reward")]
+3 -1
View File
@@ -36,6 +36,7 @@ public class RaySensors : MonoBehaviour
GameObject[] rayInfoOBJ;
LineRenderer[] lineRenderers;
rayInfoUI[] rayInfoUIs;
public List<GameObject> inViewEnemies = new List<GameObject>();
private void Start()
@@ -102,6 +103,7 @@ public class RaySensors : MonoBehaviour
break;
case 2: // Enemy
rayColor = Color.red;
inViewEnemies.Add(thisHit.transform.gameObject);
break;
case -1: // Hit Nothing
rayColor = Color.gray;
@@ -162,7 +164,7 @@ public class RaySensors : MonoBehaviour
float focusLEdge = agentCam.pixelWidth * (1 - focusRange) / 2;
float focusREdge = agentCam.pixelWidth * (1 + focusRange) / 2;
float thisCamPixelHeight = agentCam.pixelHeight;
inViewEnemies.Clear();
for (int i = 0; i < halfOuterRayNum; i++) // create left outside rays; 0 ~ focusLeftEdge
{
Vector3 point = new Vector3(i * focusLEdge / (halfOuterRayNum - 1), thisCamPixelHeight / 2, 0);
+14 -2
View File
@@ -215,8 +215,19 @@ public class TargetController : MonoBehaviour
// move Agent into Agent Spawn Area
public void moveAgentToSpwanArea()
{
float randX = UnityEngine.Random.Range(minAgentAreaX, maxAgentAreaX);
float randZ = UnityEngine.Random.Range(minAgentAreaZ, maxAgentAreaZ);
float randX = UnityEngine.Random.Range(minAgentAreaX, maxAgentAreaX); ;
float randZ = 0f;
if (paramCon.spawnAgentInAllMap)
{
// spawn agent in all around map
randZ = UnityEngine.Random.Range(minAgentAreaZ, maxEnemyAreaZ);
}
else
{
// spawn agent in only agent spawn area
randZ = UnityEngine.Random.Range(minAgentAreaZ, maxAgentAreaZ);
}
int Y = 1;
Vector3 initAgentLoc = new Vector3(randX, Y, randZ);
moveAgentTo(initAgentLoc);
@@ -369,6 +380,7 @@ public class TargetController : MonoBehaviour
{
if (firstRewardFlag)
{
// first distance record
(lastDistance, _) = blockCont.getAgentTargetDistanceAndInside(AgentObj.transform.position);
firstRewardFlag = false;
}