Side channel Added

add side channel to let python side know which target got win or lose.
fix update time bug. may cause double gameover check.(got another lose after reset the game.)
This commit is contained in:
2022-11-30 06:39:56 +09:00
parent 25eac00c53
commit 9585845ba2
13 changed files with 42376 additions and 113 deletions
+14 -1
View File
@@ -247,7 +247,7 @@ public class AgentWithGun : Agent
GameObject gotHitObj = hit.transform.gameObject;//获取受到Ray撞击的对象
gotHitObj.GetComponent<states>().ReactToHit(Damage, gameObject);
shoot = 0;
return paramContainer.hitReward;
return targetCon.hitReward(gotHitObj.transform.position);
}
}
shoot = 0;
@@ -402,6 +402,19 @@ public class AgentWithGun : Agent
// Win or lose Finished
Debug.Log("Finish reward = " + thisRoundReward);
EP += 1;
string targetString = Enum.GetName(typeof(TargetController.Targets), targetCon.targetTypeInt);
switch (finishedState)
{
case (int)TargetController.EndType.Win:
Debug.LogWarning(targetString+"|Win");
break;
case (int)TargetController.EndType.Lose:
Debug.LogWarning(targetString+"|Lose");
break;
default:
Debug.LogWarning("TypeError");
break;
}
EndEpisode();
}
else
+41
View File
@@ -0,0 +1,41 @@
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.SideChannels;
using System;
public class AimbotSideChannel : SideChannel
{
public AimbotSideChannel()
{
ChannelId = new Guid("8bbfb62a-99b4-457c-879d-b78b69066b5e");
}
protected override void OnMessageReceived(IncomingMessage msg)
{
var receivedString = msg.ReadString();
Debug.Log("From Python : " + receivedString);
}
public void SendDebugStatementToPython(string logString, string stackTrace, LogType type)
{
if (type == LogType.Warning)
{
var stringToSend = "result|"+logString;
using (var msgOut = new OutgoingMessage())
{
msgOut.WriteString(stringToSend);
QueueMessageToSend(msgOut);
}
}
if (type == LogType.Error)
{
var stringToSend = "Error|"+logString;
using (var msgOut = new OutgoingMessage())
{
msgOut.WriteString(stringToSend);
QueueMessageToSend(msgOut);
}
}
}
}
@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: 6836d727b536dd54e893311318779b9a
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:
@@ -13,6 +13,7 @@ public class EnvironmentUIControl : MonoBehaviour
public GameObject GroundCanvasObj;
public GameObject chartObj;
public TextMeshProUGUI remainTimeText;
public TextMeshProUGUI targetTypeText;
public TextMeshProUGUI winLoseText;
public float resultTimeout = 1f;
public GameObject gaugeImgObj;
@@ -111,4 +112,31 @@ public class EnvironmentUIControl : MonoBehaviour
gaugeImg.fillAmount = -firebasesBelong / belongMaxPoint;
}
}
// update targetType text
public void updateTargetType(int targetInt)
{
switch (targetInt)
{
case (int)TargetController.Targets.Go:
targetTypeText.text = "GOTO";
targetTypeText.color = Color.blue;
break;
case (int)TargetController.Targets.Attack:
targetTypeText.text = "Attack!";
targetTypeText.color = Color.red;
break;
case (int)TargetController.Targets.Defence:
targetTypeText.text = "Defence";
targetTypeText.color = Color.green;
break;
case (int)TargetController.Targets.Free:
targetTypeText.text = "Free";
targetTypeText.color = Color.yellow;
break;
default:
targetTypeText.text = "TYPE ERROR";
targetTypeText.color = Color.red;
break;
}
}
}
+35 -13
View File
@@ -26,15 +26,15 @@ public class ParameterContainer : MonoBehaviour
[Header("Dynamic Defaut Rewards")]
[Tooltip("Hit Enemy reward")]
public float hitRewardDefault = 30.0f;
[Tooltip("Episode Win reward")]
public float winRewardDefault = 50.0f;
public float hitRewardDefault = 60.0f;
[Tooltip("Free mode Hit Enemy reward")]
public float hitTargetRewardDefault = 60.0f;
[Tooltip("Enemy down reward")]
public float killRewardDefault = 40.0f;
public float killRewardDefault = 60.0f;
[Tooltip("Enemy down in area Reward")]
public float killInAreaEnemyRewardDefault = 80.0f;
public float killTargetEnemyRewardDefault = 100.0f;
[Tooltip("stay in firebasesArea reward")]
public float inAreaRewardDefault = 1.0f;
public float inAreaRewardDefault = 10.0f;
[Tooltip("free left time bonus reward. ALLR + leftTime * r")]
public float freeTimeBonusPerSec = 1.0f;
[Tooltip("target left time bonus reward. ALLR + leftTime * r")]
@@ -42,20 +42,37 @@ public class ParameterContainer : MonoBehaviour
[Tooltip("in area left time bonus reward. ALLR + leftTime * r")]
public float areaTimeBonusPerSec = 1.0f;
[Tooltip("distance reward reward = r*(1-(nowDis/startDis))")]
public float distanceReward = 1.0f;
public float distanceReward = 20.0f;
[Space(10)]
[Tooltip("Goto Win reward")]
public float goWinRewardDefault = 100.0f;
[Tooltip("Attack Win reward")]
public float attackWinRewardDefault = 100.0f;
[Tooltip("Defence Win reward")]
public float defenceWinRewardDefault = 100.0f;
[Tooltip("free Win reward")]
public float freeWinRewardDefault = 100.0f;
[Header("Dynamic Rewards")]
[Tooltip("Free mode Hit Enemy reward")]
public float hitTargetReward = 60.0f;
[Tooltip("Hit Enemy reward")]
public float hitReward = 30.0f;
[Tooltip("Enemy down reward")]
public float killReward = 40.0f;
[Tooltip("Episode Win reward")]
public float winReward = 50.0f;
[Tooltip("Enemy down in area Reward")]
public float killInAreaEnemyReward = 80.0f;
public float killTargetEnemyReward = 80.0f;
[Tooltip("stay in firebasesArea reward")]
public float inAreaReward = 1.0f;
[Space(10)]
[Tooltip("go Win reward")]
public float goWinReward = 50.0f;
[Tooltip("attack Win reward")]
public float attackWinReward = 50.0f;
[Tooltip("defence Win reward")]
public float defenceWinReward = 50.0f;
[Tooltip("free Win reward")]
public float freeWinReward = 50.0f;
[Header("Static Rewards")]
@@ -115,10 +132,15 @@ public class ParameterContainer : MonoBehaviour
}
hitReward = hitRewardDefault + freeTimeBonus;
hitTargetReward = hitTargetRewardDefault + freeTimeBonus;
killReward = killRewardDefault + freeTimeBonus;
winReward = winRewardDefault + targetTimeBonus;
killInAreaEnemyReward = killInAreaEnemyRewardDefault + targetTimeBonus;
killTargetEnemyReward = killTargetEnemyRewardDefault + targetTimeBonus;
inAreaReward = inAreaRewardDefault + areaTimeBonus;
goWinReward = goWinRewardDefault + targetTimeBonus;
attackWinReward = attackWinRewardDefault + targetTimeBonus;
defenceWinReward = defenceWinRewardDefault + targetTimeBonus;
freeWinReward = freeWinRewardDefault + targetTimeBonus;
}
public void resetTimeBonusReward()
+48 -9
View File
@@ -105,6 +105,7 @@ public class TargetController : MonoBehaviour
public void rollNewScene()
{
startTime = Time.time;// Reset StartTime as now time
leftTime = paramCon.timeLimit - Time.time + startTime;
float randTargetType = UnityEngine.Random.Range(0f, 1f);
if (randTargetType <= gotoProb)
{
@@ -181,6 +182,7 @@ public class TargetController : MonoBehaviour
moveAgentToSpwanArea();
blockCont.destroyBlock();
}
envUICon.updateTargetType(targetTypeInt);
}
// get target observation states
@@ -253,7 +255,7 @@ public class TargetController : MonoBehaviour
{
// win
// let the area belongs to me
thisReward = paramCon.winReward;
thisReward = paramCon.goWinReward;
//thisReward = (paramCon.inAreaReward * inArea) + getSceneReward(nowDistance);
endTypeInt = (int)EndType.Win;
}
@@ -280,7 +282,7 @@ public class TargetController : MonoBehaviour
{
// win
// let the area belongs to me and kill every enmy in this area.
thisReward = paramCon.winReward;
thisReward = paramCon.attackWinReward;
//thisReward = (paramCon.inAreaReward * inArea) + getSceneReward(nowDistance);
endTypeInt = (int)EndType.Win;
}
@@ -306,7 +308,7 @@ public class TargetController : MonoBehaviour
{
// win
// time over and the area still mine
thisReward = paramCon.winReward;
thisReward = paramCon.defenceWinReward;
//thisReward = (paramCon.inAreaReward * inArea) + getSceneReward(nowDistance);
endTypeInt = (int)EndType.Win;
}
@@ -330,14 +332,14 @@ public class TargetController : MonoBehaviour
{
// win
//thisReward = paramCon.winReward + (paramCon.timeBonusPerSecReward * leftTime);
thisReward = 0f;
thisReward = paramCon.freeWinReward;
endTypeInt = (int)EndType.Win;
}
else if (Time.time - startTime >= paramCon.timeLimit)
{
// lose
//thisReward = paramCon.loseReward;
thisReward = 0f;
thisReward = paramCon.loseReward;
endTypeInt = (int)EndType.Lose;
}
else
@@ -358,22 +360,59 @@ public class TargetController : MonoBehaviour
float thisKillReward = 0f;
if (targetTypeInt == (int)Targets.Attack)
{
// attack
// attack mode
(_, int isInArea) = blockCont.thisBlock.getDist_inArea(enemyPosition);
if (isInArea == 1)
{
thisKillReward = paramCon.killInAreaEnemyReward;
// kill in area enemy
thisKillReward = paramCon.killTargetEnemyReward;
}
else
{
thisKillReward = paramCon.killReward;
}
}
else
else if(targetTypeInt == (int)Targets.Free)
{
// goto & defence & free
// free mode kill
thisKillReward = paramCon.killTargetEnemyReward;
}else
{
// goto & defence
thisKillReward = paramCon.killReward;
}
return thisKillReward;
}
// calculate hit reward base on killed enemy's position and now mode
public float hitReward(Vector3 enemyPosition)
{
float thisHitReward = 0f;
if (targetTypeInt == (int)Targets.Attack)
{
// attack mode
(_, int isInArea) = blockCont.thisBlock.getDist_inArea(enemyPosition);
if (isInArea == 1)
{
// hit in area enemy
thisHitReward = paramCon.hitTargetReward;
}
else
{
// hit not in area enemy
thisHitReward = paramCon.hitReward;
}
}
else if (targetTypeInt == (int)Targets.Free)
{
// free mode hit
thisHitReward = paramCon.hitTargetReward;
}
else
{
// goto & defence
thisHitReward = paramCon.hitReward;
}
return thisHitReward;
}
}