Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Each reward #10

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions evaluate.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import argparse
from env_utils import LocalEnv
from parl.utils import logger, tensorboard
# from torch_base import TorchModel, TorchSAC, TorchAgent # Choose base wrt which deep-learning framework you are using
from paddle_base import PaddleModel, PaddleSAC, PaddleAgent
from torch_base import TorchModel, TorchSAC, TorchAgent # Choose base wrt which deep-learning framework you are using
# from paddle_base import PaddleModel, PaddleSAC, PaddleAgent
from env_config import EnvConfig

EVAL_EPISODES = 3
Expand Down Expand Up @@ -66,7 +66,8 @@ def main():
parser.add_argument("--env", default="carla-v0")
parser.add_argument(
'--framework',
default='paddle',
# default='paddle',
default='torch',
help='deep learning framework: torch or paddle')
parser.add_argument(
"--eval_episodes",
Expand Down
40 changes: 22 additions & 18 deletions gym_carla/gym_carla/envs/carla_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,19 +336,19 @@ def close(self):
def _terminal(self):
"""Calculate whether to terminate the current episode."""
# Get ego state
# ego_x, ego_y = self._get_ego_pos()
ego_x, ego_y = self._get_ego_pos()

# # If at destination
# dest = self.dest
# if np.sqrt((ego_x-dest[0])**2+(ego_y-dest[1])**2) < 2.0:
# # print("Get destination! Episode Done.")
# self.logger.debug('Get destination! Episode cost %d steps in route %d.' % (self.time_step, self.route_id))
# # self.isSuccess = True
# return True
dest = self.dest
if np.sqrt((ego_x-dest[0])**2+(ego_y-dest[1])**2) < 2.0:
print("Cool! Get destination! Episode Done.")
self.logger.debug('Cool! Get destination! Episode cost %d steps in route %d.' % (self.time_step, self.route_id))
self.isSuccess = True
return True

# If collides
if len(self.collision_hist) > 0:
# print("Collision happened! Episode Done.")
print("Collision happened! Episode Done.")
self.logger.debug(
'Collision happened! Episode cost %d steps in route %d.' %
(self.time_step, self.route_id))
Expand All @@ -357,7 +357,7 @@ def _terminal(self):

# If reach maximum timestep
if self.time_step >= self.max_time_episode:
# print("Time out! Episode Done.")
print("Time out! Episode Done.")
self.logger.debug('Time out! Episode cost %d steps in route %d.' %
(self.time_step, self.route_id))
self.isTimeOut = True
Expand All @@ -366,7 +366,7 @@ def _terminal(self):
# If out of lane
# if len(self.lane_invasion_hist) > 0:
if abs(self.state_info['lateral_dist_t']) > 1.2:
# print("lane invasion happened! Episode Done.")
print("lane invasion happened! Episode Done.")
if self.state_info['lateral_dist_t'] > 0:
self.logger.debug(
'Left Lane invasion! Episode cost %d steps in route %d.' %
Expand All @@ -382,12 +382,14 @@ def _terminal(self):
velocity = self.ego.get_velocity()
v_norm = np.linalg.norm(np.array((velocity.x, velocity.y)))
if v_norm < 4:
print("Speed too slow!")
self.logger.debug(
'Speed too slow! Episode cost %d steps in route %d.' %
(self.time_step, self.route_id))
self.isSpecialSpeed = True
return True
elif v_norm > (1.5 * self.desired_speed):
print("Speed too fast!")
self.logger.debug(
'Speed too fast! Episode cost %d steps in route %d.' %
(self.time_step, self.route_id))
Expand Down Expand Up @@ -499,22 +501,22 @@ def _get_reward(self, action):
if self.isCollided or self.isOutOfLane or self.isSpecialSpeed:
r_done = -500.0
return r_done
# if self.isSuccess:
# r_done = 300.0
# return r_done
if self.isSuccess:
r_done = 300.0
return r_done

# reward for speed
v = self.ego.get_velocity()
ego_velocity = np.array([v.x, v.y])
speed_norm = np.linalg.norm(ego_velocity)
delta_speed = speed_norm - self.desired_speed
r_speed = -delta_speed**2 / 5.0
# print("r_speed:", speed_norm)
print("r_speed:", speed_norm)

# reward for steer
delta_yaw, _, _ = self._get_delta_yaw()
r_steer = -100 * (delta_yaw * np.pi / 180)**2
# print("r_steer:", delta_yaw, '------>', r_steer)
print("r_steer:", delta_yaw, '------>', r_steer)

# reward for action smoothness
r_action_regularized = -5 * np.linalg.norm(action)**2
Expand All @@ -523,9 +525,10 @@ def _get_reward(self, action):
# reward for lateral distance to the center of road
lateral_dist = self.state_info['lateral_dist_t']
r_lateral = -10.0 * lateral_dist**2
# print("r_lateral:", lateral_dist, '-------->', r_lateral)
print("r_lateral:", lateral_dist, '-------->', r_lateral)

return r_speed + r_steer + r_action_regularized + r_lateral + r_step
# print("all rewards:", r_speed + r_steer + r_action_regularized + r_lateral + r_step)

def _make_carla_client(self, host, port):
while True:
Expand All @@ -544,8 +547,9 @@ def _make_carla_client(self, host, port):
self.world = self.client.load_world('Town01')
# self.world = self.client.load_world('Town02')
elif self.task_mode == 'Lane':
# self.world = self.client.load_world('Town01')
self.world = self.client.load_world('Town05')
# self.world = self.client.load_world('Town01')
# self.world = self.client.load_world('Town05')
self.world = self.client.load_world('Town02')
elif self.task_mode == 'U_curve':
self.world = self.client.load_world('Town03')
elif self.task_mode == 'Lane_test':
Expand Down
11 changes: 11 additions & 0 deletions process.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import subprocess

count = 0
print('carla start')
while True:
count += 1
returncode = subprocess.call(['./CarlaUE4.sh', '-windowed', '-carla-port=2031', '-Renderoffscreen'])

if returncode != 0:
print(f"Carla simulation interrupted, attempt #{count}")
continue
7 changes: 4 additions & 3 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import numpy as np
from parl.utils import logger, tensorboard, ReplayMemory
from env_utils import ParallelEnv, LocalEnv
# from torch_base import TorchModel, TorchSAC, TorchAgent # Choose base wrt which deep-learning framework you are using
from paddle_base import PaddleModel, PaddleSAC, PaddleAgent
from torch_base import TorchModel, TorchSAC, TorchAgent # Choose base wrt which deep-learning framework you are using
# from paddle_base import PaddleModel, PaddleSAC, PaddleAgent
from env_config import EnvConfig

WARMUP_STEPS = 2e3
Expand Down Expand Up @@ -125,7 +125,8 @@ def main():
parser.add_argument("--env", default="carla-v0")
parser.add_argument(
'--framework',
default='paddle',
# default='paddle',
default='torch',
help='choose deep learning framework: torch or paddle')
parser.add_argument(
"--train_total_steps",
Expand Down
180 changes: 180 additions & 0 deletions train02.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
import argparse
import datetime
import numpy as np
from parl.utils import logger, tensorboard, ReplayMemory
# from parl.utils import logger, ReplayMemory



from env_utils import ParallelEnv, LocalEnv
from torch_base import TorchModel, TorchSAC, TorchAgent # Choose base wrt which deep-learning framework you are using
# from paddle_base import PaddleModel, PaddleSAC, PaddleAgent
from env_config import EnvConfig

WARMUP_STEPS = 2e3
EVAL_EPISODES = 3
MEMORY_SIZE = int(1e4)
BATCH_SIZE = 256
GAMMA = 0.99
TAU = 0.005
ALPHA = 0.2 # determines the relative importance of entropy term against the reward
ACTOR_LR = 3e-4
CRITIC_LR = 3e-4





# Runs policy for 3 episodes by default and returns average reward
def run_evaluate_episodes(agent, env, eval_episodes):
avg_reward = 0.
for k in range(eval_episodes):
obs = env.reset()
done = False
steps = 0
while not done and steps < env._max_episode_steps:
steps += 1
action = agent.predict(obs)
obs, reward, done, _ = env.step(action)
avg_reward += reward
avg_reward /= eval_episodes
return avg_reward


def main():
logger.info("-----------------Carla_SAC-------------------")
logger.set_dir('./{}_train'.format(args.env))

# Parallel environments for training
train_envs_params = EnvConfig['train_envs_params']
env_num = EnvConfig['env_num']
env_list = ParallelEnv(args.env, args.xparl_addr, train_envs_params)

# env for eval
eval_env_params = EnvConfig['eval_env_params']
eval_env = LocalEnv(args.env, eval_env_params)

obs_dim = eval_env.obs_dim
action_dim = eval_env.action_dim

# Initialize model, algorithm, agent, replay_memory
if args.framework == 'torch':
CarlaModel, SAC, CarlaAgent = TorchModel, TorchSAC, TorchAgent
elif args.framework == 'paddle':
CarlaModel, SAC, CarlaAgent = PaddleModel, PaddleSAC, PaddleAgent
model = CarlaModel(obs_dim, action_dim)
algorithm = SAC(
model,
gamma=GAMMA,
tau=TAU,
alpha=ALPHA,
actor_lr=ACTOR_LR,
critic_lr=CRITIC_LR)
agent = CarlaAgent(algorithm)
rpm = ReplayMemory(
max_size=MEMORY_SIZE, obs_dim=obs_dim, act_dim=action_dim)

total_steps = 0
last_save_steps = 0
test_flag = 0
best_reward = 0

obs_list = env_list.reset()
logger.info("----------------env-reset------------------")

while total_steps < args.train_total_steps:
# Train episode
logger.info("-----------------Train episode-------------------")
if rpm.size() < WARMUP_STEPS:
action_list = [
np.random.uniform(-1, 1, size=action_dim)
for _ in range(env_num)
]
else:
action_list = [agent.sample(obs) for obs in obs_list]
next_obs_list, reward_list, done_list, info_list = env_list.step(
action_list)

# Store data in replay memory
for i in range(env_num):
rpm.append(obs_list[i], action_list[i], reward_list[i],
next_obs_list[i], done_list[i])

obs_list = env_list.get_obs()
total_steps = env_list.total_steps
# Train agent after collecting sufficient data
logger.info("-----------------Train agent after collecting sufficient data-------------------")
if rpm.size() >= WARMUP_STEPS:
batch_obs, batch_action, batch_reward, batch_next_obs, batch_terminal = rpm.sample_batch(
BATCH_SIZE)
agent.learn(batch_obs, batch_action, batch_reward, batch_next_obs,
batch_terminal)

# Save agent
logger.info("-----------------save agent-------------------")

if total_steps > int(1e5) and total_steps > last_save_steps + int(1e4):
agent.save('./{}_model/step_{}_model.ckpt'.format( # 模型存储路径
args.framework, total_steps))
print('model saved')
last_save_steps = total_steps
# print('last_save_steps: ', last_save_steps)
#add current time
# print('current time: ', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
now = datetime.datetime.now()
print('last_save_steps: ', last_save_steps, ' (', now.strftime('%Y-%m-%d %H:%M:%S'), ')')

# Evaluate episode
logger.info("-----------------evaluate-------------------")
if (total_steps + 1) // args.test_every_steps >= test_flag:
while (total_steps + 1) // args.test_every_steps >= test_flag:
test_flag += 1
avg_reward = run_evaluate_episodes(agent, eval_env, EVAL_EPISODES)


tensorboard.add_scalar('eval/episode_reward', avg_reward,
total_steps)
logger.info(
'Total steps {}, Evaluation over {} episodes, Average reward: {}'
.format(total_steps, EVAL_EPISODES, avg_reward))
if avg_reward > best_reward:
best_reward = avg_reward
best_model_path = './{}_model/{}_best.ckpt'.format(args.framework, args.env)
agent.save(best_model_path)
print('best model saved')
logger.info('Saved best model to {}'.format(best_model_path))


# avg_reward = run_evaluate_episodes(agent, eval_env, EVAL_EPISODES)
# if avg_reward > best_reward:
# best_reward = avg_reward
# best_model_path = './model_dir/{}_best'.format(args.env)
# agent.save(best_model_path)
# logger.info('Saved best model to {}'.format(best_model_path))


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--xparl_addr",
default='localhost:8080',
help='xparl address for parallel training')
parser.add_argument("--env", default="carla-v2")
parser.add_argument(
'--framework',
# default='paddle',
default='torch',
help='choose deep learning framework: torch or paddle')
parser.add_argument(
"--train_total_steps",
default=5e5,
type=int,
help='max time steps to run environment')
parser.add_argument(
"--test_every_steps",
default=1e3,
type=int,
help='the step interval between two consecutive evaluations')
args = parser.parse_args()

main()