-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluation.py
117 lines (94 loc) · 4.01 KB
/
evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import numpy as np
import torch
import copy
from core import utils
from core.envs import make_vec_envs
from core.agents.heuristic.load_balance import LeastWorkAgent, \
ShortestProcessingTimeAgent, RandomAllocateAgent, EarliestCompletionTimeAgent
NUM_EVAL_EPISODES = 64
def evaluate(actor_critic, env_name, seed, num_processes, eval_log_dir,
device, env_args=None):
seed = seed if env_args.fix_job_sequence else seed + num_processes
num_processes = 1 if env_args.fix_job_sequence else num_processes
returns = benchmark_heuristic([LeastWorkAgent(),
RandomAllocateAgent(),
EarliestCompletionTimeAgent(
env_args.load_balance_service_rates)],
env_name=env_name,
seed=seed,
num_processes=num_processes,
log_dir=eval_log_dir,
device=device,
args=env_args)
# benchmark heuristic
# least_work
eval_envs = make_vec_envs(env_name=env_name,
seed=seed,
num_processes=num_processes,
log_dir=eval_log_dir,
device=device,
allow_early_resets=True,
train=False,
args=env_args)
eval_episode_rewards = []
obs = eval_envs.reset()
eval_recurrent_hidden_states = torch.zeros(
num_processes, actor_critic.recurrent_hidden_state_size, device=device)
eval_masks = torch.zeros(num_processes, 1, device=device)
while len(eval_episode_rewards) < NUM_EVAL_EPISODES:
with torch.no_grad():
_, action, _, eval_recurrent_hidden_states = actor_critic.act(
obs,
eval_recurrent_hidden_states,
eval_masks,
deterministic=True)
# Obser reward and next obs
# FIXME: debug why actions must be moved to cpu?
obs, _, done, infos = eval_envs.step(action.cpu())
eval_masks = torch.tensor(
[[0.0] if done_ else [1.0] for done_ in done],
dtype=torch.float32,
device=device)
for info in infos:
if 'episode' in info.keys():
eval_episode_rewards.append(info['episode']['r'])
eval_envs.close()
returns['RLAgent'] = eval_episode_rewards
# print out the result
for k, v in returns.items():
print(" => Evaluate {} using {} episodes: mean reward {:.5f}".format(
k, len(v), np.mean(v)))
return returns
def benchmark_single_heuristic(agent, eval_envs):
"""
Compute return of a single heuristic agent
"""
obs = eval_envs.reset()
eval_episode_rewards = []
while len(eval_episode_rewards) < NUM_EVAL_EPISODES:
action = agent.act(obs)
# Obser reward and next obs
obs, _, done, infos = eval_envs.step(action.cpu())
for info in infos:
if 'episode' in info.keys():
eval_episode_rewards.append(info['episode']['r'])
eval_envs.close()
return eval_episode_rewards
def benchmark_heuristic(agents, **kwargs):
"""
Compute return of all heuristics
"""
ret = {}
for agent in agents:
envs = make_vec_envs(env_name=kwargs['env_name'],
seed=kwargs['seed'],
num_processes=kwargs['num_processes'],
log_dir=kwargs['log_dir'],
device=kwargs['device'],
allow_early_resets=True,
train=False,
args=kwargs['args'])
eval_episode_rewards = benchmark_single_heuristic(agent, envs)
# append the result to return dictionary
ret[agent.__class__.__name__] = eval_episode_rewards
return ret