-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathEnvironment.py
102 lines (78 loc) · 2.81 KB
/
Environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from Config import Config
import gym
import PIL
from collections import deque
import numpy as np
class Environment(object):
def __init__(self, game="MsPacman-v0"):
self.screen_h = Config.SCREEN_H
self.screen_w = Config.SCREEN_W
self.screen_shape = Config.SCREEN_SHAPE
self.frame_per_row = Config.FRAME_PER_ROW
self.frame_buffer = None
self.action_space = 9
# meta
self.total_episode_run = 0
self.steps_in_episode = 0
self.max_steps_in_episode = 0
self.env = gym.make(game)
self.reset()
def init_frame_buffer(self):
# initialize history
if self.frame_buffer:
self.frame_buffer.clear()
else:
self.frame_buffer = deque()
for i in range(0, self.frame_per_row):
self.frame_buffer.append(self.get_screen(reduced=True)) # always full
def reset(self):
self.max_steps_in_episode = max(self.max_steps_in_episode, self.steps_in_episode)
self.current_screen = self.env.reset() # current_screen always align with ORIGINAL SETTING
self.init_frame_buffer()
self.current_reward = 0
self.done_flag = False
self.info = None
self.total_episode_run += 1
self.steps_in_episode = 0
def step(self, action):
self.current_screen, r, self.done_flag, self.info = self.env.step(action)
self.current_reward = r
self.frame_buffer.popleft()
self.frame_buffer.append(self.get_screen(reduced=True))
self.steps_in_episode += 1
def render(self):
self.env.render()
### GETs ###
def get_environment(self):
return self.env
def get_screen(self, reduced=True):
if reduced:
grayscale = self.rgb2gray(self.current_screen)
return self.resizeScreen(grayscale, self.screen_shape)
else:
return self.current_screen
def get_reward(self):
return self.current_reward
def get_done_flag(self):
return self.done_flag
def get_info(self):
return self.info
def get_action_space(self):
return self.action_space
def get_frame_buffer(self):
return self.frame_buffer
# return full list of frame_buffer in {W, H, Channel} shape
def get_history(self):
return np.transpose(self.frame_buffer, (1,2,0))
def get_max_steps(self):
return self.max_steps_in_episode
### Utilities ###
def rgb2gray(self, rgb):
r, g, b = rgb[:,:,0], rgb[:,:,1], rgb[:,:,2]
gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
return gray
def resizeScreen(self, state, shape):
img = PIL.Image.fromarray(state, mode=None)
img = img.resize(shape, PIL.Image.LANCZOS)
arr = list(img.getdata())
return np.reshape(arr, shape)