-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathdata_processor.py
executable file
·160 lines (133 loc) · 7.32 KB
/
data_processor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import numpy as np
import glob
import os
import pickle
from tqdm import tqdm
from scipy import spatial
'''
# Baidu ApolloScape data format:
frame_id, object_id, object_type, position_x, position_y, position_z,object_length, pbject_width, pbject_height, heading
Read data from $pra_file_path, and split data into clips with $total_frames length.
feture: (T, V ,C)
C is the dimension of features, x,y label
T is the temporal length of the data. history_frames + future_frames
V is the maximum number of objects. zero-padding for less objects.
'''
# Please change this to your location
data_root = './data/ApolloScape/'
# 3 second * 2 frame/second
history_frames = 6
# 3 second * 2 frame/second
future_frames = 6
total_frames = history_frames + future_frames
frame_step=1
feature_id=[3,4,2,9,6,7]
max_object_nums=115
neighbor_distance = 15
def GenerateData(file_path_list, data_root, is_train=True):
all_data = []
# max_object=[]
for file_path_idx in tqdm(file_path_list):
# print(file_path_idx)
with open(file_path_idx, 'r') as reader:
content = np.array([x.strip().split(' ') for x in reader.readlines()]).astype(float)
scene_frames = content[:, 0].astype(np.int64)
unique_frames = sorted(np.unique(scene_frames).tolist())
if is_train:
start_frame_ids = unique_frames[:-total_frames+1]
else:
start_frame_ids = unique_frames[::history_frames]
data_list=[]
for start_index in start_frame_ids:
if is_train:
sample_frames = np.arange(start_index, start_index + total_frames)
else:
sample_frames = np.arange(start_index, start_index + history_frames)
sample_mask = np.any(scene_frames.reshape(-1, 1) == sample_frames.reshape(1, -1), axis=1)
# sample_object_ids = np.sort(np.unique(content[sample_mask, 1].astype(np.int)))
sample_object_ids = np.unique(content[sample_mask, 1].astype(np.int))
# print(start_index,sample_object_ids)
# le=len(sample_object_ids)
# max_object.append(le)
xy_coordinate=content[sample_mask, 3:5].astype(float)
mean_xy = np.mean(xy_coordinate, axis=0)
# print('mean_xy',mean_xy)
if is_train:
neighbor_mask = np.zeros((total_frames, max_object_nums, max_object_nums), dtype=np.bool)
sample_object_input = np.zeros((total_frames, max_object_nums, len(feature_id)+2), dtype=np.float32)
sample_object_mask = np.zeros((total_frames, max_object_nums), dtype=np.bool)
else:
neighbor_mask = np.zeros((history_frames, max_object_nums, max_object_nums), dtype=np.bool)
sample_object_input = np.zeros((history_frames, max_object_nums, len(feature_id)+2), dtype=np.float32)
sample_object_mask = np.zeros((history_frames, max_object_nums), dtype=np.bool)
sample_object_origin = np.zeros((history_frames, max_object_nums, 3), dtype=np.int)
# for every frame
for frame_idx, frame in enumerate(sample_frames):
exist_object_idx = []
for object_idx, object_id in enumerate(sample_object_ids):
# frame and object
matched_obj = content[np.logical_and(content[:, 0] == frame, content[:, 1] == object_id)]
if 0 == len(matched_obj):
continue
obj_feature = matched_obj[0, feature_id]
obj_feature[:2]=obj_feature[:2]-mean_xy
sample_object_input[frame_idx, object_idx, :-2] = obj_feature
# 在时间域内,某个障碍物在某段时间内存在
sample_object_mask[frame_idx, object_idx] = True
exist_object_idx.append(object_idx)
if not is_train:
sample_object_origin[frame_idx, object_idx,:3]=matched_obj[0, :3]
# print(frame_idx,object_idx,matched_obj[0, :3])
# print(len(exist_object_idx))
for obj_id_i in exist_object_idx:
xy_1 = sample_object_input[frame_idx, obj_id_i, :2]
for obj_id_j in exist_object_idx:
xy_2 = sample_object_input[frame_idx, obj_id_j, :2]
relative_cord = xy_1 - xy_2
neighbor_mask[frame_idx, obj_id_i, obj_id_j] = (
abs(relative_cord[0]) > neighbor_distance) | (
abs(relative_cord[1]) > neighbor_distance)
# add speed x ,y in dim 4,5
new_mask = (sample_object_input[1:, :, :2] != 0) * (sample_object_input[:-1, :, :2] != 0).astype(float)
sample_object_input[1:, :, -2:] = (
sample_object_input[1:, :, :2] - sample_object_input[:-1, :, :2]).astype(float) * new_mask
sample_object_input[0, :, -2:] = 0.
sample_object_mask = np.expand_dims(sample_object_mask, axis=-1)
# refine the future masks
# data['masks'].sum(axis=0) == history_frames表示如果过去帧都在
#表示在过去帧都存在的情况下对未来的掩码
if is_train:
data = dict(
features=sample_object_input, masks=sample_object_mask, mean=mean_xy,
neighbors=neighbor_mask)
# data['masks'][history_frames-1:] = np.repeat(
# np.expand_dims(data['masks'][:history_frames].sum(axis=0) == history_frames, axis=0),
# history_frames+1, axis=0) & data['masks'][history_frames-1]
data['masks'] = data['masks'] & data['masks'][history_frames-1]
else:
data = dict(
features=sample_object_input, masks=sample_object_mask, mean=mean_xy,
origin=sample_object_origin, neighbors=neighbor_mask)
# data['masks'][history_frames-1] = np.expand_dims(
# data['masks'][:history_frames].sum(axis=0) == history_frames, axis=0) & data['masks'][history_frames-1]
data['masks'] = data['masks'] & data['masks'][history_frames-1]
data_list.append(data)
all_data.extend(data_list)
all_data = np.array(all_data) # Train 5010 Test 415
print(np.shape(all_data))
# save training_data and trainjing_adjacency into a file.
if is_train:
save_path=os.path.join(data_root, 'train_data.pkl')
else:
save_path=os.path.join(data_root, 'test_data.pkl')
with open(save_path, 'wb') as writer:
pickle.dump([all_data], writer)
if __name__ == '__main__':
train_file_path_list = sorted(
glob.glob(os.path.join(data_root, 'prediction_train/*.txt')))
test_file_path_list = sorted(
glob.glob(os.path.join(data_root, 'prediction_test/*.txt')))
print('Generating Training Data.')
GenerateData(train_file_path_list,data_root, is_train=True)
print('Generating Testing Data.')
GenerateData(test_file_path_list,data_root, is_train=False)