-
Notifications
You must be signed in to change notification settings - Fork 177
/
Copy pathconvert_libero_data_to_lerobot.py
106 lines (90 loc) · 3.77 KB
/
convert_libero_data_to_lerobot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
"""
Minimal example script for converting a dataset to LeRobot format.
We use the Libero dataset (stored in RLDS) for this example, but it can be easily
modified for any other data you have saved in a custom format.
Usage:
uv run examples/libero/convert_libero_data_to_lerobot.py --data_dir /path/to/your/data
If you want to push your dataset to the Hugging Face Hub, you can use the following command:
uv run examples/libero/convert_libero_data_to_lerobot.py --data_dir /path/to/your/data --push_to_hub
Note: to run the script, you need to install tensorflow_datasets:
`uv pip install tensorflow tensorflow_datasets`
You can download the raw Libero datasets from https://huggingface.co/datasets/openvla/modified_libero_rlds
The resulting dataset will get saved to the $LEROBOT_HOME directory.
Running this conversion script will take approximately 30 minutes.
"""
import shutil
from lerobot.common.datasets.lerobot_dataset import LEROBOT_HOME
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
import tensorflow_datasets as tfds
import tyro
REPO_NAME = "your_hf_username/libero" # Name of the output dataset, also used for the Hugging Face Hub
RAW_DATASET_NAMES = [
"libero_10_no_noops",
"libero_goal_no_noops",
"libero_object_no_noops",
"libero_spatial_no_noops",
] # For simplicity we will combine multiple Libero datasets into one training dataset
def main(data_dir: str, *, push_to_hub: bool = False):
# Clean up any existing dataset in the output directory
output_path = LEROBOT_HOME / REPO_NAME
if output_path.exists():
shutil.rmtree(output_path)
# Create LeRobot dataset, define features to store
# OpenPi assumes that proprio is stored in `state` and actions in `action`
# LeRobot assumes that dtype of image data is `image`
dataset = LeRobotDataset.create(
repo_id=REPO_NAME,
robot_type="panda",
fps=10,
features={
"image": {
"dtype": "image",
"shape": (256, 256, 3),
"names": ["height", "width", "channel"],
},
"wrist_image": {
"dtype": "image",
"shape": (256, 256, 3),
"names": ["height", "width", "channel"],
},
"state": {
"dtype": "float32",
"shape": (8,),
"names": ["state"],
},
"actions": {
"dtype": "float32",
"shape": (7,),
"names": ["actions"],
},
},
image_writer_threads=10,
image_writer_processes=5,
)
# Loop over raw Libero datasets and write episodes to the LeRobot dataset
# You can modify this for your own data format
for raw_dataset_name in RAW_DATASET_NAMES:
raw_dataset = tfds.load(raw_dataset_name, data_dir=data_dir, split="train")
for episode in raw_dataset:
for step in episode["steps"].as_numpy_iterator():
dataset.add_frame(
{
"image": step["observation"]["image"],
"wrist_image": step["observation"]["wrist_image"],
"state": step["observation"]["state"],
"actions": step["action"],
}
)
dataset.save_episode(task=step["language_instruction"].decode())
# Consolidate the dataset, skip computing stats since we will do that later
dataset.consolidate(run_compute_stats=False)
# Optionally push to the Hugging Face Hub
if push_to_hub:
dataset.push_to_hub(
tags=["libero", "panda", "rlds"],
private=False,
push_videos=True,
license="apache-2.0",
)
if __name__ == "__main__":
tyro.cli(main)