Skip to content

Commit

Permalink
feat: add settings + examples
Browse files Browse the repository at this point in the history
  • Loading branch information
TheEimer committed Apr 7, 2024
1 parent f83744f commit 80dcf07
Show file tree
Hide file tree
Showing 12 changed files with 462 additions and 72 deletions.
19 changes: 0 additions & 19 deletions .pre-commit-config.yaml

This file was deleted.

44 changes: 20 additions & 24 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,38 +6,34 @@ PACKAGE_NAME := adrl

DIR := "${CURDIR}"
SOURCE_DIR := ${PACKAGE_NAME}
.PHONY: help install-dev check format pre-commit clean help
.PHONY: help install-dev install check format pre-commit
@echo "Makefile ${NAME}"
@echo "* install-dev to install all dev requirements and install pre-commit"
@echo "* clean to clean any doc or build files"
@echo "* check to check the source code for issues"
@echo "* format to format the code with black and isort"
@echo "* pre-commit to run the pre-commit check"
PYTHON ?= python
PIP ?= python -m pip
MAKE ?= make
BLACK ?= black
PRECOMMIT ?= pre-commit
FLAKE8 ?= flake8
RUFF ?= python -m ruff

install-dev:
$(PIP) install -e ".[dev]"
pre-commit install

check-black:
$(BLACK) ${SOURCE_DIR} --check || :
check-flake8:
$(FLAKE8) ${SOURCE_DIR} || :
$(FLAKE8) ${TESTS_DIR} || :

check: check-black check-flake8

pre-commit:
$(PRECOMMIT) run --all-files

format-black:
$(BLACK) ${SOURCE_DIR}
format: format-black

# Clean up any builds in ./dist as well as doc, if present
clean:
$(PIP) install minari
$(PIP) install dacbench
$(PIP) install gymnasium==0.29.1
$(PIP) install gymnasium-robotics>=1.2.3

install:
$(PIP) install -e .
$(PIP) install minari
$(PIP) install dacbench
$(PIP) install gymnasium==0.29.1
$(PIP) install gymnasium-robotics>=1.2.3

check:
$(RUFF) check --exit-zero $(SOURCE_DIR)

format:
$(RUFF) check --silent --exit-zero --no-cache --fix $(SOURCE_DIR)
$(RUFF) format $(SOURCE_DIR)
28 changes: 12 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,30 +1,26 @@
# adrl
# Advances Topics in Deep RL

This repository contains the lecture materials. There are two main directories:
- adrl contains constructors for our main three settings and small examples
- lecture contains the lecture PDFs and you will add your seminar contributions via PR there as well


You should install the repository as below to run experiments in our settings. You should be able to interact with the continual environment as with any other env. For the multi-agent interface see [Petting Zoo](https://pettingzoo.farama.org/) and for offline [Minari](https://minari.farama.org/main/).

## Installation

Ideally, you'll follow these instructions to create a fresh conda environment and then install for usage. That should allow you to run the examples and use the constructor functions for all three settings.
The dev option simply enables formatting in case you're interested in using that.

```
git clone https://github.com/automl/adrl.git
cd adrl
conda create -n adrl python=3.8
conda create -n adrl python=3.10
conda activate adrl
# Install for usage
pip install .
make install
# Install for development
make install-dev
```

## Minimal Example

```
# Your code here
```

TODOs:
- verständnisampel s. fast.ai
- intuition als prio
- großes visualisierung aller themen
- beispielprojekt
- klar machen dass komponenten beliebig kombinierbar sind
5 changes: 5 additions & 0 deletions adrl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import datetime
from .continual_learning import make_continual_rl_env
from .multi_agent_learning import make_multi_agent_env
from .offline_rl import make_offline_rl_dataset

name = "advanced-topics-in-deep-rl"
package_name = "adrl"
Expand All @@ -11,3 +14,5 @@
}
copyright = f"Copyright {datetime.date.today().strftime('%Y')}, TheEimer"
version = "0.0.1"

__all__ = ["make_continual_rl_env", "make_multi_agent_env", "make_offline_rl_dataset"]
49 changes: 49 additions & 0 deletions adrl/continual_learning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import numpy as np
from gymnasium import Wrapper
from carl.envs import CARLLunarLander


class GravityChangeWrapper(Wrapper):
def __init__(self, env):
super().__init__(env)
self.n_steps = 0
self.n_switches = 0

def step(self, action):
self.n_steps += 1
state, reward, terminated, truncated, info = self.env.step(action)
if self.n_steps >= 10000:
truncated = True
return state, reward, terminated, truncated, info

def reset(self):
self.env.reset()
if self.n_steps // 10000 <= self.n_switches:
change_kind = np.random.choice(["flip", "random"])
if change_kind == "flip":
gravity = -self.env.context["GRAVITY_Y"]
else:
gravity = np.random.uniform(-20, 0)
self.env.contexts[0] = {"GRAVITY_Y": gravity}
self.env.context["GRAVITY_Y"] = gravity
self.n_switches += 1
return self.env.reset()


def make_continual_rl_env():
contexts = {0: {"GRAVITY_Y": -10}}
env = CARLLunarLander(contexts=contexts)
env = GravityChangeWrapper(env)
return env


if __name__ == "__main__":
env = make_continual_rl_env()
env.reset()
for i in range(50000):
_, _, te, tr, _ = env.step(env.action_space.sample())
if te or tr:
env.reset()
if env.n_steps % 10000 == 0:
print(f"Gravity is {env.env.context['GRAVITY_Y']}")
env.close()
72 changes: 72 additions & 0 deletions adrl/multi_agent_learning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import csv
from dacbench.benchmarks import SigmoidBenchmark
import pathlib


def make_multi_agent_env():
bench = SigmoidBenchmark()
bench.config.instance_set = {}
with open(pathlib.Path(__file__).parent.resolve() / "sigmoid_train.csv", "r") as f:
reader = csv.reader(f)
for row in reader:
f = []
inst_id = None
for i in range(len(row)):
if i == 0:
try:
inst_id = int(row[i])
except Exception:
continue
else:
try:
f.append(float(row[i]))
except Exception:
continue
if not len(f) == 0:
bench.config.instance_set[inst_id] = f

bench.config.test_set = {}
with open(pathlib.Path(__file__).parent.resolve() / "sigmoid_test.csv", "r") as f:
reader = csv.reader(f)
for row in reader:
f = []
inst_id = None
for i in range(len(row)):
if i == 0:
try:
inst_id = int(row[i])
except Exception:
continue
else:
try:
f.append(float(row[i]))
except Exception:
continue
if not len(f) == 0:
bench.config.test_set[inst_id] = f

bench.config["multi_agent"] = True
env = bench.get_environment()
return env


if __name__ == "__main__":
env = make_multi_agent_env()

# Add one agent per action dimension
env.register_agent(agent_id=0)
env.register_agent(agent_id=1)

env.reset()
total_reward = 0
terminated, truncated = False, False
while not (terminated or truncated):
for agent in [0, 1]:
observation, reward, terminated, truncated, info = env.last()
action = env.action_spaces[agent].sample()
env.step(action)
observation, reward, terminated, truncated, info = env.last()
total_reward += reward

print(f"The final reward was {total_reward}.")
env.close()
85 changes: 85 additions & 0 deletions adrl/offline_rl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import numpy as np
import minari
import torch
import torch.nn as nn
from torch.utils.data import DataLoader


def collate_fn(batch):
return {
"id": torch.Tensor([x.id for x in batch]),
"seed": torch.Tensor([x.seed for x in batch]),
"total_steps": torch.Tensor([x.total_timesteps for x in batch]),
"observations": torch.nn.utils.rnn.pad_sequence(
[torch.as_tensor(x.observations["observation"]) for x in batch],
batch_first=True,
),
"actions": torch.nn.utils.rnn.pad_sequence(
[torch.as_tensor(x.actions) for x in batch], batch_first=True
),
"rewards": torch.nn.utils.rnn.pad_sequence(
[torch.as_tensor(x.rewards) for x in batch], batch_first=True
),
"terminations": torch.nn.utils.rnn.pad_sequence(
[torch.as_tensor(x.terminations) for x in batch], batch_first=True
),
"truncations": torch.nn.utils.rnn.pad_sequence(
[torch.as_tensor(x.truncations) for x in batch], batch_first=True
),
}


class PolicyNetwork(nn.Module):
def __init__(self, input_dim, output_dim):
super().__init__()
self.fc1 = nn.Linear(input_dim, 256)
self.fc2 = nn.Linear(256, 128)
self.fc3 = nn.Linear(128, output_dim)

def forward(self, x):
x = torch.tensor(x).float()
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = self.fc3(x)
return x


def make_offline_rl_dataset():
dataset = minari.load_dataset("antmaze-umaze-v0", download=True)
dataloader = DataLoader(
dataset, batch_size=256, shuffle=True, collate_fn=collate_fn
)
env = dataset.recover_environment()
return dataloader, env


if __name__ == "__main__":
num_epochs = 3
dataloader, env = make_offline_rl_dataset()

observation_space = env.observation_space["observation"]
action_space = env.action_space
policy_net = PolicyNetwork(np.prod(observation_space.shape), action_space.shape[0])
optimizer = torch.optim.Adam(policy_net.parameters())
loss_fn = nn.CrossEntropyLoss()

for epoch in range(num_epochs):
for batch in dataloader:
a_pred = policy_net(batch["observations"][:, :-1])
loss = loss_fn(a_pred, a_pred)

optimizer.zero_grad()
loss.backward()
optimizer.step()

print(f"Epoch: {epoch}/{num_epochs}, Loss: {loss.item()}")

state = env.reset()[0]
te, tr = False, False
r = 0
while not (te or tr):
action = policy_net(state["observation"])
state, reward, te, tr, _ = env.step(action.detach().numpy())
r += reward
print(f"Total online evaluation reward: {reward}")
env.close()
Empty file removed adrl/run_mighty.sh
Empty file.
Loading

0 comments on commit 80dcf07

Please sign in to comment.