Skip to content
9 changes: 7 additions & 2 deletions ding/model/template/q_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def __init__(
norm_type: Optional[str] = None,
dropout: Optional[float] = None,
init_bias: Optional[float] = None,
noise: bool = False,
) -> None:
"""
Overview:
Expand All @@ -57,6 +58,8 @@ def __init__(
- dropout (:obj:`Optional[float]`): The dropout rate of the dropout layer. \
if ``None`` then default disable dropout layer.
- init_bias (:obj:`Optional[float]`): The initial value of the last layer bias in the head network. \
- noise (:obj:`bool`): Whether use ``NoiseLinearLayer`` as ``layer_fn`` in Q networks' MLP. \
Default ``False``.
"""
super(DQN, self).__init__()
# Squeeze data from tuple, list or dict to single object. For example, from (4, ) to 4
Expand Down Expand Up @@ -90,7 +93,8 @@ def __init__(
layer_num=head_layer_num,
activation=activation,
norm_type=norm_type,
dropout=dropout
dropout=dropout,
noise=noise,
)
else:
self.head = head_cls(
Expand All @@ -99,7 +103,8 @@ def __init__(
head_layer_num,
activation=activation,
norm_type=norm_type,
dropout=dropout
dropout=dropout,
noise=noise,
)
if init_bias is not None and head_cls == DuelingHead:
# Zero the last layer bias of advantage head
Expand Down
10 changes: 10 additions & 0 deletions ding/policy/common_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
from typing import List, Any, Dict, Callable
import torch
import torch.nn as nn
import numpy as np
import treetensor.torch as ttorch
from ding.utils.data import default_collate
from ding.torch_utils import to_tensor, to_ndarray, unsqueeze, squeeze
from ding.torch_utils import NoiseLinearLayer

def set_noise_mode(module: nn.Module, noise_enabled: bool):
"""
Overview:
Recursively set the 'force_noise' flag on all NoiseLinearLayer modules within the given module.
"""
for m in module.modules():
if isinstance(m, NoiseLinearLayer):
m.force_noise = noise_enabled

def default_preprocess_learn(
data: List[Any],
Expand Down
12 changes: 11 additions & 1 deletion ding/policy/dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ding.utils.data import default_collate, default_decollate

from .base_policy import Policy
from .common_utils import default_preprocess_learn
from .common_utils import default_preprocess_learn, set_noise_mode


@POLICY_REGISTRY.register('dqn')
Expand Down Expand Up @@ -248,6 +248,8 @@ def _forward_learn(self, data: List[Dict[str, Any]]) -> Dict[str, Any]:
.. note::
For more detailed examples, please refer to our unittest for DQNPolicy: ``ding.policy.tests.test_dqn``.
"""
set_noise_mode(self._learn_model, True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use noisy_net to control this line

Another question: how to deal with target_model in noisy net


# Data preprocessing operations, such as stack data, cpu to cuda device
data = default_preprocess_learn(
data,
Expand Down Expand Up @@ -384,6 +386,12 @@ def _forward_collect(self, data: Dict[int, Any], eps: float) -> Dict[int, Any]:
data = default_collate(list(data.values()))
if self._cuda:
data = to_device(data, self._device)
# Use the add_noise parameter to decide noise mode.
# Default to True if the parameter is not provided.
if self._cfg.collect.get("add_noise", True):
set_noise_mode(self._collect_model, True)
else:
set_noise_mode(self._collect_model, False)
self._collect_model.eval()
with torch.no_grad():
output = self._collect_model.forward(data, eps=eps)
Expand Down Expand Up @@ -476,6 +484,8 @@ def _forward_eval(self, data: Dict[int, Any]) -> Dict[int, Any]:
data = default_collate(list(data.values()))
if self._cuda:
data = to_device(data, self._device)
# Ensure that in evaluation mode noise is disabled.
set_noise_mode(self._eval_model, False)
self._eval_model.eval()
with torch.no_grad():
output = self._eval_model.forward(data)
Expand Down
11 changes: 9 additions & 2 deletions ding/torch_utils/network/nn_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -637,7 +637,10 @@ class NoiseLinearLayer(nn.Module):
def __init__(self, in_channels: int, out_channels: int, sigma0: int = 0.4) -> None:
"""
Overview:
Initialize the NoiseLinearLayer class.
Initialize the NoiseLinearLayer class. The 'force_noise' attribute enables external control over whether noise is applied.
- If force_noise is True, the layer adds noise even if the module is in evaluation mode.
- If force_noise is False, no noise is added regardless of self.training.
- If force_noise is None (default), the layer uses its standard behavior (controlled by self.training).
Arguments:
- in_channels (:obj:`int`): Number of channels in the input tensor.
- out_channels (:obj:`int`): Number of channels in the output tensor.
Expand All @@ -654,6 +657,7 @@ def __init__(self, in_channels: int, out_channels: int, sigma0: int = 0.4) -> No
self.register_buffer("weight_eps", torch.empty(out_channels, in_channels))
self.register_buffer("bias_eps", torch.empty(out_channels))
self.sigma0 = sigma0
self.force_noise = None
self.reset_parameters()
self.reset_noise()

Expand Down Expand Up @@ -703,7 +707,10 @@ def forward(self, x: torch.Tensor):
Returns:
- output (:obj:`torch.Tensor`): The output tensor with noise.
"""
if self.training:
# Determine whether to add noise:
# If force_noise is not None, use it; otherwise, default to self.training.
noise_enabled = self.force_noise if self.force_noise is not None else self.training
if noise_enabled:
return F.linear(
x,
self.weight_mu + self.weight_sigma * self.weight_eps,
Expand Down
60 changes: 60 additions & 0 deletions dizoo/atari/config/serial/demon_attack/demon_attack_dqn_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from easydict import EasyDict

demon_attack_dqn_config = dict(
exp_name='DemonAttack_dqn_collect-not-noise_seed0',
env=dict(
collector_env_num=8,
evaluator_env_num=8,
n_evaluator_episode=8,
stop_value=1e6,
env_id='DemonAttackNoFrameskip-v4',
frame_stack=4,
),
policy=dict(
cuda=True,
priority=False,
model=dict(
obs_shape=[4, 84, 84],
action_shape=6,
encoder_hidden_size_list=[128, 128, 512],
noise=True,
),
nstep=3,
discount_factor=0.99,
learn=dict(
update_per_collect=10,
batch_size=32,
learning_rate=0.0001,
target_update_freq=500,
),
# collect=dict(n_sample=96, add_noise=True),
collect=dict(n_sample=96, add_noise=False),
eval=dict(evaluator=dict(eval_freq=4000, )),
other=dict(
eps=dict(
type='exp',
start=1.,
end=0.05,
decay=250000,
),
replay_buffer=dict(replay_buffer_size=100000, ),
),
),
)
demon_attack_dqn_config = EasyDict(demon_attack_dqn_config)
main_config = demon_attack_dqn_config
demon_attack_dqn_create_config = dict(
env=dict(
type='atari',
import_names=['dizoo.atari.envs.atari_env'],
),
env_manager=dict(type='subprocess'),
policy=dict(type='dqn'),
)
demon_attack_dqn_create_config = EasyDict(demon_attack_dqn_create_config)
create_config = demon_attack_dqn_create_config

if __name__ == '__main__':
# or you can enter `ding -m serial -c demon_attack_dqn_config.py -s 0`
from ding.entry import serial_pipeline
serial_pipeline((main_config, create_config), seed=0, max_env_step=int(10e6))
Loading