Skip to content

fix(pu): fix noise layer's usage #866

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions ding/model/template/q_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def __init__(
norm_type: Optional[str] = None,
dropout: Optional[float] = None,
init_bias: Optional[float] = None,
noise: bool = False,
) -> None:
"""
Overview:
Expand All @@ -57,6 +58,8 @@ def __init__(
- dropout (:obj:`Optional[float]`): The dropout rate of the dropout layer. \
if ``None`` then default disable dropout layer.
- init_bias (:obj:`Optional[float]`): The initial value of the last layer bias in the head network. \
- noise (:obj:`bool`): Whether use ``NoiseLinearLayer`` as ``layer_fn`` in Q networks' MLP. \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • to use
  • use NoiseLinearLayer to boost exploration

Default ``False``.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Default to

"""
super(DQN, self).__init__()
# Squeeze data from tuple, list or dict to single object. For example, from (4, ) to 4
Expand Down Expand Up @@ -90,7 +93,8 @@ def __init__(
layer_num=head_layer_num,
activation=activation,
norm_type=norm_type,
dropout=dropout
dropout=dropout,
noise=noise,
)
else:
self.head = head_cls(
Expand All @@ -99,7 +103,8 @@ def __init__(
head_layer_num,
activation=activation,
norm_type=norm_type,
dropout=dropout
dropout=dropout,
noise=noise,
)
if init_bias is not None and head_cls == DuelingHead:
# Zero the last layer bias of advantage head
Expand Down
10 changes: 10 additions & 0 deletions ding/policy/common_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
from typing import List, Any, Dict, Callable
import torch
import torch.nn as nn
import numpy as np
import treetensor.torch as ttorch
from ding.utils.data import default_collate
from ding.torch_utils import to_tensor, to_ndarray, unsqueeze, squeeze
from ding.torch_utils import NoiseLinearLayer

def set_noise_mode(module: nn.Module, noise_enabled: bool):
"""
Overview:
Recursively set the 'force_noise' flag on all NoiseLinearLayer modules within the given module.
"""
for m in module.modules():
if isinstance(m, NoiseLinearLayer):
m.force_noise = noise_enabled

def default_preprocess_learn(
data: List[Any],
Expand Down
12 changes: 11 additions & 1 deletion ding/policy/dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ding.utils.data import default_collate, default_decollate

from .base_policy import Policy
from .common_utils import default_preprocess_learn
from .common_utils import default_preprocess_learn, set_noise_mode


@POLICY_REGISTRY.register('dqn')
Expand Down Expand Up @@ -248,6 +248,8 @@ def _forward_learn(self, data: List[Dict[str, Any]]) -> Dict[str, Any]:
.. note::
For more detailed examples, please refer to our unittest for DQNPolicy: ``ding.policy.tests.test_dqn``.
"""
set_noise_mode(self._learn_model, True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use noisy_net to control this line

Another question: how to deal with target_model in noisy net


# Data preprocessing operations, such as stack data, cpu to cuda device
data = default_preprocess_learn(
data,
Expand Down Expand Up @@ -384,6 +386,12 @@ def _forward_collect(self, data: Dict[int, Any], eps: float) -> Dict[int, Any]:
data = default_collate(list(data.values()))
if self._cuda:
data = to_device(data, self._device)
# Use the add_noise parameter to decide noise mode.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rename to noisy_net field and add it into default config (in the policy level), don't use xxx.get

# Default to True if the parameter is not provided.
if self._cfg.collect.get("add_noise", True):
set_noise_mode(self._collect_model, True)
else:
set_noise_mode(self._collect_model, False)
self._collect_model.eval()
with torch.no_grad():
output = self._collect_model.forward(data, eps=eps)
Expand Down Expand Up @@ -476,6 +484,8 @@ def _forward_eval(self, data: Dict[int, Any]) -> Dict[int, Any]:
data = default_collate(list(data.values()))
if self._cuda:
data = to_device(data, self._device)
# Ensure that in evaluation mode noise is disabled.
set_noise_mode(self._eval_model, False)
self._eval_model.eval()
with torch.no_grad():
output = self._eval_model.forward(data)
Expand Down
11 changes: 9 additions & 2 deletions ding/torch_utils/network/nn_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -637,7 +637,10 @@ class NoiseLinearLayer(nn.Module):
def __init__(self, in_channels: int, out_channels: int, sigma0: int = 0.4) -> None:
"""
Overview:
Initialize the NoiseLinearLayer class.
Initialize the NoiseLinearLayer class. The 'force_noise' attribute enables external control over whether noise is applied.
- If force_noise is True, the layer adds noise even if the module is in evaluation mode.
- If force_noise is False, no noise is added regardless of self.training.
- If force_noise is None (default), the layer uses its standard behavior (controlled by self.training).
Arguments:
- in_channels (:obj:`int`): Number of channels in the input tensor.
- out_channels (:obj:`int`): Number of channels in the output tensor.
Expand All @@ -654,6 +657,7 @@ def __init__(self, in_channels: int, out_channels: int, sigma0: int = 0.4) -> No
self.register_buffer("weight_eps", torch.empty(out_channels, in_channels))
self.register_buffer("bias_eps", torch.empty(out_channels))
self.sigma0 = sigma0
self.force_noise = None
self.reset_parameters()
self.reset_noise()

Expand Down Expand Up @@ -703,7 +707,10 @@ def forward(self, x: torch.Tensor):
Returns:
- output (:obj:`torch.Tensor`): The output tensor with noise.
"""
if self.training:
# Determine whether to add noise:
# If force_noise is not None, use it; otherwise, default to self.training.
noise_enabled = self.force_noise if self.force_noise is not None else self.training
if noise_enabled:
return F.linear(
x,
self.weight_mu + self.weight_sigma * self.weight_eps,
Expand Down
60 changes: 60 additions & 0 deletions dizoo/atari/config/serial/demon_attack/demon_attack_dqn_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from easydict import EasyDict

demon_attack_dqn_config = dict(
exp_name='DemonAttack_dqn_collect-not-noise_seed0',
env=dict(
collector_env_num=8,
evaluator_env_num=8,
n_evaluator_episode=8,
stop_value=1e6,
env_id='DemonAttackNoFrameskip-v4',
frame_stack=4,
),
policy=dict(
cuda=True,
priority=False,
model=dict(
obs_shape=[4, 84, 84],
action_shape=6,
encoder_hidden_size_list=[128, 128, 512],
noise=True,
),
nstep=3,
discount_factor=0.99,
learn=dict(
update_per_collect=10,
batch_size=32,
learning_rate=0.0001,
target_update_freq=500,
),
# collect=dict(n_sample=96, add_noise=True),
collect=dict(n_sample=96, add_noise=False),
eval=dict(evaluator=dict(eval_freq=4000, )),
other=dict(
eps=dict(
type='exp',
start=1.,
end=0.05,
decay=250000,
),
replay_buffer=dict(replay_buffer_size=100000, ),
),
),
)
demon_attack_dqn_config = EasyDict(demon_attack_dqn_config)
main_config = demon_attack_dqn_config
demon_attack_dqn_create_config = dict(
env=dict(
type='atari',
import_names=['dizoo.atari.envs.atari_env'],
),
env_manager=dict(type='subprocess'),
policy=dict(type='dqn'),
)
demon_attack_dqn_create_config = EasyDict(demon_attack_dqn_create_config)
create_config = demon_attack_dqn_create_config

if __name__ == '__main__':
# or you can enter `ding -m serial -c demon_attack_dqn_config.py -s 0`
from ding.entry import serial_pipeline
serial_pipeline((main_config, create_config), seed=0, max_env_step=int(10e6))
Loading