opendilab · Poet-LiBai · May 17, 2023
diff --git a/chapter6_marl/environment.yml b/chapter6_marl/environment.yml
@@ -0,0 +1,204 @@
+name: opendilab
+channels:
+  - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
+  - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r
+  - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free
+  - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge
+  - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - ca-certificates=2022.10.11=h06a4308_0
+  - certifi=2022.12.7=py39h06a4308_0
+  - ld_impl_linux-64=2.38=h1181459_1
+  - libffi=3.3=he6710b0_2
+  - libgcc-ng=11.2.0=h1234567_1
+  - libgomp=11.2.0=h1234567_1
+  - libstdcxx-ng=11.2.0=h1234567_1
+  - ncurses=6.3=h5eee18b_3
+  - openssl=1.1.1s=h7f8727e_0
+  - pip=22.3.1=py39h06a4308_0
+  - python=3.9.0=hdb3f193_2
+  - readline=8.2=h5eee18b_0
+  - setuptools=65.6.3=py39h06a4308_0
+  - sqlite=3.40.1=h5082296_0
+  - tk=8.6.12=h1ccaba5_0
+  - tzdata=2022g=h04d1e81_0
+  - wheel=0.37.1=pyhd3eb1b0_0
+  - xz=5.2.8=h5eee18b_0
+  - zlib=1.2.13=h5eee18b_0
+  - pip:
+    - absl-py==1.4.0
+    - ale-py==0.7.5
+    - appdirs==1.4.4
+    - async-timeout==4.0.2
+    - attrs==22.2.0
+    - autorom==0.6.1
+    - blinker==1.6.2
+    - box2d==2.3.2
+    - box2d-kengz==2.3.3
+    - box2d-py==2.3.5
+    - bsuite==0.3.5
+    - cffi==1.15.1
+    - charset-normalizer==3.0.1
+    - click==7.1.2
+    - cloudpickle==1.6.0
+    - cmake==3.26.3
+    - commonmark==0.9.1
+    - contourpy==1.0.7
+    - cycler==0.11.0
+    - cython==0.29.33
+    - decorator==5.1.1
+    - di-engine==0.4.7
+    - di-toolkit==0.1.0
+    - di-treetensor==0.4.0
+    - dill==0.3.6
+    - dm-env==1.6
+    - dm-tree==0.1.8
+    - docker-pycreds==0.4.0
+    - easydict==1.9
+    - enum-tools==0.9.0.post1
+    - evogym==1.0.0
+    - exceptiongroup==1.1.0
+    - fasteners==0.18
+    - filelock==3.9.0
+    - flake8==6.0.0
+    - flask==1.1.4
+    - fonttools==4.38.0
+    - frozendict==2.3.7
+    - gitdb==4.0.10
+    - gitpython==3.1.30
+    - glcontext==2.3.7
+    - glfw==2.5.6
+    - gpy==1.10.0
+    - gpyopt==1.2.6
+    - gym==0.25.1
+    - gym-notices==0.0.8
+    - gym-super-mario-bros==7.4.0
+    - gym3==0.3.3
+    - gymnasium==0.27.1
+    - gymnasium-notices==0.0.1
+    - h5py==3.7.0
+    - hanabi-learning-environment==0.0.1
+    - hbutils==0.8.0
+    - hickle==5.0.2
+    - idna==3.4
+    - imageio==2.25.1
+    - imageio-ffmpeg==0.3.0
+    - importlib-metadata==4.13.0
+    - importlib-resources==5.12.0
+    - inflect==6.0.2
+    - iniconfig==2.0.0
+    - itsdangerous==1.1.0
+    - jax-jumpy==0.2.0
+    - jinja2==2.11.3
+    - joblib==1.2.0
+    - kiwisolver==1.4.4
+    - lazy-loader==0.2
+    - llvmlite==0.39.1
+    - lxml==4.9.2
+    - lz4==4.3.2
+    - magent==0.1.14
+    - mamujoco==1.1.0
+    - markupsafe==2.0.1
+    - matplotlib==3.6.3
+    - mccabe==0.7.0
+    - metadrive-simulator==0.2.6.0
+    - minigrid==2.1.1
+    - mizani==0.8.1
+    - moderngl==5.7.4
+    - mpire==2.6.0
+    - mujoco==2.2.0
+    - mujoco-py==2.1.2.14
+    - multi-agent-ale-py==0.1.11
+    - neat-python==0.92
+    - nes-py==8.2.1
+    - networkx==3.1
+    - numba==0.56.4
+    - numpy==1.23.5
+    - nvidia-cublas-cu11==11.10.3.66
+    - nvidia-cuda-nvrtc-cu11==11.7.99
+    - nvidia-cuda-runtime-cu11==11.7.99
+    - nvidia-cudnn-cu11==8.5.0.96
+    - opencv-python==4.7.0.68
+    - opencv-python-headless==4.7.0.72
+    - packaging==23.0
+    - palettable==3.3.1
+    - panda3d==1.10.8
+    - panda3d-gltf==0.13
+    - panda3d-simplepbr==0.10
+    - pandas==1.5.2
+    - paramz==0.9.5
+    - pathtools==0.1.2
+    - patsy==0.5.3
+    - pettingzoo==1.12.0
+    - pillow==9.4.0
+    - plotnine==0.10.1
+    - pluggy==1.0.0
+    - procgen==0.10.7
+    - protobuf==3.20.1
+    - psutil==5.9.4
+    - py==1.11.0
+    - pybind11==2.10.3
+    - pycodestyle==2.10.0
+    - pycparser==2.21
+    - pydantic==1.10.4
+    - pyflakes==3.0.1
+    - pygame==2.0.0
+    - pygifsicle==1.0.7
+    - pyglet==1.5.21
+    - pygments==2.14.0
+    - pymunk==6.2.0
+    - pynng==0.7.2
+    - pyopengl==3.1.6
+    - pyopengl-accelerate==3.1.6
+    - pyparsing==3.0.9
+    - pytest==7.0.1
+    - python-chess==0.31.4
+    - python-dateutil==2.8.2
+    - python-graphviz==0.20.1
+    - pytz==2022.7.1
+    - pywavelets==1.4.1
+    - pyyaml==6.0
+    - readerwriterlock==1.0.9
+    - redis==4.4.2
+    - requests==2.28.2
+    - responses==0.12.1
+    - rich==13.1.0
+    - rlcard==1.0.4
+    - rocket-recycling==0.1
+    - scikit-image==0.20.0
+    - scikit-learn==1.2.0
+    - scipy==1.9.1
+    - seaborn==0.12.2
+    - sentry-sdk==1.13.0
+    - setproctitle==1.3.2
+    - six==1.16.0
+    - smmap==5.0.0
+    - sniffio==1.3.0
+    - stable-baselines3==1.7.0
+    - statsmodels==0.13.5
+    - support-developer==1.0.5
+    - swig==4.1.1
+    - tabulate==0.9.0
+    - tensorboardx==2.5.1
+    - termcolor==2.2.0
+    - threadpoolctl==3.1.0
+    - tifffile==2023.3.21
+    - tomli==2.0.1
+    - torch==1.12.0
+    - torchvision==0.14.1
+    - tqdm==4.64.1
+    - treevalue==1.4.3
+    - trueskill==0.4.5
+    - ttkbootstrap==1.10.1
+    - typing==3.7.4.3
+    - typing-extensions==4.4.0
+    - urllib3==1.26.14
+    - urlobject==2.4.3
+    - wandb==0.13.9
+    - werkzeug==1.0.1
+    - yapf==0.29.0
+    - zipp==3.11.0
+prefix: /home/polarstar/miniconda3/envs/opendilab
diff --git a/chapter6_marl/multi_mujoco_ant_2x4_ppo/ckpt/ckpt_best.pth.tar b/chapter6_marl/multi_mujoco_ant_2x4_ppo/ckpt/ckpt_best.pth.tar
diff --git a/chapter6_marl/multi_mujoco_ant_2x4_ppo/ckpt/iteration_0.pth.tar b/chapter6_marl/multi_mujoco_ant_2x4_ppo/ckpt/iteration_0.pth.tar
diff --git a/chapter6_marl/multi_mujoco_ant_2x4_ppo/ckpt/iteration_10000.pth.tar b/chapter6_marl/multi_mujoco_ant_2x4_ppo/ckpt/iteration_10000.pth.tar
diff --git a/chapter6_marl/multi_mujoco_ant_2x4_ppo/ckpt/iteration_20000.pth.tar b/chapter6_marl/multi_mujoco_ant_2x4_ppo/ckpt/iteration_20000.pth.tar
diff --git a/chapter6_marl/multi_mujoco_ant_2x4_ppo/ckpt/iteration_28668.pth.tar b/chapter6_marl/multi_mujoco_ant_2x4_ppo/ckpt/iteration_28668.pth.tar
diff --git a/chapter6_marl/multi_mujoco_ant_2x4_ppo/formatted_total_config.py b/chapter6_marl/multi_mujoco_ant_2x4_ppo/formatted_total_config.py
@@ -0,0 +1,154 @@
+from easydict import EasyDict
+
+main_config = dict(
+    exp_name='multi_mujoco_ant_2x4_ppo',
+    env=dict(
+        manager=dict(
+            episode_num=float('inf'),
+            max_retry=5,
+            step_timeout=None,
+            auto_reset=True,
+            reset_timeout=None,
+            retry_type='reset',
+            retry_waiting_time=0.1,
+            shared_memory=True,
+            copy_on_get=True,
+            context='fork',
+            wait_num=float('inf'),
+            step_wait_timeout=None,
+            connect_timeout=60,
+            reset_inplace=False,
+            cfg_type='SyncSubprocessEnvManagerDict',
+            type='subprocess',
+        ),
+        stop_value=6000,
+        n_evaluator_episode=8,
+        scenario='Ant-v2',
+        agent_conf='2x4d',
+        agent_obsk=2,
+        add_agent_id=False,
+        episode_limit=1000,
+        collector_env_num=8,
+        evaluator_env_num=8,
+    ),
+    policy=dict(
+        model=dict(
+            agent_num=2,
+            agent_obs_shape=54,
+            global_obs_shape=111,
+            action_shape=4,
+            action_space='continuous',
+        ),
+        learn=dict(
+            learner=dict(
+                train_iterations=1000000000,
+                dataloader=dict(
+                    num_workers=0,
+                ),
+                log_policy=True,
+                hook=dict(
+                    load_ckpt_before_run='',
+                    log_show_after_iter=100,
+                    save_ckpt_after_iter=10000,
+                    save_ckpt_after_run=True,
+                ),
+                cfg_type='BaseLearnerDict',
+            ),
+            epoch_per_collect=3,
+            batch_size=800,
+            learning_rate=0.0005,
+            value_weight=0.5,
+            entropy_weight=0.001,
+            clip_ratio=0.2,
+            adv_norm=True,
+            value_norm=True,
+            ppo_param_init=True,
+            grad_clip_type='clip_norm',
+            grad_clip_value=5,
+            ignore_done=False,
+        ),
+        collect=dict(
+            collector=dict(
+                deepcopy_obs=False,
+                transform_obs=False,
+                collect_print_freq=100,
+                cfg_type='SampleSerialCollectorDict',
+                type='sample',
+            ),
+            unroll_len=1,
+            discount_factor=0.99,
+            gae_lambda=0.95,
+            env_num=8,
+            n_sample=3200,
+        ),
+        eval=dict(
+            evaluator=dict(
+                eval_freq=1000,
+                render={'render_freq': -1, 'mode': 'train_iter'},
+                cfg_type='InteractionSerialEvaluatorDict',
+                stop_value=6000,
+                n_episode=8,
+            ),
+            env_num=8,
+        ),
+        other=dict(
+            replay_buffer=dict(
+                type='advanced',
+                replay_buffer_size=4096,
+                max_use=float('inf'),
+                max_staleness=float('inf'),
+                alpha=0.6,
+                beta=0.4,
+                anneal_step=100000,
+                enable_track_used_data=False,
+                deepcopy=False,
+                thruput_controller=dict(
+                    push_sample_rate_limit=dict(
+                        max=float('inf'),
+                        min=0,
+                    ),
+                    window_seconds=30,
+                    sample_min_limit_ratio=1,
+                ),
+                monitor=dict(
+                    sampled_data_attr=dict(
+                        average_range=5,
+                        print_freq=200,
+                    ),
+                    periodic_thruput=dict(
+                        seconds=60,
+                    ),
+                ),
+                cfg_type='AdvancedReplayBufferDict',
+            ),
+        ),
+        on_policy=True,
+        cuda=True,
+        multi_gpu=False,
+        bp_update_sync=True,
+        traj_len_inf=False,
+        priority=False,
+        priority_IS_weight=False,
+        recompute_adv=True,
+        action_space='continuous',
+        nstep_return=False,
+        multi_agent=True,
+        transition_with_policy_data=True,
+        cfg_type='PPOCommandModePolicyDict',
+    ),
+)
+main_config = EasyDict(main_config)
+main_config = main_config
+create_config = dict(
+    env=dict(
+        type='mujoco_multi',
+        import_names=['dizoo.multiagent_mujoco.envs.multi_mujoco_env'],
+    ),
+    env_manager=dict(
+        cfg_type='SyncSubprocessEnvManagerDict',
+        type='subprocess',
+    ),
+    policy=dict(type='ppo'),
+)
+create_config = EasyDict(create_config)
+create_config = create_config
diff --git a/chapter6_marl/multi_mujoco_ant_2x4_ppo/git_diff.txt b/chapter6_marl/multi_mujoco_ant_2x4_ppo/git_diff.txt