[ PROMPT_NODE_27432 ]
custom_environments
[ SKILL_DOCUMENTATION ]
# 为 Stable Baselines3 创建自定义环境
本指南提供了为 Stable Baselines3 创建兼容的自定义 Gymnasium 环境的全面信息。
## 环境结构
### 必需方法
每个自定义环境必须继承自 `gymnasium.Env` 并实现:
python
import gymnasium as gym
from gymnasium import spaces
import numpy as np
class CustomEnv(gym.Env):
def __init__(self):
"""初始化环境,定义 action_space 和 observation_space"""
super().__init__()
self.action_space = spaces.Discrete(4)
self.observation_space = spaces.Box(low=0, high=1, shape=(4,), dtype=np.float32)
def reset(self, seed=None, options=None):
"""将环境重置为初始状态"""
super().reset(seed=seed)
observation = self.observation_space.sample()
info = {}
return observation, info
def step(self, action):
"""执行一个时间步"""
observation = self.observation_space.sample()
reward = 0.0
terminated = False # 片段自然结束
truncated = False # 片段因时间限制结束
info = {}
return observation, reward, terminated, truncated, info
def render(self):
"""可视化环境(可选)"""
pass
def close(self):
"""清理资源(可选)"""
pass
### 方法详情
#### `__init__(self, ...)`
**目的:** 初始化环境并定义空间。
**要求:**
- 必须调用 `super().__init__()`
- 必须定义 `self.action_space`
- 必须定义 `self.observation_space`
**示例:**
python
def __init__(self, grid_size=10, max_steps=100):
super().__init__()
self.grid_size = grid_size
self.max_steps = max_steps
self.current_step = 0
# 定义空间
self.action_space = spaces.Discrete(4)
self.observation_space = spaces.Box(
low=0, high=grid_size-1, shape=(2,), dtype=np.float32
)
#### `reset(self, seed=None, options=None)`
**目的:** 将环境重置为初始状态。
**要求:**
- 必须调用 `super().reset(seed=seed)`
- 必须返回 `(observation, info)` 元组
- 观测值必须符合 `observation_space`
- Info 必须是一个字典(可以是空的)
**示例:**
python
def reset(self, seed=None, options=None):
super().reset(seed=seed)
# 初始化状态
self.agent_pos = self.np_random.integers(0, self.grid_size, size=2)
self.go