Quick Start

Your First Environment

from pogema import pogema_v0, GridConfig

# Create environment with 4 agents on an 8x8 grid
env = pogema_v0(GridConfig(num_agents=4, size=8, seed=42))

# Reset returns observations and info for each agent
obs, info = env.reset()

# Run episode with random actions
while True:
    obs, reward, terminated, truncated, info = env.step(env.sample_actions())
    if all(terminated) or all(truncated):
        break

Understanding the Output

Each step() returns lists of length num_agents:

Return	Type	Description
`obs`	list of arrays	Per-agent partial observations
`reward`	list of float	1.0 when agent reaches goal
`terminated`	list of bool	True when agent finishes
`truncated`	list of bool	True when episode time limit hit
`info`	list of dict	Metrics dict on final step

Recording an Animation

from pogema import pogema_v0, GridConfig

env = pogema_v0(GridConfig(num_agents=4, size=8, seed=42))
env.enable_animation()
obs, info = env.reset()

while True:
    obs, reward, terminated, truncated, info = env.step(env.sample_actions())
    if all(terminated) or all(truncated):
        break

Using the A* Baseline

from pogema import pogema_v0, GridConfig, BatchAStarAgent

env = pogema_v0(GridConfig(
    num_agents=4, size=8, seed=42,
    observation_type='POMAPF',  # A* needs xy coordinates
))
env.enable_animation()
agent = BatchAStarAgent()
obs, info = env.reset()

while True:
    actions = agent.act(obs)
    obs, reward, terminated, truncated, info = env.step(actions)
    if all(terminated) or all(truncated):
        break

agent.reset_states()

Terminal Rendering

from pogema import pogema_v0, GridConfig

env = pogema_v0(GridConfig(num_agents=2, size=6, seed=1))
obs, info = env.reset()
env.render()  # Prints ASCII grid to terminal