rlox_core/env/mod.rs
1pub mod batch;
2pub mod builtins;
3pub mod mujoco;
4pub mod parallel;
5pub mod spaces;
6
7use std::collections::HashMap;
8
9pub use batch::BatchSteppable;
10pub use spaces::{Action, ActionSpace, ObsSpace, Observation};
11
12use crate::error::RloxError;
13
14/// A single environment transition returned by `step`.
15///
16/// ## Precision convention
17///
18/// Rewards are `f64` for numerical stability during advantage computation;
19/// observations and actions are `f32` throughout (see [`Observation`]).
20/// When storing into replay buffers (which use `f32` rewards), a narrowing
21/// cast occurs. This is intentional: environments compute in f64, buffers
22/// store in f32, and training reads f32.
23///
24/// The `info` field is `None` when the environment provides no extra
25/// metadata (the common case for CartPole, Pendulum, etc.), avoiding a
26/// `HashMap` allocation on every step.
27#[derive(Debug, Clone)]
28pub struct Transition {
29 pub obs: Observation,
30 pub reward: f64,
31 pub terminated: bool,
32 pub truncated: bool,
33 pub info: Option<HashMap<String, f64>>,
34}
35
36/// The core environment trait.
37///
38/// All built-in environments implement this. The `Send + Sync` bounds
39/// enable safe parallel stepping with Rayon.
40pub trait RLEnv: Send + Sync {
41 fn step(&mut self, action: &Action) -> Result<Transition, RloxError>;
42 fn reset(&mut self, seed: Option<u64>) -> Result<Observation, RloxError>;
43 fn action_space(&self) -> &ActionSpace;
44 fn obs_space(&self) -> &ObsSpace;
45 fn render(&self) -> Option<String> {
46 None
47 }
48}