rlox_core/
lib.rs

1//! rlox-core: Rust-accelerated reinforcement learning primitives.
2//!
3//! This crate provides the high-performance data plane for rlox:
4//!
5//! - **Buffers** ([`buffer`]): Ring buffer, prioritized replay (SumTree),
6//!   memory-mapped replay, offline dataset buffer, columnar storage.
7//! - **Environments** ([`env`]): Native CartPole, vectorized stepping (Rayon),
8//!   Gymnasium wrapper via PyO3.
9//! - **Training** ([`training`]): GAE (single + batched), V-trace, expectile loss.
10//! - **LLM ops** ([`llm`]): Token-level KL divergence, GRPO group advantages,
11//!   sequence packing, DPO pairs — all with f32/f64 variants and Rayon parallelism.
12//! - **Pipeline** ([`pipeline`]): Async rollout collector with crossbeam channels,
13//!   backpressure, and flat `RolloutBatch` format.
14//!
15//! All operations release the GIL when called from Python via PyO3.
16
17pub mod buffer;
18pub mod env;
19pub mod error;
20pub mod llm;
21pub mod pipeline;
22pub mod seed;
23pub mod training;