pub struct CandleActorCritic {
pub varmap: VarMap,
/* private fields */
}Fields§
§varmap: VarMapImplementations§
Trait Implementations§
Source§impl ActorCritic for CandleActorCritic
impl ActorCritic for CandleActorCritic
Source§fn act(&self, obs: &TensorData) -> Result<ActionOutput, NNError>
fn act(&self, obs: &TensorData) -> Result<ActionOutput, NNError>
Sample actions from the policy (inference, no gradient tracking).
Source§fn value(&self, obs: &TensorData) -> Result<TensorData, NNError>
fn value(&self, obs: &TensorData) -> Result<TensorData, NNError>
Compute state values (inference, no gradient tracking).
Source§fn evaluate(
&self,
obs: &TensorData,
actions: &TensorData,
) -> Result<EvalOutput, NNError>
fn evaluate( &self, obs: &TensorData, actions: &TensorData, ) -> Result<EvalOutput, NNError>
Evaluate the policy on (obs, actions) pairs. Differentiable.
Source§fn ppo_step(
&mut self,
obs: &TensorData,
actions: &TensorData,
old_log_probs: &TensorData,
advantages: &TensorData,
returns: &TensorData,
old_values: &TensorData,
config: &PPOStepConfig,
) -> Result<TrainMetrics, NNError>
fn ppo_step( &mut self, obs: &TensorData, actions: &TensorData, old_log_probs: &TensorData, advantages: &TensorData, returns: &TensorData, old_values: &TensorData, config: &PPOStepConfig, ) -> Result<TrainMetrics, NNError>
Perform one PPO gradient step. Bundles forward→loss→backward→clip→step.
fn learning_rate(&self) -> f32
fn set_learning_rate(&mut self, lr: f32)
fn save(&self, path: &Path) -> Result<(), NNError>
fn load(&mut self, path: &Path) -> Result<(), NNError>
impl Send for CandleActorCritic
impl Sync for CandleActorCritic
Auto Trait Implementations§
impl !Freeze for CandleActorCritic
impl !RefUnwindSafe for CandleActorCritic
impl Unpin for CandleActorCritic
impl !UnwindSafe for CandleActorCritic
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
§impl<T> Instrument for T
impl<T> Instrument for T
§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more