pub trait DeterministicPolicy {
// Required methods
fn act(&self, obs: &TensorData) -> Result<TensorData, NNError>;
fn target_act(&self, obs: &TensorData) -> Result<TensorData, NNError>;
fn soft_update_target(&mut self, tau: f32);
fn learning_rate(&self) -> f32;
fn set_learning_rate(&mut self, lr: f32);
fn save(&self, path: &Path) -> Result<(), NNError>;
fn load(&mut self, path: &Path) -> Result<(), NNError>;
}Expand description
Deterministic policy for TD3.
Training steps (td3_actor_step) are intentionally NOT on this trait because
they require autograd to flow through the critic’s Q-network. Trait methods
convert tensors to TensorData (Vectd3_actor_step method instead.
Required Methods§
Sourcefn act(&self, obs: &TensorData) -> Result<TensorData, NNError>
fn act(&self, obs: &TensorData) -> Result<TensorData, NNError>
Compute deterministic action. Returns [batch_size, act_dim].
Sourcefn target_act(&self, obs: &TensorData) -> Result<TensorData, NNError>
fn target_act(&self, obs: &TensorData) -> Result<TensorData, NNError>
Compute target policy action (from target network).
Sourcefn soft_update_target(&mut self, tau: f32)
fn soft_update_target(&mut self, tau: f32)
Polyak soft update of target network.