struct InferenceConfig
Source: inference.joule:26
struct InferenceConfigSource: inference.joule:26
fn default() -> SelfSource: inference.joule:52
struct KVCacheConfigSource: inference.joule:71
fn default() -> SelfSource: inference.joule:85
struct SpeculativeConfigSource: inference.joule:98
struct SamplingParamsSource: inference.joule:113
fn default() -> SelfSource: inference.joule:141
fn greedy() -> SelfGreedy sampling (temperature = 0)
Source: inference.joule:161
fn creative() -> SelfCreative sampling (higher temperature)
Source: inference.joule:169
fn precise() -> SelfPrecise/factual sampling
Source: inference.joule:178
struct RequestId(pub u64);Source: inference.joule:193
struct InferenceRequestSource: inference.joule:197
struct InferenceResponseSource: inference.joule:214
enum FinishReasonSource: inference.joule:229
struct GenerationMetricsSource: inference.joule:248
fn tokens_per_joule(&self) -> f64Energy efficiency (tokens per joule)
Source: inference.joule:271
struct TokenStreamToken stream for real-time output
Source: inference.joule:285
struct StreamTokenSource: inference.joule:292
enum InferenceErrorSource: inference.joule:356
fn from(e: FormatError) -> SelfSource: inference.joule:369
struct EngineInference engine state
Source: inference.joule:375
struct LoadedModelLoaded model state
Source: inference.joule:395
struct ArchConfigArchitecture-specific configuration
Source: inference.joule:409
struct ActiveRequestActive request state
Source: inference.joule:422
fn new(config: InferenceConfig) -> SelfCreate new inference engine
Source: inference.joule:433
fn with_energy_tracker(mut self, tracker: Arc<EnergyTracker>) -> SelfCreate engine with energy tracking
Source: inference.joule:447
fn submit_request(Submit request for processing
Source: inference.joule:592
fn stop(&self)Stop the engine
Source: inference.joule:726
fn metrics(&self) -> EngineMetricsGet current metrics
Source: inference.joule:731
fn select_best_device(&self, model_size: u64) -> Result<DeviceId, InferenceError>Source: inference.joule:747
fn distribute_weights(Source: inference.joule:763
fn load_tokenizer(&self, path: &std::path::Path) -> Result<Tokenizer, InferenceError>Source: inference.joule:786
fn extract_arch_config(&self, loader: &ModelLoader) -> Result<ArchConfig, InferenceError>Source: inference.joule:796
fn prepare_batch(Source: inference.joule:830
fn forward(Source: inference.joule:873
fn sample_batch(Source: inference.joule:894
fn apply_top_p(&self, logits: &Tensor, top_p: f32) -> TensorSource: inference.joule:941
fn apply_top_k(&self, probs: &Tensor, top_k: u32) -> TensorSource: inference.joule:952
fn multinomial_sample(&self, probs: &Tensor, seed: Option<u64>) -> Result<Tensor, InferenceError>Source: inference.joule:961
fn check_stop_conditions(Source: inference.joule:1000
fn compute_metrics(&self, req: &ActiveRequest) -> GenerationMetricsSource: inference.joule:1033
struct EngineMetricsSource: inference.joule:1070
struct KVCacheManagerPaged KV cache manager
Source: inference.joule:1082
fn new(Source: inference.joule:1102
fn allocate_pages(&self, count: usize) -> Result<Vec<usize>, InferenceError>Source: inference.joule:1140
fn free_pages(&self, pages: &[usize])Source: inference.joule:1151
fn utilization(&self) -> f64Source: inference.joule:1156
struct TokenizerSimple tokenizer wrapper
Source: inference.joule:1167
fn from_file(path: &std::path::Path) -> Result<Self, String>Load tokenizer from file
Source: inference.joule:1176
fn encode(&self, text: &str) -> Result<Vec<u32>, InferenceError>Encode text to token IDs
Source: inference.joule:1186
fn decode(&self, tokens: &[u32]) -> Result<String, InferenceError>Decode token IDs to text
Source: inference.joule:1235
fn decode_single(&self, token: u32) -> Result<String, InferenceError>Decode single token
Source: inference.joule:1246
fn engine() -> EngineCreate engine with defaults
Source: inference.joule:1258
fn engine_with_config(config: InferenceConfig) -> EngineCreate engine with config
Source: inference.joule:1263