← Back

Module inference

struct InferenceConfig

Source: inference.joule:26

fn default() -> Self

Source: inference.joule:52

struct KVCacheConfig

Source: inference.joule:71

fn default() -> Self

Source: inference.joule:85

struct SpeculativeConfig

Source: inference.joule:98

struct SamplingParams

Source: inference.joule:113

fn default() -> Self

Source: inference.joule:141

fn greedy() -> Self

Greedy sampling (temperature = 0)

Source: inference.joule:161

fn creative() -> Self

Creative sampling (higher temperature)

Source: inference.joule:169

fn precise() -> Self

Precise/factual sampling

Source: inference.joule:178

struct RequestId(pub u64);

Source: inference.joule:193

struct InferenceRequest

Source: inference.joule:197

struct InferenceResponse

Source: inference.joule:214

enum FinishReason

Source: inference.joule:229

struct GenerationMetrics

Source: inference.joule:248

fn tokens_per_joule(&self) -> f64

Energy efficiency (tokens per joule)

Source: inference.joule:271

struct TokenStream

Token stream for real-time output

Source: inference.joule:285

struct StreamToken

Source: inference.joule:292

enum InferenceError

Source: inference.joule:356

fn from(e: FormatError) -> Self

Source: inference.joule:369

struct Engine

Inference engine state

Source: inference.joule:375

struct LoadedModel

Loaded model state

Source: inference.joule:395

struct ArchConfig

Architecture-specific configuration

Source: inference.joule:409

struct ActiveRequest

Active request state

Source: inference.joule:422

fn new(config: InferenceConfig) -> Self

Create new inference engine

Source: inference.joule:433

fn with_energy_tracker(mut self, tracker: Arc<EnergyTracker>) -> Self

Create engine with energy tracking

Source: inference.joule:447

fn submit_request(

Submit request for processing

Source: inference.joule:592

fn stop(&self)

Stop the engine

Source: inference.joule:726

fn metrics(&self) -> EngineMetrics

Get current metrics

Source: inference.joule:731

fn select_best_device(&self, model_size: u64) -> Result<DeviceId, InferenceError>

Source: inference.joule:747

fn distribute_weights(

Source: inference.joule:763

fn load_tokenizer(&self, path: &std::path::Path) -> Result<Tokenizer, InferenceError>

Source: inference.joule:786

fn extract_arch_config(&self, loader: &ModelLoader) -> Result<ArchConfig, InferenceError>

Source: inference.joule:796

fn prepare_batch(

Source: inference.joule:830

fn forward(

Source: inference.joule:873

fn sample_batch(

Source: inference.joule:894

fn apply_top_p(&self, logits: &Tensor, top_p: f32) -> Tensor

Source: inference.joule:941

fn apply_top_k(&self, probs: &Tensor, top_k: u32) -> Tensor

Source: inference.joule:952

fn multinomial_sample(&self, probs: &Tensor, seed: Option<u64>) -> Result<Tensor, InferenceError>

Source: inference.joule:961

fn check_stop_conditions(

Source: inference.joule:1000

fn compute_metrics(&self, req: &ActiveRequest) -> GenerationMetrics

Source: inference.joule:1033

struct EngineMetrics

Source: inference.joule:1070

struct KVCacheManager

Paged KV cache manager

Source: inference.joule:1082

fn new(

Source: inference.joule:1102

fn allocate_pages(&self, count: usize) -> Result<Vec<usize>, InferenceError>

Source: inference.joule:1140

fn free_pages(&self, pages: &[usize])

Source: inference.joule:1151

fn utilization(&self) -> f64

Source: inference.joule:1156

struct Tokenizer

Simple tokenizer wrapper

Source: inference.joule:1167

fn from_file(path: &std::path::Path) -> Result<Self, String>

Load tokenizer from file

Source: inference.joule:1176

fn encode(&self, text: &str) -> Result<Vec<u32>, InferenceError>

Encode text to token IDs

Source: inference.joule:1186

fn decode(&self, tokens: &[u32]) -> Result<String, InferenceError>

Decode token IDs to text

Source: inference.joule:1235

fn decode_single(&self, token: u32) -> Result<String, InferenceError>

Decode single token

Source: inference.joule:1246

fn engine() -> Engine

Create engine with defaults

Source: inference.joule:1258

fn engine_with_config(config: InferenceConfig) -> Engine

Create engine with config

Source: inference.joule:1263