enum Engine
Source: heterogeneous.joule:18
enum EngineSource: heterogeneous.joule:18
fn is_single(&self) -> boolIs this a single engine or cooperative?
Source: heterogeneous.joule:37
fn components(&self) -> Vec<Engine>Get component engines for cooperative modes
Source: heterogeneous.joule:42
enum ComputeDTypeSource: heterogeneous.joule:55
fn bits(&self) -> usizeSize in bits
Source: heterogeneous.joule:71
fn bytes(&self) -> usizeSize in bytes (rounded up)
Source: heterogeneous.joule:84
fn is_float(&self) -> boolIs this a floating point type?
Source: heterogeneous.joule:89
fn is_quantized(&self) -> boolIs this a quantized/integer type?
Source: heterogeneous.joule:95
fn preferred_engine(&self) -> EnginePreferred engine for this dtype
Source: heterogeneous.joule:101
enum OpClassSource: heterogeneous.joule:121
fn classify(op: &str, m: usize, n: usize, k: usize, batch: usize) -> SelfClassify operation from name and dimensions
Source: heterogeneous.joule:154
fn preferred_engine(&self, dtype: ComputeDType) -> EngineGet preferred engine for this operation class and dtype
Source: heterogeneous.joule:182
fn offload_threshold(&self) -> u64Minimum FLOPS to justify offload to accelerator
Source: heterogeneous.joule:231
struct EngineCapabilitiesSource: heterogeneous.joule:249
fn peak_ops_per_sec(&self, dtype: ComputeDType) -> f64Estimate peak performance for a given dtype
Source: heterogeneous.joule:278
fn efficiency_tops_per_watt(&self, dtype: ComputeDType) -> f64Energy efficiency in TOPS/Watt
Source: heterogeneous.joule:291
fn arithmetic_intensity_threshold(&self, dtype: ComputeDType) -> f64Arithmetic intensity threshold (roofline model) Operations with AI below this are memory-bound
Source: heterogeneous.joule:297
fn apple_m5_cpu() -> SelfApple M5 CPU (estimated specs)
Source: heterogeneous.joule:307
fn apple_m5_gpu() -> SelfApple M5 GPU (estimated specs)
Source: heterogeneous.joule:328
fn apple_m5_npu() -> SelfApple M5 Neural Engine (estimated specs)
Source: heterogeneous.joule:349
fn apple_m5_all() -> Vec<Self>Get all Apple M5 engines
Source: heterogeneous.joule:369
struct ScheduleOpSource: heterogeneous.joule:384
fn new(Create from operation parameters
Source: heterogeneous.joule:409
fn with_dep(mut self, dep_id: usize) -> SelfAdd dependency
Source: heterogeneous.joule:437
fn with_deps(mut self, deps: &[usize]) -> SelfAdd multiple dependencies
Source: heterogeneous.joule:443
fn arithmetic_intensity(&self) -> f64Arithmetic intensity (FLOPS / bytes)
Source: heterogeneous.joule:449
fn is_compute_bound(&self, caps: &EngineCapabilities) -> boolIs this operation compute-bound on given engine?
Source: heterogeneous.joule:458
fn should_offload(&self) -> boolShould this operation be offloaded from CPU?
Source: heterogeneous.joule:464
struct ScheduleDecisionSource: heterogeneous.joule:471
struct TransformerScheduleSource: heterogeneous.joule:492
enum BufferLocationSource: heterogeneous.joule:519
struct BufferUnified memory buffer that can be used by any engine On unified memory systems (Apple Silicon), no copies needed
Source: heterogeneous.joule:532
fn zeros(shape: &[usize], dtype: ComputeDType) -> SelfCreate new zero-initialized buffer
Source: heterogeneous.joule:542
fn from_vec(data: Vec<T>, shape: &[usize], dtype: ComputeDType) -> SelfCreate from existing data
Source: heterogeneous.joule:556
fn compute_strides(shape: &[usize]) -> Vec<usize>Compute row-major strides
Source: heterogeneous.joule:571
fn shape(&self) -> &[usize]Get shape
Source: heterogeneous.joule:580
fn strides(&self) -> &[usize]Get strides
Source: heterogeneous.joule:585
fn len(&self) -> usizeGet total element count
Source: heterogeneous.joule:590
fn size_bytes(&self) -> usizeGet size in bytes
Source: heterogeneous.joule:595
fn dtype(&self) -> ComputeDTypeGet dtype
Source: heterogeneous.joule:600
fn location(&self) -> BufferLocationGet current location
Source: heterogeneous.joule:605
fn ensure_on(&mut self, engine: Engine)Ensure buffer is accessible on engine (no-op for unified memory)
Source: heterogeneous.joule:610
fn as_ptr(&self) -> *const TGet raw data pointer
Source: heterogeneous.joule:616
fn as_mut_ptr(&mut self) -> *mut TGet mutable raw data pointer
Source: heterogeneous.joule:621
fn as_slice(&self) -> &[T]Get data slice
Source: heterogeneous.joule:626
fn as_mut_slice(&mut self) -> &mut [T]Get mutable data slice
Source: heterogeneous.joule:631
struct GemmGeneral Matrix Multiply: C = α * A @ B + β * C
Source: heterogeneous.joule:641
fn default() -> SelfSource: heterogeneous.joule:649
fn new() -> SelfSource: heterogeneous.joule:660
fn transposed_a(mut self) -> SelfSource: heterogeneous.joule:664
fn transposed_b(mut self) -> SelfSource: heterogeneous.joule:669
fn with_alpha(mut self, alpha: f32) -> SelfSource: heterogeneous.joule:674
fn with_beta(mut self, beta: f32) -> SelfSource: heterogeneous.joule:679
fn execute_f32(Execute GEMM on the optimal engine
Source: heterogeneous.joule:685
fn execute_cpu_f32(&self, a: &Buffer<f32>, b: &Buffer<f32>, c: &mut Buffer<f32>, m: usize, n: usize, k: usize)Source: heterogeneous.joule:718
fn execute_gpu_f32(&self, a: &Buffer<f32>, b: &Buffer<f32>, c: &mut Buffer<f32>, m: usize, n: usize, k: usize)Source: heterogeneous.joule:745
fn execute_cpu_f32(&self, _a: &Buffer<f32>, _b: &Buffer<f32>, _c: &mut Buffer<f32>, _m: usize, _n: usize, _k: usize)Source: heterogeneous.joule:752
fn execute_gpu_f32(&self, _a: &Buffer<f32>, _b: &Buffer<f32>, _c: &mut Buffer<f32>, _m: usize, _n: usize, _k: usize)Source: heterogeneous.joule:757
struct GemmResultSource: heterogeneous.joule:764
fn scheduler() -> &'static SchedulerGet the global heterogeneous scheduler
Source: heterogeneous.joule:779
fn init()Initialize the compute subsystem
Source: heterogeneous.joule:786