← Back

Module heterogeneous

enum Engine

Source: heterogeneous.joule:18

fn is_single(&self) -> bool

Is this a single engine or cooperative?

Source: heterogeneous.joule:37

fn components(&self) -> Vec<Engine>

Get component engines for cooperative modes

Source: heterogeneous.joule:42

enum ComputeDType

Source: heterogeneous.joule:55

fn bits(&self) -> usize

Size in bits

Source: heterogeneous.joule:71

fn bytes(&self) -> usize

Size in bytes (rounded up)

Source: heterogeneous.joule:84

fn is_float(&self) -> bool

Is this a floating point type?

Source: heterogeneous.joule:89

fn is_quantized(&self) -> bool

Is this a quantized/integer type?

Source: heterogeneous.joule:95

fn preferred_engine(&self) -> Engine

Preferred engine for this dtype

Source: heterogeneous.joule:101

enum OpClass

Source: heterogeneous.joule:121

fn classify(op: &str, m: usize, n: usize, k: usize, batch: usize) -> Self

Classify operation from name and dimensions

Source: heterogeneous.joule:154

fn preferred_engine(&self, dtype: ComputeDType) -> Engine

Get preferred engine for this operation class and dtype

Source: heterogeneous.joule:182

fn offload_threshold(&self) -> u64

Minimum FLOPS to justify offload to accelerator

Source: heterogeneous.joule:231

struct EngineCapabilities

Source: heterogeneous.joule:249

fn peak_ops_per_sec(&self, dtype: ComputeDType) -> f64

Estimate peak performance for a given dtype

Source: heterogeneous.joule:278

fn efficiency_tops_per_watt(&self, dtype: ComputeDType) -> f64

Energy efficiency in TOPS/Watt

Source: heterogeneous.joule:291

fn arithmetic_intensity_threshold(&self, dtype: ComputeDType) -> f64

Arithmetic intensity threshold (roofline model) Operations with AI below this are memory-bound

Source: heterogeneous.joule:297

fn apple_m5_cpu() -> Self

Apple M5 CPU (estimated specs)

Source: heterogeneous.joule:307

fn apple_m5_gpu() -> Self

Apple M5 GPU (estimated specs)

Source: heterogeneous.joule:328

fn apple_m5_npu() -> Self

Apple M5 Neural Engine (estimated specs)

Source: heterogeneous.joule:349

fn apple_m5_all() -> Vec<Self>

Get all Apple M5 engines

Source: heterogeneous.joule:369

struct ScheduleOp

Source: heterogeneous.joule:384

fn new(

Create from operation parameters

Source: heterogeneous.joule:409

fn with_dep(mut self, dep_id: usize) -> Self

Add dependency

Source: heterogeneous.joule:437

fn with_deps(mut self, deps: &[usize]) -> Self

Add multiple dependencies

Source: heterogeneous.joule:443

fn arithmetic_intensity(&self) -> f64

Arithmetic intensity (FLOPS / bytes)

Source: heterogeneous.joule:449

fn is_compute_bound(&self, caps: &EngineCapabilities) -> bool

Is this operation compute-bound on given engine?

Source: heterogeneous.joule:458

fn should_offload(&self) -> bool

Should this operation be offloaded from CPU?

Source: heterogeneous.joule:464

struct ScheduleDecision

Source: heterogeneous.joule:471

struct TransformerSchedule

Source: heterogeneous.joule:492

enum BufferLocation

Source: heterogeneous.joule:519

struct Buffer

Unified memory buffer that can be used by any engine On unified memory systems (Apple Silicon), no copies needed

Source: heterogeneous.joule:532

fn zeros(shape: &[usize], dtype: ComputeDType) -> Self

Create new zero-initialized buffer

Source: heterogeneous.joule:542

fn from_vec(data: Vec<T>, shape: &[usize], dtype: ComputeDType) -> Self

Create from existing data

Source: heterogeneous.joule:556

fn compute_strides(shape: &[usize]) -> Vec<usize>

Compute row-major strides

Source: heterogeneous.joule:571

fn shape(&self) -> &[usize]

Get shape

Source: heterogeneous.joule:580

fn strides(&self) -> &[usize]

Get strides

Source: heterogeneous.joule:585

fn len(&self) -> usize

Get total element count

Source: heterogeneous.joule:590

fn size_bytes(&self) -> usize

Get size in bytes

Source: heterogeneous.joule:595

fn dtype(&self) -> ComputeDType

Get dtype

Source: heterogeneous.joule:600

fn location(&self) -> BufferLocation

Get current location

Source: heterogeneous.joule:605

fn ensure_on(&mut self, engine: Engine)

Ensure buffer is accessible on engine (no-op for unified memory)

Source: heterogeneous.joule:610

fn as_ptr(&self) -> *const T

Get raw data pointer

Source: heterogeneous.joule:616

fn as_mut_ptr(&mut self) -> *mut T

Get mutable raw data pointer

Source: heterogeneous.joule:621

fn as_slice(&self) -> &[T]

Get data slice

Source: heterogeneous.joule:626

fn as_mut_slice(&mut self) -> &mut [T]

Get mutable data slice

Source: heterogeneous.joule:631

struct Gemm

General Matrix Multiply: C = α * A @ B + β * C

Source: heterogeneous.joule:641

fn default() -> Self

Source: heterogeneous.joule:649

fn new() -> Self

Source: heterogeneous.joule:660

fn transposed_a(mut self) -> Self

Source: heterogeneous.joule:664

fn transposed_b(mut self) -> Self

Source: heterogeneous.joule:669

fn with_alpha(mut self, alpha: f32) -> Self

Source: heterogeneous.joule:674

fn with_beta(mut self, beta: f32) -> Self

Source: heterogeneous.joule:679

fn execute_f32(

Execute GEMM on the optimal engine

Source: heterogeneous.joule:685

fn execute_cpu_f32(&self, a: &Buffer<f32>, b: &Buffer<f32>, c: &mut Buffer<f32>, m: usize, n: usize, k: usize)

Source: heterogeneous.joule:718

fn execute_gpu_f32(&self, a: &Buffer<f32>, b: &Buffer<f32>, c: &mut Buffer<f32>, m: usize, n: usize, k: usize)

Source: heterogeneous.joule:745

fn execute_cpu_f32(&self, _a: &Buffer<f32>, _b: &Buffer<f32>, _c: &mut Buffer<f32>, _m: usize, _n: usize, _k: usize)

Source: heterogeneous.joule:752

fn execute_gpu_f32(&self, _a: &Buffer<f32>, _b: &Buffer<f32>, _c: &mut Buffer<f32>, _m: usize, _n: usize, _k: usize)

Source: heterogeneous.joule:757

struct GemmResult

Source: heterogeneous.joule:764

fn scheduler() -> &'static Scheduler

Get the global heterogeneous scheduler

Source: heterogeneous.joule:779

fn init()

Initialize the compute subsystem

Source: heterogeneous.joule:786