← Back

Module quantize

enum QuantMethod

Source: quantize.joule:29

struct QuantConfig

Source: quantize.joule:68

fn default() -> Self

Source: quantize.joule:92

fn int8() -> Self

INT8 quantization preset

Source: quantize.joule:113

fn int4() -> Self

INT4 quantization preset

Source: quantize.joule:122

fn gptq() -> Self

GPTQ preset

Source: quantize.joule:132

fn awq() -> Self

AWQ preset

Source: quantize.joule:142

fn nf4() -> Self

NF4 preset (QLoRA-style)

Source: quantize.joule:152

fn fp8() -> Self

FP8 preset

Source: quantize.joule:162

fn int3() -> Self

INT3 preset

Source: quantize.joule:171

fn int2() -> Self

INT2 preset

Source: quantize.joule:181

fn ternary() -> Self

Ternary preset (-1, 0, +1) - effectively 1.58 bits

Source: quantize.joule:191

fn binary() -> Self

Binary preset (-1, +1) - 1 bit

Source: quantize.joule:202

fn bitnet() -> Self

BitNet b1.58 preset (ternary optimized for LLMs)

Source: quantize.joule:213

fn one_bit() -> Self

1-bit with learned scaling

Source: quantize.joule:225

fn sub_one_bit() -> Self

Sub-1-bit quantization (experimental)

Source: quantize.joule:235

struct CalibrationConfig

Source: quantize.joule:247

fn default() -> Self

Source: quantize.joule:263

struct QuantStats

Source: quantize.joule:281

fn new() -> Self

Create empty stats

Source: quantize.joule:300

fn update(&mut self, tensor: &Tensor)

Update stats with new tensor

Source: quantize.joule:313

fn compute_scale_zp(&self, bits: u8, symmetric: bool) -> (f32, i32)

Get scale and zero point for quantization

Source: quantize.joule:337

struct LayerQuantParams

Source: quantize.joule:355

enum QuantError

Source: quantize.joule:374

fn from(e: FormatError) -> Self

Source: quantize.joule:384

fn from(e: std::io::Error) -> Self

Source: quantize.joule:390

struct Quantizer

Main quantizer struct

Source: quantize.joule:396

fn new(config: QuantConfig) -> Self

Create new quantizer

Source: quantize.joule:405

fn calibrate<D: Dataset>(

Source: quantize.joule:417

fn quantize(

Source: quantize.joule:469

fn quantize_gptq_with_hessian<D: Dataset>(

Source: quantize.joule:530

fn quantize_dynamic(

Source: quantize.joule:573

fn quantize_static(

Source: quantize.joule:599

fn quantize_gptq(

Source: quantize.joule:627

fn quantize_awq(

Source: quantize.joule:680

fn quantize_llm_int8(

Source: quantize.joule:722

fn quantize_nf4(

Source: quantize.joule:766

fn quantize_fp8(

Source: quantize.joule:839

fn quantize_ternary(

Source: quantize.joule:878

fn quantize_binary(

Source: quantize.joule:936

fn quantize_bitnet(

Source: quantize.joule:998

fn quantize_sub_one_bit(

Source: quantize.joule:1054

fn pack_ternary(&self, values: &[i8]) -> Vec<u8>

Source: quantize.joule:1131

fn kmeans_init(

Source: quantize.joule:1151

fn should_skip_layer(&self, name: &str) -> bool

Source: quantize.joule:1208

fn compute_scale_zp(&self, min_val: f32, max_val: f32) -> (f32, i32)

Source: quantize.joule:1222

fn quantize_tensor(&self, tensor: &Tensor, scale: f32, zp: i32) -> Tensor

Source: quantize.joule:1238

fn quantize_per_channel(

Source: quantize.joule:1251

fn quantize_tensor_with_mask(

Source: quantize.joule:1285

fn quantize_scales(&self, scales: &Tensor) -> Result<Tensor, QuantError>

Source: quantize.joule:1317

fn compute_hessian<D: Dataset>(

Source: quantize.joule:1325

fn gptq_layer(&self, weight: &Tensor, hessian: &Tensor) -> Result<QuantizedTensor, QuantError>

Source: quantize.joule:1355

fn energy_scope(&self, op: &str) -> Option<EnergyScope>

Source: quantize.joule:1410

struct QuantizedTensor

Source: quantize.joule:1421

fn dequantize(&self) -> Tensor

Source: quantize.joule:1437

fn size_bytes(&self) -> usize

Size in bytes

Source: quantize.joule:1457

fn compression_ratio(&self) -> f32

Compression ratio vs FP16

Source: quantize.joule:1469

struct QuantizedModel

Source: quantize.joule:1477

fn size_bytes(&self) -> usize

Total size in bytes

Source: quantize.joule:1488

fn compression_ratio(&self) -> f32

Average compression ratio

Source: quantize.joule:1493

fn get(&self, name: &str) -> Option<&QuantizedTensor>

Get quantized weight

Source: quantize.joule:1506

fn dequantize_all(&self) -> HashMap<String, Tensor>

Source: quantize.joule:1512

fn save_gguf(&self, path: &Path) -> Result<(), QuantError>

Source: quantize.joule:1520

fn save_safetensors(&self, path: &Path) -> Result<(), QuantError>

Source: quantize.joule:1533

fn quantize_int8(model: HashMap<String, Tensor>) -> Result<QuantizedModel, QuantError>

Source: quantize.joule:1551

fn quantize_int4(model: HashMap<String, Tensor>) -> Result<QuantizedModel, QuantError>

Source: quantize.joule:1558

fn quantize_gptq<D: Dataset>(

Source: quantize.joule:1565

fn quantize_awq<D: Dataset>(

Source: quantize.joule:1576

fn quantize_int3(model: HashMap<String, Tensor>) -> Result<QuantizedModel, QuantError>

Source: quantize.joule:1587

fn quantize_int2(model: HashMap<String, Tensor>) -> Result<QuantizedModel, QuantError>

Source: quantize.joule:1594

fn quantize_ternary(model: HashMap<String, Tensor>) -> Result<QuantizedModel, QuantError>

Source: quantize.joule:1601

fn quantize_binary(model: HashMap<String, Tensor>) -> Result<QuantizedModel, QuantError>

Source: quantize.joule:1608

fn quantize_bitnet(model: HashMap<String, Tensor>) -> Result<QuantizedModel, QuantError>

Source: quantize.joule:1615

fn quantize_one_bit(model: HashMap<String, Tensor>) -> Result<QuantizedModel, QuantError>

Source: quantize.joule:1622

fn quantize_sub_one_bit(model: HashMap<String, Tensor>) -> Result<QuantizedModel, QuantError>

Source: quantize.joule:1629

fn estimate_energy_savings(

Estimate energy savings from quantization

Source: quantize.joule:1635

fn bits_per_weight(method: QuantMethod) -> f32

Calculate bits per weight for different quantization methods

Source: quantize.joule:1651

fn estimate_quantized_size(

Estimate model size after quantization

Source: quantize.joule:1666

fn recommend_quantization(

Recommend best quantization for memory constraint

Source: quantize.joule:1676

enum QuantComplexity

Source: quantize.joule:1711

struct QuantTimeEstimate

Source: quantize.joule:1726

fn quantization_complexity(method: QuantMethod) -> QuantComplexity

Get complexity class for a quantization method

Source: quantize.joule:1740

fn estimate_quantization_time(

Estimate time to quantize a model Returns estimated duration based on: - Model size (parameters) - Quantization method complexity - Hardware (CPU vs GPU) - Calibration requirements

Source: quantize.joule:1778

fn secs_to_duration(secs: f64) -> std::time::Duration

Source: quantize.joule:1896

fn format_duration(d: std::time::Duration) -> String

Format duration for display

Source: quantize.joule:1901

struct QuantTradeoff

Source: quantize.joule:1914

fn analyze_quantization_options(

Analyze all quantization options for a model

Source: quantize.joule:1924

fn print_quantization_analysis(

Print quantization options as a table

Source: quantize.joule:1960

fn recommend_quantization_balanced(

Recommend quantization balancing time, quality, and size

Source: quantize.joule:1988