enum Backend
Source: distributed.joule:18
enum BackendSource: distributed.joule:18
struct ProcessGroupProcess group for distributed communication
Source: distributed.joule:30
fn init_from_env() -> Result<Self, DistributedError>Initialize process group from environment
Source: distributed.joule:42
fn init_with(Initialize with explicit parameters
Source: distributed.joule:86
fn init(&mut self) -> Result<(), DistributedError>Initialize the process group
Source: distributed.joule:108
fn init_nccl(&self) -> Result<(), DistributedError>Source: distributed.joule:120
fn ncclGetUniqueId(id: *mut u8) -> i32;Source: distributed.joule:124
fn ncclCommInitRank(Source: distributed.joule:125
fn init_gloo(&self) -> Result<(), DistributedError>Source: distributed.joule:143
fn init_mpi(&self) -> Result<(), DistributedError>Source: distributed.joule:148
fn MPI_Init(argc: *mut i32, argv: *mut *mut *mut i8) -> i32;Source: distributed.joule:152
fn MPI_Comm_rank(comm: i32, rank: *mut i32) -> i32;Source: distributed.joule:153
fn MPI_Comm_size(comm: i32, size: *mut i32) -> i32;Source: distributed.joule:154
fn init_tcp(&self) -> Result<(), DistributedError>Source: distributed.joule:167
fn rank(&self) -> usizeGet current rank
Source: distributed.joule:187
fn world_size(&self) -> usizeGet world size
Source: distributed.joule:192
fn local_rank(&self) -> usizeGet local rank (on this node)
Source: distributed.joule:197
fn is_master(&self) -> boolCheck if this is the master process
Source: distributed.joule:202
fn barrier(&self) -> Result<(), DistributedError>Source: distributed.joule:208
fn MPI_Barrier(comm: i32) -> i32;Source: distributed.joule:220
fn all_reduce(tensor: &mut Tensor, op: ReduceOp, group: &ProcessGroup) -> Result<(), DistributedError>Source: distributed.joule:238
enum ReduceOpSource: distributed.joule:249
fn all_reduce_nccl(tensor: &mut Tensor, op: ReduceOp) -> Result<(), DistributedError>Source: distributed.joule:257
fn all_reduce_mpi(tensor: &mut Tensor, op: ReduceOp) -> Result<(), DistributedError>Source: distributed.joule:265
fn all_reduce_ring(tensor: &mut Tensor, op: ReduceOp, group: &ProcessGroup) -> Result<(), DistributedError>Ring all-reduce (pure Joule implementation)
Source: distributed.joule:274
fn apply_reduce_op(a: &Tensor, b: &Tensor, op: ReduceOp) -> TensorSource: distributed.joule:314
fn send_recv(Source: distributed.joule:324
fn broadcast(tensor: &mut Tensor, src: usize, group: &ProcessGroup) -> Result<(), DistributedError>Source: distributed.joule:346
fn broadcast_tree(tensor: &mut Tensor, src: usize, group: &ProcessGroup) -> Result<(), DistributedError>Source: distributed.joule:373
fn gather(Source: distributed.joule:388
fn scatter(Source: distributed.joule:417
fn all_gather(tensor: &Tensor, group: &ProcessGroup) -> Result<Vec<Tensor>, DistributedError>Source: distributed.joule:442
fn reduce_scatter(Source: distributed.joule:458
struct DistributedDataParallelDistributed Data Parallel wrapper
Source: distributed.joule:476
fn new(module: M, process_group: ProcessGroup) -> SelfWrap module for distributed training
Source: distributed.joule:485
fn bucket_cap_mb(mut self, cap: f64) -> SelfSet bucket size for gradient bucketing
Source: distributed.joule:497
fn find_unused_parameters(mut self) -> SelfEnable detection of unused parameters
Source: distributed.joule:503
fn sync_parameters(&mut self) -> Result<(), DistributedError>Source: distributed.joule:510
fn sync_gradients(&mut self) -> Result<(), DistributedError>Source: distributed.joule:519
fn module(&self) -> &MGet inner module reference
Source: distributed.joule:535
fn module_mut(&mut self) -> &mut MGet mutable inner module reference
Source: distributed.joule:540
fn forward(&self, input: &Tensor) -> TensorSource: distributed.joule:546
fn parameters(&self) -> Vec<&Tensor>Source: distributed.joule:550
fn parameters_mut(&mut self) -> Vec<&mut Tensor>Source: distributed.joule:554
fn train(&mut self, mode: bool)Source: distributed.joule:558
fn training(&self) -> boolSource: distributed.joule:562
fn to(&mut self, device: Device)Source: distributed.joule:566
struct PipelineStagePipeline parallelism stage
Source: distributed.joule:576
fn new(module: M, stage_id: usize, num_stages: usize, num_microbatches: usize) -> SelfSource: distributed.joule:584
fn forward_pipeline(&self, inputs: Vec<Tensor>, group: &ProcessGroup) -> Vec<Tensor>Source: distributed.joule:595
struct TensorParallelTensor parallel for large layers
Source: distributed.joule:623
fn new(module: M, tp_rank: usize, tp_size: usize, split_dim: i64) -> SelfSource: distributed.joule:631
fn forward(&self, input: &Tensor, group: &ProcessGroup) -> Result<Tensor, DistributedError>Source: distributed.joule:642
enum DistributedErrorSource: distributed.joule:667
fn from(e: std::io::Error) -> SelfSource: distributed.joule:676
fn launch(Launch distributed training
Source: distributed.joule:686
fn get_device(group: &ProcessGroup) -> DeviceGet optimal device for this rank
Source: distributed.joule:715
struct DistributedContextDistributed training context
Source: distributed.joule:720
fn init() -> Result<Self, DistributedError>Source: distributed.joule:726
fn print(&self, msg: &str)Print only from master
Source: distributed.joule:734
fn barrier(&self) -> Result<(), DistributedError>Barrier synchronization
Source: distributed.joule:741