enum KernelArg
Source: gpu/kernel.joule:9
enum KernelArgSource: gpu/kernel.joule:9
fn from_buffer<T: Copy>(buffer: &GpuBuffer<T>) -> SelfSource: gpu/kernel.joule:31
struct LaunchConfigSource: gpu/kernel.joule:38
fn new_1d(grid_size: u32, block_size: u32) -> SelfCreate 1D launch config
Source: gpu/kernel.joule:49
fn new_2d(grid: [u32; 2], block: [u32; 2]) -> SelfCreate 2D launch config
Source: gpu/kernel.joule:58
fn new_3d(grid: [u32; 3], block: [u32; 3]) -> SelfCreate 3D launch config
Source: gpu/kernel.joule:67
fn with_shared_mem(mut self, size: usize) -> SelfSet shared memory size
Source: gpu/kernel.joule:76
fn for_elements(total: usize, block_size: u32) -> SelfCalculate optimal config for total work items
Source: gpu/kernel.joule:82
fn for_image(width: u32, height: u32, block_x: u32, block_y: u32) -> SelfCalculate optimal 2D config for image dimensions
Source: gpu/kernel.joule:88
fn total_threads(&self) -> u64Total number of threads
Source: gpu/kernel.joule:95
struct GpuKernelCompiled GPU kernel
Source: gpu/kernel.joule:102
enum KernelHandleInternal kernel handle (backend-specific)
Source: gpu/kernel.joule:109
fn from_source(device: &GpuDevice, source: &str, name: &str) -> Result<Self, GpuError>Source: gpu/kernel.joule:136
fn from_binary(device: &GpuDevice, binary: &[u8], name: &str) -> Result<Self, GpuError>Source: gpu/kernel.joule:165
fn device(&self) -> &GpuDeviceGet the device this kernel is compiled for
Source: gpu/kernel.joule:193
fn name(&self) -> &strGet kernel name
Source: gpu/kernel.joule:198
fn launch(&self, config: &LaunchConfig, args: &[KernelArg]) -> Result<(), GpuError>Source: gpu/kernel.joule:204
fn launch_async(Source: gpu/kernel.joule:232
fn max_threads_per_block(&self) -> u32Get maximum threads per block for this kernel
Source: gpu/kernel.joule:261
fn cuFuncGetAttribute(Source: gpu/kernel.joule:266
fn shared_mem_usage(&self) -> usizeGet shared memory usage for this kernel
Source: gpu/kernel.joule:285
fn cuFuncGetAttribute(Source: gpu/kernel.joule:290
fn register_usage(&self) -> u32Get register usage for this kernel
Source: gpu/kernel.joule:309
fn cuFuncGetAttribute(Source: gpu/kernel.joule:314
fn drop(&mut self)Source: gpu/kernel.joule:334
fn cuModuleUnload(module: *mut std::ffi::c_void) -> i32;Source: gpu/kernel.joule:339
fn hipModuleUnload(module: *mut std::ffi::c_void) -> i32;Source: gpu/kernel.joule:352
fn compile_cuda_kernel(source: &str, name: &str) -> Result<KernelHandle, GpuError>Source: gpu/kernel.joule:364
fn nvrtcCreateProgram(Source: gpu/kernel.joule:366
fn nvrtcCompileProgram(Source: gpu/kernel.joule:374
fn nvrtcGetPTXSize(prog: *mut std::ffi::c_void, size: *mut usize) -> i32;Source: gpu/kernel.joule:379
fn nvrtcGetPTX(prog: *mut std::ffi::c_void, ptx: *mut i8) -> i32;Source: gpu/kernel.joule:380
fn nvrtcDestroyProgram(prog: *mut *mut std::ffi::c_void) -> i32;Source: gpu/kernel.joule:381
fn cuModuleLoadData(module: *mut *mut std::ffi::c_void, data: *const i8) -> i32;Source: gpu/kernel.joule:382
fn cuModuleGetFunction(Source: gpu/kernel.joule:383
fn load_cuda_kernel(binary: &[u8], name: &str) -> Result<KernelHandle, GpuError>Source: gpu/kernel.joule:443
fn cuModuleLoadData(module: *mut *mut std::ffi::c_void, data: *const u8) -> i32;Source: gpu/kernel.joule:445
fn cuModuleGetFunction(Source: gpu/kernel.joule:446
fn cuda_launch(Source: gpu/kernel.joule:471
fn cuLaunchKernel(Source: gpu/kernel.joule:477
fn cuda_launch_async(Source: gpu/kernel.joule:548
fn cuLaunchKernel(Source: gpu/kernel.joule:555
fn compile_metal_kernel(Source: gpu/kernel.joule:642
fn mtl_compile_library(Source: gpu/kernel.joule:648
fn mtl_library_new_function(Source: gpu/kernel.joule:655
fn mtl_device_new_compute_pipeline(Source: gpu/kernel.joule:661
fn load_metal_kernel(Source: gpu/kernel.joule:698
fn mtl_load_library(Source: gpu/kernel.joule:704
fn mtl_library_new_function(Source: gpu/kernel.joule:711
fn mtl_device_new_compute_pipeline(Source: gpu/kernel.joule:717
fn metal_launch(Source: gpu/kernel.joule:754
fn mtl_device_new_command_queue(Source: gpu/kernel.joule:761
fn mtl_command_queue_command_buffer(Source: gpu/kernel.joule:764
fn mtl_command_buffer_compute_command_encoder(Source: gpu/kernel.joule:767
fn mtl_compute_encoder_set_pipeline(Source: gpu/kernel.joule:770
fn mtl_compute_encoder_set_buffer(Source: gpu/kernel.joule:774
fn mtl_compute_encoder_set_bytes(Source: gpu/kernel.joule:780
fn mtl_compute_encoder_set_threadgroup_memory_length(Source: gpu/kernel.joule:786
fn mtl_compute_encoder_dispatch_threadgroups(Source: gpu/kernel.joule:791
fn mtl_compute_encoder_end_encoding(encoder: *mut std::ffi::c_void);Source: gpu/kernel.joule:796
fn mtl_command_buffer_commit(buffer: *mut std::ffi::c_void);Source: gpu/kernel.joule:797
fn mtl_command_buffer_wait_until_completed(buffer: *mut std::ffi::c_void);Source: gpu/kernel.joule:798
fn metal_launch_async(Source: gpu/kernel.joule:931
fn mtl_command_queue_command_buffer(Source: gpu/kernel.joule:939
fn mtl_command_buffer_compute_command_encoder(Source: gpu/kernel.joule:942
fn mtl_compute_encoder_set_pipeline(Source: gpu/kernel.joule:945
fn mtl_compute_encoder_set_buffer(Source: gpu/kernel.joule:949
fn mtl_compute_encoder_set_bytes(Source: gpu/kernel.joule:955
fn mtl_compute_encoder_set_threadgroup_memory_length(Source: gpu/kernel.joule:961
fn mtl_compute_encoder_dispatch_threadgroups(Source: gpu/kernel.joule:966
fn mtl_compute_encoder_end_encoding(encoder: *mut std::ffi::c_void);Source: gpu/kernel.joule:971
fn mtl_command_buffer_commit(buffer: *mut std::ffi::c_void);Source: gpu/kernel.joule:972
fn compile_rocm_kernel(source: &str, name: &str) -> Result<KernelHandle, GpuError>Source: gpu/kernel.joule:1099
fn hiprtcCreateProgram(Source: gpu/kernel.joule:1101
fn hiprtcCompileProgram(Source: gpu/kernel.joule:1109
fn hiprtcGetCodeSize(prog: *mut std::ffi::c_void, size: *mut usize) -> i32;Source: gpu/kernel.joule:1114
fn hiprtcGetCode(prog: *mut std::ffi::c_void, code: *mut i8) -> i32;Source: gpu/kernel.joule:1115
fn hiprtcDestroyProgram(prog: *mut *mut std::ffi::c_void) -> i32;Source: gpu/kernel.joule:1116
fn hipModuleLoadData(module: *mut *mut std::ffi::c_void, data: *const i8) -> i32;Source: gpu/kernel.joule:1117
fn hipModuleGetFunction(Source: gpu/kernel.joule:1118
fn load_rocm_kernel(binary: &[u8], name: &str) -> Result<KernelHandle, GpuError>Source: gpu/kernel.joule:1174
fn hipModuleLoadData(module: *mut *mut std::ffi::c_void, data: *const u8) -> i32;Source: gpu/kernel.joule:1176
fn hipModuleGetFunction(Source: gpu/kernel.joule:1177
fn rocm_launch(Source: gpu/kernel.joule:1202
fn rocm_launch_async(Source: gpu/kernel.joule:1211
fn rocm_launch_on_stream(Source: gpu/kernel.joule:1222
fn hipModuleLaunchKernel(Source: gpu/kernel.joule:1229
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::ResultSource: gpu/kernel.joule:1315