← Back

Module kernel

enum KernelArg

Source: gpu/kernel.joule:9

fn from_buffer<T: Copy>(buffer: &GpuBuffer<T>) -> Self

Source: gpu/kernel.joule:31

struct LaunchConfig

Source: gpu/kernel.joule:38

fn new_1d(grid_size: u32, block_size: u32) -> Self

Create 1D launch config

Source: gpu/kernel.joule:49

fn new_2d(grid: [u32; 2], block: [u32; 2]) -> Self

Create 2D launch config

Source: gpu/kernel.joule:58

fn new_3d(grid: [u32; 3], block: [u32; 3]) -> Self

Create 3D launch config

Source: gpu/kernel.joule:67

fn with_shared_mem(mut self, size: usize) -> Self

Set shared memory size

Source: gpu/kernel.joule:76

fn for_elements(total: usize, block_size: u32) -> Self

Calculate optimal config for total work items

Source: gpu/kernel.joule:82

fn for_image(width: u32, height: u32, block_x: u32, block_y: u32) -> Self

Calculate optimal 2D config for image dimensions

Source: gpu/kernel.joule:88

fn total_threads(&self) -> u64

Total number of threads

Source: gpu/kernel.joule:95

struct GpuKernel

Compiled GPU kernel

Source: gpu/kernel.joule:102

enum KernelHandle

Internal kernel handle (backend-specific)

Source: gpu/kernel.joule:109

fn from_source(device: &GpuDevice, source: &str, name: &str) -> Result<Self, GpuError>

Source: gpu/kernel.joule:136

fn from_binary(device: &GpuDevice, binary: &[u8], name: &str) -> Result<Self, GpuError>

Source: gpu/kernel.joule:165

fn device(&self) -> &GpuDevice

Get the device this kernel is compiled for

Source: gpu/kernel.joule:193

fn name(&self) -> &str

Get kernel name

Source: gpu/kernel.joule:198

fn launch(&self, config: &LaunchConfig, args: &[KernelArg]) -> Result<(), GpuError>

Source: gpu/kernel.joule:204

fn launch_async(

Source: gpu/kernel.joule:232

fn max_threads_per_block(&self) -> u32

Get maximum threads per block for this kernel

Source: gpu/kernel.joule:261

fn cuFuncGetAttribute(

Source: gpu/kernel.joule:266

fn shared_mem_usage(&self) -> usize

Get shared memory usage for this kernel

Source: gpu/kernel.joule:285

fn cuFuncGetAttribute(

Source: gpu/kernel.joule:290

fn register_usage(&self) -> u32

Get register usage for this kernel

Source: gpu/kernel.joule:309

fn cuFuncGetAttribute(

Source: gpu/kernel.joule:314

fn drop(&mut self)

Source: gpu/kernel.joule:334

fn cuModuleUnload(module: *mut std::ffi::c_void) -> i32;

Source: gpu/kernel.joule:339

fn hipModuleUnload(module: *mut std::ffi::c_void) -> i32;

Source: gpu/kernel.joule:352

fn compile_cuda_kernel(source: &str, name: &str) -> Result<KernelHandle, GpuError>

Source: gpu/kernel.joule:364

fn nvrtcCreateProgram(

Source: gpu/kernel.joule:366

fn nvrtcCompileProgram(

Source: gpu/kernel.joule:374

fn nvrtcGetPTXSize(prog: *mut std::ffi::c_void, size: *mut usize) -> i32;

Source: gpu/kernel.joule:379

fn nvrtcGetPTX(prog: *mut std::ffi::c_void, ptx: *mut i8) -> i32;

Source: gpu/kernel.joule:380

fn nvrtcDestroyProgram(prog: *mut *mut std::ffi::c_void) -> i32;

Source: gpu/kernel.joule:381

fn cuModuleLoadData(module: *mut *mut std::ffi::c_void, data: *const i8) -> i32;

Source: gpu/kernel.joule:382

fn cuModuleGetFunction(

Source: gpu/kernel.joule:383

fn load_cuda_kernel(binary: &[u8], name: &str) -> Result<KernelHandle, GpuError>

Source: gpu/kernel.joule:443

fn cuModuleLoadData(module: *mut *mut std::ffi::c_void, data: *const u8) -> i32;

Source: gpu/kernel.joule:445

fn cuModuleGetFunction(

Source: gpu/kernel.joule:446

fn cuda_launch(

Source: gpu/kernel.joule:471

fn cuLaunchKernel(

Source: gpu/kernel.joule:477

fn cuda_launch_async(

Source: gpu/kernel.joule:548

fn cuLaunchKernel(

Source: gpu/kernel.joule:555

fn compile_metal_kernel(

Source: gpu/kernel.joule:642

fn mtl_compile_library(

Source: gpu/kernel.joule:648

fn mtl_library_new_function(

Source: gpu/kernel.joule:655

fn mtl_device_new_compute_pipeline(

Source: gpu/kernel.joule:661

fn load_metal_kernel(

Source: gpu/kernel.joule:698

fn mtl_load_library(

Source: gpu/kernel.joule:704

fn mtl_library_new_function(

Source: gpu/kernel.joule:711

fn mtl_device_new_compute_pipeline(

Source: gpu/kernel.joule:717

fn metal_launch(

Source: gpu/kernel.joule:754

fn mtl_device_new_command_queue(

Source: gpu/kernel.joule:761

fn mtl_command_queue_command_buffer(

Source: gpu/kernel.joule:764

fn mtl_command_buffer_compute_command_encoder(

Source: gpu/kernel.joule:767

fn mtl_compute_encoder_set_pipeline(

Source: gpu/kernel.joule:770

fn mtl_compute_encoder_set_buffer(

Source: gpu/kernel.joule:774

fn mtl_compute_encoder_set_bytes(

Source: gpu/kernel.joule:780

fn mtl_compute_encoder_set_threadgroup_memory_length(

Source: gpu/kernel.joule:786

fn mtl_compute_encoder_dispatch_threadgroups(

Source: gpu/kernel.joule:791

fn mtl_compute_encoder_end_encoding(encoder: *mut std::ffi::c_void);

Source: gpu/kernel.joule:796

fn mtl_command_buffer_commit(buffer: *mut std::ffi::c_void);

Source: gpu/kernel.joule:797

fn mtl_command_buffer_wait_until_completed(buffer: *mut std::ffi::c_void);

Source: gpu/kernel.joule:798

fn metal_launch_async(

Source: gpu/kernel.joule:931

fn mtl_command_queue_command_buffer(

Source: gpu/kernel.joule:939

fn mtl_command_buffer_compute_command_encoder(

Source: gpu/kernel.joule:942

fn mtl_compute_encoder_set_pipeline(

Source: gpu/kernel.joule:945

fn mtl_compute_encoder_set_buffer(

Source: gpu/kernel.joule:949

fn mtl_compute_encoder_set_bytes(

Source: gpu/kernel.joule:955

fn mtl_compute_encoder_set_threadgroup_memory_length(

Source: gpu/kernel.joule:961

fn mtl_compute_encoder_dispatch_threadgroups(

Source: gpu/kernel.joule:966

fn mtl_compute_encoder_end_encoding(encoder: *mut std::ffi::c_void);

Source: gpu/kernel.joule:971

fn mtl_command_buffer_commit(buffer: *mut std::ffi::c_void);

Source: gpu/kernel.joule:972

fn compile_rocm_kernel(source: &str, name: &str) -> Result<KernelHandle, GpuError>

Source: gpu/kernel.joule:1099

fn hiprtcCreateProgram(

Source: gpu/kernel.joule:1101

fn hiprtcCompileProgram(

Source: gpu/kernel.joule:1109

fn hiprtcGetCodeSize(prog: *mut std::ffi::c_void, size: *mut usize) -> i32;

Source: gpu/kernel.joule:1114

fn hiprtcGetCode(prog: *mut std::ffi::c_void, code: *mut i8) -> i32;

Source: gpu/kernel.joule:1115

fn hiprtcDestroyProgram(prog: *mut *mut std::ffi::c_void) -> i32;

Source: gpu/kernel.joule:1116

fn hipModuleLoadData(module: *mut *mut std::ffi::c_void, data: *const i8) -> i32;

Source: gpu/kernel.joule:1117

fn hipModuleGetFunction(

Source: gpu/kernel.joule:1118

fn load_rocm_kernel(binary: &[u8], name: &str) -> Result<KernelHandle, GpuError>

Source: gpu/kernel.joule:1174

fn hipModuleLoadData(module: *mut *mut std::ffi::c_void, data: *const u8) -> i32;

Source: gpu/kernel.joule:1176

fn hipModuleGetFunction(

Source: gpu/kernel.joule:1177

fn rocm_launch(

Source: gpu/kernel.joule:1202

fn rocm_launch_async(

Source: gpu/kernel.joule:1211

fn rocm_launch_on_stream(

Source: gpu/kernel.joule:1222

fn hipModuleLaunchKernel(

Source: gpu/kernel.joule:1229

fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result

Source: gpu/kernel.joule:1315