diff --git a/.github/workflows/rustdoc.yml b/.github/workflows/rustdoc.yml index 046a89cee..f62503604 100644 --- a/.github/workflows/rustdoc.yml +++ b/.github/workflows/rustdoc.yml @@ -37,8 +37,8 @@ jobs: --enable-index-page \ --extern-html-root-url const_type_layout=https://docs.rs/const-type-layout/0.3.2/ \ --extern-html-root-url final=https://docs.rs/final/0.1.1/ \ - --extern-html-root-url rustacuda=https://docs.rs/rustacuda/0.1.3/ \ - --extern-html-root-url rustacuda_core=https://docs.rs/rustacuda_core/0.1.2/ \ + --extern-html-root-url cust=https://docs.rs/cust/0.3.2/ \ + --extern-html-root-url cust_core=https://docs.rs/cust_core/0.1/ \ -Zunstable-options \ " cargo doc \ --all-features \ diff --git a/Cargo.toml b/Cargo.toml index f4d43727a..6ec09e455 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,7 @@ edition = "2021" authors = ["Juniper Tyree "] repository = "https://github.com/juntyr/rust-cuda" license = "MIT OR Apache-2.0" -rust-version = "1.81" # nightly +rust-version = "1.84" # nightly [workspace.dependencies] # workspace-internal crates @@ -24,12 +24,12 @@ rust-cuda-derive = { version = "0.1", path = "rust-cuda-derive", default-feature rust-cuda-kernel = { version = "0.1", path = "rust-cuda-kernel", default-features = false } # third-party dependencies with unpublished patches -rustacuda = { git = "https://github.com/juntyr/RustaCUDA", rev = "c6ea7cc", default-features = false } -rustacuda_core = { git = "https://github.com/juntyr/RustaCUDA", rev = "c6ea7cc", default-features = false } +cust = { git = "https://github.com/juntyr/Rust-GPU-CUDA.git", rev = "5365c14", version = "0.3.2", default-features = false } +cust_core = { git = "https://github.com/juntyr/Rust-GPU-CUDA.git", rev = "5365c14", version = "0.1", default-features = false } # crates.io third-party dependencies cargo_metadata = { version = "0.19", default-features = false } -cargo-util = { version = "=0.2.16", default-features = false } # TODO: keep in sync with toolchain +cargo-util = { version = "=0.2.17", default-features = false } # TODO: keep in sync with toolchain colored = { version = "3.0", default-features = false } const-type-layout = { version = "0.3.2", default-features = false } final = { version = "0.1.1", default-features = false } @@ -92,16 +92,16 @@ default = [] derive = ["dep:rust-cuda-derive"] device = [] final = ["dep:final"] -host = ["dep:rustacuda", "dep:regex", "dep:oneshot", "dep:safer_owning_ref"] +host = ["dep:cust", "dep:regex", "dep:oneshot", "dep:safer_owning_ref"] kernel = ["dep:rust-cuda-kernel"] [dependencies] const-type-layout = { workspace = true, features = ["derive"] } +cust = { workspace = true, optional = true } +cust_core = { workspace = true } final = { workspace = true, optional = true } oneshot = { workspace = true, features = ["std", "async"], optional = true } regex = { workspace = true, optional = true } -rustacuda = { workspace = true, optional = true } -rustacuda_core = { workspace = true } rust-cuda-derive = { workspace = true, optional = true } rust-cuda-kernel = { workspace = true, optional = true } safer_owning_ref = { workspace = true, optional = true } diff --git a/README.md b/README.md index 4140f9e4b..ebc3a5a0b 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/rust-cuda/ci.yml?branch=main [workflow]: https://github.com/juntyr/rust-cuda/actions/workflows/ci.yml?query=branch%3Amain -[MSRV]: https://img.shields.io/badge/MSRV-1.81.0--nightly-orange +[MSRV]: https://img.shields.io/badge/MSRV-1.84.0--nightly-orange [repo]: https://github.com/juntyr/rust-cuda [Rust Doc]: https://img.shields.io/badge/docs-main-blue diff --git a/examples/lifetime/src/main.rs b/examples/lifetime/src/main.rs index 78cbe943d..212d5de45 100644 --- a/examples/lifetime/src/main.rs +++ b/examples/lifetime/src/main.rs @@ -2,30 +2,30 @@ use lifetime::{kernel, link}; -fn main() -> rust_cuda::deps::rustacuda::error::CudaResult<()> { +fn main() -> rust_cuda::deps::cust::error::CudaResult<()> { // Link the lifetime-only-generic CUDA kernel struct KernelPtx<'a, 'b>(core::marker::PhantomData<(&'a (), &'b ())>); link! { impl kernel<'a, 'b> for KernelPtx } // Initialize the CUDA API - rust_cuda::deps::rustacuda::init(rust_cuda::deps::rustacuda::CudaFlags::empty())?; + rust_cuda::deps::cust::init(rust_cuda::deps::cust::CudaFlags::empty())?; // Get the first CUDA GPU device - let device = rust_cuda::deps::rustacuda::device::Device::get_device(0)?; + let device = rust_cuda::deps::cust::device::Device::get_device(0)?; // Create a CUDA context associated to this device let _context = rust_cuda::host::CudaDropWrapper::from( - rust_cuda::deps::rustacuda::context::Context::create_and_push( - rust_cuda::deps::rustacuda::context::ContextFlags::MAP_HOST - | rust_cuda::deps::rustacuda::context::ContextFlags::SCHED_AUTO, + rust_cuda::deps::cust::context::legacy::Context::create_and_push( + rust_cuda::deps::cust::context::legacy::ContextFlags::MAP_HOST + | rust_cuda::deps::cust::context::legacy::ContextFlags::SCHED_AUTO, device, )?, ); // Create a new CUDA stream to submit kernels to let mut stream = - rust_cuda::host::CudaDropWrapper::from(rust_cuda::deps::rustacuda::stream::Stream::new( - rust_cuda::deps::rustacuda::stream::StreamFlags::NON_BLOCKING, + rust_cuda::host::CudaDropWrapper::from(rust_cuda::deps::cust::stream::Stream::new( + rust_cuda::deps::cust::stream::StreamFlags::NON_BLOCKING, None, )?); @@ -34,8 +34,8 @@ fn main() -> rust_cuda::deps::rustacuda::error::CudaResult<()> { // Create a new instance of the CUDA kernel and prepare the launch config let mut kernel = rust_cuda::kernel::TypedPtxKernel::::new::(None); let config = rust_cuda::kernel::LaunchConfig { - grid: rust_cuda::deps::rustacuda::function::GridSize::x(1), - block: rust_cuda::deps::rustacuda::function::BlockSize::x(4), + grid: rust_cuda::deps::cust::function::GridSize::x(1), + block: rust_cuda::deps::cust::function::BlockSize::x(4), ptx_jit: false, }; diff --git a/examples/print/src/main.rs b/examples/print/src/main.rs index 008b39f5b..e4ae250d0 100644 --- a/examples/print/src/main.rs +++ b/examples/print/src/main.rs @@ -2,38 +2,38 @@ use print::{kernel, link, Action}; -fn main() -> rust_cuda::deps::rustacuda::error::CudaResult<()> { +fn main() -> rust_cuda::deps::cust::error::CudaResult<()> { // Link the non-generic CUDA kernel struct KernelPtx; link! { impl kernel for KernelPtx } // Initialize the CUDA API - rust_cuda::deps::rustacuda::init(rust_cuda::deps::rustacuda::CudaFlags::empty())?; + rust_cuda::deps::cust::init(rust_cuda::deps::cust::CudaFlags::empty())?; // Get the first CUDA GPU device - let device = rust_cuda::deps::rustacuda::device::Device::get_device(0)?; + let device = rust_cuda::deps::cust::device::Device::get_device(0)?; // Create a CUDA context associated to this device let _context = rust_cuda::host::CudaDropWrapper::from( - rust_cuda::deps::rustacuda::context::Context::create_and_push( - rust_cuda::deps::rustacuda::context::ContextFlags::MAP_HOST - | rust_cuda::deps::rustacuda::context::ContextFlags::SCHED_AUTO, + rust_cuda::deps::cust::context::legacy::Context::create_and_push( + rust_cuda::deps::cust::context::legacy::ContextFlags::MAP_HOST + | rust_cuda::deps::cust::context::legacy::ContextFlags::SCHED_AUTO, device, )?, ); // Create a new CUDA stream to submit kernels to let mut stream = - rust_cuda::host::CudaDropWrapper::from(rust_cuda::deps::rustacuda::stream::Stream::new( - rust_cuda::deps::rustacuda::stream::StreamFlags::NON_BLOCKING, + rust_cuda::host::CudaDropWrapper::from(rust_cuda::deps::cust::stream::Stream::new( + rust_cuda::deps::cust::stream::StreamFlags::NON_BLOCKING, None, )?); // Create a new instance of the CUDA kernel and prepare the launch config let mut kernel = rust_cuda::kernel::TypedPtxKernel::::new::(None); let config = rust_cuda::kernel::LaunchConfig { - grid: rust_cuda::deps::rustacuda::function::GridSize::x(1), - block: rust_cuda::deps::rustacuda::function::BlockSize::x(4), + grid: rust_cuda::deps::cust::function::GridSize::x(1), + block: rust_cuda::deps::cust::function::BlockSize::x(4), ptx_jit: false, }; diff --git a/rust-cuda-derive/src/lib.rs b/rust-cuda-derive/src/lib.rs index cc371f18c..2cfb62949 100644 --- a/rust-cuda-derive/src/lib.rs +++ b/rust-cuda-derive/src/lib.rs @@ -5,7 +5,7 @@ //! [CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/rust-cuda/ci.yml?branch=main //! [workflow]: https://github.com/juntyr/rust-cuda/actions/workflows/ci.yml?query=branch%3Amain //! -//! [MSRV]: https://img.shields.io/badge/MSRV-1.81.0--nightly-orange +//! [MSRV]: https://img.shields.io/badge/MSRV-1.84.0--nightly-orange //! [repo]: https://github.com/juntyr/rust-cuda //! //! [Rust Doc]: https://img.shields.io/badge/docs-main-blue diff --git a/rust-cuda-derive/src/rust_to_cuda/field_ty.rs b/rust-cuda-derive/src/rust_to_cuda/field_ty.rs index 4278d308c..8e167a626 100644 --- a/rust-cuda-derive/src/rust_to_cuda/field_ty.rs +++ b/rust-cuda-derive/src/rust_to_cuda/field_ty.rs @@ -1,6 +1,5 @@ use syn::{parse_quote, spanned::Spanned}; -#[expect(clippy::module_name_repetitions)] pub enum CudaReprFieldTy { SafeDeviceCopy, RustToCuda { diff --git a/rust-cuda-derive/src/rust_to_cuda/impl.rs b/rust-cuda-derive/src/rust_to_cuda/impl.rs index 55b0948d7..56dc3dcca 100644 --- a/rust-cuda-derive/src/rust_to_cuda/impl.rs +++ b/rust-cuda-derive/src/rust_to_cuda/impl.rs @@ -84,7 +84,7 @@ pub fn rust_to_cuda_trait( unsafe fn borrow( &self, alloc: CudaAllocType, - ) -> #crate_path::deps::rustacuda::error::CudaResult<( + ) -> #crate_path::deps::cust::error::CudaResult<( #crate_path::utils::ffi::DeviceAccessible, #crate_path::alloc::CombinedCudaAlloc )> { @@ -107,7 +107,7 @@ pub fn rust_to_cuda_trait( alloc: #crate_path::alloc::CombinedCudaAlloc< Self::CudaAllocation, CudaAllocType >, - ) -> #crate_path::deps::rustacuda::error::CudaResult { + ) -> #crate_path::deps::cust::error::CudaResult { let (alloc_front, alloc_tail) = alloc.split(); #(#r2c_field_destructors)* @@ -192,7 +192,7 @@ pub fn rust_to_cuda_async_trait( &self, alloc: CudaAllocType, stream: #crate_path::host::Stream<'stream>, - ) -> #crate_path::deps::rustacuda::error::CudaResult<( + ) -> #crate_path::deps::cust::error::CudaResult<( #crate_path::utils::r#async::Async< '_, 'stream, #crate_path::utils::ffi::DeviceAccessible, @@ -220,7 +220,7 @@ pub fn rust_to_cuda_async_trait( Self::CudaAllocationAsync, CudaAllocType >, stream: #crate_path::host::Stream<'stream>, - ) -> #crate_path::deps::rustacuda::error::CudaResult<( + ) -> #crate_path::deps::cust::error::CudaResult<( #crate_path::utils::r#async::Async< 'a, 'stream, #crate_path::deps::owning_ref::BoxRefMut<'a, CudaRestoreOwner, Self>, diff --git a/rust-cuda-derive/src/rust_to_cuda/mod.rs b/rust-cuda-derive/src/rust_to_cuda/mod.rs index 800c58fa7..099f97a1d 100644 --- a/rust-cuda-derive/src/rust_to_cuda/mod.rs +++ b/rust-cuda-derive/src/rust_to_cuda/mod.rs @@ -10,7 +10,7 @@ fn get_cuda_repr_ident(rust_repr_ident: &proc_macro2::Ident) -> proc_macro2::Ide format_ident!("{}CudaRepresentation", rust_repr_ident) } -#[expect(clippy::module_name_repetitions, clippy::too_many_lines)] +#[expect(clippy::too_many_lines)] pub fn impl_rust_to_cuda(ast: &syn::DeriveInput) -> proc_macro::TokenStream { let (mut struct_fields_cuda, struct_semi_cuda) = if let syn::Data::Struct(s) = &ast.data { (s.fields.clone(), s.semi_token) diff --git a/rust-cuda-kernel/build.rs b/rust-cuda-kernel/build.rs index ecd3b29cb..65b149df9 100644 --- a/rust-cuda-kernel/build.rs +++ b/rust-cuda-kernel/build.rs @@ -5,7 +5,7 @@ //! [CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/rust-cuda/ci.yml?branch=main //! [workflow]: https://github.com/juntyr/rust-cuda/actions/workflows/ci.yml?query=branch%3Amain //! -//! [MSRV]: https://img.shields.io/badge/MSRV-1.81.0--nightly-orange +//! [MSRV]: https://img.shields.io/badge/MSRV-1.84.0--nightly-orange //! [repo]: https://github.com/juntyr/rust-cuda //! //! [Rust Doc]: https://img.shields.io/badge/docs-main-blue diff --git a/rust-cuda-kernel/src/kernel/link/mod.rs b/rust-cuda-kernel/src/kernel/link/mod.rs index 49db5c264..f0a6bd154 100644 --- a/rust-cuda-kernel/src/kernel/link/mod.rs +++ b/rust-cuda-kernel/src/kernel/link/mod.rs @@ -321,7 +321,7 @@ fn check_kernel_ptx_and_report( Ok(Some(binary)) => { if ptx_lint_levels .get(&PtxLint::DumpAssembly) - .map_or(false, |level| *level > LintLevel::Allow) + .is_some_and(|level| *level > LintLevel::Allow) { const HEX: [char; 16] = [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', @@ -336,7 +336,7 @@ fn check_kernel_ptx_and_report( if ptx_lint_levels .get(&PtxLint::DumpAssembly) - .map_or(false, |level| *level > LintLevel::Warn) + .is_some_and(|level| *level > LintLevel::Warn) { emit_call_site_error!( "{} compiled binary:\n{}\n\n{}", @@ -459,31 +459,31 @@ fn check_kernel_ptx( if ptx_lint_levels .get(&PtxLint::Verbose) - .map_or(false, |level| *level > LintLevel::Warn) + .is_some_and(|level| *level > LintLevel::Warn) { options.push(c"--verbose"); } if ptx_lint_levels .get(&PtxLint::DoublePrecisionUse) - .map_or(false, |level| *level > LintLevel::Warn) + .is_some_and(|level| *level > LintLevel::Warn) { options.push(c"--warn-on-double-precision-use"); } if ptx_lint_levels .get(&PtxLint::LocalMemoryUse) - .map_or(false, |level| *level > LintLevel::Warn) + .is_some_and(|level| *level > LintLevel::Warn) { options.push(c"--warn-on-local-memory-usage"); } if ptx_lint_levels .get(&PtxLint::RegisterSpills) - .map_or(false, |level| *level > LintLevel::Warn) + .is_some_and(|level| *level > LintLevel::Warn) { options.push(c"--warn-on-spills"); } if ptx_lint_levels .get(&PtxLint::DynamicStackSize) - .map_or(true, |level| *level <= LintLevel::Warn) + .is_none_or(|level| *level <= LintLevel::Warn) { options.push(c"--suppress-stack-size-warning"); } @@ -505,31 +505,31 @@ fn check_kernel_ptx( if ptx_lint_levels .get(&PtxLint::Verbose) - .map_or(false, |level| *level > LintLevel::Allow) + .is_some_and(|level| *level > LintLevel::Allow) { options.push(c"--verbose"); } if ptx_lint_levels .get(&PtxLint::DoublePrecisionUse) - .map_or(false, |level| *level > LintLevel::Allow) + .is_some_and(|level| *level > LintLevel::Allow) { options.push(c"--warn-on-double-precision-use"); } if ptx_lint_levels .get(&PtxLint::LocalMemoryUse) - .map_or(false, |level| *level > LintLevel::Allow) + .is_some_and(|level| *level > LintLevel::Allow) { options.push(c"--warn-on-local-memory-usage"); } if ptx_lint_levels .get(&PtxLint::RegisterSpills) - .map_or(false, |level| *level > LintLevel::Allow) + .is_some_and(|level| *level > LintLevel::Allow) { options.push(c"--warn-on-spills"); } if ptx_lint_levels .get(&PtxLint::DynamicStackSize) - .map_or(true, |level| *level < LintLevel::Warn) + .is_none_or(|level| *level < LintLevel::Warn) { options.push(c"--suppress-stack-size-warning"); } diff --git a/rust-cuda-kernel/src/kernel/lints.rs b/rust-cuda-kernel/src/kernel/lints.rs index dd85a289f..c5d05704d 100644 --- a/rust-cuda-kernel/src/kernel/lints.rs +++ b/rust-cuda-kernel/src/kernel/lints.rs @@ -180,7 +180,7 @@ pub trait NestedMetaParser { ) -> syn::Result<()>; } -impl<'a> NestedMetaParser for syn::meta::ParseNestedMeta<'a> { +impl NestedMetaParser for syn::meta::ParseNestedMeta<'_> { fn path(&self) -> &syn::Path { &self.path } diff --git a/rust-cuda-kernel/src/lib.rs b/rust-cuda-kernel/src/lib.rs index 1714bddcd..e2b198153 100644 --- a/rust-cuda-kernel/src/lib.rs +++ b/rust-cuda-kernel/src/lib.rs @@ -5,7 +5,7 @@ //! [CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/rust-cuda/ci.yml?branch=main //! [workflow]: https://github.com/juntyr/rust-cuda/actions/workflows/ci.yml?query=branch%3Amain //! -//! [MSRV]: https://img.shields.io/badge/MSRV-1.81.0--nightly-orange +//! [MSRV]: https://img.shields.io/badge/MSRV-1.84.0--nightly-orange //! [repo]: https://github.com/juntyr/rust-cuda //! //! [Rust Doc]: https://img.shields.io/badge/docs-main-blue diff --git a/rust-toolchain b/rust-toolchain index 071c4ebfe..5b8ab7dea 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1,5 +1,5 @@ [toolchain] -# Pin to final 1.81.0 nightly -channel = "nightly-2024-07-21" +# Pin to final 1.84.0 nightly +channel = "nightly-2024-11-22" components = [ "cargo", "rustfmt", "clippy", "llvm-bitcode-linker", "llvm-tools" ] -targets = [ "x86_64-unknown-linux-gnu", "nvptx64-nvidia-cuda" ] +targets = [ "nvptx64-nvidia-cuda" ] diff --git a/src/deps.rs b/src/deps.rs index 50fd38f3f..8521ed267 100644 --- a/src/deps.rs +++ b/src/deps.rs @@ -7,6 +7,6 @@ pub extern crate const_type_layout; pub extern crate owning_ref; #[cfg(feature = "host")] -pub extern crate rustacuda; +pub extern crate cust; -pub extern crate rustacuda_core; +pub extern crate cust_core; diff --git a/src/host/mod.rs b/src/host/mod.rs index c2d0558c4..3ae4f0353 100644 --- a/src/host/mod.rs +++ b/src/host/mod.rs @@ -5,11 +5,11 @@ use std::{ }; use const_type_layout::TypeGraphLayout; -use rustacuda::{ +use cust::{ context::Context, error::CudaError, event::Event, - memory::{CopyDestination, DeviceBox, DeviceBuffer, LockedBox, LockedBuffer}, + memory::{CopyDestination, DeviceBox, DeviceBuffer, DeviceCopy, LockedBox, LockedBuffer}, module::Module, }; @@ -30,19 +30,19 @@ type InvariantLifetime<'brand> = PhantomData &'brand ()>; #[derive(Copy, Clone)] #[repr(transparent)] pub struct Stream<'stream> { - stream: &'stream rustacuda::stream::Stream, + stream: &'stream cust::stream::Stream, _brand: InvariantLifetime<'stream>, } -impl<'stream> Deref for Stream<'stream> { - type Target = rustacuda::stream::Stream; +impl Deref for Stream<'_> { + type Target = cust::stream::Stream; fn deref(&self) -> &Self::Target { self.stream } } -impl<'stream> Stream<'stream> { +impl Stream<'_> { /// Create a new uniquely branded [`Stream`], which can bind async /// operations to the [`Stream`] that they are computed on. /// @@ -65,7 +65,7 @@ impl<'stream> Stream<'stream> { /// } /// ``` pub fn with( - stream: &mut rustacuda::stream::Stream, + stream: &mut cust::stream::Stream, inner: impl for<'new_stream> FnOnce(Stream<'new_stream>) -> O, ) -> O { inner(Stream { @@ -77,7 +77,7 @@ impl<'stream> Stream<'stream> { pub trait CudaDroppable: Sized { #[expect(clippy::missing_errors_doc)] - fn drop(val: Self) -> Result<(), (rustacuda::error::CudaError, Self)>; + fn drop(val: Self) -> Result<(), (cust::error::CudaError, Self)>; } #[repr(transparent)] @@ -112,25 +112,27 @@ impl DerefMut for CudaDropWrapper { } } -impl CudaDroppable for DeviceBox { +impl CudaDroppable for DeviceBox { fn drop(val: Self) -> Result<(), (CudaError, Self)> { Self::drop(val) } } -impl CudaDroppable for DeviceBuffer { +impl CudaDroppable for DeviceBuffer { fn drop(val: Self) -> Result<(), (CudaError, Self)> { Self::drop(val) } } -impl CudaDroppable for LockedBox { +impl CudaDroppable for LockedBox { fn drop(val: Self) -> Result<(), (CudaError, Self)> { - Self::drop(val) + // FIXME: cust's LockedBox no longer has a fallible drop + std::mem::drop(val); + Ok(()) } } -impl CudaDroppable for LockedBuffer { +impl CudaDroppable for LockedBuffer { fn drop(val: Self) -> Result<(), (CudaError, Self)> { Self::drop(val) } @@ -147,8 +149,9 @@ macro_rules! impl_sealed_drop_value { } impl_sealed_drop_value!(Module); -impl_sealed_drop_value!(rustacuda::stream::Stream); +impl_sealed_drop_value!(cust::stream::Stream); impl_sealed_drop_value!(Context); +impl_sealed_drop_value!(cust::context::legacy::Context); impl_sealed_drop_value!(Event); #[expect(clippy::module_name_repetitions)] @@ -201,13 +204,14 @@ impl<'a, T: PortableBitSemantics + TypeGraphLayout> HostAndDeviceMutRef<'a, T> { } } + #[allow(clippy::needless_pass_by_ref_mut)] #[must_use] pub(crate) fn for_device<'b>(&'b mut self) -> DeviceMutRef<'a, T> where 'a: 'b, { DeviceMutRef { - pointer: DeviceMutPointer(self.device_box.as_device_ptr().as_raw_mut().cast()), + pointer: DeviceMutPointer(self.device_box.as_device_ptr().as_mut_ptr().cast()), reference: PhantomData, } } @@ -240,18 +244,15 @@ impl<'a, T: PortableBitSemantics + TypeGraphLayout> HostAndDeviceMutRef<'a, T> { } #[must_use] - pub fn into_mut<'b>(self) -> HostAndDeviceMutRef<'b, T> + pub const fn into_mut<'b>(self) -> HostAndDeviceMutRef<'b, T> where 'a: 'b, { - HostAndDeviceMutRef { - device_box: self.device_box, - host_ref: self.host_ref, - } + self } #[must_use] - pub fn into_async<'b, 'stream>( + pub const fn into_async<'b, 'stream>( self, stream: Stream<'stream>, ) -> Async<'b, 'stream, HostAndDeviceMutRef<'b, T>, NoCompletion> @@ -268,13 +269,13 @@ pub struct HostAndDeviceConstRef<'a, T: PortableBitSemantics + TypeGraphLayout> host_ref: &'a T, } -impl<'a, T: PortableBitSemantics + TypeGraphLayout> Clone for HostAndDeviceConstRef<'a, T> { +impl Clone for HostAndDeviceConstRef<'_, T> { fn clone(&self) -> Self { *self } } -impl<'a, T: PortableBitSemantics + TypeGraphLayout> Copy for HostAndDeviceConstRef<'a, T> {} +impl Copy for HostAndDeviceConstRef<'_, T> {} impl<'a, T: PortableBitSemantics + TypeGraphLayout> HostAndDeviceConstRef<'a, T> { /// # Errors @@ -322,10 +323,10 @@ impl<'a, T: PortableBitSemantics + TypeGraphLayout> HostAndDeviceConstRef<'a, T> where 'a: 'b, { - let mut hack = ManuallyDrop::new(unsafe { std::ptr::read(self.device_box) }); + let hack = ManuallyDrop::new(unsafe { std::ptr::read(self.device_box) }); DeviceConstRef { - pointer: DeviceConstPointer(hack.as_device_ptr().as_raw().cast()), + pointer: DeviceConstPointer(hack.as_device_ptr().as_ptr().cast()), reference: PhantomData, } } @@ -390,7 +391,7 @@ impl<'a, T: PortableBitSemantics + TypeGraphLayout> HostAndDeviceOwned<'a, T> { #[must_use] pub(crate) fn for_device(self) -> DeviceOwnedRef<'a, T> { DeviceOwnedRef { - pointer: DeviceOwnedPointer(self.device_box.as_device_ptr().as_raw_mut().cast()), + pointer: DeviceOwnedPointer(self.device_box.as_device_ptr().as_mut_ptr().cast()), marker: PhantomData::, reference: PhantomData::<&'a mut ()>, } diff --git a/src/kernel/mod.rs b/src/kernel/mod.rs index 44b4c6216..a6134af30 100644 --- a/src/kernel/mod.rs +++ b/src/kernel/mod.rs @@ -7,10 +7,10 @@ use std::{ }; #[cfg(feature = "host")] -use rustacuda::{ +use cust::{ error::{CudaError, CudaResult}, function::Function, - module::Module, + module::{Module, ModuleJitOption, OptLevel}, }; #[cfg(feature = "kernel")] @@ -42,12 +42,7 @@ mod sealed { #[cfg(all(feature = "host", not(doc)))] #[doc(hidden)] -pub trait WithNewAsync< - 'stream, - P: ?Sized + CudaKernelParameter, - O, - E: From, -> +pub trait WithNewAsync<'stream, P: ?Sized + CudaKernelParameter, O, E: From> { fn with<'b>(self, param: P::AsyncHostType<'stream, 'b>) -> Result where @@ -59,7 +54,7 @@ impl< 'stream, P: ?Sized + CudaKernelParameter, O, - E: From, + E: From, F: for<'b> FnOnce(P::AsyncHostType<'stream, 'b>) -> Result, > WithNewAsync<'stream, P, O, E> for F { @@ -109,7 +104,7 @@ pub trait CudaKernelParameter: sealed::Sealed { #[cfg(feature = "host")] #[expect(clippy::missing_errors_doc)] // FIXME - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl WithNewAsync<'stream, Self, O, E>, @@ -139,7 +134,7 @@ pub trait CudaKernelParameter: sealed::Sealed { #[doc(hidden)] #[cfg(feature = "host")] - fn async_to_ffi<'stream, 'b, E: From>( + fn async_to_ffi<'stream, 'b, E: From>( param: Self::AsyncHostType<'stream, 'b>, token: sealed::Token, ) -> Result, E> @@ -229,7 +224,7 @@ macro_rules! impl_launcher_launch { } #[cfg(feature = "host")] -impl<'stream, 'kernel, Kernel> Launcher<'stream, 'kernel, Kernel> { +impl<'stream, Kernel> Launcher<'stream, '_, Kernel> { impl_launcher_launch! { launch0() => with0_async => launch0_async } impl_launcher_launch! { launch1( @@ -286,8 +281,8 @@ impl<'stream, 'kernel, Kernel> Launcher<'stream, 'kernel, Kernel> { #[cfg(feature = "host")] #[derive(Clone, Debug, PartialEq, Eq)] pub struct LaunchConfig { - pub grid: rustacuda::function::GridSize, - pub block: rustacuda::function::BlockSize, + pub grid: cust::function::GridSize, + pub block: cust::function::BlockSize, pub ptx_jit: bool, } @@ -305,9 +300,15 @@ impl RawPtxKernel { /// Returns a [`CudaError`] if `ptx` is not a valid PTX source, or it does /// not contain an entry point named `entry_point`. pub fn new(ptx: &CStr, entry_point: &CStr) -> CudaResult { - let module: Box = Box::new(Module::load_from_string(ptx)?); - - let function = unsafe { &*std::ptr::from_ref(module.as_ref()) }.get_function(entry_point); + let module: Box = Box::new(Module::from_ptx_cstr( + ptx, + &[ModuleJitOption::OptLevel(OptLevel::O4)], + )?); + + // FIXME: cust's Module::get_function takes a str and turns it back into + // a CString immediately + let function = unsafe { &*std::ptr::from_ref(module.as_ref()) } + .get_function(unsafe { core::str::from_utf8_unchecked(entry_point.to_bytes()) }); let function = match function { Ok(function) => function, diff --git a/src/kernel/param.rs b/src/kernel/param.rs index 2ad1b0bf8..9ab27fa1b 100644 --- a/src/kernel/param.rs +++ b/src/kernel/param.rs @@ -88,7 +88,7 @@ impl< type SyncHostType = T; #[cfg(feature = "host")] - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, _stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -124,7 +124,7 @@ impl< } #[cfg(feature = "host")] - fn async_to_ffi<'stream, 'b, E: From>( + fn async_to_ffi<'stream, 'b, E: From>( param: Self::AsyncHostType<'stream, 'b>, _token: sealed::Token, ) -> Result, E> @@ -157,6 +157,7 @@ impl< { } +#[cfg_attr(not(feature = "host"), expect(clippy::needless_lifetimes))] impl< 'a, T: Sync + crate::safety::StackOnly + crate::safety::PortableBitSemantics + TypeGraphLayout, @@ -180,7 +181,7 @@ impl< type SyncHostType = &'a T; #[cfg(feature = "host")] - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -219,7 +220,7 @@ impl< } #[cfg(feature = "host")] - fn async_to_ffi<'stream, 'b, E: From>( + fn async_to_ffi<'stream, 'b, E: From>( param: Self::AsyncHostType<'stream, 'b>, _token: sealed::Token, ) -> Result, E> @@ -244,9 +245,8 @@ impl< } } impl< - 'a, T: Sync + crate::safety::StackOnly + crate::safety::PortableBitSemantics + TypeGraphLayout, - > sealed::Sealed for &'a PerThreadShallowCopy + > sealed::Sealed for &PerThreadShallowCopy { } @@ -273,7 +273,7 @@ impl< type SyncHostType = <&'a PerThreadShallowCopy as CudaKernelParameter>::SyncHostType; #[cfg(feature = "host")] - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -315,7 +315,7 @@ impl< } #[cfg(feature = "host")] - fn async_to_ffi<'stream, 'b, E: From>( + fn async_to_ffi<'stream, 'b, E: From>( param: Self::AsyncHostType<'stream, 'b>, token: sealed::Token, ) -> Result, E> @@ -342,9 +342,8 @@ impl< } } impl< - 'a, T: Sync + crate::safety::StackOnly + crate::safety::PortableBitSemantics + TypeGraphLayout, - > sealed::Sealed for &'a PtxJit> + > sealed::Sealed for &PtxJit> { } @@ -374,6 +373,7 @@ impl< } } +#[cfg_attr(not(feature = "host"), expect(clippy::needless_lifetimes))] impl< 'a, T: Sync @@ -403,7 +403,7 @@ impl< type SyncHostType = &'a mut T; #[cfg(feature = "host")] - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -442,7 +442,7 @@ impl< } #[cfg(feature = "host")] - fn async_to_ffi<'stream, 'b, E: From>( + fn async_to_ffi<'stream, 'b, E: From>( param: Self::AsyncHostType<'stream, 'b>, _token: sealed::Token, ) -> Result, E> @@ -467,13 +467,12 @@ impl< } } impl< - 'a, T: crate::safety::StackOnly + Sync + crate::safety::PortableBitSemantics + TypeGraphLayout + InteriorMutableSync, - > sealed::Sealed for &'a ShallowInteriorMutable + > sealed::Sealed for &ShallowInteriorMutable { } @@ -549,7 +548,7 @@ impl< type SyncHostType = T; #[cfg(feature = "host")] - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -585,7 +584,7 @@ impl< } #[cfg(feature = "host")] - fn async_to_ffi<'stream, 'b, E: From>( + fn async_to_ffi<'stream, 'b, E: From>( param: Self::AsyncHostType<'stream, 'b>, _token: sealed::Token, ) -> Result, E> @@ -618,6 +617,7 @@ impl< { } +#[cfg_attr(not(feature = "host"), expect(clippy::needless_lifetimes))] impl<'a, T: Sync + RustToCuda> CudaKernelParameter for &'a DeepPerThreadBorrow { #[cfg(feature = "host")] type AsyncHostType<'stream, 'b> @@ -644,7 +644,7 @@ impl<'a, T: Sync + RustToCuda> CudaKernelParameter for &'a DeepPerThreadBorrow>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -683,7 +683,7 @@ impl<'a, T: Sync + RustToCuda> CudaKernelParameter for &'a DeepPerThreadBorrow>( + fn async_to_ffi<'stream, 'b, E: From>( param: Self::AsyncHostType<'stream, 'b>, _token: sealed::Token, ) -> Result, E> @@ -707,8 +707,9 @@ impl<'a, T: Sync + RustToCuda> CudaKernelParameter for &'a DeepPerThreadBorrow sealed::Sealed for &'a DeepPerThreadBorrow {} +impl sealed::Sealed for &DeepPerThreadBorrow {} +#[cfg_attr(not(feature = "host"), expect(clippy::needless_lifetimes))] impl<'a, T: Sync + RustToCuda + SafeMutableAliasing> CudaKernelParameter for &'a mut DeepPerThreadBorrow { @@ -737,7 +738,7 @@ impl<'a, T: Sync + RustToCuda + SafeMutableAliasing> CudaKernelParameter type SyncHostType = &'a mut T; #[cfg(feature = "host")] - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -781,7 +782,7 @@ impl<'a, T: Sync + RustToCuda + SafeMutableAliasing> CudaKernelParameter } #[cfg(feature = "host")] - fn async_to_ffi<'stream, 'b, E: From>( + fn async_to_ffi<'stream, 'b, E: From>( mut param: Self::AsyncHostType<'stream, 'b>, _token: sealed::Token, ) -> Result, E> @@ -806,10 +807,7 @@ impl<'a, T: Sync + RustToCuda + SafeMutableAliasing> CudaKernelParameter } } } -impl<'a, T: Sync + RustToCuda + SafeMutableAliasing> sealed::Sealed - for &'a mut DeepPerThreadBorrow -{ -} +impl sealed::Sealed for &mut DeepPerThreadBorrow {} impl< T: Send @@ -835,7 +833,7 @@ impl< type SyncHostType = as CudaKernelParameter>::SyncHostType; #[cfg(feature = "host")] - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -862,7 +860,7 @@ impl< } #[cfg(feature = "host")] - fn async_to_ffi<'stream, 'b, E: From>( + fn async_to_ffi<'stream, 'b, E: From>( param: Self::AsyncHostType<'stream, 'b>, token: sealed::Token, ) -> Result, E> @@ -926,7 +924,7 @@ impl<'a, T: Sync + RustToCuda> CudaKernelParameter for &'a PtxJit as CudaKernelParameter>::SyncHostType; #[cfg(feature = "host")] - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -968,7 +966,7 @@ impl<'a, T: Sync + RustToCuda> CudaKernelParameter for &'a PtxJit>( + fn async_to_ffi<'stream, 'b, E: From>( param: Self::AsyncHostType<'stream, 'b>, token: sealed::Token, ) -> Result, E> @@ -994,7 +992,7 @@ impl<'a, T: Sync + RustToCuda> CudaKernelParameter for &'a PtxJit sealed::Sealed for &'a PtxJit> {} +impl sealed::Sealed for &PtxJit> {} impl<'a, T: Sync + RustToCuda + SafeMutableAliasing> CudaKernelParameter for &'a mut PtxJit> @@ -1017,7 +1015,7 @@ impl<'a, T: Sync + RustToCuda + SafeMutableAliasing> CudaKernelParameter type SyncHostType = <&'a mut DeepPerThreadBorrow as CudaKernelParameter>::SyncHostType; #[cfg(feature = "host")] - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -1064,7 +1062,7 @@ impl<'a, T: Sync + RustToCuda + SafeMutableAliasing> CudaKernelParameter } #[cfg(feature = "host")] - fn async_to_ffi<'stream, 'b, E: From>( + fn async_to_ffi<'stream, 'b, E: From>( param: Self::AsyncHostType<'stream, 'b>, token: sealed::Token, ) -> Result, E> @@ -1090,8 +1088,8 @@ impl<'a, T: Sync + RustToCuda + SafeMutableAliasing> CudaKernelParameter } } } -impl<'a, T: Sync + RustToCuda + SafeMutableAliasing> sealed::Sealed - for &'a mut PtxJit> +impl sealed::Sealed + for &mut PtxJit> { } @@ -1135,7 +1133,7 @@ mod private_shared { } } -impl<'a, T: 'static> CudaKernelParameter for &'a mut crate::utils::shared::ThreadBlockShared { +impl CudaKernelParameter for &mut crate::utils::shared::ThreadBlockShared { #[cfg(feature = "host")] type AsyncHostType<'stream, 'b> = &'b mut crate::utils::shared::ThreadBlockShared @@ -1154,7 +1152,7 @@ impl<'a, T: 'static> CudaKernelParameter for &'a mut crate::utils::shared::Threa type SyncHostType = Self; #[cfg(feature = "host")] - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, _stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -1190,7 +1188,7 @@ impl<'a, T: 'static> CudaKernelParameter for &'a mut crate::utils::shared::Threa } #[cfg(feature = "host")] - fn async_to_ffi<'stream, 'b, E: From>( + fn async_to_ffi<'stream, 'b, E: From>( _param: Self::AsyncHostType<'stream, 'b>, _token: sealed::Token, ) -> Result, E> @@ -1218,10 +1216,10 @@ impl<'a, T: 'static> CudaKernelParameter for &'a mut crate::utils::shared::Threa inner.with(&mut param) } } -impl<'a, T: 'static> sealed::Sealed for &'a mut crate::utils::shared::ThreadBlockShared {} +impl sealed::Sealed for &mut crate::utils::shared::ThreadBlockShared {} -impl<'a, T: 'static + PortableBitSemantics + TypeGraphLayout> CudaKernelParameter - for &'a mut crate::utils::shared::ThreadBlockSharedSlice +impl CudaKernelParameter + for &mut crate::utils::shared::ThreadBlockSharedSlice { #[cfg(feature = "host")] type AsyncHostType<'stream, 'b> @@ -1241,7 +1239,7 @@ impl<'a, T: 'static + PortableBitSemantics + TypeGraphLayout> CudaKernelParamete type SyncHostType = Self; #[cfg(feature = "host")] - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, _stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -1277,7 +1275,7 @@ impl<'a, T: 'static + PortableBitSemantics + TypeGraphLayout> CudaKernelParamete } #[cfg(feature = "host")] - fn async_to_ffi<'stream, 'b, E: From>( + fn async_to_ffi<'stream, 'b, E: From>( param: Self::AsyncHostType<'stream, 'b>, _token: sealed::Token, ) -> Result, E> @@ -1307,7 +1305,7 @@ impl<'a, T: 'static + PortableBitSemantics + TypeGraphLayout> CudaKernelParamete } } } -impl<'a, T: 'static + PortableBitSemantics + TypeGraphLayout> sealed::Sealed - for &'a mut crate::utils::shared::ThreadBlockSharedSlice +impl sealed::Sealed + for &mut crate::utils::shared::ThreadBlockSharedSlice { } diff --git a/src/kernel/ptx_jit/regex.rs b/src/kernel/ptx_jit/regex.rs index f07f64fa5..d5237475e 100644 --- a/src/kernel/ptx_jit/regex.rs +++ b/src/kernel/ptx_jit/regex.rs @@ -2,7 +2,6 @@ use std::sync::OnceLock; use regex::bytes::Regex; -#[expect(clippy::module_name_repetitions)] pub fn const_marker_regex() -> &'static Regex { static CONST_MARKER_REGEX: OnceLock = OnceLock::new(); #[allow(clippy::unwrap_used)] @@ -12,7 +11,6 @@ pub fn const_marker_regex() -> &'static Regex { }) } -#[expect(clippy::module_name_repetitions)] pub fn const_base_register_regex() -> &'static Regex { static CONST_BASE_REGISTER_REGEX: OnceLock = OnceLock::new(); #[allow(clippy::unwrap_used)] @@ -22,7 +20,6 @@ pub fn const_base_register_regex() -> &'static Regex { }) } -#[expect(clippy::module_name_repetitions)] pub fn const_load_instruction_regex() -> &'static Regex { static CONST_LOAD_INSTRUCTION_REGEX: OnceLock = OnceLock::new(); #[allow(clippy::unwrap_used)] @@ -54,7 +51,6 @@ pub fn const_load_instruction_regex() -> &'static Regex { }) } -#[expect(clippy::module_name_repetitions)] pub fn register_regex() -> &'static Regex { static REGISTER_REGEX: OnceLock = OnceLock::new(); #[allow(clippy::unwrap_used)] diff --git a/src/lend/impls/arc.rs b/src/lend/impls/arc.rs index 4d59837ff..b08ba6342 100644 --- a/src/lend/impls/arc.rs +++ b/src/lend/impls/arc.rs @@ -5,7 +5,7 @@ use std::mem::ManuallyDrop; use const_type_layout::{TypeGraphLayout, TypeLayout}; #[cfg(feature = "host")] -use rustacuda::{error::CudaResult, memory::DeviceBox, memory::LockedBox}; +use cust::{error::CudaResult, memory::DeviceBox, memory::LockedBox}; use crate::{ deps::alloc::sync::Arc, @@ -30,7 +30,6 @@ use crate::{ #[doc(hidden)] #[repr(transparent)] #[derive(TypeLayout)] -#[expect(clippy::module_name_repetitions)] pub struct ArcCudaRepresentation( DeviceOwnedPointer<_ArcInner>, ); @@ -65,13 +64,13 @@ unsafe impl RustToCuda for Arc { let offset = std::mem::offset_of!(_ArcInner, data); let arc_ptr: *const _ArcInner = data_ptr.byte_sub(offset).cast(); - let mut device_box = CudaDropWrapper::from(DeviceBox::new( + let device_box = CudaDropWrapper::from(DeviceBox::new( DeviceCopyWithPortableBitSemantics::from_ref(&*arc_ptr), )?); Ok(( DeviceAccessible::from(ArcCudaRepresentation(DeviceOwnedPointer( - device_box.as_device_ptr().as_raw_mut().cast(), + device_box.as_device_ptr().as_mut_ptr().cast(), ))), CombinedCudaAlloc::new(device_box, alloc), )) @@ -101,25 +100,26 @@ unsafe impl RustToCudaAsync for Arc, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'_, 'stream, DeviceAccessible>, CombinedCudaAlloc, )> { - use rustacuda::memory::AsyncCopyDestination; + use cust::memory::AsyncCopyDestination; let locked_box = unsafe { + #[allow(clippy::used_underscore_items)] let inner = ManuallyDrop::new(_ArcInner { strong: AtomicUsize::new(1), weak: AtomicUsize::new(1), data: std::ptr::read(&**self), }); - let mut uninit = CudaDropWrapper::from(LockedBox::< + let uninit = CudaDropWrapper::from(LockedBox::< DeviceCopyWithPortableBitSemantics>>, >::uninitialized()?); std::ptr::copy_nonoverlapping( std::ptr::from_ref(DeviceCopyWithPortableBitSemantics::from_ref(&inner)), - uninit.as_mut_ptr(), + uninit.as_raw(), 1, ); @@ -129,12 +129,12 @@ unsafe impl RustToCudaAsync for Arc>>, >::uninitialized()?); - device_box.async_copy_from(&*locked_box, &stream)?; + device_box.async_copy_from(&**locked_box, &stream)?; Ok(( Async::pending( DeviceAccessible::from(ArcCudaRepresentation(DeviceOwnedPointer( - device_box.as_device_ptr().as_raw_mut().cast(), + device_box.as_device_ptr().as_mut_ptr().cast(), ))), stream, NoCompletion, diff --git a/src/lend/impls/arced_slice.rs b/src/lend/impls/arced_slice.rs index cce12b3cd..76d3f15cd 100644 --- a/src/lend/impls/arced_slice.rs +++ b/src/lend/impls/arced_slice.rs @@ -5,12 +5,11 @@ use std::mem::{ManuallyDrop, MaybeUninit}; use const_type_layout::{TypeGraphLayout, TypeLayout}; #[cfg(feature = "host")] -use rustacuda::{ +use cust::{ error::CudaResult, memory::LockedBuffer, - memory::{DeviceBox, DeviceBuffer}, + memory::{DeviceBox, DeviceBuffer, DeviceCopy}, }; -use rustacuda_core::DeviceCopy; use crate::{ deps::alloc::sync::Arc, @@ -33,7 +32,6 @@ use crate::{ }; #[doc(hidden)] -#[expect(clippy::module_name_repetitions)] #[derive(TypeLayout)] #[repr(C)] pub struct ArcedSliceCudaRepresentation { @@ -51,13 +49,20 @@ pub struct _ArcInner { data: T, } +#[cfg(feature = "host")] +#[derive(Copy, Clone, DeviceCopy)] #[repr(C)] struct _ArcInnerHeader { - strong: AtomicUsize, - weak: AtomicUsize, + strong: _AtomicUsize, + weak: _AtomicUsize, } -unsafe impl DeviceCopy for _ArcInnerHeader {} +#[cfg(feature = "host")] +#[derive(Copy, Clone, DeviceCopy)] +#[repr(C, align(8))] +struct _AtomicUsize { + v: usize, +} unsafe impl RustToCuda for Arc<[T]> { #[cfg(all(feature = "host", not(doc)))] @@ -74,38 +79,38 @@ unsafe impl RustToCuda for Arc<[T]> { DeviceAccessible, CombinedCudaAlloc, )> { - use rustacuda::memory::{CopyDestination, DeviceSlice}; - use rustacuda_core::DevicePointer; + use cust::memory::{CopyDestination, DeviceSlice}; let data_ptr: *const T = std::ptr::from_ref(&**self).as_ptr(); let offset = std::mem::offset_of!(_ArcInner<[T; 42]>, data); let arc_ptr: *const _ArcInner<[T; 42]> = data_ptr.byte_sub(offset).cast(); - let header_len = (offset + (std::mem::align_of::() - 1)) / std::mem::align_of::(); + let header_len = offset.div_ceil(std::mem::align_of::()); - let mut device_buffer = CudaDropWrapper::from(DeviceBuffer::< + let device_buffer = CudaDropWrapper::from(DeviceBuffer::< DeviceCopyWithPortableBitSemantics, >::uninitialized( header_len + self.len() )?); - let (header, buffer): (&mut DeviceSlice<_>, &mut DeviceSlice<_>) = - device_buffer.split_at_mut(header_len); + + let mut buffer: DeviceSlice<_> = device_buffer.index(header_len..); buffer.copy_from(std::slice::from_raw_parts(self.as_ptr().cast(), self.len()))?; + + let header: DeviceSlice<_> = device_buffer.index(..header_len); let header = DeviceSlice::from_raw_parts_mut( - DevicePointer::wrap(header.as_mut_ptr().cast::()), + header.as_device_ptr().cast::(), header.len() * std::mem::size_of::(), ); - let (_, header) = header.split_at_mut(header.len() - offset); - let (header, _) = header.split_at_mut(std::mem::size_of::<_ArcInnerHeader>()); - #[expect(clippy::cast_ptr_alignment)] + let header = header.index((header.len() - offset)..); + let header = header.index(..std::mem::size_of::<_ArcInnerHeader>()); let mut header: ManuallyDrop> = ManuallyDrop::new( - DeviceBox::from_raw(header.as_mut_ptr().cast::<_ArcInnerHeader>()), + DeviceBox::from_device(header.as_device_ptr().cast::<_ArcInnerHeader>()), ); header.copy_from(&*arc_ptr.cast::<_ArcInnerHeader>())?; Ok(( DeviceAccessible::from(ArcedSliceCudaRepresentation { - data: DeviceOwnedPointer(header.as_device_ptr().as_raw_mut().cast()), + data: DeviceOwnedPointer(header.as_device_ptr().as_mut_ptr().cast()), len: self.len(), }), CombinedCudaAlloc::new(device_buffer, alloc), @@ -136,17 +141,17 @@ unsafe impl RustToCudaAsync for Arc<[ &self, alloc: A, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'_, 'stream, DeviceAccessible>, CombinedCudaAlloc, )> { - use rustacuda::memory::AsyncCopyDestination; + use cust::memory::AsyncCopyDestination; let data_ptr: *const T = std::ptr::from_ref(&**self).as_ptr(); let offset = std::mem::offset_of!(_ArcInner<[T; 42]>, data); let arc_ptr: *const _ArcInner<[T; 42]> = data_ptr.byte_sub(offset).cast(); - let header_len = (offset + (std::mem::align_of::() - 1)) / std::mem::align_of::(); + let header_len = offset.div_ceil(std::mem::align_of::()); let locked_buffer = unsafe { let mut locked_buffer = @@ -187,7 +192,7 @@ unsafe impl RustToCudaAsync for Arc<[ data: DeviceOwnedPointer( device_buffer .as_device_ptr() - .as_raw_mut() + .as_mut_ptr() .byte_add(header_len * std::mem::size_of::() - offset) .cast(), ), diff --git a/src/lend/impls/box.rs b/src/lend/impls/box.rs index 305072a34..2bd7ec78c 100644 --- a/src/lend/impls/box.rs +++ b/src/lend/impls/box.rs @@ -4,7 +4,7 @@ use std::mem::ManuallyDrop; use const_type_layout::{TypeGraphLayout, TypeLayout}; #[cfg(feature = "host")] -use rustacuda::{error::CudaResult, memory::DeviceBox, memory::LockedBox}; +use cust::{error::CudaResult, memory::DeviceBox, memory::LockedBox}; use crate::{ deps::alloc::boxed::Box, @@ -29,7 +29,6 @@ use crate::{ #[doc(hidden)] #[repr(transparent)] #[derive(TypeLayout)] -#[expect(clippy::module_name_repetitions)] pub struct BoxCudaRepresentation(DeviceOwnedPointer); unsafe impl RustToCuda for Box { @@ -47,13 +46,13 @@ unsafe impl RustToCuda for Box { DeviceAccessible, CombinedCudaAlloc, )> { - let mut device_box = CudaDropWrapper::from(DeviceBox::new( + let device_box = CudaDropWrapper::from(DeviceBox::new( DeviceCopyWithPortableBitSemantics::from_ref(&**self), )?); Ok(( DeviceAccessible::from(BoxCudaRepresentation(DeviceOwnedPointer( - device_box.as_device_ptr().as_raw_mut().cast(), + device_box.as_device_ptr().as_mut_ptr().cast(), ))), CombinedCudaAlloc::new(device_box, alloc), )) @@ -64,7 +63,7 @@ unsafe impl RustToCuda for Box { &mut self, alloc: CombinedCudaAlloc, ) -> CudaResult { - use rustacuda::memory::CopyDestination; + use cust::memory::CopyDestination; let (alloc_front, alloc_tail) = alloc.split(); @@ -90,20 +89,20 @@ unsafe impl RustToCudaAsync for Box, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'_, 'stream, DeviceAccessible>, CombinedCudaAlloc, )> { - use rustacuda::memory::AsyncCopyDestination; + use cust::memory::AsyncCopyDestination; let locked_box = unsafe { - let mut uninit = CudaDropWrapper::from(LockedBox::< + let uninit = CudaDropWrapper::from(LockedBox::< DeviceCopyWithPortableBitSemantics>, >::uninitialized()?); std::ptr::copy_nonoverlapping( std::ptr::from_ref::(&**self) .cast::>>(), - uninit.as_mut_ptr(), + uninit.as_raw(), 1, ); uninit @@ -112,12 +111,12 @@ unsafe impl RustToCudaAsync for Box>, >::uninitialized()?); - device_box.async_copy_from(&*locked_box, &stream)?; + device_box.async_copy_from(&**locked_box, &stream)?; Ok(( Async::pending( DeviceAccessible::from(BoxCudaRepresentation(DeviceOwnedPointer( - device_box.as_device_ptr().as_raw_mut().cast(), + device_box.as_device_ptr().as_mut_ptr().cast(), ))), stream, NoCompletion, @@ -135,12 +134,12 @@ unsafe impl RustToCudaAsync for Box, CompletionFnMut<'a, Self>>, A, )> { - use rustacuda::memory::AsyncCopyDestination; + use cust::memory::AsyncCopyDestination; let (alloc_front, alloc_tail) = alloc.split(); let (mut locked_box, device_box) = alloc_front.split(); - device_box.async_copy_to(&mut *locked_box, &stream)?; + device_box.async_copy_to(&mut **locked_box, &stream)?; let r#async = crate::utils::r#async::Async::<_, CompletionFnMut<'a, Self>>::pending( this, @@ -151,7 +150,7 @@ unsafe impl RustToCudaAsync for Box> doesn't drop T unsafe { - std::ptr::copy_nonoverlapping(locked_box.as_ptr().cast::(), data, 1); + std::ptr::copy_nonoverlapping(locked_box.as_raw().cast::(), data, 1); } std::mem::drop(locked_box); Ok(()) diff --git a/src/lend/impls/boxed_slice.rs b/src/lend/impls/boxed_slice.rs index b2c22765c..8d00e49e8 100644 --- a/src/lend/impls/boxed_slice.rs +++ b/src/lend/impls/boxed_slice.rs @@ -7,7 +7,7 @@ use crate::{deps::alloc::boxed::Box, lend::RustToCudaAsync, utils::ffi::DeviceOw use const_type_layout::{TypeGraphLayout, TypeLayout}; #[cfg(feature = "host")] -use rustacuda::{error::CudaResult, memory::DeviceBuffer, memory::LockedBuffer}; +use cust::{error::CudaResult, memory::DeviceBuffer, memory::LockedBuffer}; use crate::{ lend::{CudaAsRust, RustToCuda}, @@ -26,7 +26,6 @@ use crate::{ }; #[doc(hidden)] -#[expect(clippy::module_name_repetitions)] #[derive(TypeLayout)] #[repr(C)] pub struct BoxedSliceCudaRepresentation { @@ -51,13 +50,13 @@ unsafe impl RustToCuda for Box<[T]> { DeviceAccessible, CombinedCudaAlloc, )> { - let mut device_buffer = CudaDropWrapper::from(DeviceBuffer::from_slice( + let device_buffer = CudaDropWrapper::from(DeviceBuffer::from_slice( DeviceCopyWithPortableBitSemantics::from_slice(self), )?); Ok(( DeviceAccessible::from(BoxedSliceCudaRepresentation { - data: DeviceOwnedPointer(device_buffer.as_mut_ptr().cast()), + data: DeviceOwnedPointer(device_buffer.as_device_ptr().as_mut_ptr().cast()), len: device_buffer.len(), _marker: PhantomData::, }), @@ -70,7 +69,7 @@ unsafe impl RustToCuda for Box<[T]> { &mut self, alloc: CombinedCudaAlloc, ) -> CudaResult { - use rustacuda::memory::CopyDestination; + use cust::memory::CopyDestination; let (alloc_front, alloc_tail) = alloc.split(); @@ -96,11 +95,11 @@ unsafe impl RustToCudaAsync for Box<[ &self, alloc: A, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'_, 'stream, DeviceAccessible>, CombinedCudaAlloc, )> { - use rustacuda::memory::AsyncCopyDestination; + use cust::memory::AsyncCopyDestination; let locked_buffer = unsafe { let mut uninit = CudaDropWrapper::from(LockedBuffer::< @@ -124,7 +123,7 @@ unsafe impl RustToCudaAsync for Box<[ Ok(( Async::pending( DeviceAccessible::from(BoxedSliceCudaRepresentation { - data: DeviceOwnedPointer(device_buffer.as_mut_ptr().cast()), + data: DeviceOwnedPointer(device_buffer.as_device_ptr().as_mut_ptr().cast()), len: device_buffer.len(), _marker: PhantomData::, }), @@ -144,7 +143,7 @@ unsafe impl RustToCudaAsync for Box<[ Async<'a, 'stream, owning_ref::BoxRefMut<'a, O, Self>, CompletionFnMut<'a, Self>>, A, )> { - use rustacuda::memory::AsyncCopyDestination; + use cust::memory::AsyncCopyDestination; let (alloc_front, alloc_tail) = alloc.split(); let (mut locked_buffer, device_buffer) = alloc_front.split(); diff --git a/src/lend/impls/final.rs b/src/lend/impls/final.rs index fa83de5a2..68569d7a4 100644 --- a/src/lend/impls/final.rs +++ b/src/lend/impls/final.rs @@ -6,7 +6,6 @@ use crate::{ }; #[doc(hidden)] -#[expect(clippy::module_name_repetitions)] #[derive(const_type_layout::TypeLayout)] #[repr(transparent)] pub struct FinalCudaRepresentation(DeviceAccessible); @@ -19,7 +18,7 @@ unsafe impl RustToCuda for Final { unsafe fn borrow( &self, alloc: A, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( DeviceAccessible, crate::alloc::CombinedCudaAlloc, )> { @@ -35,7 +34,7 @@ unsafe impl RustToCuda for Final { unsafe fn restore( &mut self, alloc: crate::alloc::CombinedCudaAlloc, - ) -> rustacuda::error::CudaResult { + ) -> cust::error::CudaResult { let (_alloc_front, alloc_tail) = alloc.split(); Ok(alloc_tail) } @@ -49,7 +48,7 @@ unsafe impl RustToCudaAsync for Final { &self, alloc: A, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( crate::utils::r#async::Async<'_, 'stream, DeviceAccessible>, crate::alloc::CombinedCudaAlloc, )> { @@ -76,7 +75,7 @@ unsafe impl RustToCudaAsync for Final { this: owning_ref::BoxRefMut<'a, O, Self>, alloc: crate::alloc::CombinedCudaAlloc, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( crate::utils::r#async::Async< 'a, 'stream, diff --git a/src/lend/impls/option.rs b/src/lend/impls/option.rs index 1997822a2..bca51faf3 100644 --- a/src/lend/impls/option.rs +++ b/src/lend/impls/option.rs @@ -3,7 +3,7 @@ use core::mem::MaybeUninit; use const_type_layout::{TypeGraphLayout, TypeLayout}; #[cfg(feature = "host")] -use rustacuda::error::CudaResult; +use cust::error::CudaResult; use crate::{ lend::{CudaAsRust, RustToCuda, RustToCudaAsync, RustToCudaProxy}, @@ -18,7 +18,6 @@ use crate::{ }; #[doc(hidden)] -#[expect(clippy::module_name_repetitions)] #[derive(TypeLayout)] #[repr(C)] pub struct OptionCudaRepresentation { diff --git a/src/lend/impls/ref.rs b/src/lend/impls/ref.rs index 43358c546..4224f51a5 100644 --- a/src/lend/impls/ref.rs +++ b/src/lend/impls/ref.rs @@ -5,7 +5,7 @@ use std::mem::ManuallyDrop; use const_type_layout::{TypeGraphLayout, TypeLayout}; #[cfg(feature = "host")] -use rustacuda::{error::CudaResult, memory::DeviceBox, memory::LockedBox}; +use cust::{error::CudaResult, memory::DeviceBox, memory::LockedBox}; use crate::{ lend::{CudaAsRust, RustToCuda, RustToCudaAsync}, @@ -27,7 +27,6 @@ use crate::{ #[doc(hidden)] #[repr(transparent)] #[derive(TypeLayout)] -#[expect(clippy::module_name_repetitions)] pub struct RefCudaRepresentation<'a, T: 'a + PortableBitSemantics + TypeGraphLayout> { data: DeviceConstPointer, _marker: PhantomData<&'a T>, @@ -48,13 +47,13 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCuda for &'a T DeviceAccessible, CombinedCudaAlloc, )> { - let mut device_box = CudaDropWrapper::from(DeviceBox::new( + let device_box = CudaDropWrapper::from(DeviceBox::new( DeviceCopyWithPortableBitSemantics::from_ref(&**self), )?); Ok(( DeviceAccessible::from(RefCudaRepresentation { - data: DeviceConstPointer(device_box.as_device_ptr().as_raw().cast()), + data: DeviceConstPointer(device_box.as_device_ptr().as_ptr().cast()), _marker: PhantomData::<&'a T>, }), CombinedCudaAlloc::new(device_box, alloc), @@ -71,7 +70,7 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCuda for &'a T } } -unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCudaAsync for &'a T { +unsafe impl RustToCudaAsync for &T { #[cfg(all(feature = "host", not(doc)))] type CudaAllocationAsync = CombinedCudaAlloc< CudaDropWrapper>>>, @@ -85,20 +84,20 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCudaAsync for & &self, alloc: A, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'_, 'stream, DeviceAccessible>, CombinedCudaAlloc, )> { - use rustacuda::memory::AsyncCopyDestination; + use cust::memory::AsyncCopyDestination; let locked_box = unsafe { - let mut uninit = CudaDropWrapper::from(LockedBox::< + let uninit = CudaDropWrapper::from(LockedBox::< DeviceCopyWithPortableBitSemantics>, >::uninitialized()?); std::ptr::copy_nonoverlapping( std::ptr::from_ref::(&**self) .cast::>>(), - uninit.as_mut_ptr(), + uninit.as_raw(), 1, ); uninit @@ -107,12 +106,12 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCudaAsync for & let mut device_box = CudaDropWrapper::from(DeviceBox::< DeviceCopyWithPortableBitSemantics>, >::uninitialized()?); - device_box.async_copy_from(&*locked_box, &stream)?; + device_box.async_copy_from(&**locked_box, &stream)?; Ok(( Async::pending( DeviceAccessible::from(RefCudaRepresentation { - data: DeviceConstPointer(device_box.as_device_ptr().as_raw().cast()), + data: DeviceConstPointer(device_box.as_device_ptr().as_ptr().cast()), _marker: PhantomData::<&T>, }), stream, diff --git a/src/lend/impls/ref_mut.rs b/src/lend/impls/ref_mut.rs index ca9830c75..3ade45276 100644 --- a/src/lend/impls/ref_mut.rs +++ b/src/lend/impls/ref_mut.rs @@ -3,7 +3,7 @@ use core::marker::PhantomData; use const_type_layout::{TypeGraphLayout, TypeLayout}; #[cfg(feature = "host")] -use rustacuda::{error::CudaResult, memory::DeviceBox}; +use cust::{error::CudaResult, memory::DeviceBox}; use crate::{ lend::{CudaAsRust, RustToCuda}, @@ -24,7 +24,6 @@ use crate::{ #[doc(hidden)] #[repr(transparent)] #[derive(TypeLayout)] -#[expect(clippy::module_name_repetitions)] pub struct RefMutCudaRepresentation<'a, T: 'a + PortableBitSemantics + TypeGraphLayout> { data: DeviceMutPointer, _marker: PhantomData<&'a mut T>, @@ -45,13 +44,13 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCuda for &'a mu DeviceAccessible, CombinedCudaAlloc, )> { - let mut device_box = CudaDropWrapper::from(DeviceBox::new( + let device_box = CudaDropWrapper::from(DeviceBox::new( DeviceCopyWithPortableBitSemantics::from_ref(&**self), )?); Ok(( DeviceAccessible::from(RefMutCudaRepresentation { - data: DeviceMutPointer(device_box.as_device_ptr().as_raw_mut().cast()), + data: DeviceMutPointer(device_box.as_device_ptr().as_mut_ptr().cast()), _marker: PhantomData::<&'a mut T>, }), CombinedCudaAlloc::new(device_box, alloc), @@ -63,7 +62,7 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCuda for &'a mu &mut self, alloc: CombinedCudaAlloc, ) -> CudaResult { - use rustacuda::memory::CopyDestination; + use cust::memory::CopyDestination; let (alloc_front, alloc_tail) = alloc.split(); diff --git a/src/lend/impls/slice_ref.rs b/src/lend/impls/slice_ref.rs index 0a97b673f..400ef0669 100644 --- a/src/lend/impls/slice_ref.rs +++ b/src/lend/impls/slice_ref.rs @@ -5,7 +5,7 @@ use std::mem::ManuallyDrop; use const_type_layout::{TypeGraphLayout, TypeLayout}; #[cfg(feature = "host")] -use rustacuda::{error::CudaResult, memory::DeviceBuffer, memory::LockedBuffer}; +use cust::{error::CudaResult, memory::DeviceBuffer, memory::LockedBuffer}; use crate::{ lend::{CudaAsRust, RustToCuda, RustToCudaAsync}, @@ -25,7 +25,6 @@ use crate::{ }; #[doc(hidden)] -#[expect(clippy::module_name_repetitions)] #[derive(TypeLayout)] #[repr(C)] pub struct SliceRefCudaRepresentation<'a, T: 'a + PortableBitSemantics + TypeGraphLayout> { @@ -56,7 +55,7 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCuda for &'a [T Ok(( DeviceAccessible::from(SliceRefCudaRepresentation { - data: DeviceConstPointer(device_buffer.as_ptr().cast()), + data: DeviceConstPointer(device_buffer.as_device_ptr().as_ptr().cast()), len: device_buffer.len(), _marker: PhantomData::<&'a [T]>, }), @@ -74,6 +73,7 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCuda for &'a [T } } +#[cfg_attr(not(feature = "host"), expect(clippy::needless_lifetimes))] unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCudaAsync for &'a [T] { #[cfg(all(feature = "host", not(doc)))] type CudaAllocationAsync = CombinedCudaAlloc< @@ -88,11 +88,11 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCudaAsync for & &self, alloc: A, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'_, 'stream, DeviceAccessible>, CombinedCudaAlloc, )> { - use rustacuda::memory::AsyncCopyDestination; + use cust::memory::AsyncCopyDestination; let locked_buffer = unsafe { let mut uninit = CudaDropWrapper::from(LockedBuffer::< @@ -116,7 +116,7 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCudaAsync for & Ok(( Async::pending( DeviceAccessible::from(SliceRefCudaRepresentation { - data: DeviceConstPointer(device_buffer.as_ptr().cast()), + data: DeviceConstPointer(device_buffer.as_device_ptr().as_ptr().cast()), len: device_buffer.len(), _marker: PhantomData::<&'a [T]>, }), diff --git a/src/lend/impls/slice_ref_mut.rs b/src/lend/impls/slice_ref_mut.rs index 0300735cd..c98ae3111 100644 --- a/src/lend/impls/slice_ref_mut.rs +++ b/src/lend/impls/slice_ref_mut.rs @@ -3,7 +3,7 @@ use core::marker::PhantomData; use const_type_layout::{TypeGraphLayout, TypeLayout}; #[cfg(feature = "host")] -use rustacuda::{error::CudaResult, memory::DeviceBuffer}; +use cust::{error::CudaResult, memory::DeviceBuffer}; use crate::{ lend::{CudaAsRust, RustToCuda}, @@ -22,7 +22,6 @@ use crate::{ }; #[doc(hidden)] -#[expect(clippy::module_name_repetitions)] #[derive(TypeLayout)] #[repr(C)] pub struct SliceRefMutCudaRepresentation<'a, T: 'a + PortableBitSemantics + TypeGraphLayout> { @@ -47,13 +46,13 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCuda for &'a mu DeviceAccessible, CombinedCudaAlloc, )> { - let mut device_buffer = CudaDropWrapper::from(DeviceBuffer::from_slice( + let device_buffer = CudaDropWrapper::from(DeviceBuffer::from_slice( DeviceCopyWithPortableBitSemantics::from_slice(self), )?); Ok(( DeviceAccessible::from(SliceRefMutCudaRepresentation { - data: DeviceMutPointer(device_buffer.as_mut_ptr().cast()), + data: DeviceMutPointer(device_buffer.as_device_ptr().as_mut_ptr().cast()), len: device_buffer.len(), _marker: PhantomData::<&'a mut [T]>, }), @@ -66,7 +65,7 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCuda for &'a mu &mut self, alloc: CombinedCudaAlloc, ) -> CudaResult { - use rustacuda::memory::CopyDestination; + use cust::memory::CopyDestination; let (alloc_front, alloc_tail) = alloc.split(); diff --git a/src/lend/mod.rs b/src/lend/mod.rs index 3bca11f75..7296473e9 100644 --- a/src/lend/mod.rs +++ b/src/lend/mod.rs @@ -1,6 +1,6 @@ use const_type_layout::TypeGraphLayout; #[cfg(feature = "host")] -use rustacuda::error::CudaError; +use cust::error::CudaError; #[cfg(feature = "derive")] #[expect(clippy::module_name_repetitions)] @@ -34,7 +34,7 @@ pub unsafe trait RustToCuda { #[cfg(feature = "host")] /// # Errors /// - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA /// /// # Safety @@ -46,7 +46,7 @@ pub unsafe trait RustToCuda { unsafe fn borrow( &self, alloc: A, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( DeviceAccessible, CombinedCudaAlloc, )>; @@ -55,7 +55,7 @@ pub unsafe trait RustToCuda { #[cfg(feature = "host")] /// # Errors /// - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA /// /// # Safety @@ -64,7 +64,7 @@ pub unsafe trait RustToCuda { unsafe fn restore( &mut self, alloc: CombinedCudaAlloc, - ) -> rustacuda::error::CudaResult; + ) -> cust::error::CudaResult; } /// # Safety @@ -78,7 +78,7 @@ pub unsafe trait RustToCudaAsync: RustToCuda { #[cfg(feature = "host")] /// # Errors /// - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA /// /// # Safety @@ -101,7 +101,7 @@ pub unsafe trait RustToCudaAsync: RustToCuda { &self, alloc: A, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'_, 'stream, DeviceAccessible>, CombinedCudaAlloc, )>; @@ -110,7 +110,7 @@ pub unsafe trait RustToCudaAsync: RustToCuda { #[cfg(feature = "host")] /// # Errors /// - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA /// /// # Safety @@ -127,7 +127,7 @@ pub unsafe trait RustToCudaAsync: RustToCuda { this: owning_ref::BoxRefMut<'a, O, Self>, alloc: CombinedCudaAlloc, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'a, 'stream, owning_ref::BoxRefMut<'a, O, Self>, CompletionFnMut<'a, Self>>, A, )>; @@ -187,7 +187,7 @@ pub trait LendToCuda: RustToCuda { /// /// # Errors /// - /// Returns a `rustacuda::errors::CudaError` iff an error occurs inside CUDA + /// Returns a `cust::errors::CudaError` iff an error occurs inside CUDA fn lend_to_cuda_mut< O, E: From, @@ -339,7 +339,7 @@ pub trait LendToCudaAsync: RustToCudaAsync { /// /// # Errors /// - /// Returns a `rustacuda::errors::CudaError` iff an error occurs inside CUDA + /// Returns a `cust::errors::CudaError` iff an error occurs inside CUDA fn lend_to_cuda_mut_async< 'a, 'stream, diff --git a/src/lib.rs b/src/lib.rs index 5605ad612..1a1bebd63 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,7 +5,7 @@ //! [CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/rust-cuda/ci.yml?branch=main //! [workflow]: https://github.com/juntyr/rust-cuda/actions/workflows/ci.yml?query=branch%3Amain //! -//! [MSRV]: https://img.shields.io/badge/MSRV-1.81.0--nightly-orange +//! [MSRV]: https://img.shields.io/badge/MSRV-1.84.0--nightly-orange //! [repo]: https://github.com/juntyr/rust-cuda //! //! [Rust Doc]: https://img.shields.io/badge/docs-main-blue @@ -29,7 +29,6 @@ #![feature(negative_impls)] #![cfg_attr(all(feature = "device", not(doc)), feature(stdarch_nvptx))] #![cfg_attr(feature = "device", feature(asm_experimental_arch))] -#![cfg_attr(feature = "device", feature(asm_const))] #![feature(doc_auto_cfg)] #![feature(doc_cfg)] #![feature(marker_trait_attr)] @@ -48,7 +47,6 @@ #![feature(generic_const_exprs)] #![expect(internal_features)] #![feature(core_intrinsics)] -#![feature(const_intrinsic_compare_bytes)] #![doc(html_root_url = "https://juntyr.github.io/rust-cuda/")] #[cfg(all(feature = "host", feature = "device", not(doc)))] diff --git a/src/safety/aliasing.rs b/src/safety/aliasing.rs index 3a9cb8442..100dbbbd8 100644 --- a/src/safety/aliasing.rs +++ b/src/safety/aliasing.rs @@ -1,4 +1,3 @@ -#[expect(clippy::module_name_repetitions)] /// Types for which mutable references can be safely shared with each CUDA /// thread without breaking Rust's no-mutable-aliasing memory safety /// guarantees. @@ -38,23 +37,21 @@ pub unsafe trait SafeMutableAliasing {} unsafe impl< - 'a, T: crate::safety::StackOnly + crate::safety::PortableBitSemantics + const_type_layout::TypeGraphLayout, const STRIDE: usize, > SafeMutableAliasing - for crate::utils::aliasing::SplitSliceOverCudaThreadsConstStride<&'a mut [T], STRIDE> + for crate::utils::aliasing::SplitSliceOverCudaThreadsConstStride<&mut [T], STRIDE> { } unsafe impl< - 'a, T: crate::safety::StackOnly + crate::safety::PortableBitSemantics + const_type_layout::TypeGraphLayout, > SafeMutableAliasing - for crate::utils::aliasing::SplitSliceOverCudaThreadsDynamicStride<&'a mut [T]> + for crate::utils::aliasing::SplitSliceOverCudaThreadsDynamicStride<&mut [T]> { } diff --git a/src/safety/portable.rs b/src/safety/portable.rs index 6013b7d74..74e42c144 100644 --- a/src/safety/portable.rs +++ b/src/safety/portable.rs @@ -1,8 +1,11 @@ macro_rules! portable_bit_semantics_docs { ($item:item) => { - /// Types whose in-memory bit representation on the CPU host is safe to copy - /// to and read back on the GPU device while maintaining the same semantics, - /// iff the type layout on the CPU matches the type layout on the GPU. + /// Types with a CPU-GPU-compatible memory representation. + /// + /// More specifically, types in-memory bit representation on the CPU host + /// is safe to copy to and read back on the GPU device while maintaining + /// the same semantics, iff the type layout on the CPU matches the type + /// layout on the GPU. /// /// For a type to implement [`PortableBitSemantics`], it /// @@ -36,7 +39,6 @@ macro_rules! portable_bit_semantics_docs { #[cfg(not(doc))] portable_bit_semantics_docs! { - #[expect(clippy::module_name_repetitions)] pub trait PortableBitSemantics: sealed::PortableBitSemantics {} } #[cfg(doc)] diff --git a/src/utils/adapter.rs b/src/utils/adapter.rs index bc8bd161b..c8a533d80 100644 --- a/src/utils/adapter.rs +++ b/src/utils/adapter.rs @@ -124,7 +124,7 @@ unsafe impl RustToCuda unsafe fn borrow( &self, alloc: A, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( DeviceAccessible, CombinedCudaAlloc, )> { @@ -136,7 +136,7 @@ unsafe impl RustToCuda unsafe fn restore( &mut self, alloc: CombinedCudaAlloc, - ) -> rustacuda::error::CudaResult { + ) -> cust::error::CudaResult { let (_alloc_front, alloc_tail): (NoCudaAlloc, A) = alloc.split(); Ok(alloc_tail) @@ -153,7 +153,7 @@ unsafe impl RustToCudaAsync &self, alloc: A, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( crate::utils::r#async::Async<'_, 'stream, DeviceAccessible>, CombinedCudaAlloc, )> { @@ -169,7 +169,7 @@ unsafe impl RustToCudaAsync this: owning_ref::BoxRefMut<'a, O, Self>, alloc: CombinedCudaAlloc, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( crate::utils::r#async::Async< 'a, 'stream, @@ -312,7 +312,7 @@ unsafe impl RustToCuda unsafe fn borrow( &self, alloc: A, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( DeviceAccessible, CombinedCudaAlloc, )> { @@ -324,7 +324,7 @@ unsafe impl RustToCuda unsafe fn restore( &mut self, alloc: CombinedCudaAlloc, - ) -> rustacuda::error::CudaResult { + ) -> cust::error::CudaResult { let (_alloc_front, alloc_tail): (NoCudaAlloc, A) = alloc.split(); Ok(alloc_tail) @@ -341,7 +341,7 @@ unsafe impl RustToCudaAsync &self, alloc: A, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( crate::utils::r#async::Async<'_, 'stream, DeviceAccessible>, CombinedCudaAlloc, )> { @@ -357,7 +357,7 @@ unsafe impl RustToCudaAsync this: owning_ref::BoxRefMut<'a, O, Self>, alloc: CombinedCudaAlloc, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( crate::utils::r#async::Async< 'a, 'stream, @@ -394,7 +394,7 @@ unsafe impl CudaAsRust #[repr(transparent)] pub struct DeviceCopyWithPortableBitSemantics(T); -unsafe impl rustacuda_core::DeviceCopy +unsafe impl cust_core::DeviceCopy for DeviceCopyWithPortableBitSemantics { } diff --git a/src/utils/aliasing/const.rs b/src/utils/aliasing/const.rs index 4cd6eb228..624aa1ea5 100644 --- a/src/utils/aliasing/const.rs +++ b/src/utils/aliasing/const.rs @@ -193,7 +193,7 @@ unsafe impl RustToCuda unsafe fn borrow( &self, alloc: A, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( DeviceAccessible, crate::alloc::CombinedCudaAlloc, )> { @@ -209,7 +209,7 @@ unsafe impl RustToCuda unsafe fn restore( &mut self, alloc: crate::alloc::CombinedCudaAlloc, - ) -> rustacuda::error::CudaResult { + ) -> cust::error::CudaResult { self.0.restore(alloc) } } @@ -224,7 +224,7 @@ unsafe impl RustToCudaAsync &self, alloc: A, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( crate::utils::r#async::Async<'_, 'stream, DeviceAccessible>, crate::alloc::CombinedCudaAlloc, )> { @@ -252,7 +252,7 @@ unsafe impl RustToCudaAsync this: owning_ref::BoxRefMut<'a, O, Self>, alloc: crate::alloc::CombinedCudaAlloc, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( crate::utils::r#async::Async< 'a, 'stream, diff --git a/src/utils/aliasing/dynamic.rs b/src/utils/aliasing/dynamic.rs index 2e16bf42e..2fd8c3646 100644 --- a/src/utils/aliasing/dynamic.rs +++ b/src/utils/aliasing/dynamic.rs @@ -170,7 +170,7 @@ unsafe impl RustToCuda for SplitSliceOverCudaThreadsDynamicStride unsafe fn borrow( &self, alloc: A, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( DeviceAccessible, crate::alloc::CombinedCudaAlloc, )> { @@ -189,7 +189,7 @@ unsafe impl RustToCuda for SplitSliceOverCudaThreadsDynamicStride unsafe fn restore( &mut self, alloc: crate::alloc::CombinedCudaAlloc, - ) -> rustacuda::error::CudaResult { + ) -> cust::error::CudaResult { self.inner.restore(alloc) } } @@ -202,7 +202,7 @@ unsafe impl RustToCudaAsync for SplitSliceOverCudaThreadsDyn &self, alloc: A, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( crate::utils::r#async::Async<'_, 'stream, DeviceAccessible>, crate::alloc::CombinedCudaAlloc, )> { @@ -232,7 +232,7 @@ unsafe impl RustToCudaAsync for SplitSliceOverCudaThreadsDyn this: owning_ref::BoxRefMut<'a, O, Self>, alloc: crate::alloc::CombinedCudaAlloc, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( crate::utils::r#async::Async< 'a, 'stream, diff --git a/src/utils/async.rs b/src/utils/async.rs index 6221447a8..a6791b313 100644 --- a/src/utils/async.rs +++ b/src/utils/async.rs @@ -2,7 +2,7 @@ use std::{borrow::BorrowMut, future::Future, future::IntoFuture, marker::PhantomData, task::Poll}; #[cfg(feature = "host")] -use rustacuda::{ +use cust::{ error::CudaError, error::CudaResult, event::Event, event::EventFlags, stream::StreamWaitEventFlags, }; @@ -55,7 +55,7 @@ impl Completion for NoCompletion { impl sealed::Sealed for NoCompletion {} #[cfg(feature = "host")] -impl<'a, T: ?Sized + BorrowMut, B: ?Sized> Completion for CompletionFnMut<'a, B> { +impl, B: ?Sized> Completion for CompletionFnMut<'_, B> { type Completed = B; #[inline] @@ -74,7 +74,7 @@ impl<'a, T: ?Sized + BorrowMut, B: ?Sized> Completion for CompletionFnMut< } } #[cfg(feature = "host")] -impl<'a, T: ?Sized> sealed::Sealed for CompletionFnMut<'a, T> {} +impl sealed::Sealed for CompletionFnMut<'_, T> {} #[cfg(feature = "host")] impl, C: Completion> Completion for Option { @@ -87,7 +87,7 @@ impl, C: Completion> Completion for Op #[inline] fn synchronize_on_drop(&self) -> bool { - self.as_ref().map_or(false, Completion::synchronize_on_drop) + self.as_ref().is_some_and(Completion::synchronize_on_drop) } #[inline] @@ -136,7 +136,7 @@ impl<'a, 'stream, T: BorrowMut, C: Completion> Async<'a, 'strea /// such that its computation can be synchronised on. /// /// # Errors - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA. pub fn pending(value: T, stream: Stream<'stream>, completion: C) -> CudaResult { let (sender, receiver) = oneshot::channel(); @@ -160,11 +160,11 @@ impl<'a, 'stream, T: BorrowMut, C: Completion> Async<'a, 'strea /// operations. /// /// Calling `synchronize` after the computation has completed, e.g. after - /// calling [`rustacuda::stream::Stream::synchronize`], should be very + /// calling [`cust::stream::Stream::synchronize`], should be very /// cheap. /// /// # Errors - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA. pub fn synchronize(self) -> CudaResult { let (_stream, mut value, status) = self.destructure_into_parts(); @@ -198,7 +198,7 @@ impl<'a, 'stream, T: BorrowMut, C: Completion> Async<'a, 'strea /// used on the new one. /// /// # Errors - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA. pub fn move_to_stream<'stream_new>( self, @@ -407,7 +407,7 @@ where } #[cfg(feature = "host")] -impl<'a, 'stream, T: BorrowMut, C: Completion> Drop for Async<'a, 'stream, T, C> { +impl, C: Completion> Drop for Async<'_, '_, T, C> { fn drop(&mut self) { let AsyncStatus::Processing { receiver, @@ -434,9 +434,7 @@ struct AsyncFuture<'a, 'stream, T: BorrowMut, C: Completion> { } #[cfg(feature = "host")] -impl<'a, 'stream, T: BorrowMut, C: Completion> Future - for AsyncFuture<'a, 'stream, T, C> -{ +impl, C: Completion> Future for AsyncFuture<'_, '_, T, C> { type Output = CudaResult; fn poll( @@ -517,9 +515,7 @@ impl<'a, 'stream, T: BorrowMut, C: Completion> IntoFuture } #[cfg(feature = "host")] -impl<'a, 'stream, T: BorrowMut, C: Completion> Drop - for AsyncFuture<'a, 'stream, T, C> -{ +impl, C: Completion> Drop for AsyncFuture<'_, '_, T, C> { fn drop(&mut self) { let Some(mut value) = self.value.take() else { return; diff --git a/src/utils/exchange/buffer/device.rs b/src/utils/exchange/buffer/device.rs index 760fe4d35..047652186 100644 --- a/src/utils/exchange/buffer/device.rs +++ b/src/utils/exchange/buffer/device.rs @@ -9,7 +9,6 @@ use crate::{ use super::CudaExchangeItem; -#[expect(clippy::module_name_repetitions)] pub struct CudaExchangeBufferDevice< T: StackOnly + PortableBitSemantics + TypeGraphLayout, const M2D: bool, diff --git a/src/utils/exchange/buffer/host.rs b/src/utils/exchange/buffer/host.rs index f5a3e5308..05b8d1ead 100644 --- a/src/utils/exchange/buffer/host.rs +++ b/src/utils/exchange/buffer/host.rs @@ -4,7 +4,7 @@ use std::{ }; use const_type_layout::TypeGraphLayout; -use rustacuda::{ +use cust::{ error::CudaResult, memory::{DeviceBuffer, LockedBuffer}, }; @@ -22,7 +22,6 @@ use crate::{ use super::{common::CudaExchangeBufferCudaRepresentation, CudaExchangeItem}; -#[expect(clippy::module_name_repetitions)] pub struct CudaExchangeBufferHost< T: StackOnly + PortableBitSemantics + TypeGraphLayout, const M2D: bool, @@ -45,7 +44,7 @@ impl< > CudaExchangeBufferHost { /// # Errors - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA pub fn new(elem: &T, capacity: usize) -> CudaResult { // Safety: CudaExchangeItem is a `repr(transparent)` wrapper around T @@ -70,7 +69,7 @@ impl { /// # Errors - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA pub fn from_vec(vec: Vec) -> CudaResult { let host_buffer = unsafe { @@ -127,7 +126,7 @@ impl( &self, alloc: A, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( DeviceAccessible>, CombinedCudaAlloc, )> { @@ -138,7 +137,7 @@ impl( &mut self, alloc: CombinedCudaAlloc, - ) -> rustacuda::error::CudaResult { + ) -> cust::error::CudaResult { let (_alloc_front, alloc_tail) = alloc.split(); if M2H { // Only move the buffer contents back to the host if needed - rustacuda::memory::CopyDestination::copy_to( + cust::memory::CopyDestination::copy_to( &***self.device_buffer.get_mut(), self.host_buffer.as_mut_slice(), )?; @@ -180,7 +179,7 @@ impl, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'_, 'stream, DeviceAccessible>>, CombinedCudaAlloc, )> { @@ -191,7 +190,7 @@ impl, alloc: CombinedCudaAlloc, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'a, 'stream, owning_ref::BoxRefMut<'a, O, Self>, CompletionFnMut<'a, Self>>, A, )> { @@ -228,7 +227,7 @@ impl CudaExchangeBuffer { /// # Errors - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA - pub fn new(elem: &T, capacity: usize) -> rustacuda::error::CudaResult { + pub fn new(elem: &T, capacity: usize) -> cust::error::CudaResult { Ok(Self { inner: host::CudaExchangeBufferHost::new(elem, capacity)?, }) @@ -77,9 +77,9 @@ impl { /// # Errors - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA - pub fn from_vec(vec: Vec) -> rustacuda::error::CudaResult { + pub fn from_vec(vec: Vec) -> cust::error::CudaResult { Ok(Self { inner: host::CudaExchangeBufferHost::from_vec(vec)?, }) @@ -117,7 +117,7 @@ unsafe impl( &self, alloc: A, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( DeviceAccessible, CombinedCudaAlloc, )> { @@ -128,7 +128,7 @@ unsafe impl( &mut self, alloc: CombinedCudaAlloc, - ) -> rustacuda::error::CudaResult { + ) -> cust::error::CudaResult { self.inner.restore(alloc) } } @@ -144,7 +144,7 @@ unsafe impl, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'_, 'stream, DeviceAccessible>, CombinedCudaAlloc, )> { @@ -156,7 +156,7 @@ unsafe impl, alloc: CombinedCudaAlloc, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'a, 'stream, owning_ref::BoxRefMut<'a, O, Self>, CompletionFnMut<'a, Self>>, A, )> { diff --git a/src/utils/exchange/wrapper.rs b/src/utils/exchange/wrapper.rs index ed15c63de..3c56ebfc1 100644 --- a/src/utils/exchange/wrapper.rs +++ b/src/utils/exchange/wrapper.rs @@ -1,6 +1,6 @@ use std::ops::{Deref, DerefMut}; -use rustacuda::{ +use cust::{ error::CudaResult, memory::{AsyncCopyDestination, CopyDestination, DeviceBox, LockedBox}, }; @@ -55,7 +55,7 @@ pub struct ExchangeWrapperOnDevice impl> ExchangeWrapperOnHost { /// # Errors - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA pub fn new(value: T) -> CudaResult { // Safety: The uninitialised memory is never exposed @@ -65,13 +65,13 @@ impl> ExchangeWrapperOnHost { let (cuda_repr, _null_alloc) = unsafe { value.borrow(NoCudaAlloc) }?; let locked_cuda_repr = unsafe { - let mut uninit = CudaDropWrapper::from(LockedBox::< + let uninit = CudaDropWrapper::from(LockedBox::< DeviceCopyWithPortableBitSemantics< DeviceAccessible<::CudaRepresentation>, >, >::uninitialized()?); uninit - .as_mut_ptr() + .as_raw() .write(DeviceCopyWithPortableBitSemantics::from(cuda_repr)); uninit }; @@ -88,7 +88,7 @@ impl> ExchangeWrapperOnHost { /// via [`ExchangeWrapperOnDevice::as_mut_async`](Async::as_mut_async). /// /// # Errors - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA pub fn move_to_device(mut self) -> CudaResult> { let (cuda_repr, null_alloc) = unsafe { self.value.borrow(NoCudaAlloc) }?; @@ -113,7 +113,7 @@ impl( mut self, @@ -130,7 +130,7 @@ impl> ExchangeWrapperOnDevice { /// Moves the data synchronously back to the host CPU device. /// /// # Errors - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA pub fn move_to_host(mut self) -> CudaResult> { let null_alloc = NoCudaAlloc.into(); @@ -201,7 +201,7 @@ impl( self, @@ -251,15 +251,14 @@ impl, - > Async<'a, 'stream, ExchangeWrapperOnDevice, NoCompletion> + > Async<'_, 'stream, ExchangeWrapperOnDevice, NoCompletion> { /// Moves the data asynchronously back to the host CPU device. /// /// # Errors - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA pub fn move_to_host_async( self, diff --git a/src/utils/ffi.rs b/src/utils/ffi.rs index 9566a0c40..f94af17d8 100644 --- a/src/utils/ffi.rs +++ b/src/utils/ffi.rs @@ -66,16 +66,16 @@ pub struct DeviceConstRef<'r, T: PortableBitSemantics + 'r> { pub(crate) reference: PhantomData<&'r T>, } -impl<'r, T: PortableBitSemantics> Copy for DeviceConstRef<'r, T> {} +impl Copy for DeviceConstRef<'_, T> {} -impl<'r, T: PortableBitSemantics> Clone for DeviceConstRef<'r, T> { +impl Clone for DeviceConstRef<'_, T> { fn clone(&self) -> Self { *self } } #[cfg(feature = "device")] -impl<'r, T: PortableBitSemantics> AsRef for DeviceConstRef<'r, T> { +impl AsRef for DeviceConstRef<'_, T> { fn as_ref(&self) -> &T { unsafe { &*self.pointer.0 } } @@ -90,14 +90,14 @@ pub struct DeviceMutRef<'r, T: PortableBitSemantics + 'r> { } #[cfg(feature = "device")] -impl<'r, T: PortableBitSemantics> AsRef for DeviceMutRef<'r, T> { +impl AsRef for DeviceMutRef<'_, T> { fn as_ref(&self) -> &T { unsafe { &*self.pointer.0 } } } #[cfg(feature = "device")] -impl<'r, T: PortableBitSemantics> AsMut for DeviceMutRef<'r, T> { +impl AsMut for DeviceMutRef<'_, T> { fn as_mut(&mut self) -> &mut T { unsafe { &mut *self.pointer.0 } } @@ -113,14 +113,14 @@ pub struct DeviceOwnedRef<'r, T: PortableBitSemantics> { } #[cfg(feature = "device")] -impl<'r, T: PortableBitSemantics> AsRef for DeviceOwnedRef<'r, T> { +impl AsRef for DeviceOwnedRef<'_, T> { fn as_ref(&self) -> &T { unsafe { &*self.pointer.0 } } } #[cfg(feature = "device")] -impl<'r, T: PortableBitSemantics> AsMut for DeviceOwnedRef<'r, T> { +impl AsMut for DeviceOwnedRef<'_, T> { fn as_mut(&mut self) -> &mut T { unsafe { &mut *self.pointer.0 } } diff --git a/src/utils/shared/slice.rs b/src/utils/shared/slice.rs index a3df82d06..a691bd2ea 100644 --- a/src/utils/shared/slice.rs +++ b/src/utils/shared/slice.rs @@ -2,7 +2,6 @@ use core::alloc::Layout; use const_type_layout::TypeGraphLayout; -#[expect(clippy::module_name_repetitions)] #[repr(transparent)] pub struct ThreadBlockSharedSlice { shared: *mut [T], @@ -11,7 +10,7 @@ pub struct ThreadBlockSharedSlice { impl ThreadBlockSharedSlice { #[cfg(feature = "host")] #[must_use] - pub fn new_uninit_with_len(len: usize) -> Self { + pub const fn new_uninit_with_len(len: usize) -> Self { Self { shared: Self::dangling_slice_with_len(len), } @@ -19,7 +18,7 @@ impl ThreadBlockSharedSlice { #[cfg(feature = "host")] #[must_use] - pub fn with_len(mut self, len: usize) -> Self { + pub const fn with_len(mut self, len: usize) -> Self { self.shared = Self::dangling_slice_with_len(len); self } @@ -32,7 +31,7 @@ impl ThreadBlockSharedSlice { } #[cfg(feature = "host")] - fn dangling_slice_with_len(len: usize) -> *mut [T] { + const fn dangling_slice_with_len(len: usize) -> *mut [T] { core::ptr::slice_from_raw_parts_mut(core::ptr::NonNull::dangling().as_ptr(), len) }