1mod backend;
4mod convert;
5mod ffi;
6mod kernel;
7
8pub use backend::{Backend, Cpu, Npu};
9pub use ffi::NpuDesc;
10pub use kernel::{Buffer, Kernel};
11
12#[cfg(furiosa_opt)]
14pub type CurrentBackend = Npu;
15
16#[cfg(not(furiosa_opt))]
17#[doc(hidden)]
18pub type CurrentBackend = Cpu;
19
20pub fn kernel_path(out_dir: &str, pkg: &str, module_path: &str, fn_name: &str) -> String {
25 let stem = module_path
26 .split("::")
27 .chain(std::iter::once(fn_name))
28 .skip(1)
29 .collect::<Vec<_>>()
30 .join("__");
31 format!("{out_dir}/{pkg}/{stem}.bin")
32}
33
34pub trait TupleApply<Args> {
39 type Output;
41 fn apply(self, args: Args) -> Self::Output;
43}
44
45impl<F, A, R> TupleApply<&mut A> for F
46where
47 F: FnOnce(&mut A) -> R,
48{
49 type Output = R;
50 fn apply(self, a: &mut A) -> R {
51 self(a)
52 }
53}
54
55impl<F, A, R> TupleApply<&A> for F
56where
57 F: FnOnce(&A) -> R,
58{
59 type Output = R;
60 fn apply(self, a: &A) -> R {
61 self(a)
62 }
63}
64
65macro_rules! impl_tuple_apply {
66 ($($T:ident),+) => {
67 #[expect(non_snake_case, reason = "type parameters A..Z used as destructuring variable names")]
68 impl<Func, $($T,)+ Ret> TupleApply<($($T,)+)> for Func
69 where
70 Func: FnOnce($($T,)+) -> Ret,
71 {
72 type Output = Ret;
73 fn apply(self, ($($T,)+): ($($T,)+)) -> Ret {
74 self($($T,)+)
75 }
76 }
77 };
78}
79
80impl_tuple_apply!(A, B);
81impl_tuple_apply!(A, B, C);
82impl_tuple_apply!(A, B, C, D);
83impl_tuple_apply!(A, B, C, D, E);
84impl_tuple_apply!(A, B, C, D, E, G);
85impl_tuple_apply!(A, B, C, D, E, G, H);
86impl_tuple_apply!(A, B, C, D, E, G, H, I);
87impl_tuple_apply!(A, B, C, D, E, G, H, I, J);
88impl_tuple_apply!(A, B, C, D, E, G, H, I, J, K);
89impl_tuple_apply!(A, B, C, D, E, G, H, I, J, K, L);
90impl_tuple_apply!(A, B, C, D, E, G, H, I, J, K, L, M);
91impl_tuple_apply!(A, B, C, D, E, G, H, I, J, K, L, M, N);
92impl_tuple_apply!(A, B, C, D, E, G, H, I, J, K, L, M, N, O);
93impl_tuple_apply!(A, B, C, D, E, G, H, I, J, K, L, M, N, O, P);
94impl_tuple_apply!(A, B, C, D, E, G, H, I, J, K, L, M, N, O, P, Q);
95impl_tuple_apply!(A, B, C, D, E, G, H, I, J, K, L, M, N, O, P, Q, R);
96impl_tuple_apply!(A, B, C, D, E, G, H, I, J, K, L, M, N, O, P, Q, R, S);
97impl_tuple_apply!(A, B, C, D, E, G, H, I, J, K, L, M, N, O, P, Q, R, S, T);
98impl_tuple_apply!(A, B, C, D, E, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U);
99impl_tuple_apply!(A, B, C, D, E, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V);
100impl_tuple_apply!(A, B, C, D, E, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W);
101
102pub(crate) trait DeviceSend {}
117
118impl DeviceSend for () {}
119impl DeviceSend for bool {}
120impl DeviceSend for i8 {}
121impl DeviceSend for i16 {}
122impl DeviceSend for i32 {}
123impl DeviceSend for i64 {}
124impl DeviceSend for isize {}
125impl DeviceSend for u8 {}
126impl DeviceSend for u16 {}
127impl DeviceSend for u32 {}
128impl DeviceSend for u64 {}
129impl DeviceSend for usize {}
130impl DeviceSend for f32 {}
131impl DeviceSend for f64 {}
132
133macro_rules! impl_device_send_tuple {
134 ($($T:ident),+) => {
135 impl<$($T: DeviceSend),+> DeviceSend for ($($T,)+) {}
136 };
137}
138
139impl_device_send_tuple!(A);
140impl_device_send_tuple!(A, B);
141impl_device_send_tuple!(A, B, C);
142impl_device_send_tuple!(A, B, C, D);
143impl_device_send_tuple!(A, B, C, D, E);
144impl_device_send_tuple!(A, B, C, D, E, F);
145impl_device_send_tuple!(A, B, C, D, E, F, G);
146impl_device_send_tuple!(A, B, C, D, E, F, G, H);
147impl_device_send_tuple!(A, B, C, D, E, F, G, H, I);
148impl_device_send_tuple!(A, B, C, D, E, F, G, H, I, J);
149impl_device_send_tuple!(A, B, C, D, E, F, G, H, I, J, K);
150impl_device_send_tuple!(A, B, C, D, E, F, G, H, I, J, K, L);
151impl_device_send_tuple!(A, B, C, D, E, F, G, H, I, J, K, L, M);
152impl_device_send_tuple!(A, B, C, D, E, F, G, H, I, J, K, L, M, N);
153impl_device_send_tuple!(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O);
154impl_device_send_tuple!(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
155impl_device_send_tuple!(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q);
156impl_device_send_tuple!(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R);
157impl_device_send_tuple!(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S);
158impl_device_send_tuple!(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T);
159impl_device_send_tuple!(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U);
160impl_device_send_tuple!(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V);
161impl_device_send_tuple!(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W);
162
163impl<T> DeviceSend for std::marker::PhantomData<T> {}
164
165#[expect(
170 private_bounds,
171 reason = "DeviceSend is intentionally sealed to prevent foreign impls"
172)]
173pub trait DeviceFn<Args: DeviceSend> {
174 type Output: DeviceSend;
176 fn execute(args: Args) -> impl std::future::Future<Output = Self::Output>;
178}
179
180#[expect(
184 private_bounds,
185 reason = "DeviceSend is intentionally sealed to prevent foreign impls"
186)]
187pub async fn launch<F, P>(_f: F, args: P) -> F::Output
188where
189 F: DeviceFn<P>,
190 P: DeviceSend,
191{
192 F::execute(args).await
193}