Skip to content
This repository was archived by the owner on Oct 31, 2025. It is now read-only.

Commit 1c2bd65

Browse files
committed
improve transition effect function
We are using a function with less branches, leading to better auto-vectorization.
1 parent 5c5c7ff commit 1c2bd65

File tree

1 file changed

+76
-25
lines changed

1 file changed

+76
-25
lines changed

daemon/src/animations/transitions.rs

Lines changed: 76 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::{cell::RefCell, rc::Rc, time::Instant};
1+
use std::{cell::RefCell, num::NonZeroU8, rc::Rc, time::Instant};
22

33
use crate::{WaylandObject, wallpaper::Wallpaper};
44
use common::ipc::{PixelFormat, Transition, TransitionType};
@@ -26,14 +26,30 @@ fn bezier_seq(transition: &Transition, start: f32, end: f32) -> (AnimationSequen
2626
}
2727

2828
#[inline(always)]
29-
fn change_byte(step: u8, old: &mut u8, new: &u8) {
30-
if old.abs_diff(*new) < step {
31-
*old = *new;
32-
} else if *old > *new {
33-
*old -= step;
34-
} else {
35-
*old += step;
29+
/// This is calculating the following:
30+
/// ```
31+
/// if old.abs_diff(*new) < step.get() {
32+
/// *old = *new;
33+
/// } else if *old > *new {
34+
/// *old -= step.get();
35+
/// } else {
36+
/// *old += step.get();
37+
/// }
38+
/// ```
39+
/// However, it does so with less branches, making it more amenable to being autovectorized.
40+
/// From my tests, this is almost twice as fast as the above code in x86_64, when compiling without
41+
/// any target features. It only loses slightly (5%) in speed when we compile with avx512. However,
42+
/// avx512 is by itself already pretty fast anyway, and thus benefits less from this.
43+
fn change_byte(step: NonZeroU8, old: &mut u8, new: &u8) {
44+
let min = (*old).min(*new);
45+
let max = (*old).max(*new);
46+
let diff = max - min;
47+
let mut to_add = step.get().min(diff);
48+
49+
if *old > *new {
50+
to_add = to_add.wrapping_neg();
3651
}
52+
*old = old.wrapping_add(to_add);
3753
}
3854

3955
struct None;
@@ -75,7 +91,7 @@ pub enum Effect {
7591
impl Effect {
7692
pub fn new(transition: &Transition, pixel_format: PixelFormat, dimensions: (u32, u32)) -> Self {
7793
match transition.transition_type {
78-
TransitionType::Simple => Self::Simple(Simple::new(transition.step.get())),
94+
TransitionType::Simple => Self::Simple(Simple::new(transition.step)),
7995
TransitionType::Fade => Self::Fade(Fade::new(transition)),
8096
TransitionType::Outer => Self::Outer(Outer::new(transition, pixel_format, dimensions)),
8197
TransitionType::Wipe => Self::Wipe(Wipe::new(transition, pixel_format, dimensions)),
@@ -104,26 +120,30 @@ impl Effect {
104120
};
105121
// we only finish for real if we are doing a None or a Simple transition
106122
if done {
123+
#[inline(always)]
124+
const fn new_nonzero(step: u8) -> NonZeroU8 {
125+
NonZeroU8::new(step / 4 + 4).unwrap()
126+
}
107127
*self = match self {
108128
Effect::None(_) | Effect::Simple(_) => return true,
109-
Effect::Fade(t) => Effect::Simple(Simple::new((t.step / 4 + 4) as u8)),
110-
Effect::Wave(t) => Effect::Simple(Simple::new(t.step / 4 + 4)),
111-
Effect::Wipe(t) => Effect::Simple(Simple::new(t.step / 4 + 4)),
112-
Effect::Grow(t) => Effect::Simple(Simple::new(t.step / 4 + 4)),
113-
Effect::Outer(t) => Effect::Simple(Simple::new(t.step / 4 + 4)),
129+
Effect::Fade(t) => Effect::Simple(Simple::new(new_nonzero(t.step as u8))),
130+
Effect::Wave(t) => Effect::Simple(Simple::new(new_nonzero(t.step.get()))),
131+
Effect::Wipe(t) => Effect::Simple(Simple::new(new_nonzero(t.step.get()))),
132+
Effect::Grow(t) => Effect::Simple(Simple::new(new_nonzero(t.step.get()))),
133+
Effect::Outer(t) => Effect::Simple(Simple::new(new_nonzero(t.step.get()))),
114134
};
115135
return false;
116-
}
136+
};
117137
done
118138
}
119139
}
120140

121141
struct Simple {
122-
step: u8,
142+
step: NonZeroU8,
123143
}
124144

125145
impl Simple {
126-
fn new(step: u8) -> Self {
146+
fn new(step: NonZeroU8) -> Self {
127147
Self { step }
128148
}
129149
fn run(
@@ -201,7 +221,7 @@ struct Wave {
201221
circle_radius: f64,
202222
a: f64,
203223
b: f64,
204-
step: u8,
224+
step: NonZeroU8,
205225
}
206226

207227
impl Wave {
@@ -225,7 +245,7 @@ impl Wave {
225245

226246
let (seq, start) = bezier_seq(transition, offset as f32, max_offset as f32);
227247

228-
let step = transition.step.get();
248+
let step = transition.step;
229249
let channels = pixel_format.channels() as usize;
230250
let stride = width * channels;
231251
Self {
@@ -349,7 +369,7 @@ struct Wipe {
349369
circle_radius: f64,
350370
a: f64,
351371
b: f64,
352-
step: u8,
372+
step: NonZeroU8,
353373
}
354374

355375
impl Wipe {
@@ -375,7 +395,7 @@ impl Wipe {
375395
let (width, height) = (width as usize, height as usize);
376396
let (seq, start) = bezier_seq(transition, offset as f32, max_offset as f32);
377397

378-
let step = transition.step.get();
398+
let step = transition.step;
379399
let channels = pixel_format.channels() as usize;
380400
let stride = width * channels;
381401
Self {
@@ -449,7 +469,7 @@ struct Grow {
449469
center_y: usize,
450470
stride: usize,
451471
dist_center: f32,
452-
step: u8,
472+
step: NonZeroU8,
453473
}
454474

455475
impl Grow {
@@ -472,7 +492,7 @@ impl Grow {
472492
let (width, height) = (width as usize, height as usize);
473493
let (center_x, center_y) = (center_x as usize, center_y as usize);
474494

475-
let step = transition.step.get();
495+
let step = transition.step;
476496
let channels = pixel_format.channels() as usize;
477497
let stride = width * channels;
478498
let (seq, start) = bezier_seq(transition, 0.0, dist_end);
@@ -545,7 +565,7 @@ struct Outer {
545565
center_y: usize,
546566
stride: usize,
547567
dist_center: f32,
548-
step: u8,
568+
step: NonZeroU8,
549569
}
550570

551571
impl Outer {
@@ -566,7 +586,7 @@ impl Outer {
566586
let (width, height) = (width as usize, height as usize);
567587
let (center_x, center_y) = (center_x as usize, center_y as usize);
568588

569-
let step = transition.step.get();
589+
let step = transition.step;
570590
let channels = pixel_format.channels() as usize;
571591
let stride = width * channels;
572592
let (seq, start) = bezier_seq(transition, dist_center, 0.0);
@@ -629,3 +649,34 @@ impl Outer {
629649
self.start.elapsed().as_secs_f64() > self.seq.duration()
630650
}
631651
}
652+
653+
#[cfg(test)]
654+
mod tests {
655+
use std::num::NonZeroU8;
656+
657+
#[test]
658+
fn change_byte() {
659+
fn expected(step: NonZeroU8, old: &mut u8, new: &u8) {
660+
if old.abs_diff(*new) < step.get() {
661+
*old = *new;
662+
} else if *old > *new {
663+
*old -= step.get();
664+
} else {
665+
*old += step.get();
666+
}
667+
}
668+
669+
for old in 0..=255 {
670+
for new in 0..=255 {
671+
for step in 1..=255 {
672+
let step = NonZeroU8::new(step).unwrap();
673+
let mut a = old;
674+
let mut b = old;
675+
expected(step, &mut a, &new);
676+
super::change_byte(step, &mut b, &new);
677+
assert_eq!(a, b, "old: {old}, new: {new}, step: {step}");
678+
}
679+
}
680+
}
681+
}
682+
}

0 commit comments

Comments
 (0)