Skip to content

Commit f600da7

Browse files
authored
Merge pull request #28 from codri/rust_native_simd
Issue #9 : Switched to native SIMD instructions in Rust. Deleted build.rs and the C code
2 parents 9a63ddf + b382105 commit f600da7

File tree

4 files changed

+36
-58
lines changed

4 files changed

+36
-58
lines changed

Cargo.toml

-4
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,6 @@ authors = [ "Raph Levien <[email protected]>" ]
66
keywords = ["font", "truetype", "ttf"]
77
description = "A font renderer written (mostly) in pure, safe Rust"
88
repository = "https://github.com/google/font-rs"
9-
build = "build.rs"
109

1110
[features]
1211
sse = []
13-
14-
[target.'cfg(target_feature = "sse")'.build-dependencies]
15-
gcc = "0.3"

build.rs

-11
This file was deleted.

src/accumulate.c

-37
This file was deleted.

src/accumulate.rs

+36-6
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,18 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
#[cfg(feature = "sse")]
16-
#[link(name = "accumulate")]
17-
extern "C" {
18-
fn accumulate_sse(src: *const f32, dst: *mut u8, n: u32);
15+
use std::mem;
16+
17+
#[cfg(target_arch = "x86_64")]
18+
use std::arch::x86_64::*;
19+
20+
#[cfg(target_arch = "x86")]
21+
use std::arch::x86::*;
22+
23+
macro_rules! _mm_shuffle {
24+
($z:expr, $y:expr, $x:expr, $w:expr) => {
25+
($z << 6) | ($y << 4) | ($x << 2) | $w
26+
};
1927
}
2028

2129
#[cfg(feature = "sse")]
@@ -28,11 +36,33 @@ pub fn accumulate(src: &[f32]) -> Vec<u8> {
2836
// and so on
2937
let len = src.len();
3038
let n = (len + 3) & !3; // align data
31-
let mut dst: Vec<u8> = Vec::with_capacity(n);
39+
let mut dst: Vec<u8> = vec![0; n];
40+
3241
unsafe {
33-
accumulate_sse(src.as_ptr(), dst.as_mut_ptr(), n as u32);
42+
let mut offset = _mm_setzero_ps();
43+
let sign_mask = _mm_set1_ps(-0.);
44+
let mask = _mm_set1_epi32(0x0c080400);
45+
46+
for i in (0..n).step_by(4) {
47+
let mut x = _mm_loadu_ps(&src[i]);
48+
x = _mm_add_ps(x, _mm_castsi128_ps(_mm_slli_si128(_mm_castps_si128(x), 4)));
49+
x = _mm_add_ps(x, _mm_shuffle_ps(_mm_setzero_ps(), x, 0x40));
50+
x = _mm_add_ps(x, offset);
51+
52+
let mut y = _mm_andnot_ps(sign_mask, x); // fabs(x)
53+
y = _mm_min_ps(y, _mm_set1_ps(1.0));
54+
y = _mm_mul_ps(y, _mm_set1_ps(255.0));
55+
56+
let mut z = _mm_cvttps_epi32(y);
57+
z = _mm_shuffle_epi8(z, mask);
58+
59+
_mm_store_ss(mem::transmute(&dst[i]), _mm_castsi128_ps(z));
60+
offset = _mm_shuffle_ps(x, x, _mm_shuffle!(3, 3, 3, 3));
61+
}
62+
3463
dst.set_len(len); // we must return vec of the same length as src.len()
3564
}
65+
3666
dst
3767
}
3868

0 commit comments

Comments
 (0)