Skip to content

Issue #9 : Switched to native SIMD instructions in Rust. Deleted build.rs and the C code #28

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 18, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@ authors = [ "Raph Levien <[email protected]>" ]
keywords = ["font", "truetype", "ttf"]
description = "A font renderer written (mostly) in pure, safe Rust"
repository = "https://github.com/google/font-rs"
build = "build.rs"

[features]
sse = []

[target.'cfg(target_feature = "sse")'.build-dependencies]
gcc = "0.3"
11 changes: 0 additions & 11 deletions build.rs

This file was deleted.

37 changes: 0 additions & 37 deletions src/accumulate.c

This file was deleted.

42 changes: 36 additions & 6 deletions src/accumulate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,18 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#[cfg(feature = "sse")]
#[link(name = "accumulate")]
extern "C" {
fn accumulate_sse(src: *const f32, dst: *mut u8, n: u32);
use std::mem;

#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;

#[cfg(target_arch = "x86")]
use std::arch::x86::*;

macro_rules! _mm_shuffle {
($z:expr, $y:expr, $x:expr, $w:expr) => {
($z << 6) | ($y << 4) | ($x << 2) | $w
};
}

#[cfg(feature = "sse")]
Expand All @@ -28,11 +36,33 @@ pub fn accumulate(src: &[f32]) -> Vec<u8> {
// and so on
let len = src.len();
let n = (len + 3) & !3; // align data
let mut dst: Vec<u8> = Vec::with_capacity(n);
let mut dst: Vec<u8> = vec![0; n];

unsafe {
accumulate_sse(src.as_ptr(), dst.as_mut_ptr(), n as u32);
let mut offset = _mm_setzero_ps();
let sign_mask = _mm_set1_ps(-0.);
let mask = _mm_set1_epi32(0x0c080400);

for i in (0..n).step_by(4) {
let mut x = _mm_loadu_ps(&src[i]);
x = _mm_add_ps(x, _mm_castsi128_ps(_mm_slli_si128(_mm_castps_si128(x), 4)));
x = _mm_add_ps(x, _mm_shuffle_ps(_mm_setzero_ps(), x, 0x40));
x = _mm_add_ps(x, offset);

let mut y = _mm_andnot_ps(sign_mask, x); // fabs(x)
y = _mm_min_ps(y, _mm_set1_ps(1.0));
y = _mm_mul_ps(y, _mm_set1_ps(255.0));

let mut z = _mm_cvttps_epi32(y);
z = _mm_shuffle_epi8(z, mask);

_mm_store_ss(mem::transmute(&dst[i]), _mm_castsi128_ps(z));
offset = _mm_shuffle_ps(x, x, _mm_shuffle!(3, 3, 3, 3));
}

dst.set_len(len); // we must return vec of the same length as src.len()
}

dst
}

Expand Down