Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/workflows/macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ jobs:
sudo port install adwaita-icon-theme
sudo port install meson
sudo port install dbus
sudo port install simde

- name: Install LibRaw
run: |
Expand Down Expand Up @@ -195,6 +196,11 @@ jobs:
export PATH=/opt/local/libexec/gnubin:/opt/local/bin:/opt/local/sbin:$PATH
mkdir build
cd build
if [ "${{ matrix.arch }}" = "arm64" ]; then
ART_SIMDE=1
else
ART_SIMDE=0
fi
export PKG_CONFIG_PATH=/opt/art-deps/lib/pkgconfig
/opt/local/bin/cmake \
-DCMAKE_C_COMPILER=/opt/local/bin/clang-mp-17 \
Expand All @@ -206,6 +212,7 @@ jobs:
-DCMAKE_LIBRARY_PATH=/opt/art-deps/lib \
-DMACOS_LEGACY_BUNDLE=0 \
-DCMAKE_OSX_DEPLOYMENT_TARGET=$OS_VERSION \
-DENABLE_SIMDE=${ART_SIMDE} \
../repo
cd ..

Expand Down
14 changes: 14 additions & 0 deletions .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -134,15 +134,28 @@ jobs:
ninja install
cd ..

- if: ${{ matrix.arch == 'arm64' }}
name: Install SIMDE
run: |
wget https://github.com/mesonbuild/meson/releases/download/1.10.0/meson-1.10.0.tar.gz
tar xzf meson-1.10.0.tar.gz
wget https://github.com/simd-everywhere/simde/archive/refs/tags/v0.8.2.tar.gz
tar xzf v0.8.2.tar.gz
cd simde-0.8.2
python ../meson-1.10.0/meson.py setup build -Dbuildtype=release -Dtests=false -Dprefix=c:/msys2/msys64/usr/local
python ../meson-1.10.0/meson.py install -C build

- name: Configure build
run: |
mkdir build
cd build
export PKG_CONFIG_PATH=c:/msys2/msys64/usr/local/lib/pkgconfig
if [ "${{ matrix.msystem }}" = "UCRT64" ]; then
ART_CXX_FLAGS=
ART_SIMDE=0
else
ART_CXX_FLAGS="-Wno-ignored-attributes -Wno-unused-command-line-argument"
ART_SIMDE=1
fi
cmake \
-GNinja \
Expand All @@ -153,6 +166,7 @@ jobs:
-DCTL_INCLUDE_DIR=/usr/local/include/CTL \
-DCMAKE_LIBRARY_PATH=/usr/local/lib \
-DBUILD_BUNDLE=1 \
-DENABLE_SIMDE=${ART_SIMDE} \
-DCMAKE_CXX_FLAGS="${ART_CXX_FLAGS}" \
../repo
cd ..
Expand Down
28 changes: 27 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ option(ENABLE_MIMALLOC "Use the mimalloc library if available" ON)
option(ENABLE_LIBRAW "Use libraw for decoding" ON)
option(ENABLE_OCIO "Use OpenColorIOv2 for LUT application" ON)
option(ENABLE_CTL "Enable support for the ACES Color Transformation Language" OFF)
option(ENABLE_SIMDE "Build with SIMD Everywhere support" OFF)

option(MACOS_LEGACY_BUNDLE "Use legacy method for building a macOS bundle using the tools/osx/macos_bundle.sh script" OFF)

Expand Down Expand Up @@ -835,8 +836,9 @@ if(WIN32)
endif()
endif()

include(CheckSymbolExists)

if(APPLE)
include(CheckSymbolExists)
set(CMAKE_REQUIRED_INCLUDES ${GTK_INCLUDE_DIRS})
message(STATUS "Checking whether GDK supports specifying custom color spaces")
check_symbol_exists(GDK_QUARTZ_WINDOW_SUPPORTS_COLORSPACE "gdk/gdkquartz.h" HAVE_GDK_QUARTZ_COLORSPACE)
Expand All @@ -848,6 +850,30 @@ if(APPLE)
endif()
endif()

if(ENABLE_SIMDE)
pkg_check_modules(SIMDE REQUIRED simde)
add_compile_definitions(-DART_USE_SIMDE)
add_compile_definitions(-DSIMDE_ENABLE_NATIVE_ALIASES)
add_compile_definitions(-DART_SIMD)

set(CMAKE_REQUIRED_INCLUDES ${SIMDE_INCLUDE_DIRS})
set(CMAKE_REQUIRED_DEFINITIONS -DSIMDE_ENABLE_NATIVE_ALIASES)
check_symbol_exists(_MM_GET_FLUSH_ZERO_MODE "simde/x86/sse.h" HAVE_SIMDE_MM_GET_FLUSH_ZERO_MODE)
if(HAVE_SIMDE_MM_GET_FLUSH_ZERO_MODE)
add_definitions(-DART_SIMDE_GET_MM_FLUSH_ZERO_MODE)
endif()
else()
check_cxx_source_compiles("#if !defined(__SSE2__)
#error
#endif

int main() { return 0; }
" HAVE_SSE2)
if(HAVE_SSE2)
add_definitions(-DART_SIMD)
endif()
endif()

add_subdirectory(rtengine)
add_subdirectory(rtgui)
add_subdirectory(rtdata)
40 changes: 20 additions & 20 deletions rtengine/CA_correct_RT.cc
Original file line number Diff line number Diff line change
Expand Up @@ -349,15 +349,15 @@ float *RawImageSource::CA_correct_RT(bool autoCA, size_t autoIterations,
// rgb values should be floating point numbers between 0
// and 1 after white balance multipliers are applied

#ifdef __SSE2__
#ifdef ART_SIMD
vfloat c65535v = F2V(65535.f);
#endif

for (int rr = rrmin; rr < rrmax; rr++) {
int row = rr + top;
int cc = ccmin;
int col = cc + left;
#ifdef __SSE2__
#ifdef ART_SIMD
int c0 = fc(cfa, rr, cc);
if (c0 == 1) {
rgb[c0][rr * ts + cc] =
Expand Down Expand Up @@ -488,7 +488,7 @@ float *RawImageSource::CA_correct_RT(bool autoCA, size_t autoIterations,
// end of border fill
// end of initialization

#ifdef __SSE2__
#ifdef ART_SIMD
vfloat onev = F2V(1.f);
vfloat epsv = F2V(eps);
#endif
Expand All @@ -497,7 +497,7 @@ float *RawImageSource::CA_correct_RT(bool autoCA, size_t autoIterations,
int cc = 3 + (fc(cfa, rr, 3) & 1);
int indx = rr * ts + cc;
int c = fc(cfa, rr, cc);
#ifdef __SSE2__
#ifdef ART_SIMD
for (; cc < cc1 - 9; cc += 8, indx += 8) {
// compute directional weights using image
// gradients
Expand Down Expand Up @@ -601,7 +601,7 @@ float *RawImageSource::CA_correct_RT(bool autoCA, size_t autoIterations,
int col = max(left + 3, 0) + offset;
int indx = rr * ts + 3 -
(left < 0 ? (left + 3) : 0) + offset;
#ifdef __SSE2__
#ifdef ART_SIMD
for (; col < min(cc1 + left - 3, width) - 7;
col += 8, indx += 8) {
STVFU(Gtmp[(row * width + col) >> 1],
Expand All @@ -616,14 +616,14 @@ float *RawImageSource::CA_correct_RT(bool autoCA, size_t autoIterations,
}
}

#ifdef __SSE2__
#ifdef ART_SIMD
vfloat zd25v = F2V(0.25f);
#endif
for (int rr = 4; rr < rr1 - 4; rr++) {
int cc = 4 + (fc(cfa, rr, 2) & 1);
int indx = rr * ts + cc;
int c = fc(cfa, rr, cc);
#ifdef __SSE2__
#ifdef ART_SIMD
for (; cc < cc1 - 10; cc += 8, indx += 8) {
vfloat rgb1v = LC2VFU(rgb[1][indx]);
vfloat rgbcv = LVFU(rgb[c][indx >> 1]);
Expand Down Expand Up @@ -757,7 +757,7 @@ float *RawImageSource::CA_correct_RT(bool autoCA, size_t autoIterations,
}
}

#ifdef __SSE2__
#ifdef ART_SIMD
vfloat zd3v = F2V(0.3f);
vfloat zd1v = F2V(0.1f);
vfloat zd5v = F2V(0.5f);
Expand All @@ -771,7 +771,7 @@ float *RawImageSource::CA_correct_RT(bool autoCA, size_t autoIterations,
int cc = 8 + (fc(cfa, rr, 2) & 1);
int indx = rr * ts + cc;
int c = fc(cfa, rr, cc);
#ifdef __SSE2__
#ifdef ART_SIMD
vfloat coeff00v = ZEROV;
vfloat coeff01v = ZEROV;
vfloat coeff02v = ZEROV;
Expand Down Expand Up @@ -1229,7 +1229,7 @@ float *RawImageSource::CA_correct_RT(bool autoCA, size_t autoIterations,
// rgb values should be floating point number between 0
// and 1 after white balance multipliers are applied

#ifdef __SSE2__
#ifdef ART_SIMD
vfloat c65535v = F2V(65535.f);
vmask gmask =
_mm_set_epi32(0, 0xffffffff, 0, 0xffffffff);
Expand All @@ -1240,7 +1240,7 @@ float *RawImageSource::CA_correct_RT(bool autoCA, size_t autoIterations,
int col = cc + left;
int indx = row * width + col;
int indx1 = rr * ts + cc;
#ifdef __SSE2__
#ifdef ART_SIMD
int c = fc(cfa, rr, cc);
if (c & 1) {
rgb[1][indx1] = rawData[row][col] / 65535.f;
Expand Down Expand Up @@ -1431,7 +1431,7 @@ float *RawImageSource::CA_correct_RT(bool autoCA, size_t autoIterations,
// end of border fill

if (!autoCA || fitParamsIn) {
#ifdef __SSE2__
#ifdef ART_SIMD
const vfloat onev = F2V(1.f);
const vfloat epsv = F2V(eps);
#endif
Expand All @@ -1440,7 +1440,7 @@ float *RawImageSource::CA_correct_RT(bool autoCA, size_t autoIterations,
for (int rr = 3; rr < rr1 - 3; rr++) {
int cc = 3 + fc(cfa, rr, 1),
c = fc(cfa, rr, cc), indx = rr * ts + cc;
#ifdef __SSE2__
#ifdef ART_SIMD
for (; cc < cc1 - 10; cc += 8, indx += 8) {
// compute directional weights using image
// gradients
Expand Down Expand Up @@ -1649,7 +1649,7 @@ float *RawImageSource::CA_correct_RT(bool autoCA, size_t autoIterations,
(rr + shiftvceil[c]) * ts + cc + shifthceil[c];
int indxcf =
(rr + shiftvceil[c]) * ts + cc + shifthfloor[c];
#ifdef __SSE2__
#ifdef ART_SIMD
vfloat shifthfracv = F2V(shifthfrac[c]);
vfloat shiftvfracv = F2V(shiftvfrac[c]);
for (; cc < cc1 - 10; cc += 8, indxfc += 8,
Expand Down Expand Up @@ -1708,7 +1708,7 @@ float *RawImageSource::CA_correct_RT(bool autoCA, size_t autoIterations,
shiftvfrac[0] /= 2.f;
shiftvfrac[2] /= 2.f;

#ifdef __SSE2__
#ifdef ART_SIMD
vfloat zd25v = F2V(0.25f);
vfloat onev = F2V(1.f);
vfloat zd5v = F2V(0.5f);
Expand All @@ -1719,7 +1719,7 @@ float *RawImageSource::CA_correct_RT(bool autoCA, size_t autoIterations,
int c = fc(cfa, rr, cc);
int GRBdir0 = GRBdir[0][c];
int GRBdir1 = GRBdir[1][c];
#ifdef __SSE2__
#ifdef ART_SIMD
vfloat shifthfracc = F2V(shifthfrac[c]);
vfloat shiftvfracc = F2V(shiftvfrac[c]);
for (int indx = rr * ts + cc; cc < cc1 - 14;
Expand Down Expand Up @@ -1911,7 +1911,7 @@ float *RawImageSource::CA_correct_RT(bool autoCA, size_t autoIterations,
int cc = border + (fc(cfa, rr, 2) & 1);
int indx = (row * width + cc + left) >> 1;
int indx1 = (rr * ts + cc) >> 1;
#ifdef __SSE2__
#ifdef ART_SIMD
for (; indx <
(row * width + cc1 - border - 7 + left) >> 1;
indx += 4, indx1 += 4) {
Expand Down Expand Up @@ -1951,7 +1951,7 @@ float *RawImageSource::CA_correct_RT(bool autoCA, size_t autoIterations,
for (int row = cb; row < height - cb; row++) {
int col = cb + (fc(cfa, row, 0) & 1);
int indx = (row * width + col) >> 1;
#ifdef __SSE2__
#ifdef ART_SIMD
for (; col < width - 7 - cb; col += 8, indx += 4) {
const vfloat val = vmaxf(LVFU(RawDataTmp[indx]), ZEROV);
STC2VFU(rawData[row][col], val);
Expand All @@ -1975,7 +1975,7 @@ float *RawImageSource::CA_correct_RT(bool autoCA, size_t autoIterations,
#pragma omp parallel
#endif
{
#ifdef __SSE2__
#ifdef ART_SIMD
const vfloat onev = F2V(1.f);
const vfloat twov = F2V(2.f);
const vfloat zd5v = F2V(0.5f);
Expand All @@ -1989,7 +1989,7 @@ float *RawImageSource::CA_correct_RT(bool autoCA, size_t autoIterations,
const array2D<float> *nonGreen =
colour == 0 ? redFactor : blueFactor;
int j = firstCol;
#ifdef __SSE2__
#ifdef ART_SIMD
for (; j < W - 7 - 2 * cb; j += 8) {
const vfloat newvals = LC2VFU(rawData[i + cb][j + cb]);
const vfloat oldvals = LVFU((*oldraw)[i][j / 2]);
Expand Down
4 changes: 4 additions & 0 deletions rtengine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ if(CTL_FOUND)
link_directories(${CTL_LIBRARY_DIRS})
endif()

if(SIMDE_INCLUDE_DIRS)
include_directories("${SIMDE_INCLUDE_DIRS}")
endif()

set(CAMCONSTSFILE "camconst.json")

set(RTENGINESOURCEFILES
Expand Down
Loading