alicevision · griwodz · Aug 12, 2024 · Jan 13, 2025 · Jul 30, 2024 · Jan 14, 2025
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -28,7 +28,6 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-$
 
 option(PopSift_BUILD_EXAMPLES "Build PopSift applications."  ON)
 option(PopSift_BUILD_DOCS "Build PopSift documentation."  OFF)
-option(PopSift_USE_NVTX_PROFILING     "Use CUDA NVTX for profiling." OFF)
 option(PopSift_ERRCHK_AFTER_KERNEL     "Synchronize and check CUDA error after every kernel." OFF)
 option(PopSift_USE_POSITION_INDEPENDENT_CODE "Generate position independent code." ON)
 option(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce compile time while debugging other things." ON)
@@ -99,10 +98,6 @@ find_package(CUDAToolkit)
 message(STATUS "CUDA Version is ${CUDAToolkit_VERSION}")
 set(CUDA_VERSION ${CUDAToolkit_VERSION})
 
-if(PopSift_USE_NVTX_PROFILING)
-  message(STATUS "PROFILING CPU CODE: NVTX is in use")
-endif()
-
 if(PopSift_ERRCHK_AFTER_KERNEL)
   message(STATUS "Synchronizing and checking errors after every kernel call")
   list(APPEND CUDA_NVCC_FLAGS "-DERRCHK_AFTER_KERNEL")
@@ -153,13 +148,6 @@ else()
   set(DISABLE_GRID_FILTER   0)
 endif()
 
-if(PopSift_USE_NVTX_PROFILING)
-  # library required for NVTX profiling of the CPU
-  set(PopSift_USE_NVTX 1)
-else()
-  set(PopSift_USE_NVTX 0)
-endif()
-
 add_subdirectory(src)
 
 if(PopSift_BUILD_DOCS)
@@ -197,7 +185,6 @@ message(STATUS "Build Shared libs: " ${BUILD_SHARED_LIBS})
 message(STATUS "Build examples: " ${PopSift_BUILD_EXAMPLES})
 message(STATUS "Build documentation: " ${PopSift_BUILD_DOCS})
 message(STATUS "Generate position independent code: " ${CMAKE_POSITION_INDEPENDENT_CODE})
-message(STATUS "Use CUDA NVTX for profiling: " ${PopSift_USE_NVTX_PROFILING})
 message(STATUS "Synchronize and check CUDA error after every kernel: " ${PopSift_ERRCHK_AFTER_KERNEL})
 message(STATUS "Grid filtering: " ${PopSift_USE_GRID_FILTER})
 message(STATUS "Additional warning for CUDA nvcc: " ${PopSift_NVCC_WARNINGS})

diff --git a/appveyor.yml b/appveyor.yml
@@ -44,7 +44,7 @@ install:
 before_build:
     - md build
     - cd build
-    - cmake -G "Visual Studio 17 2022" -A x64 -T v143,host=x64,cuda="%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" -DBUILD_SHARED_LIBS:BOOL=ON -DPopSift_USE_NVTX_PROFILING:BOOL=OFF -DPopSift_USE_GRID_FILTER:BOOL=OFF -DPopSift_BUILD_DOCS:BOOL=OFF -DPopSift_USE_POSITION_INDEPENDENT_CODE:BOOL=ON -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake ..
+    - cmake -G "Visual Studio 17 2022" -A x64 -T v143,host=x64,cuda="%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" -DBUILD_SHARED_LIBS:BOOL=ON -DPopSift_USE_GRID_FILTER:BOOL=OFF -DPopSift_BUILD_DOCS:BOOL=OFF -DPopSift_USE_POSITION_INDEPENDENT_CODE:BOOL=ON -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake ..
     - ls -l
 
 build:

diff --git a/cmake/sift_config.h.in b/cmake/sift_config.h.in
@@ -13,5 +13,4 @@
 
 #define POPSIFT_HAVE_SHFL_DOWN_SYNC()     @PopSift_HAVE_SHFL_DOWN_SYNC@
 #define POPSIFT_DISABLE_GRID_FILTER()     @DISABLE_GRID_FILTER@
-#define POPSIFT_USE_NVTX()                @PopSift_USE_NVTX@
 
diff --git a/cudaInstallAppveyor.cmd b/cudaInstallAppveyor.cmd
@@ -4,14 +4,12 @@ echo Downloading CUDA toolkit 12 for Windows 10
 
 appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.5.82-archive.zip -Filename cuda_nvcc.zip
 appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.5.82-archive.zip -Filename cuda_cudart.zip
-appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.5.82-archive.zip -Filename cuda_nvtx.zip
 appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.5.82-archive.zip -Filename vs_integration.zip
 dir
 
 echo Unzipping CUDA toolkit 12
 tar -xf cuda_nvcc.zip
 tar -xf cuda_cudart.zip
-tar -xf cuda_nvtx.zip
 tar -xf vs_integration.zip
 dir
 
@@ -22,7 +20,6 @@ mkdir "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5\extras"
 echo Copying toolkit files to install dir(s)
 xcopy cuda_cudart-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y
 xcopy cuda_nvcc-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y
-xcopy cuda_nvtx-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y
 xcopy visual_studio_integration-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5\extras" /s /e /i /y
 
 

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -10,10 +10,9 @@ add_library(popsift
         popsift/sift_pyramid.cu popsift/sift_pyramid.h
         popsift/sift_octave.cu popsift/sift_octave.h
         popsift/s_pyramid_build.cu
-        popsift/s_pyramid_build_aa.cu popsift/s_pyramid_build_aa.h
-        popsift/s_pyramid_build_ai.cu popsift/s_pyramid_build_ai.h
-        popsift/s_pyramid_build_ra.cu popsift/s_pyramid_build_ra.h
-        popsift/s_pyramid_fixed.cu
+        popsift/s_pyramid_build_aa.cu
+        popsift/s_pyramid_build_ai.cu
+        popsift/s_pyramid_build_ra.cu
         popsift/sift_extremum.h
         popsift/sift_extremum.cu popsift/s_extrema.cu
         popsift/s_orientation.cu
@@ -45,12 +44,6 @@ target_link_libraries(popsift
 		      CUDA::cudart
 		      Threads::Threads)
 
-if(PopSift_USE_NVTX_PROFILING)
-target_link_libraries(popsift
-                      PUBLIC
-		      CUDA::nvtx3)
-endif()
-
 set_target_properties(popsift PROPERTIES VERSION ${PROJECT_VERSION})
 set_target_properties(popsift PROPERTIES DEBUG_POSTFIX "d")
 set_target_properties(popsift PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

diff --git a/src/application/CMakeLists.txt b/src/application/CMakeLists.txt
@@ -39,6 +39,10 @@ endif()
 
 find_package(DevIL COMPONENTS IL ILU) # yields IL_FOUND, IL_LIBRARIES, IL_INCLUDE_DIR
 
+# for newer CMake versions and Boost 1.70 pr newer must use Boost's make file
+if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.30)
+  cmake_policy(SET CMP0167 NEW)
+endif()
 if(PopSift_BOOST_USE_STATIC_LIBS)
   set(Boost_USE_STATIC_LIBS ON)
 endif()

diff --git a/src/application/main.cpp b/src/application/main.cpp
@@ -71,28 +71,20 @@ static void parseargs(int argc, char** argv, popsift::Config& config, string& in
         ( "gauss-mode", value<std::string>()->notifier([&](const std::string& s) { config.setGaussMode(s); }),
           popsift::Config::getGaussModeUsage() )
         // "Choice of span (1-sided) for Gauss filters. Default is VLFeat-like computation depending on sigma. "
-        // "Options are: vlfeat, relative, relative-all, opencv, fixed9, fixed15"
+        // "Options are: vlfeat, relative, relative-all, opencv"
         ("desc-mode", value<std::string>()->notifier([&](const std::string& s) { config.setDescMode(s); }),
         "Choice of descriptor extraction modes:\n"
         "loop, iloop, grid, igrid, notile\n"
-	"Default is loop\n"
+        "Default is loop\n"
         "loop is OpenCV-like horizontal scanning, computing only valid points, grid extracts only useful points but rounds them, iloop uses linear texture and rotated gradiant fetching. igrid is grid with linear interpolation. notile is like igrid but avoids redundant gradiant fetching.")
-        ("popsift-mode", bool_switch()->notifier([&](bool b) { if(b) config.setMode(popsift::Config::PopSift); }),
+        ("popsift-mode", bool_switch()->notifier([&](bool b) { if(b) config.setMode(popsift::Config::RefineInOctave); }),
         "During the initial upscale, shift pixels by 1. In extrema refinement, steps up to 0.6, do not reject points when reaching max iterations, "
         "first contrast threshold is .8 * peak thresh. Shift feature coords octave 0 back to original pos.")
-        ("vlfeat-mode", bool_switch()->notifier([&](bool b) { if(b) config.setMode(popsift::Config::VLFeat); }),
+        ("vlfeat-mode", bool_switch()->notifier([&](bool b) { if(b) config.setMode(popsift::Config::RefineInLevel); }),
         "During the initial upscale, shift pixels by 1. That creates a sharper upscaled image. "
         "In extrema refinement, steps up to 0.6, levels remain unchanged, "
         "do not reject points when reaching max iterations, "
         "first contrast threshold is .8 * peak thresh.")
-        ("opencv-mode", bool_switch()->notifier([&](bool b) { if(b) config.setMode(popsift::Config::OpenCV); }),
-        "During the initial upscale, shift pixels by 0.5. "
-        "In extrema refinement, steps up to 0.5, "
-        "reject points when reaching max iterations, "
-        "first contrast threshold is floor(.5 * peak thresh). "
-        "Computed filter width are lower than VLFeat/PopSift")
-        ("direct-scaling", bool_switch()->notifier([&](bool b) { if(b) config.setScalingMode(popsift::Config::ScaleDirect); }),
-         "Direct each octave from upscaled orig instead of blurred level.")
         ("norm-multi", value<int>()->notifier([&](int i) {config.setNormalizationMultiplier(i); }), "Multiply the descriptor by pow(2,<int>).")
         ( "norm-mode", value<std::string>()->notifier([&](const std::string& s) { config.setNormMode(s); }),
           popsift::Config::getNormModeUsage() )

diff --git a/src/application/match.cpp b/src/application/match.cpp
@@ -70,28 +70,16 @@ static void parseargs(int argc, char** argv, popsift::Config& config, string& lF
     modes.add_options()
         ( "gauss-mode", value<std::string>()->notifier([&](const std::string& s) { config.setGaussMode(s); }),
           popsift::Config::getGaussModeUsage() )
-        ("desc-mode", value<std::string>()->notifier([&](const std::string& s) { config.setDescMode(s); }),
+        ( "desc-mode", value<std::string>()->notifier([&](const std::string& s) { config.setDescMode(s); }),
         "Choice of descriptor extraction modes:\n"
         "loop, iloop, grid, igrid, notile\n"
-	"Default is loop\n"
+        "Default is loop\n"
         "loop is OpenCV-like horizontal scanning, computing only valid points, grid extracts only useful points but rounds them, iloop uses linear texture and rotated gradiant fetching. igrid is grid with linear interpolation. notile is like igrid but avoids redundant gradiant fetching.")
-        ("popsift-mode", bool_switch()->notifier([&](bool b) { if(b) config.setMode(popsift::Config::PopSift); }),
-        "During the initial upscale, shift pixels by 1. In extrema refinement, steps up to 0.6, do not reject points when reaching max iterations, "
-        "first contrast threshold is .8 * peak thresh. Shift feature coords octave 0 back to original pos.")
-        ("vlfeat-mode", bool_switch()->notifier([&](bool b) { if(b) config.setMode(popsift::Config::VLFeat); }),
-        "During the initial upscale, shift pixels by 1. That creates a sharper upscaled image. "
-        "In extrema refinement, steps up to 0.6, levels remain unchanged, "
-        "do not reject points when reaching max iterations, "
-        "first contrast threshold is .8 * peak thresh.")
-        ("opencv-mode", bool_switch()->notifier([&](bool b) { if(b) config.setMode(popsift::Config::OpenCV); }),
-        "During the initial upscale, shift pixels by 0.5. "
-        "In extrema refinement, steps up to 0.5, "
-        "reject points when reaching max iterations, "
-        "first contrast threshold is floor(.5 * peak thresh). "
-        "Computed filter width are lower than VLFeat/PopSift")
-        ("direct-scaling", bool_switch()->notifier([&](bool b) { if(b) config.setScalingMode(popsift::Config::ScaleDirect); }),
-         "Direct each octave from upscaled orig instead of blurred level.")
-        ("norm-multi", value<int>()->notifier([&](int i) {config.setNormalizationMultiplier(i); }), "Multiply the descriptor by pow(2,<int>).")
+        ( "popsift-mode", bool_switch()->notifier([&](bool b) { if(b) config.setMode(popsift::Config::RefineInOctave); }),
+        "In extrema refinement, it is possible to move extrema within a level but also between the levels of an octave.")
+        ( "vlfeat-mode", bool_switch()->notifier([&](bool b) { if(b) config.setMode(popsift::Config::RefineInLevel); }),
+        "In extrema refinement, it is possible to move extrema within a level only.")
+        ( "norm-multi", value<int>()->notifier([&](int i) {config.setNormalizationMultiplier(i); }), "Multiply the descriptor by pow(2,<int>).")
         ( "norm-mode", value<std::string>()->notifier([&](const std::string& s) { config.setNormMode(s); }),
           popsift::Config::getNormModeUsage() )
         ( "root-sift", bool_switch()->notifier([&](bool b) { if(b) config.setNormMode(popsift::Config::RootSift); }),

diff --git a/src/popsift/gauss_filter.cu b/src/popsift/gauss_filter.cu
@@ -67,42 +67,7 @@ void print_gauss_filter_symbol( int columns )
 
     printf( "\n"
             "Gauss tables\n"
-            "      level span sigma : center value -> edge value\n"
-            "      absolute filters octave 0 (compute level 0, all other levels directly from level 0)\n");
-
-    for( int lvl=0; lvl<d_gauss.required_filter_stages; lvl++ ) {
-        int span = d_gauss.abs_o0.span[lvl] + d_gauss.abs_o0.span[lvl] - 1;
-
-        printf("      %d %d %2.6f: ", lvl, span, d_gauss.abs_o0.sigma[lvl] );
-        int m = min( d_gauss.abs_o0.span[lvl], columns );
-        for( int x=0; x<m; x++ ) {
-            printf("%0.8f ", d_gauss.abs_o0.filter[lvl*GAUSS_ALIGN+x] );
-        }
-        if( m < d_gauss.abs_o0.span[lvl] )
-            printf("...\n");
-        else
-            printf("\n");
-    }
-    printf( "\n"
-            "      absolute filters other octaves\n"
-            "      (level 0 via downscaling, all other levels directly from level 0)\n");
-
-    for( int lvl=0; lvl<d_gauss.required_filter_stages; lvl++ ) {
-        int span = d_gauss.abs_oN.span[lvl] + d_gauss.abs_oN.span[lvl] - 1;
-
-        printf("      %d %d %2.6f: ", lvl, span, d_gauss.abs_oN.sigma[lvl] );
-        int m = min( d_gauss.abs_oN.span[lvl], columns );
-        for( int x=0; x<m; x++ ) {
-            printf("%0.8f ", d_gauss.abs_oN.filter[lvl*GAUSS_ALIGN+x] );
-        }
-        if( m < d_gauss.abs_oN.span[lvl] )
-            printf("...\n");
-        else
-            printf("\n");
-    }
-    printf("\n");
-
-    printf("    level 0-filters for direct downscaling\n");
+            "    level 0-filters for direct downscaling\n");
 
     for( int lvl=0; lvl<MAX_OCTAVES; lvl++ ) {
         int span = d_gauss.dd.span[lvl] + d_gauss.dd.span[lvl] - 1;
@@ -187,33 +152,6 @@ void init_filter( const Config& conf,
 
     h_gauss.inc.computeBlurTable( &h_gauss );
 
-    /* abs_o0 :
-     * Gauss table to create octave 0 of the absolute filters directly from
-     * input images.
-     */
-    for( int lvl=0; lvl<h_gauss.required_filter_stages; lvl++ ) {
-        const float sigmaS = sigma0 * pow( 2.0f, (float)(lvl)/(float)levels );
-        h_gauss.abs_o0.sigma[lvl]  = sqrt( fabs( sigmaS * sigmaS - initial_blur * initial_blur ) );
-    }
-
-    h_gauss.abs_o0.computeBlurTable( &h_gauss );
-
-    /* abs_oN :
-     * Gauss tables to create levels 1 and above directly from level 0 of every
-     * octave. Could be used on octave 0, but abs_o0 is better.
-     * Level 0 must be created by other means (downscaling from previous octave,
-     * direct downscaling from input image, ...) before using abs_oN.
-     * 
-     */
-    h_gauss.abs_oN.sigma[0] = 0;
-    for( int lvl=1; lvl<h_gauss.required_filter_stages; lvl++ ) {
-        const float sigmaP = sigma0; // level 0 has already reached sigma0 blur
-        const float sigmaS = sigma0 * pow( 2.0f, (float)(lvl)/(float)levels );
-        h_gauss.abs_oN.sigma[lvl] = sqrt( sigmaS * sigmaS - sigmaP * sigmaP );
-    }
-
-    h_gauss.abs_oN.computeBlurTable( &h_gauss );
-
     /* dd :
      * The direct-downscaling kernels make use of the assumption that downscaling
      * from MAX_LEVEL-3 is identical to applying 2*sigma on the identical image
@@ -224,17 +162,13 @@ void init_filter( const Config& conf,
      * octaves, where it is also good for performance.
      * dd is only for creating level 0 of all octave directly from the input image.
      */
-    for( int oct=0; oct<MAX_OCTAVES; oct++ ) {
-        // sigma * 2^i
-        float oct_sigma = scalbnf( sigma0, oct );
 
-        // subtract initial blur
-        float b = sqrt( fabs( oct_sigma * oct_sigma - initial_blur * initial_blur ) );
+    // subtract initial blur
+    const float b = sqrt( fabs( sigma0 * sigma0 - initial_blur * initial_blur ) );
 
-        // sigma / 2^i
-        h_gauss.dd.sigma[oct] = scalbnf( b, -oct );
-        h_gauss.dd.computeBlurTable( &h_gauss );
-    }
+    // sigma / 2^i
+    h_gauss.dd.sigma[0] = b;
+    h_gauss.dd.computeBlurTable( &h_gauss );
 
     cudaError_t err;
     err = cudaMemcpyToSymbol( d_gauss,
@@ -260,8 +194,6 @@ __host__
 void GaussInfo::clearTables( )
 {
     inc            .clearTables();
-    abs_o0         .clearTables();
-    abs_oN         .clearTables();
     dd             .clearTables();
 }
 
@@ -276,20 +208,10 @@ int GaussInfo::getSpan( float sigma ) const
 {
     switch( _span_mode )
     {
-    case Config::VLFeat_Relative_All :
-        // return GaussInfo::vlFeatRelativeSpan( sigma );
-        return GaussInfo::vlFeatSpan( sigma );
-
     case Config::VLFeat_Compute :
         return GaussInfo::vlFeatSpan( sigma );
     case Config::VLFeat_Relative :
         return GaussInfo::vlFeatRelativeSpan( sigma );
-    case Config::OpenCV_Compute :
-        return GaussInfo::openCVSpan( sigma );
-    case Config::Fixed9 :
-        return 5;
-    case Config::Fixed15 :
-        return 8;
     default :
         stringstream ss;
         ss << "ERROR: The mode for computing Gauss filter scan is invalid";
@@ -317,15 +239,6 @@ int GaussInfo::vlFeatRelativeSpan( float sigma )
     return spn;
 }
 
-__host__
-int GaussInfo::openCVSpan( float sigma )
-{
-    int span = int( roundf( 2.0f * 4.0f * sigma + 1.0f ) ) | 1;
-    span >>= 1;
-    span  += 1;
-    return std::min<int>( span, GAUSS_ALIGN - 1 );
-}
-
 template<int LEVELS>
 __host__
 void GaussTable<LEVELS>::clearTables( )

diff --git a/src/popsift/gauss_filter.h b/src/popsift/gauss_filter.h
@@ -61,25 +61,11 @@ struct GaussInfo
      */
     GaussTable<GAUSS_LEVELS> inc;
 
-    /* Compute the 1D Gauss tables for all levels of octave 0.
-     * For octave 0, all of these tables derive from the input
-     * image.
+    /* This is the 1D Gauss table for filtering the input image.
+     * The input image is downscaled and blurred with sigma or by
+     * blurring the input image with 2*sigma and downscaling afterwards.
      */
-    GaussTable<GAUSS_LEVELS> abs_o0;
-
-    /* Compute the 1D Gauss tables for all levels of octaves 1 and up.
-     * Level 0 is empty, since it is created by other means.
-     * All other levels blur from level 0, not considering any
-     * initial blur.
-     */
-    GaussTable<GAUSS_LEVELS> abs_oN;
-
-    /* In theory, level 0 of octave 2 contains the same information
-     * whether it is constructed by downscaling and blurring the
-     * input image with sigma or by blurring the input image with 2*sigma
-     * and downscaling afterwards.
-     */
-    GaussTable<MAX_OCTAVES> dd;
+    GaussTable<1> dd;
 
     __host__
     void clearTables( );
@@ -99,9 +85,6 @@ struct GaussInfo
 
     __host__
     static int vlFeatRelativeSpan( float sigma );
-
-    __host__
-    static int openCVSpan( float sigma );
 };
 
 extern __device__ __constant__ GaussInfo d_gauss;

diff --git a/src/popsift/popsift.cu b/src/popsift/popsift.cu
@@ -438,18 +438,12 @@ void SiftJob::setImg( popsift::ImageBase* img )
 
 popsift::ImageBase* SiftJob::getImg()
 {
-#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
-    _nvtx_id = nvtxRangeStartA( "inserting image" );
-#endif
     return _img;
 }
 
 void SiftJob::setFeatures( popsift::FeaturesBase* f )
 {
     _p.set_value( f );
-#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
-    nvtxRangeEnd( _nvtx_id );
-#endif
 }
 
 popsift::FeaturesHost* SiftJob::get()
Original file line number	Diff line number	Diff line change
Expand Up		@@ -13,5 +13,4 @@

		#define POPSIFT_HAVE_SHFL_DOWN_SYNC() @PopSift_HAVE_SHFL_DOWN_SYNC@
		#define POPSIFT_DISABLE_GRID_FILTER() @DISABLE_GRID_FILTER@
		#define POPSIFT_USE_NVTX() @PopSift_USE_NVTX@