Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs_input/api/signalimage/general/pwelch.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ pwelch

Estimate the power spectral density of a signal using Welch's method [1]_

.. doxygenfunction:: pwelch(const xType& x, const wType& w, index_t nperseg, index_t noverlap, index_t nfft)
.. doxygenfunction:: pwelch(const xType& x, index_t nperseg, index_t noverlap, index_t nfft)
.. doxygenfunction:: pwelch(const xType& x, const wType& w, index_t nperseg, index_t noverlap, index_t nfft, PwelchOutputScaleMode output_scale_mode, typename xType::value_type::value_type fs)
.. doxygenfunction:: pwelch(const xType& x, index_t nperseg, index_t noverlap, index_t nfft, PwelchOutputScaleMode output_scale_mode, typename xType::value_type::value_type fs)

Examples
~~~~~~~~
Expand Down
40 changes: 18 additions & 22 deletions examples/pwelch.cu
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,15 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
MATX_ENTER_HANDLER();
using complex = cuda::std::complex<float>;

float exec_time_ms;
const int num_iterations = 100;
index_t signal_size = 256;
index_t nperseg = 32;
index_t nfft = nperseg;
index_t noverlap = 8;
float ftone = 3.0;
const int num_iterations = 500;
index_t signal_size = 256000;
index_t nperseg = 512;
index_t noverlap = 256;
index_t nfft = 65536;

float ftone = 2048.0;
cudaStream_t stream;
cudaStreamCreate(&stream);
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaExecutor exec{stream};

// Create input signal as a complex exponential
Expand All @@ -71,31 +68,30 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
auto x = make_tensor<complex>({signal_size});
(x = tmp_x).run(exec); // pre-compute x, tmp_x is otherwise lazily evaluated

// Create window
auto w = make_tensor<float>({nperseg});
(w = flattop<0>({nperseg})).run(exec);

// Create output tensor
auto Pxx = make_tensor<typename complex::value_type>({nfft});

// Run one time to pre-cache the FFT plan
(Pxx = pwelch(x, nperseg, noverlap, nfft)).run(exec);
(Pxx = pwelch(x, w, nperseg, noverlap, nfft)).run(exec);
exec.sync();

// Start the timing
cudaEventRecord(start, stream);

// Start the timing
cudaEventRecord(start, stream);
exec.start_timer();

for (int iteration = 0; iteration < num_iterations; iteration++) {
// Use the PWelch operator
(Pxx = pwelch(x, nperseg, noverlap, nfft)).run(exec);
(Pxx = pwelch(x, w, nperseg, noverlap, nfft)).run(exec);
}

cudaEventRecord(stop, stream);
exec.sync();
cudaEventElapsedTime(&exec_time_ms, start, stop);
exec.stop_timer();

printf("Output Pxx:\n");
print(Pxx);
printf("PWelchOp avg runtime = %.3f ms\n", exec_time_ms / num_iterations);
printf("Pxx(0) = %f\n", Pxx(0));
printf("Pxx(ftone) = %f\n", Pxx(2048));
printf("PWelchOp avg runtime = %.3f ms\n", exec.get_time_ms() / num_iterations);

MATX_CUDA_CHECK_LAST_ERROR();
MATX_EXIT_HANDLER();
Expand Down
101 changes: 101 additions & 0 deletions include/matx/kernels/pwelch.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
////////////////////////////////////////////////////////////////////////////////
// BSD 3-Clause License
//
// Copyright (c) 2023, NVIDIA Corporation
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/////////////////////////////////////////////////////////////////////////////////

#pragma once

namespace matx {

enum PwelchOutputScaleMode {
PwelchOutputScaleMode_Spectrum,
PwelchOutputScaleMode_Density,
PwelchOutputScaleMode_Spectrum_dB,
PwelchOutputScaleMode_Density_dB
};

namespace detail {

#ifdef __CUDACC__
template<PwelchOutputScaleMode OUTPUT_SCALE_MODE, typename T_IN, typename T_OUT, typename fsType>
__global__ void pwelch_kernel(const T_IN t_in, T_OUT t_out, fsType fs)
{
const index_t tid = blockIdx.x * blockDim.x + threadIdx.x;
const index_t batches = t_in.Shape()[0];
const index_t nfft = t_in.Shape()[1];

if (tid < nfft)
{
typename T_OUT::value_type pxx = 0;
constexpr typename T_OUT::value_type ten = 10;

for (index_t batch = 0; batch < batches; batch++)
{
pxx += cuda::std::norm(t_in(batch, tid));
}

if constexpr (OUTPUT_SCALE_MODE == PwelchOutputScaleMode_Spectrum)
{
t_out(tid) = pxx / batches;
}
else if constexpr (OUTPUT_SCALE_MODE == PwelchOutputScaleMode_Density)
{
t_out(tid) = pxx / (batches * fs);
}
else if constexpr (OUTPUT_SCALE_MODE == PwelchOutputScaleMode_Spectrum_dB)
{
pxx /= batches;
if (pxx != 0)
{
t_out(tid) = ten * cuda::std::log10(pxx);
}
else
{
t_out(tid) = cuda::std::numeric_limits<typename T_OUT::value_type>::lowest();
}
}
else if constexpr (OUTPUT_SCALE_MODE == PwelchOutputScaleMode_Density_dB)
{
pxx /= (batches * fs);
if (pxx != 0)
{
t_out(tid) = ten * cuda::std::log10(pxx);
}
else
{
t_out(tid) = cuda::std::numeric_limits<typename T_OUT::value_type>::lowest();
}
}
}
}
#endif

};
};
123 changes: 94 additions & 29 deletions include/matx/operators/pwelch.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,35 +40,38 @@
namespace matx
{
namespace detail {
template <typename OpX, typename OpW>
class PWelchOp : public BaseOp<PWelchOp<OpX,OpW>>
template <typename OpX, typename OpW, typename fsType>
class PWelchOp : public BaseOp<PWelchOp<OpX,OpW,fsType>>
{
private:
typename detail::base_type_t<OpX> x_;
typename detail::base_type_t<OpW> w_;

index_t nperseg_;
index_t noverlap_;
index_t nfft_;
cuda::std::array<index_t, 1> out_dims_;
mutable detail::tensor_impl_t<typename remove_cvref_t<OpX>::value_type, 1> tmp_out_;
mutable typename remove_cvref_t<OpX>::value_type *ptr = nullptr;

public:
static_assert(is_complex_v<typename OpX::value_type>, "pwelch() must have a complex input type");
using matxop = bool;
using value_type = typename OpX::value_type::value_type;
using matx_transform_op = bool;
using pwelch_xform_op = bool;

static_assert(is_complex_v<typename OpX::value_type>, "pwelch() must have a complex input type");

__MATX_INLINE__ std::string str() const {
return "pwelch(" + get_type_str(x_) + "," + get_type_str(w_) + ")";
}

__MATX_INLINE__ PWelchOp(const OpX &x, const OpW &w, index_t nperseg, index_t noverlap, index_t nfft) :
x_(x), w_(w), nperseg_(nperseg), noverlap_(noverlap), nfft_(nfft) {

__MATX_INLINE__ PWelchOp(
const OpX &x,
const OpW &w,
index_t nperseg,
index_t noverlap,
index_t nfft,
PwelchOutputScaleMode output_scale_mode,
fsType fs
) :
x_(x),
w_(w),
nperseg_(nperseg),
noverlap_(noverlap),
nfft_(nfft),
output_scale_mode_(output_scale_mode),
fs_(fs)
{
MATX_STATIC_ASSERT_STR(OpX::Rank() == 1, matxInvalidDim, "pwelch: Only input rank of 1 is supported presently");
for (int r = 0; r < OpX::Rank(); r++) {
out_dims_[r] = nfft_;
Expand Down Expand Up @@ -96,25 +99,25 @@ namespace matx
template <typename Out, typename Executor>
void Exec(Out &&out, Executor &&ex) const{
static_assert(is_cuda_executor_v<Executor>, "pwelch() only supports the CUDA executor currently");
pwelch_impl(cuda::std::get<0>(out), x_, w_, nperseg_, noverlap_, nfft_, ex.getStream());
pwelch_impl(cuda::std::get<0>(out), x_, w_, nperseg_, noverlap_, nfft_, output_scale_mode_, fs_, ex.getStream());
}

template <typename ShapeType, typename Executor>
__MATX_INLINE__ void InnerPreRun([[maybe_unused]] ShapeType &&shape, Executor &&ex) const noexcept
{
if constexpr (is_matx_op<OpX>()) {
x_.PreRun(std::forward<ShapeType>(shape), std::forward<Executor>(ex));
}
}

if constexpr (is_matx_op<OpW>()) {
w_.PreRun(Shape(w_), std::forward<Executor>(ex));
}
}
}
}

template <typename ShapeType, typename Executor>
__MATX_INLINE__ void PreRun([[maybe_unused]] ShapeType &&shape, Executor &&ex) const noexcept
{
InnerPreRun(std::forward<ShapeType>(shape), std::forward<Executor>(ex));
InnerPreRun(std::forward<ShapeType>(shape), std::forward<Executor>(ex));

detail::AllocateTempTensor(tmp_out_, std::forward<Executor>(ex), out_dims_, &ptr);

Expand All @@ -133,7 +136,20 @@ namespace matx
}

matxFree(ptr);
}
}

private:
typename detail::base_type_t<OpX> x_;
typename detail::base_type_t<OpW> w_;

index_t nperseg_;
index_t noverlap_;
index_t nfft_;
PwelchOutputScaleMode output_scale_mode_;
fsType fs_;
cuda::std::array<index_t, 1> out_dims_;
mutable detail::tensor_impl_t<typename remove_cvref_t<OpX>::value_type, 1> tmp_out_;
mutable typename remove_cvref_t<OpX>::value_type *ptr = nullptr;
};
}

Expand All @@ -144,6 +160,8 @@ namespace matx
* Input time domain data type
* @tparam wType
* Input window type
* @tparam fsType
* Sampling frequency type
* @param x
* Input time domain tensor
* @param w
Expand All @@ -154,22 +172,69 @@ namespace matx
* Number of points to overlap between segments. Defaults to 0
* @param nfft
* Length of FFT used per segment. nfft >= nperseg. Defaults to nfft = nperseg
* @param output_scale_mode
* Output scale mode. Defaults to PwelchOutputScaleMode_Spectrum
* @param fs
* Sampling frequency. Defaults to 1
*
* @returns Operator with power spectral density of x
*
*/

template <typename xType, typename wType>
__MATX_INLINE__ auto pwelch(const xType& x, const wType& w, index_t nperseg, index_t noverlap, index_t nfft)
template <
typename xType,
typename wType,
typename fsType>
__MATX_INLINE__ auto pwelch(
const xType& x,
const wType& w,
index_t nperseg,
index_t noverlap,
index_t nfft,
PwelchOutputScaleMode output_scale_mode = PwelchOutputScaleMode_Spectrum,
fsType fs = 1
)
{
return detail::PWelchOp(x, w, nperseg, noverlap, nfft, output_scale_mode, fs);
}

template <
typename xType,
typename fsType>
__MATX_INLINE__ auto pwelch(
const xType& x,
index_t nperseg,
index_t noverlap,
index_t nfft,
PwelchOutputScaleMode output_scale_mode = PwelchOutputScaleMode_Spectrum,
fsType fs = 1
)
{
MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
return detail::PWelchOp(x, std::nullopt, nperseg, noverlap, nfft, output_scale_mode, fs);
}

return detail::PWelchOp(x, w, nperseg, noverlap, nfft);
template <typename xType, typename wType>
__MATX_INLINE__ auto pwelch(
const xType& x,
const wType& w,
index_t nperseg,
index_t noverlap,
index_t nfft,
PwelchOutputScaleMode output_scale_mode = PwelchOutputScaleMode_Spectrum
)
{
return detail::PWelchOp(x, w, nperseg, noverlap, nfft, output_scale_mode, 1.f);
}

template <typename xType>
__MATX_INLINE__ auto pwelch(const xType& x, index_t nperseg, index_t noverlap, index_t nfft)
__MATX_INLINE__ auto pwelch(
const xType& x,
index_t nperseg,
index_t noverlap,
index_t nfft,
PwelchOutputScaleMode output_scale_mode = PwelchOutputScaleMode_Spectrum
)
{
return detail::PWelchOp(x, std::nullopt, nperseg, noverlap, nfft);
return detail::PWelchOp(x, std::nullopt, nperseg, noverlap, nfft, output_scale_mode, 1.f);
}
}
Loading