diff options
Diffstat (limited to 'lib/jxl/gauss_blur.cc')
-rw-r--r-- | lib/jxl/gauss_blur.cc | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/lib/jxl/gauss_blur.cc b/lib/jxl/gauss_blur.cc index 930ffb4..9edd995 100644 --- a/lib/jxl/gauss_blur.cc +++ b/lib/jxl/gauss_blur.cc @@ -16,11 +16,10 @@ #include <hwy/foreach_target.h> #include <hwy/highway.h> +#include "lib/jxl/base/common.h" #include "lib/jxl/base/compiler_specific.h" -#include "lib/jxl/base/profiler.h" -#include "lib/jxl/common.h" +#include "lib/jxl/base/matrix_ops.h" #include "lib/jxl/image_ops.h" -#include "lib/jxl/linalg.h" HWY_BEFORE_NAMESPACE(); namespace jxl { namespace HWY_NAMESPACE { @@ -217,7 +216,8 @@ struct OutputStore { void operator()(const V& out, float* JXL_RESTRICT pos, ptrdiff_t offset) const { // Stream helps for large images but is slower for images that fit in cache. - Store(out, HWY_FULL(float)(), pos + offset); + const HWY_FULL(float) df; + Store(out, df, pos + offset); } }; @@ -227,7 +227,8 @@ class SingleInput { public: explicit SingleInput(const float* pos) : pos_(pos) {} Vec<HWY_FULL(float)> operator()(const size_t offset) const { - return Load(HWY_FULL(float)(), pos_ + offset); + const HWY_FULL(float) df; + return Load(df, pos_ + offset); } const float* pos_; }; @@ -238,8 +239,9 @@ class TwoInputs { public: TwoInputs(const float* pos1, const float* pos2) : pos1_(pos1), pos2_(pos2) {} Vec<HWY_FULL(float)> operator()(const size_t offset) const { - const auto in1 = Load(HWY_FULL(float)(), pos1_ + offset); - const auto in2 = Load(HWY_FULL(float)(), pos2_ + offset); + const HWY_FULL(float) df; + const auto in1 = Load(df, pos1_ + offset); + const auto in2 = Load(df, pos2_ + offset); return Add(in1, in2); } @@ -377,11 +379,11 @@ void VerticalStrip(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg, void FastGaussianVertical(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg, const ImageF& in, ThreadPool* /*pool*/, ImageF* JXL_RESTRICT out) { - PROFILER_FUNC; JXL_CHECK(SameSize(in, *out)); + const HWY_FULL(float) df; constexpr size_t kCacheLineLanes = 64 / sizeof(float); - constexpr size_t kVN = MaxLanes(HWY_FULL(float)()); + constexpr size_t kVN = MaxLanes(df); constexpr size_t kCacheLineVectors = (kVN < kCacheLineLanes) ? (kCacheLineLanes / kVN) : 4; constexpr size_t kFastPace = kCacheLineVectors * kVN; @@ -497,7 +499,6 @@ ImageF ConvolveAndSample(const ImageF& in, const std::vector<float>& kernel, // Implements "Recursive Implementation of the Gaussian Filter Using Truncated // Cosine Functions" by Charalampidis [2016]. hwy::AlignedUniquePtr<RecursiveGaussian> CreateRecursiveGaussian(double sigma) { - PROFILER_FUNC; auto rg = hwy::MakeUniqueAligned<RecursiveGaussian>(); constexpr double kPi = 3.141592653589793238; @@ -542,7 +543,7 @@ hwy::AlignedUniquePtr<RecursiveGaussian> CreateRecursiveGaussian(double sigma) { const double gamma[3] = {1, radius * radius - sigma * sigma, // (55) zeta_15 * rho[0] + zeta_35 * rho[1] + rho[2]}; double beta[3]; - MatMul(A, gamma, 3, 3, 1, beta); // (53) + Mul3x3Vector(A, gamma, beta); // (53) // Sanity check: correctly solved for beta (IIR filter weights are normalized) const double sum = beta[0] * p_1 + beta[1] * p_3 + beta[2] * p_5; // (39) @@ -595,7 +596,6 @@ namespace { void FastGaussianHorizontal(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg, const ImageF& in, ThreadPool* pool, ImageF* JXL_RESTRICT out) { - PROFILER_FUNC; JXL_CHECK(SameSize(in, *out)); const intptr_t xsize = in.xsize(); |