summaryrefslogtreecommitdiff
path: root/lib/jxl/gauss_blur.cc
diff options
context:
space:
mode:
Diffstat (limited to 'lib/jxl/gauss_blur.cc')
-rw-r--r--lib/jxl/gauss_blur.cc24
1 files changed, 12 insertions, 12 deletions
diff --git a/lib/jxl/gauss_blur.cc b/lib/jxl/gauss_blur.cc
index 930ffb4..9edd995 100644
--- a/lib/jxl/gauss_blur.cc
+++ b/lib/jxl/gauss_blur.cc
@@ -16,11 +16,10 @@
#include <hwy/foreach_target.h>
#include <hwy/highway.h>
+#include "lib/jxl/base/common.h"
#include "lib/jxl/base/compiler_specific.h"
-#include "lib/jxl/base/profiler.h"
-#include "lib/jxl/common.h"
+#include "lib/jxl/base/matrix_ops.h"
#include "lib/jxl/image_ops.h"
-#include "lib/jxl/linalg.h"
HWY_BEFORE_NAMESPACE();
namespace jxl {
namespace HWY_NAMESPACE {
@@ -217,7 +216,8 @@ struct OutputStore {
void operator()(const V& out, float* JXL_RESTRICT pos,
ptrdiff_t offset) const {
// Stream helps for large images but is slower for images that fit in cache.
- Store(out, HWY_FULL(float)(), pos + offset);
+ const HWY_FULL(float) df;
+ Store(out, df, pos + offset);
}
};
@@ -227,7 +227,8 @@ class SingleInput {
public:
explicit SingleInput(const float* pos) : pos_(pos) {}
Vec<HWY_FULL(float)> operator()(const size_t offset) const {
- return Load(HWY_FULL(float)(), pos_ + offset);
+ const HWY_FULL(float) df;
+ return Load(df, pos_ + offset);
}
const float* pos_;
};
@@ -238,8 +239,9 @@ class TwoInputs {
public:
TwoInputs(const float* pos1, const float* pos2) : pos1_(pos1), pos2_(pos2) {}
Vec<HWY_FULL(float)> operator()(const size_t offset) const {
- const auto in1 = Load(HWY_FULL(float)(), pos1_ + offset);
- const auto in2 = Load(HWY_FULL(float)(), pos2_ + offset);
+ const HWY_FULL(float) df;
+ const auto in1 = Load(df, pos1_ + offset);
+ const auto in2 = Load(df, pos2_ + offset);
return Add(in1, in2);
}
@@ -377,11 +379,11 @@ void VerticalStrip(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
void FastGaussianVertical(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
const ImageF& in, ThreadPool* /*pool*/,
ImageF* JXL_RESTRICT out) {
- PROFILER_FUNC;
JXL_CHECK(SameSize(in, *out));
+ const HWY_FULL(float) df;
constexpr size_t kCacheLineLanes = 64 / sizeof(float);
- constexpr size_t kVN = MaxLanes(HWY_FULL(float)());
+ constexpr size_t kVN = MaxLanes(df);
constexpr size_t kCacheLineVectors =
(kVN < kCacheLineLanes) ? (kCacheLineLanes / kVN) : 4;
constexpr size_t kFastPace = kCacheLineVectors * kVN;
@@ -497,7 +499,6 @@ ImageF ConvolveAndSample(const ImageF& in, const std::vector<float>& kernel,
// Implements "Recursive Implementation of the Gaussian Filter Using Truncated
// Cosine Functions" by Charalampidis [2016].
hwy::AlignedUniquePtr<RecursiveGaussian> CreateRecursiveGaussian(double sigma) {
- PROFILER_FUNC;
auto rg = hwy::MakeUniqueAligned<RecursiveGaussian>();
constexpr double kPi = 3.141592653589793238;
@@ -542,7 +543,7 @@ hwy::AlignedUniquePtr<RecursiveGaussian> CreateRecursiveGaussian(double sigma) {
const double gamma[3] = {1, radius * radius - sigma * sigma, // (55)
zeta_15 * rho[0] + zeta_35 * rho[1] + rho[2]};
double beta[3];
- MatMul(A, gamma, 3, 3, 1, beta); // (53)
+ Mul3x3Vector(A, gamma, beta); // (53)
// Sanity check: correctly solved for beta (IIR filter weights are normalized)
const double sum = beta[0] * p_1 + beta[1] * p_3 + beta[2] * p_5; // (39)
@@ -595,7 +596,6 @@ namespace {
void FastGaussianHorizontal(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
const ImageF& in, ThreadPool* pool,
ImageF* JXL_RESTRICT out) {
- PROFILER_FUNC;
JXL_CHECK(SameSize(in, *out));
const intptr_t xsize = in.xsize();