diff options
Diffstat (limited to 'lib/jpegli')
74 files changed, 19637 insertions, 0 deletions
diff --git a/lib/jpegli/README.md b/lib/jpegli/README.md new file mode 100644 index 0000000..72f13af --- /dev/null +++ b/lib/jpegli/README.md @@ -0,0 +1,49 @@ +# Improved JPEG encoder and decoder implementation + +This subdirectory contains a JPEG encoder and decoder implementation that is +API and ABI compatible with libjpeg62. + +## Building + +When building the parent libjxl project, two binaries, `tools/cjpegli` and +`tools/djpegli` will be built, as well as a +`lib/jpegli/libjpeg.so.62.3.0` shared library that can be used as a drop-in +replacement for the system library with the same name. + +## Encoder improvements + +Improvements and new features used by the encoder include: + +* Support for 16-bit unsigned and 32-bit floating point input buffers. + +* Color space conversions, chroma subsampling and DCT are all done in floating + point precision, the conversion to integers happens first when producing + the final quantized DCT coefficients. + +* The desired quality can be indicated by a distance parameter that is + analogous to the distance parameter of JPEG XL. The quantization tables + are chosen based on the distance and the chroma subsampling mode, with + different positions in the quantization matrix scaling differently, and the + red and blue chrominance channels have separate quantization tables. + +* Adaptive dead-zone quantization. On noisy parts of the image, quantization + thresholds for zero coefficients are higher than on smoother parts of the + image. + +* Support for more efficient compression of JPEGs with an ICC profile + representing the XYB colorspace. These JPEGs will not be converted to the + YCbCr colorspace, but specialized quantization tables will be chosen for + the original X, Y, B channels. + +## Decoder improvements + +* Support for 16-bit unsigned and 32-bit floating point output buffers. + +* Non-zero DCT coefficients are dequantized to the expectation value of their + respective quantization intervals assuming a Laplacian distribution of the + original unquantized DCT coefficients. + +* After dequantization, inverse DCT, chroma upsampling and color space + conversions are all done in floating point precision, the conversion to + integer samples happens only in the final output phase (unless output to + floating point was requested). diff --git a/lib/jpegli/adaptive_quantization.cc b/lib/jpegli/adaptive_quantization.cc new file mode 100644 index 0000000..6a8c4d3 --- /dev/null +++ b/lib/jpegli/adaptive_quantization.cc @@ -0,0 +1,562 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/adaptive_quantization.h" + +#include <stddef.h> +#include <stdlib.h> + +#include <algorithm> +#include <cmath> +#include <limits> +#include <string> +#include <vector> + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jpegli/adaptive_quantization.cc" +#include <hwy/foreach_target.h> +#include <hwy/highway.h> + +#include "lib/jpegli/encode_internal.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { +namespace { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::AbsDiff; +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::And; +using hwy::HWY_NAMESPACE::Div; +using hwy::HWY_NAMESPACE::Floor; +using hwy::HWY_NAMESPACE::GetLane; +using hwy::HWY_NAMESPACE::Max; +using hwy::HWY_NAMESPACE::Min; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::NegMulAdd; +using hwy::HWY_NAMESPACE::Rebind; +using hwy::HWY_NAMESPACE::ShiftLeft; +using hwy::HWY_NAMESPACE::ShiftRight; +using hwy::HWY_NAMESPACE::Sqrt; +using hwy::HWY_NAMESPACE::Sub; +using hwy::HWY_NAMESPACE::ZeroIfNegative; + +static constexpr float kInputScaling = 1.0f / 255.0f; + +// Primary template: default to actual division. +template <typename T, class V> +struct FastDivision { + HWY_INLINE V operator()(const V n, const V d) const { return n / d; } +}; +// Partial specialization for float vectors. +template <class V> +struct FastDivision<float, V> { + // One Newton-Raphson iteration. + static HWY_INLINE V ReciprocalNR(const V x) { + const auto rcp = ApproximateReciprocal(x); + const auto sum = Add(rcp, rcp); + const auto x_rcp = Mul(x, rcp); + return NegMulAdd(x_rcp, rcp, sum); + } + + V operator()(const V n, const V d) const { +#if 1 // Faster on SKX + return Div(n, d); +#else + return n * ReciprocalNR(d); +#endif + } +}; + +// Approximates smooth functions via rational polynomials (i.e. dividing two +// polynomials). Evaluates polynomials via Horner's scheme, which is faster than +// Clenshaw recurrence for Chebyshev polynomials. LoadDup128 allows us to +// specify constants (replicated 4x) independently of the lane count. +template <size_t NP, size_t NQ, class D, class V, typename T> +HWY_INLINE HWY_MAYBE_UNUSED V EvalRationalPolynomial(const D d, const V x, + const T (&p)[NP], + const T (&q)[NQ]) { + constexpr size_t kDegP = NP / 4 - 1; + constexpr size_t kDegQ = NQ / 4 - 1; + auto yp = LoadDup128(d, &p[kDegP * 4]); + auto yq = LoadDup128(d, &q[kDegQ * 4]); + // We use pointer arithmetic to refer to &p[(kDegP - n) * 4] to avoid a + // compiler warning that the index is out of bounds since we are already + // checking that it is not out of bounds with (kDegP >= n) and the access + // will be optimized away. Similarly with q and kDegQ. + HWY_FENCE; + if (kDegP >= 1) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 1) * 4))); + if (kDegQ >= 1) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 1) * 4))); + HWY_FENCE; + if (kDegP >= 2) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 2) * 4))); + if (kDegQ >= 2) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 2) * 4))); + HWY_FENCE; + if (kDegP >= 3) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 3) * 4))); + if (kDegQ >= 3) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 3) * 4))); + HWY_FENCE; + if (kDegP >= 4) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 4) * 4))); + if (kDegQ >= 4) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 4) * 4))); + HWY_FENCE; + if (kDegP >= 5) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 5) * 4))); + if (kDegQ >= 5) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 5) * 4))); + HWY_FENCE; + if (kDegP >= 6) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 6) * 4))); + if (kDegQ >= 6) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 6) * 4))); + HWY_FENCE; + if (kDegP >= 7) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 7) * 4))); + if (kDegQ >= 7) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 7) * 4))); + + return FastDivision<T, V>()(yp, yq); +} + +// Computes base-2 logarithm like std::log2. Undefined if negative / NaN. +// L1 error ~3.9E-6 +template <class DF, class V> +V FastLog2f(const DF df, V x) { + // 2,2 rational polynomial approximation of std::log1p(x) / std::log(2). + HWY_ALIGN const float p[4 * (2 + 1)] = {HWY_REP4(-1.8503833400518310E-06f), + HWY_REP4(1.4287160470083755E+00f), + HWY_REP4(7.4245873327820566E-01f)}; + HWY_ALIGN const float q[4 * (2 + 1)] = {HWY_REP4(9.9032814277590719E-01f), + HWY_REP4(1.0096718572241148E+00f), + HWY_REP4(1.7409343003366853E-01f)}; + + const Rebind<int32_t, DF> di; + const auto x_bits = BitCast(di, x); + + // Range reduction to [-1/3, 1/3] - 3 integer, 2 float ops + const auto exp_bits = Sub(x_bits, Set(di, 0x3f2aaaab)); // = 2/3 + // Shifted exponent = log2; also used to clear mantissa. + const auto exp_shifted = ShiftRight<23>(exp_bits); + const auto mantissa = BitCast(df, Sub(x_bits, ShiftLeft<23>(exp_shifted))); + const auto exp_val = ConvertTo(df, exp_shifted); + return Add(EvalRationalPolynomial(df, Sub(mantissa, Set(df, 1.0f)), p, q), + exp_val); +} + +// max relative error ~3e-7 +template <class DF, class V> +V FastPow2f(const DF df, V x) { + const Rebind<int32_t, DF> di; + auto floorx = Floor(x); + auto exp = + BitCast(df, ShiftLeft<23>(Add(ConvertTo(di, floorx), Set(di, 127)))); + auto frac = Sub(x, floorx); + auto num = Add(frac, Set(df, 1.01749063e+01)); + num = MulAdd(num, frac, Set(df, 4.88687798e+01)); + num = MulAdd(num, frac, Set(df, 9.85506591e+01)); + num = Mul(num, exp); + auto den = MulAdd(frac, Set(df, 2.10242958e-01), Set(df, -2.22328856e-02)); + den = MulAdd(den, frac, Set(df, -1.94414990e+01)); + den = MulAdd(den, frac, Set(df, 9.85506633e+01)); + return Div(num, den); +} + +inline float FastPow2f(float f) { + HWY_CAPPED(float, 1) D; + return GetLane(FastPow2f(D, Set(D, f))); +} + +// The following functions modulate an exponent (out_val) and return the updated +// value. Their descriptor is limited to 8 lanes for 8x8 blocks. + +template <class D, class V> +V ComputeMask(const D d, const V out_val) { + const auto kBase = Set(d, -0.74174993f); + const auto kMul4 = Set(d, 3.2353257320940401f); + const auto kMul2 = Set(d, 12.906028311180409f); + const auto kOffset2 = Set(d, 305.04035728311436f); + const auto kMul3 = Set(d, 5.0220313103171232f); + const auto kOffset3 = Set(d, 2.1925739705298404f); + const auto kOffset4 = Mul(Set(d, 0.25f), kOffset3); + const auto kMul0 = Set(d, 0.74760422233706747f); + const auto k1 = Set(d, 1.0f); + + // Avoid division by zero. + const auto v1 = Max(Mul(out_val, kMul0), Set(d, 1e-3f)); + const auto v2 = Div(k1, Add(v1, kOffset2)); + const auto v3 = Div(k1, MulAdd(v1, v1, kOffset3)); + const auto v4 = Div(k1, MulAdd(v1, v1, kOffset4)); + // TODO(jyrki): + // A log or two here could make sense. In butteraugli we have effectively + // log(log(x + C)) for this kind of use, as a single log is used in + // saturating visual masking and here the modulation values are exponential, + // another log would counter that. + return Add(kBase, MulAdd(kMul4, v4, MulAdd(kMul2, v2, Mul(kMul3, v3)))); +} + +// mul and mul2 represent a scaling difference between jxl and butteraugli. +static const float kSGmul = 226.0480446705883f; +static const float kSGmul2 = 1.0f / 73.377132366608819f; +static const float kLog2 = 0.693147181f; +// Includes correction factor for std::log -> log2. +static const float kSGRetMul = kSGmul2 * 18.6580932135f * kLog2; +static const float kSGVOffset = 7.14672470003f; + +template <bool invert, typename D, typename V> +V RatioOfDerivativesOfCubicRootToSimpleGamma(const D d, V v) { + // The opsin space in jxl is the cubic root of photons, i.e., v * v * v + // is related to the number of photons. + // + // SimpleGamma(v * v * v) is the psychovisual space in butteraugli. + // This ratio allows quantization to move from jxl's opsin space to + // butteraugli's log-gamma space. + static const float kEpsilon = 1e-2; + static const float kNumOffset = kEpsilon / kInputScaling / kInputScaling; + static const float kNumMul = kSGRetMul * 3 * kSGmul; + static const float kVOffset = (kSGVOffset * kLog2 + kEpsilon) / kInputScaling; + static const float kDenMul = kLog2 * kSGmul * kInputScaling * kInputScaling; + + v = ZeroIfNegative(v); + const auto num_mul = Set(d, kNumMul); + const auto num_offset = Set(d, kNumOffset); + const auto den_offset = Set(d, kVOffset); + const auto den_mul = Set(d, kDenMul); + + const auto v2 = Mul(v, v); + + const auto num = MulAdd(num_mul, v2, num_offset); + const auto den = MulAdd(Mul(den_mul, v), v2, den_offset); + return invert ? Div(num, den) : Div(den, num); +} + +template <bool invert = false> +static float RatioOfDerivativesOfCubicRootToSimpleGamma(float v) { + using DScalar = HWY_CAPPED(float, 1); + auto vscalar = Load(DScalar(), &v); + return GetLane( + RatioOfDerivativesOfCubicRootToSimpleGamma<invert>(DScalar(), vscalar)); +} + +// TODO(veluca): this function computes an approximation of the derivative of +// SimpleGamma with (f(x+eps)-f(x))/eps. Consider two-sided approximation or +// exact derivatives. For reference, SimpleGamma was: +/* +template <typename D, typename V> +V SimpleGamma(const D d, V v) { + // A simple HDR compatible gamma function. + const auto mul = Set(d, kSGmul); + const auto kRetMul = Set(d, kSGRetMul); + const auto kRetAdd = Set(d, kSGmul2 * -20.2789020414f); + const auto kVOffset = Set(d, kSGVOffset); + + v *= mul; + + // This should happen rarely, but may lead to a NaN, which is rather + // undesirable. Since negative photons don't exist we solve the NaNs by + // clamping here. + // TODO(veluca): with FastLog2f, this no longer leads to NaNs. + v = ZeroIfNegative(v); + return kRetMul * FastLog2f(d, v + kVOffset) + kRetAdd; +} +*/ + +template <class D, class V> +V GammaModulation(const D d, const size_t x, const size_t y, + const RowBuffer<float>& input, const V out_val) { + static const float kBias = 0.16f / kInputScaling; + static const float kScale = kInputScaling / 64.0f; + auto overall_ratio = Zero(d); + const auto bias = Set(d, kBias); + const auto scale = Set(d, kScale); + const float* const JXL_RESTRICT block_start = input.Row(y) + x; + for (size_t dy = 0; dy < 8; ++dy) { + const float* const JXL_RESTRICT row_in = block_start + dy * input.stride(); + for (size_t dx = 0; dx < 8; dx += Lanes(d)) { + const auto iny = Add(Load(d, row_in + dx), bias); + const auto ratio_g = + RatioOfDerivativesOfCubicRootToSimpleGamma</*invert=*/true>(d, iny); + overall_ratio = Add(overall_ratio, ratio_g); + } + } + overall_ratio = Mul(SumOfLanes(d, overall_ratio), scale); + // ideally -1.0, but likely optimal correction adds some entropy, so slightly + // less than that. + // ln(2) constant folded in because we want std::log but have FastLog2f. + const auto kGam = Set(d, -0.15526878023684174f * 0.693147180559945f); + return MulAdd(kGam, FastLog2f(d, overall_ratio), out_val); +} + +// Change precision in 8x8 blocks that have high frequency content. +template <class D, class V> +V HfModulation(const D d, const size_t x, const size_t y, + const RowBuffer<float>& input, const V out_val) { + // Zero out the invalid differences for the rightmost value per row. + const Rebind<uint32_t, D> du; + HWY_ALIGN constexpr uint32_t kMaskRight[8] = {~0u, ~0u, ~0u, ~0u, + ~0u, ~0u, ~0u, 0}; + + auto sum = Zero(d); // sum of absolute differences with right and below + static const float kSumCoeff = -2.0052193233688884f * kInputScaling / 112.0; + auto sumcoeff = Set(d, kSumCoeff); + + const float* const JXL_RESTRICT block_start = input.Row(y) + x; + for (size_t dy = 0; dy < 8; ++dy) { + const float* JXL_RESTRICT row_in = block_start + dy * input.stride(); + const float* JXL_RESTRICT row_in_next = + dy == 7 ? row_in : row_in + input.stride(); + + for (size_t dx = 0; dx < 8; dx += Lanes(d)) { + const auto p = Load(d, row_in + dx); + const auto pr = LoadU(d, row_in + dx + 1); + const auto mask = BitCast(d, Load(du, kMaskRight + dx)); + sum = Add(sum, And(mask, AbsDiff(p, pr))); + const auto pd = Load(d, row_in_next + dx); + sum = Add(sum, AbsDiff(p, pd)); + } + } + + sum = SumOfLanes(d, sum); + return MulAdd(sum, sumcoeff, out_val); +} + +void PerBlockModulations(const float y_quant_01, const RowBuffer<float>& input, + const size_t yb0, const size_t yblen, + RowBuffer<float>* aq_map) { + static const float kAcQuant = 0.841f; + float base_level = 0.48f * kAcQuant; + float kDampenRampStart = 9.0f; + float kDampenRampEnd = 65.0f; + float dampen = 1.0f; + if (y_quant_01 >= kDampenRampStart) { + dampen = 1.0f - ((y_quant_01 - kDampenRampStart) / + (kDampenRampEnd - kDampenRampStart)); + if (dampen < 0) { + dampen = 0; + } + } + const float mul = kAcQuant * dampen; + const float add = (1.0f - dampen) * base_level; + for (size_t iy = 0; iy < yblen; iy++) { + const size_t yb = yb0 + iy; + const size_t y = yb * 8; + float* const JXL_RESTRICT row_out = aq_map->Row(yb); + const HWY_CAPPED(float, 8) df; + for (size_t ix = 0; ix < aq_map->xsize(); ix++) { + size_t x = ix * 8; + auto out_val = Set(df, row_out[ix]); + out_val = ComputeMask(df, out_val); + out_val = HfModulation(df, x, y, input, out_val); + out_val = GammaModulation(df, x, y, input, out_val); + // We want multiplicative quantization field, so everything + // until this point has been modulating the exponent. + row_out[ix] = FastPow2f(GetLane(out_val) * 1.442695041f) * mul + add; + } + } +} + +template <typename D, typename V> +V MaskingSqrt(const D d, V v) { + static const float kLogOffset = 28; + static const float kMul = 211.50759899638012f; + const auto mul_v = Set(d, kMul * 1e8); + const auto offset_v = Set(d, kLogOffset); + return Mul(Set(d, 0.25f), Sqrt(MulAdd(v, Sqrt(mul_v), offset_v))); +} + +template <typename V> +void Sort4(V& min0, V& min1, V& min2, V& min3) { + const auto tmp0 = Min(min0, min1); + const auto tmp1 = Max(min0, min1); + const auto tmp2 = Min(min2, min3); + const auto tmp3 = Max(min2, min3); + const auto tmp4 = Max(tmp0, tmp2); + const auto tmp5 = Min(tmp1, tmp3); + min0 = Min(tmp0, tmp2); + min1 = Min(tmp4, tmp5); + min2 = Max(tmp4, tmp5); + min3 = Max(tmp1, tmp3); +} + +template <typename V> +void UpdateMin4(const V v, V& min0, V& min1, V& min2, V& min3) { + const auto tmp0 = Max(min0, v); + const auto tmp1 = Max(min1, tmp0); + const auto tmp2 = Max(min2, tmp1); + min0 = Min(min0, v); + min1 = Min(min1, tmp0); + min2 = Min(min2, tmp1); + min3 = Min(min3, tmp2); +} + +// Computes a linear combination of the 4 lowest values of the 3x3 neighborhood +// of each pixel. Output is downsampled 2x. +void FuzzyErosion(const RowBuffer<float>& pre_erosion, const size_t yb0, + const size_t yblen, RowBuffer<float>* tmp, + RowBuffer<float>* aq_map) { + int xsize_blocks = aq_map->xsize(); + int xsize = pre_erosion.xsize(); + HWY_FULL(float) d; + const auto mul0 = Set(d, 0.125f); + const auto mul1 = Set(d, 0.075f); + const auto mul2 = Set(d, 0.06f); + const auto mul3 = Set(d, 0.05f); + for (size_t iy = 0; iy < 2 * yblen; ++iy) { + size_t y = 2 * yb0 + iy; + const float* JXL_RESTRICT rowt = pre_erosion.Row(y - 1); + const float* JXL_RESTRICT rowm = pre_erosion.Row(y); + const float* JXL_RESTRICT rowb = pre_erosion.Row(y + 1); + float* row_out = tmp->Row(y); + for (int x = 0; x < xsize; x += Lanes(d)) { + int xm1 = x - 1; + int xp1 = x + 1; + auto min0 = LoadU(d, rowm + x); + auto min1 = LoadU(d, rowm + xm1); + auto min2 = LoadU(d, rowm + xp1); + auto min3 = LoadU(d, rowt + xm1); + Sort4(min0, min1, min2, min3); + UpdateMin4(LoadU(d, rowt + x), min0, min1, min2, min3); + UpdateMin4(LoadU(d, rowt + xp1), min0, min1, min2, min3); + UpdateMin4(LoadU(d, rowb + xm1), min0, min1, min2, min3); + UpdateMin4(LoadU(d, rowb + x), min0, min1, min2, min3); + UpdateMin4(LoadU(d, rowb + xp1), min0, min1, min2, min3); + const auto v = Add(Add(Mul(mul0, min0), Mul(mul1, min1)), + Add(Mul(mul2, min2), Mul(mul3, min3))); + Store(v, d, row_out + x); + } + if (iy % 2 == 1) { + const float* JXL_RESTRICT row_out0 = tmp->Row(y - 1); + float* JXL_RESTRICT aq_out = aq_map->Row(yb0 + iy / 2); + for (int bx = 0, x = 0; bx < xsize_blocks; ++bx, x += 2) { + aq_out[bx] = + (row_out[x] + row_out[x + 1] + row_out0[x] + row_out0[x + 1]); + } + } + } +} + +void ComputePreErosion(const RowBuffer<float>& input, const size_t xsize, + const size_t y0, const size_t ylen, int border, + float* diff_buffer, RowBuffer<float>* pre_erosion) { + const size_t xsize_out = xsize / 4; + const size_t y0_out = y0 / 4; + + // The XYB gamma is 3.0 to be able to decode faster with two muls. + // Butteraugli's gamma is matching the gamma of human eye, around 2.6. + // We approximate the gamma difference by adding one cubic root into + // the adaptive quantization. This gives us a total gamma of 2.6666 + // for quantization uses. + static const float match_gamma_offset = 0.019 / kInputScaling; + + const HWY_CAPPED(float, 8) df; + + static const float limit = 0.2f; + // Computes image (padded to multiple of 8x8) of local pixel differences. + // Subsample both directions by 4. + for (size_t iy = 0; iy < ylen; ++iy) { + size_t y = y0 + iy; + const float* row_in = input.Row(y); + const float* row_in1 = input.Row(y + 1); + const float* row_in2 = input.Row(y - 1); + float* JXL_RESTRICT row_out = diff_buffer; + const auto match_gamma_offset_v = Set(df, match_gamma_offset); + const auto quarter = Set(df, 0.25f); + for (size_t x = 0; x < xsize; x += Lanes(df)) { + const auto in = LoadU(df, row_in + x); + const auto in_r = LoadU(df, row_in + x + 1); + const auto in_l = LoadU(df, row_in + x - 1); + const auto in_t = LoadU(df, row_in2 + x); + const auto in_b = LoadU(df, row_in1 + x); + const auto base = Mul(quarter, Add(Add(in_r, in_l), Add(in_t, in_b))); + const auto gammacv = + RatioOfDerivativesOfCubicRootToSimpleGamma</*invert=*/false>( + df, Add(in, match_gamma_offset_v)); + auto diff = Mul(gammacv, Sub(in, base)); + diff = Mul(diff, diff); + diff = Min(diff, Set(df, limit)); + diff = MaskingSqrt(df, diff); + if ((iy & 3) != 0) { + diff = Add(diff, LoadU(df, row_out + x)); + } + StoreU(diff, df, row_out + x); + } + if (iy % 4 == 3) { + size_t y_out = y0_out + iy / 4; + float* row_dout = pre_erosion->Row(y_out); + for (size_t x = 0; x < xsize_out; x++) { + row_dout[x] = (row_out[x * 4] + row_out[x * 4 + 1] + + row_out[x * 4 + 2] + row_out[x * 4 + 3]) * + 0.25f; + } + pre_erosion->PadRow(y_out, xsize_out, border); + } + } +} + +} // namespace + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jpegli { +HWY_EXPORT(ComputePreErosion); +HWY_EXPORT(FuzzyErosion); +HWY_EXPORT(PerBlockModulations); + +namespace { + +static constexpr int kPreErosionBorder = 1; + +} // namespace + +void ComputeAdaptiveQuantField(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + if (!m->use_adaptive_quantization) { + return; + } + int y_channel = cinfo->jpeg_color_space == JCS_RGB ? 1 : 0; + jpeg_component_info* y_comp = &cinfo->comp_info[y_channel]; + int y_quant_01 = cinfo->quant_tbl_ptrs[y_comp->quant_tbl_no]->quantval[1]; + if (m->next_iMCU_row == 0) { + m->input_buffer[y_channel].CopyRow(-1, 0, 1); + } + if (m->next_iMCU_row + 1 == cinfo->total_iMCU_rows) { + size_t last_row = m->ysize_blocks * DCTSIZE - 1; + m->input_buffer[y_channel].CopyRow(last_row + 1, last_row, 1); + } + const RowBuffer<float>& input = m->input_buffer[y_channel]; + const size_t xsize_blocks = y_comp->width_in_blocks; + const size_t xsize = xsize_blocks * DCTSIZE; + const size_t yb0 = m->next_iMCU_row * cinfo->max_v_samp_factor; + const size_t yblen = cinfo->max_v_samp_factor; + size_t y0 = yb0 * DCTSIZE; + size_t ylen = cinfo->max_v_samp_factor * DCTSIZE; + if (y0 == 0) { + ylen += 4; + } else { + y0 += 4; + } + if (m->next_iMCU_row + 1 == cinfo->total_iMCU_rows) { + ylen -= 4; + } + HWY_DYNAMIC_DISPATCH(ComputePreErosion) + (input, xsize, y0, ylen, kPreErosionBorder, m->diff_buffer, &m->pre_erosion); + if (y0 == 0) { + m->pre_erosion.CopyRow(-1, 0, kPreErosionBorder); + } + if (m->next_iMCU_row + 1 == cinfo->total_iMCU_rows) { + size_t last_row = m->ysize_blocks * 2 - 1; + m->pre_erosion.CopyRow(last_row + 1, last_row, kPreErosionBorder); + } + HWY_DYNAMIC_DISPATCH(FuzzyErosion) + (m->pre_erosion, yb0, yblen, &m->fuzzy_erosion_tmp, &m->quant_field); + HWY_DYNAMIC_DISPATCH(PerBlockModulations) + (y_quant_01, input, yb0, yblen, &m->quant_field); + for (int y = 0; y < cinfo->max_v_samp_factor; ++y) { + float* row = m->quant_field.Row(yb0 + y); + for (size_t x = 0; x < xsize_blocks; ++x) { + row[x] = std::max(0.0f, (0.6f / row[x]) - 1.0f); + } + } +} + +} // namespace jpegli +#endif // HWY_ONCE diff --git a/lib/jpegli/adaptive_quantization.h b/lib/jpegli/adaptive_quantization.h new file mode 100644 index 0000000..d8537e8 --- /dev/null +++ b/lib/jpegli/adaptive_quantization.h @@ -0,0 +1,17 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_ADAPTIVE_QUANTIZATION_H_ +#define LIB_JPEGLI_ADAPTIVE_QUANTIZATION_H_ + +#include "lib/jpegli/common.h" + +namespace jpegli { + +void ComputeAdaptiveQuantField(j_compress_ptr cinfo); + +} // namespace jpegli + +#endif // LIB_JPEGLI_ADAPTIVE_QUANTIZATION_H_ diff --git a/lib/jpegli/bit_writer.cc b/lib/jpegli/bit_writer.cc new file mode 100644 index 0000000..9788f35 --- /dev/null +++ b/lib/jpegli/bit_writer.cc @@ -0,0 +1,60 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/bit_writer.h" + +#include "lib/jpegli/encode_internal.h" + +namespace jpegli { + +void JpegBitWriterInit(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + JpegBitWriter* bw = &m->bw; + size_t buffer_size = m->blocks_per_iMCU_row * (DCTSIZE2 * 16 + 8) + (1 << 16); + bw->cinfo = cinfo; + bw->data = Allocate<uint8_t>(cinfo, buffer_size, JPOOL_IMAGE); + bw->len = buffer_size; + bw->pos = 0; + bw->output_pos = 0; + bw->put_buffer = 0; + bw->free_bits = 64; + bw->healthy = true; +} + +bool EmptyBitWriterBuffer(JpegBitWriter* bw) { + while (bw->output_pos < bw->pos) { + j_compress_ptr cinfo = bw->cinfo; + if (cinfo->dest->free_in_buffer == 0 && + !(*cinfo->dest->empty_output_buffer)(cinfo)) { + return false; + } + size_t buflen = bw->pos - bw->output_pos; + size_t copylen = std::min<size_t>(cinfo->dest->free_in_buffer, buflen); + memcpy(cinfo->dest->next_output_byte, bw->data + bw->output_pos, copylen); + bw->output_pos += copylen; + cinfo->dest->free_in_buffer -= copylen; + cinfo->dest->next_output_byte += copylen; + } + bw->output_pos = bw->pos = 0; + return true; +} + +void JumpToByteBoundary(JpegBitWriter* bw) { + size_t n_bits = bw->free_bits & 7u; + if (n_bits > 0) { + WriteBits(bw, n_bits, (1u << n_bits) - 1); + } + bw->put_buffer <<= bw->free_bits; + while (bw->free_bits <= 56) { + int c = (bw->put_buffer >> 56) & 0xFF; + EmitByte(bw, c); + bw->put_buffer <<= 8; + bw->free_bits += 8; + } + bw->put_buffer = 0; + bw->free_bits = 64; +} + +} // namespace jpegli diff --git a/lib/jpegli/bit_writer.h b/lib/jpegli/bit_writer.h new file mode 100644 index 0000000..3adf1ea --- /dev/null +++ b/lib/jpegli/bit_writer.h @@ -0,0 +1,98 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_BIT_WRITER_H_ +#define LIB_JPEGLI_BIT_WRITER_H_ + +#include <stdint.h> +#include <string.h> + +#include "lib/jpegli/common.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/compiler_specific.h" + +namespace jpegli { + +// Handles the packing of bits into output bytes. +struct JpegBitWriter { + j_compress_ptr cinfo; + uint8_t* data; + size_t len; + size_t pos; + size_t output_pos; + uint64_t put_buffer; + int free_bits; + bool healthy; +}; + +void JpegBitWriterInit(j_compress_ptr cinfo); + +bool EmptyBitWriterBuffer(JpegBitWriter* bw); + +void JumpToByteBoundary(JpegBitWriter* bw); + +// Returns non-zero if and only if x has a zero byte, i.e. one of +// x & 0xff, x & 0xff00, ..., x & 0xff00000000000000 is zero. +static JXL_INLINE uint64_t HasZeroByte(uint64_t x) { + return (x - 0x0101010101010101ULL) & ~x & 0x8080808080808080ULL; +} + +/** + * Writes the given byte to the output, writes an extra zero if byte is 0xFF. + * + * This method is "careless" - caller must make sure that there is enough + * space in the output buffer. Emits up to 2 bytes to buffer. + */ +static JXL_INLINE void EmitByte(JpegBitWriter* bw, int byte) { + bw->data[bw->pos++] = byte; + if (byte == 0xFF) bw->data[bw->pos++] = 0; +} + +static JXL_INLINE void DischargeBitBuffer(JpegBitWriter* bw) { + // At this point we are ready to emit the bytes of put_buffer to the output. + // The JPEG format requires that after every 0xff byte in the entropy + // coded section, there is a zero byte, therefore we first check if any of + // the bytes of put_buffer is 0xFF. + if (HasZeroByte(~bw->put_buffer)) { + // We have a 0xFF byte somewhere, examine each byte and append a zero + // byte if necessary. + EmitByte(bw, (bw->put_buffer >> 56) & 0xFF); + EmitByte(bw, (bw->put_buffer >> 48) & 0xFF); + EmitByte(bw, (bw->put_buffer >> 40) & 0xFF); + EmitByte(bw, (bw->put_buffer >> 32) & 0xFF); + EmitByte(bw, (bw->put_buffer >> 24) & 0xFF); + EmitByte(bw, (bw->put_buffer >> 16) & 0xFF); + EmitByte(bw, (bw->put_buffer >> 8) & 0xFF); + EmitByte(bw, (bw->put_buffer >> 0) & 0xFF); + } else { + // We don't have any 0xFF bytes, output all 8 bytes without checking. + StoreBE64(bw->put_buffer, bw->data + bw->pos); + bw->pos += 8; + } +} + +static JXL_INLINE void WriteBits(JpegBitWriter* bw, int nbits, uint64_t bits) { + // This is an optimization; if everything goes well, + // then |nbits| is positive; if non-existing Huffman symbol is going to be + // encoded, its length should be zero; later encoder could check the + // "health" of JpegBitWriter. + if (nbits == 0) { + bw->healthy = false; + return; + } + bw->free_bits -= nbits; + if (bw->free_bits < 0) { + bw->put_buffer <<= (bw->free_bits + nbits); + bw->put_buffer |= (bits >> -bw->free_bits); + DischargeBitBuffer(bw); + bw->free_bits += 64; + bw->put_buffer = nbits; + } + bw->put_buffer <<= nbits; + bw->put_buffer |= bits; +} + +} // namespace jpegli +#endif // LIB_JPEGLI_BIT_WRITER_H_ diff --git a/lib/jpegli/bitstream.cc b/lib/jpegli/bitstream.cc new file mode 100644 index 0000000..3448367 --- /dev/null +++ b/lib/jpegli/bitstream.cc @@ -0,0 +1,452 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/bitstream.h" + +#include <cmath> + +#include "lib/jpegli/bit_writer.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/memory_manager.h" + +namespace jpegli { + +void WriteOutput(j_compress_ptr cinfo, const uint8_t* buf, size_t bufsize) { + size_t pos = 0; + while (pos < bufsize) { + if (cinfo->dest->free_in_buffer == 0 && + !(*cinfo->dest->empty_output_buffer)(cinfo)) { + JPEGLI_ERROR("Destination suspension is not supported in markers."); + } + size_t len = std::min<size_t>(cinfo->dest->free_in_buffer, bufsize - pos); + memcpy(cinfo->dest->next_output_byte, buf + pos, len); + pos += len; + cinfo->dest->free_in_buffer -= len; + cinfo->dest->next_output_byte += len; + } +} + +void WriteOutput(j_compress_ptr cinfo, const std::vector<uint8_t>& bytes) { + WriteOutput(cinfo, bytes.data(), bytes.size()); +} + +void WriteOutput(j_compress_ptr cinfo, std::initializer_list<uint8_t> bytes) { + WriteOutput(cinfo, bytes.begin(), bytes.size()); +} + +void EncodeAPP0(j_compress_ptr cinfo) { + WriteOutput(cinfo, + {0xff, 0xe0, 0, 16, 'J', 'F', 'I', 'F', '\0', + cinfo->JFIF_major_version, cinfo->JFIF_minor_version, + cinfo->density_unit, static_cast<uint8_t>(cinfo->X_density >> 8), + static_cast<uint8_t>(cinfo->X_density & 0xff), + static_cast<uint8_t>(cinfo->Y_density >> 8), + static_cast<uint8_t>(cinfo->Y_density & 0xff), 0, 0}); +} + +void EncodeAPP14(j_compress_ptr cinfo) { + uint8_t color_transform = cinfo->jpeg_color_space == JCS_YCbCr ? 1 + : cinfo->jpeg_color_space == JCS_YCCK ? 2 + : 0; + WriteOutput(cinfo, {0xff, 0xee, 0, 14, 'A', 'd', 'o', 'b', 'e', 0, 100, 0, 0, + 0, 0, color_transform}); +} + +void WriteFileHeader(j_compress_ptr cinfo) { + WriteOutput(cinfo, {0xFF, 0xD8}); // SOI + if (cinfo->write_JFIF_header) { + EncodeAPP0(cinfo); + } + if (cinfo->write_Adobe_marker) { + EncodeAPP14(cinfo); + } +} + +bool EncodeDQT(j_compress_ptr cinfo, bool write_all_tables) { + uint8_t data[4 + NUM_QUANT_TBLS * (1 + 2 * DCTSIZE2)]; // 520 bytes + size_t pos = 0; + data[pos++] = 0xFF; + data[pos++] = 0xDB; + pos += 2; // Length will be filled in later. + + int send_table[NUM_QUANT_TBLS] = {}; + if (write_all_tables) { + for (int i = 0; i < NUM_QUANT_TBLS; ++i) { + if (cinfo->quant_tbl_ptrs[i]) send_table[i] = 1; + } + } else { + for (int c = 0; c < cinfo->num_components; ++c) { + send_table[cinfo->comp_info[c].quant_tbl_no] = 1; + } + } + + bool is_baseline = true; + for (int i = 0; i < NUM_QUANT_TBLS; ++i) { + if (!send_table[i]) continue; + JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[i]; + if (quant_table == nullptr) { + JPEGLI_ERROR("Missing quant table %d", i); + } + int precision = 0; + for (size_t k = 0; k < DCTSIZE2; ++k) { + if (quant_table->quantval[k] > 255) { + precision = 1; + is_baseline = false; + } + } + if (quant_table->sent_table) { + continue; + } + data[pos++] = (precision << 4) + i; + for (size_t j = 0; j < DCTSIZE2; ++j) { + int val_idx = kJPEGNaturalOrder[j]; + int val = quant_table->quantval[val_idx]; + if (val == 0) { + JPEGLI_ERROR("Invalid quantval 0."); + } + if (precision) { + data[pos++] = val >> 8; + } + data[pos++] = val & 0xFFu; + } + quant_table->sent_table = TRUE; + } + if (pos > 4) { + data[2] = (pos - 2) >> 8u; + data[3] = (pos - 2) & 0xFFu; + WriteOutput(cinfo, data, pos); + } + return is_baseline; +} + +void EncodeSOF(j_compress_ptr cinfo, bool is_baseline) { + if (cinfo->data_precision != kJpegPrecision) { + is_baseline = false; + JPEGLI_ERROR("Unsupported data precision %d", cinfo->data_precision); + } + const uint8_t marker = cinfo->progressive_mode ? 0xc2 + : is_baseline ? 0xc0 + : 0xc1; + const size_t n_comps = cinfo->num_components; + const size_t marker_len = 8 + 3 * n_comps; + std::vector<uint8_t> data(marker_len + 2); + size_t pos = 0; + data[pos++] = 0xFF; + data[pos++] = marker; + data[pos++] = marker_len >> 8u; + data[pos++] = marker_len & 0xFFu; + data[pos++] = kJpegPrecision; + data[pos++] = cinfo->image_height >> 8u; + data[pos++] = cinfo->image_height & 0xFFu; + data[pos++] = cinfo->image_width >> 8u; + data[pos++] = cinfo->image_width & 0xFFu; + data[pos++] = n_comps; + for (size_t i = 0; i < n_comps; ++i) { + jpeg_component_info* comp = &cinfo->comp_info[i]; + data[pos++] = comp->component_id; + data[pos++] = ((comp->h_samp_factor << 4u) | (comp->v_samp_factor)); + const uint32_t quant_idx = comp->quant_tbl_no; + if (cinfo->quant_tbl_ptrs[quant_idx] == nullptr) { + JPEGLI_ERROR("Invalid component quant table index %u.", quant_idx); + } + data[pos++] = quant_idx; + } + WriteOutput(cinfo, data); +} + +void WriteFrameHeader(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + bool is_baseline = EncodeDQT(cinfo, /*write_all_tables=*/false); + if (cinfo->progressive_mode || cinfo->arith_code || + cinfo->data_precision != 8) { + is_baseline = false; + } + for (size_t i = 0; i < m->num_huffman_tables; ++i) { + int slot_id = m->slot_id_map[i]; + if (slot_id > 0x11 || (slot_id > 0x01 && slot_id < 0x10)) { + is_baseline = false; + } + } + EncodeSOF(cinfo, is_baseline); +} + +void EncodeDRI(j_compress_ptr cinfo) { + WriteOutput(cinfo, {0xFF, 0xDD, 0, 4, + static_cast<uint8_t>(cinfo->restart_interval >> 8), + static_cast<uint8_t>(cinfo->restart_interval & 0xFF)}); +} + +void EncodeDHT(j_compress_ptr cinfo, size_t offset, size_t num) { + jpeg_comp_master* m = cinfo->master; + size_t marker_len = 2; + for (size_t i = 0; i < num; ++i) { + const JHUFF_TBL& table = m->huffman_tables[offset + i]; + if (table.sent_table) continue; + marker_len += kJpegHuffmanMaxBitLength + 1; + for (size_t j = 0; j <= kJpegHuffmanMaxBitLength; ++j) { + marker_len += table.bits[j]; + } + } + std::vector<uint8_t> data(marker_len + 2); + size_t pos = 0; + data[pos++] = 0xFF; + data[pos++] = 0xC4; + data[pos++] = marker_len >> 8u; + data[pos++] = marker_len & 0xFFu; + for (size_t i = 0; i < num; ++i) { + const JHUFF_TBL& table = m->huffman_tables[offset + i]; + if (table.sent_table) continue; + size_t total_count = 0; + for (size_t i = 0; i <= kJpegHuffmanMaxBitLength; ++i) { + total_count += table.bits[i]; + } + data[pos++] = m->slot_id_map[offset + i]; + for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) { + data[pos++] = table.bits[i]; + } + for (size_t i = 0; i < total_count; ++i) { + data[pos++] = table.huffval[i]; + } + } + if (marker_len > 2) { + WriteOutput(cinfo, data); + } +} + +void EncodeSOS(j_compress_ptr cinfo, int scan_index) { + jpeg_comp_master* m = cinfo->master; + const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index]; + const size_t marker_len = 6 + 2 * scan_info->comps_in_scan; + std::vector<uint8_t> data(marker_len + 2); + size_t pos = 0; + data[pos++] = 0xFF; + data[pos++] = 0xDA; + data[pos++] = marker_len >> 8u; + data[pos++] = marker_len & 0xFFu; + data[pos++] = scan_info->comps_in_scan; + for (int i = 0; i < scan_info->comps_in_scan; ++i) { + int comp_idx = scan_info->component_index[i]; + data[pos++] = cinfo->comp_info[comp_idx].component_id; + int dc_slot_id = m->slot_id_map[m->context_map[comp_idx]]; + int ac_context = m->ac_ctx_offset[scan_index] + i; + int ac_slot_id = m->slot_id_map[m->context_map[ac_context]]; + data[pos++] = (dc_slot_id << 4u) + (ac_slot_id - 16); + } + data[pos++] = scan_info->Ss; + data[pos++] = scan_info->Se; + data[pos++] = ((scan_info->Ah << 4u) | (scan_info->Al)); + WriteOutput(cinfo, data); +} + +void WriteScanHeader(j_compress_ptr cinfo, int scan_index) { + jpeg_comp_master* m = cinfo->master; + const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index]; + cinfo->restart_interval = m->scan_token_info[scan_index].restart_interval; + if (cinfo->restart_interval != m->last_restart_interval) { + EncodeDRI(cinfo); + m->last_restart_interval = cinfo->restart_interval; + } + size_t num_dht = 0; + if (scan_index == 0) { + // For the first scan we emit all DC and at most 4 AC Huffman codes. + for (size_t i = 0, num_ac = 0; i < m->num_huffman_tables; ++i) { + if (m->slot_id_map[i] >= 16 && num_ac++ >= 4) break; + ++num_dht; + } + } else if (scan_info->Ss > 0) { + // For multi-scan sequential and progressive DC scans we have already + // emitted all Huffman codes that we need before the first scan. For + // progressive AC scans we only need at most one new Huffman code. + if (m->context_map[m->ac_ctx_offset[scan_index]] == m->next_dht_index) { + num_dht = 1; + } + } + if (num_dht > 0) { + EncodeDHT(cinfo, m->next_dht_index, num_dht); + m->next_dht_index += num_dht; + } + EncodeSOS(cinfo, scan_index); +} + +void WriteBlock(const int32_t* JXL_RESTRICT symbols, + const int32_t* JXL_RESTRICT extra_bits, const int num_nonzeros, + const bool emit_eob, + const HuffmanCodeTable* JXL_RESTRICT dc_code, + const HuffmanCodeTable* JXL_RESTRICT ac_code, + JpegBitWriter* JXL_RESTRICT bw) { + int symbol = symbols[0]; + WriteBits(bw, dc_code->depth[symbol], dc_code->code[symbol] | extra_bits[0]); + for (int i = 1; i < num_nonzeros; ++i) { + symbol = symbols[i]; + if (symbol > 255) { + WriteBits(bw, ac_code->depth[0xf0], ac_code->code[0xf0]); + symbol -= 256; + if (symbol > 255) { + WriteBits(bw, ac_code->depth[0xf0], ac_code->code[0xf0]); + symbol -= 256; + if (symbol > 255) { + WriteBits(bw, ac_code->depth[0xf0], ac_code->code[0xf0]); + symbol -= 256; + } + } + } + WriteBits(bw, ac_code->depth[symbol], + ac_code->code[symbol] | extra_bits[i]); + } + if (emit_eob) { + WriteBits(bw, ac_code->depth[0], ac_code->code[0]); + } +} + +namespace { + +static JXL_INLINE void EmitMarker(JpegBitWriter* bw, int marker) { + bw->data[bw->pos++] = 0xFF; + bw->data[bw->pos++] = marker; +} + +void WriteTokens(j_compress_ptr cinfo, int scan_index, JpegBitWriter* bw) { + jpeg_comp_master* m = cinfo->master; + HuffmanCodeTable* coding_tables = &m->coding_tables[0]; + int next_restart_marker = 0; + const ScanTokenInfo& sti = m->scan_token_info[scan_index]; + size_t num_token_arrays = m->cur_token_array + 1; + size_t total_tokens = 0; + size_t restart_idx = 0; + size_t next_restart = sti.restarts[restart_idx]; + uint8_t* context_map = m->context_map; + for (size_t i = 0; i < num_token_arrays; ++i) { + Token* tokens = m->token_arrays[i].tokens; + size_t num_tokens = m->token_arrays[i].num_tokens; + if (sti.token_offset < total_tokens + num_tokens && + total_tokens < sti.token_offset + sti.num_tokens) { + size_t start_ix = + total_tokens < sti.token_offset ? sti.token_offset - total_tokens : 0; + size_t end_ix = std::min(sti.token_offset + sti.num_tokens - total_tokens, + num_tokens); + size_t cycle_len = bw->len / 8; + size_t next_cycle = cycle_len; + for (size_t i = start_ix; i < end_ix; ++i) { + if (total_tokens + i == next_restart) { + JumpToByteBoundary(bw); + EmitMarker(bw, 0xD0 + next_restart_marker); + next_restart_marker += 1; + next_restart_marker &= 0x7; + next_restart = sti.restarts[++restart_idx]; + } + Token t = tokens[i]; + const HuffmanCodeTable* code = &coding_tables[context_map[t.context]]; + WriteBits(bw, code->depth[t.symbol], code->code[t.symbol] | t.bits); + if (--next_cycle == 0) { + if (!EmptyBitWriterBuffer(bw)) { + JPEGLI_ERROR( + "Output suspension is not supported in " + "finish_compress"); + } + next_cycle = cycle_len; + } + } + } + total_tokens += num_tokens; + } +} + +void WriteACRefinementTokens(j_compress_ptr cinfo, int scan_index, + JpegBitWriter* bw) { + jpeg_comp_master* m = cinfo->master; + const ScanTokenInfo& sti = m->scan_token_info[scan_index]; + const uint8_t context = m->ac_ctx_offset[scan_index]; + const HuffmanCodeTable* code = &m->coding_tables[m->context_map[context]]; + size_t cycle_len = bw->len / 64; + size_t next_cycle = cycle_len; + size_t refbit_idx = 0; + size_t eobrun_idx = 0; + size_t restart_idx = 0; + size_t next_restart = sti.restarts[restart_idx]; + int next_restart_marker = 0; + for (size_t i = 0; i < sti.num_tokens; ++i) { + if (i == next_restart) { + JumpToByteBoundary(bw); + EmitMarker(bw, 0xD0 + next_restart_marker); + next_restart_marker += 1; + next_restart_marker &= 0x7; + next_restart = sti.restarts[++restart_idx]; + } + RefToken t = sti.tokens[i]; + int symbol = t.symbol & 253; + uint16_t bits = 0; + if ((symbol & 1) == 0) { + int r = symbol >> 4; + if (r > 0 && r < 15) { + bits = sti.eobruns[eobrun_idx++]; + } + } else { + bits = (t.symbol >> 1) & 1; + } + WriteBits(bw, code->depth[symbol], code->code[symbol] | bits); + for (int j = 0; j < t.refbits; ++j) { + WriteBits(bw, 1, sti.refbits[refbit_idx++]); + } + if (--next_cycle == 0) { + if (!EmptyBitWriterBuffer(bw)) { + JPEGLI_ERROR("Output suspension is not supported in finish_compress"); + } + next_cycle = cycle_len; + } + } +} + +void WriteDCRefinementBits(j_compress_ptr cinfo, int scan_index, + JpegBitWriter* bw) { + jpeg_comp_master* m = cinfo->master; + const ScanTokenInfo& sti = m->scan_token_info[scan_index]; + size_t restart_idx = 0; + size_t next_restart = sti.restarts[restart_idx]; + int next_restart_marker = 0; + size_t cycle_len = bw->len * 4; + size_t next_cycle = cycle_len; + size_t refbit_idx = 0; + for (size_t i = 0; i < sti.num_tokens; ++i) { + if (i == next_restart) { + JumpToByteBoundary(bw); + EmitMarker(bw, 0xD0 + next_restart_marker); + next_restart_marker += 1; + next_restart_marker &= 0x7; + next_restart = sti.restarts[++restart_idx]; + } + WriteBits(bw, 1, sti.refbits[refbit_idx++]); + if (--next_cycle == 0) { + if (!EmptyBitWriterBuffer(bw)) { + JPEGLI_ERROR( + "Output suspension is not supported in " + "finish_compress"); + } + next_cycle = cycle_len; + } + } +} + +} // namespace + +void WriteScanData(j_compress_ptr cinfo, int scan_index) { + const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index]; + JpegBitWriter* bw = &cinfo->master->bw; + if (scan_info->Ah == 0) { + WriteTokens(cinfo, scan_index, bw); + } else if (scan_info->Ss > 0) { + WriteACRefinementTokens(cinfo, scan_index, bw); + } else { + WriteDCRefinementBits(cinfo, scan_index, bw); + } + if (!bw->healthy) { + JPEGLI_ERROR("Unknown Huffman coded symbol found in scan %d", scan_index); + } + JumpToByteBoundary(bw); + if (!EmptyBitWriterBuffer(bw)) { + JPEGLI_ERROR("Output suspension is not supported in finish_compress"); + } +} + +} // namespace jpegli diff --git a/lib/jpegli/bitstream.h b/lib/jpegli/bitstream.h new file mode 100644 index 0000000..aa54c73 --- /dev/null +++ b/lib/jpegli/bitstream.h @@ -0,0 +1,44 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_BITSTREAM_H_ +#define LIB_JPEGLI_BITSTREAM_H_ + +#include <initializer_list> +#include <vector> + +#include "lib/jpegli/encode_internal.h" + +namespace jpegli { + +void WriteOutput(j_compress_ptr cinfo, const uint8_t* buf, size_t bufsize); +void WriteOutput(j_compress_ptr cinfo, const std::vector<uint8_t>& bytes); +void WriteOutput(j_compress_ptr cinfo, std::initializer_list<uint8_t> bytes); + +void EncodeAPP0(j_compress_ptr cinfo); +void EncodeAPP14(j_compress_ptr cinfo); +void WriteFileHeader(j_compress_ptr cinfo); + +// Returns true of only baseline 8-bit tables are used. +bool EncodeDQT(j_compress_ptr cinfo, bool write_all_tables); +void EncodeSOF(j_compress_ptr cinfo, bool is_baseline); +void WriteFrameHeader(j_compress_ptr cinfo); + +void EncodeDRI(j_compress_ptr cinfo); +void EncodeDHT(j_compress_ptr cinfo, size_t offset, size_t num); +void EncodeSOS(j_compress_ptr cinfo, int scan_index); +void WriteScanHeader(j_compress_ptr cinfo, int scan_index); + +void WriteBlock(const int32_t* JXL_RESTRICT symbols, + const int32_t* JXL_RESTRICT extra_bits, const int num_nonzeros, + const bool emit_eob, + const HuffmanCodeTable* JXL_RESTRICT dc_code, + const HuffmanCodeTable* JXL_RESTRICT ac_code, + JpegBitWriter* JXL_RESTRICT bw); +void WriteScanData(j_compress_ptr cinfo, int scan_index); + +} // namespace jpegli + +#endif // LIB_JPEGLI_BITSTREAM_H_ diff --git a/lib/jpegli/color_quantize.cc b/lib/jpegli/color_quantize.cc new file mode 100644 index 0000000..e8357e2 --- /dev/null +++ b/lib/jpegli/color_quantize.cc @@ -0,0 +1,533 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/color_quantize.h" + +#include <cmath> +#include <limits> +#include <unordered_map> + +#include "lib/jpegli/decode_internal.h" +#include "lib/jpegli/error.h" + +namespace jpegli { + +namespace { + +static constexpr int kNumColorCellBits[kMaxComponents] = {3, 4, 3, 3}; +static constexpr int kCompW[kMaxComponents] = {2, 3, 1, 1}; + +int Pow(int a, int b) { + int r = 1; + for (int i = 0; i < b; ++i) { + r *= a; + } + return r; +} + +int ComponentOrder(j_decompress_ptr cinfo, int i) { + if (cinfo->out_color_components == 3) { + return i < 2 ? 1 - i : i; + } + return i; +} + +int GetColorComponent(int i, int N) { + return (i * 255 + (N - 1) / 2) / (N - 1); +} + +} // namespace + +void ChooseColorMap1Pass(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + int components = cinfo->out_color_components; + int desired = std::min(cinfo->desired_number_of_colors, 256); + int num = 1; + while (Pow(num + 1, components) <= desired) { + ++num; + } + if (num == 1) { + JPEGLI_ERROR("Too few colors (%d) in requested colormap", desired); + } + int actual = Pow(num, components); + for (int i = 0; i < components; ++i) { + m->num_colors_[i] = num; + } + while (actual < desired) { + int total = actual; + for (int i = 0; i < components; ++i) { + int c = ComponentOrder(cinfo, i); + int new_total = (actual / m->num_colors_[c]) * (m->num_colors_[c] + 1); + if (new_total <= desired) { + ++m->num_colors_[c]; + actual = new_total; + } + } + if (actual == total) { + break; + } + } + cinfo->actual_number_of_colors = actual; + cinfo->colormap = (*cinfo->mem->alloc_sarray)( + reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE, actual, components); + int next_color[kMaxComponents] = {0}; + for (int i = 0; i < actual; ++i) { + for (int c = 0; c < components; ++c) { + cinfo->colormap[c][i] = + GetColorComponent(next_color[c], m->num_colors_[c]); + } + int c = components - 1; + while (c > 0 && next_color[c] + 1 == m->num_colors_[c]) { + next_color[c--] = 0; + } + ++next_color[c]; + } + if (!m->colormap_lut_) { + m->colormap_lut_ = Allocate<uint8_t>(cinfo, components * 256, JPOOL_IMAGE); + } + int stride = actual; + for (int c = 0; c < components; ++c) { + int N = m->num_colors_[c]; + stride /= N; + for (int i = 0; i < 256; ++i) { + int index = ((2 * i - 1) * (N - 1) + 254) / 510; + m->colormap_lut_[c * 256 + i] = index * stride; + } + } +} + +namespace { + +// 2^13 priority levels for the PQ seems to be a good compromise between +// accuracy, running time and stack space usage. +static const int kMaxPriority = 1 << 13; +static const int kMaxLevel = 3; + +// This function is used in the multi-resolution grid to be able to compute +// the keys for the different resolutions by just shifting the first key. +inline int InterlaceBitsRGB(uint8_t r, uint8_t g, uint8_t b) { + int z = 0; + for (int i = 0; i < 7; ++i) { + z += (r >> 5) & 4; + z += (g >> 6) & 2; + z += (b >> 7); + z <<= 3; + r <<= 1; + g <<= 1; + b <<= 1; + } + z += (r >> 5) & 4; + z += (g >> 6) & 2; + z += (b >> 7); + return z; +} + +// This function will compute the actual priorities of the colors based on +// the current distance from the palette, the population count and the signals +// from the multi-resolution grid. +inline int Priority(int d, int n, const int* density, const int* radius) { + int p = d * n; + for (int level = 0; level < kMaxLevel; ++level) { + if (d > radius[level]) { + p += density[level] * (d - radius[level]); + } + } + return std::min(kMaxPriority - 1, p >> 4); +} + +inline int ColorIntQuadDistanceRGB(uint8_t r1, uint8_t g1, uint8_t b1, + uint8_t r2, uint8_t g2, uint8_t b2) { + // weights for the intensity calculation + static constexpr int ired = 2; + static constexpr int igreen = 5; + static constexpr int iblue = 1; + // normalization factor for the intensity calculation (2^ishift) + static constexpr int ishift = 3; + const int rd = r1 - r2; + const int gd = g1 - g2; + const int bd = b1 - b2; + const int id = ired * rd + igreen * gd + iblue * bd; + return rd * rd + gd * gd + bd * bd + ((id * id) >> (2 * ishift)); +} + +inline int ScaleQuadDistanceRGB(int d) { + return static_cast<int>(sqrt(d * 0.25) + 0.5); +} + +// The function updates the minimal distances, the clustering and the +// quantization error after the insertion of the new color into the palette. +void AddToRGBPalette(const uint8_t* red, const uint8_t* green, + const uint8_t* blue, + const int* count, // histogram of colors + const int index, // index of color to be added + const int k, // size of current palette + const int n, // number of colors + int* dist, // array of distances from palette + int* cluster, // mapping of color indices to palette + int* center, // the inverse mapping + int64_t* error) { // measure of the quantization error + center[k] = index; + cluster[index] = k; + *error -= + static_cast<int64_t>(dist[index]) * static_cast<int64_t>(count[index]); + dist[index] = 0; + for (int j = 0; j < n; ++j) { + if (dist[j] > 0) { + const int d = ColorIntQuadDistanceRGB( + red[index], green[index], blue[index], red[j], green[j], blue[j]); + if (d < dist[j]) { + *error += static_cast<int64_t>((d - dist[j])) * + static_cast<int64_t>(count[j]); + dist[j] = d; + cluster[j] = k; + } + } + } +} + +struct RGBPixelHasher { + // A quick but good-enough hash to get 24 bits of RGB into the lower 12 bits. + size_t operator()(uint32_t a) const { return (a ^ (a >> 12)) * 0x9e3779b9; } +}; + +struct WangHasher { + // Thomas Wang's Hash. Nearly perfect and still quite fast. Above (for + // pixels) we use a simpler hash because the number of hash calls is + // proportional to the number of pixels and that hash dominates; we want the + // cost to be minimal and we start with a large table. We can use a better + // hash for the histogram since the number of hash calls is proportional to + // the number of unique colors in the image, which is hopefully much smaller. + // Note that the difference is slight; e.g. replacing RGBPixelHasher with + // WangHasher only slows things down by 5% on an Opteron. + size_t operator()(uint32_t a) const { + a = (a ^ 61) ^ (a >> 16); + a = a + (a << 3); + a = a ^ (a >> 4); + a = a * 0x27d4eb2d; + a = a ^ (a >> 15); + return a; + } +}; + +// Build an index of all the different colors in the input +// image. To do this we map the 24 bit RGB representation of the colors +// to a unique integer index assigned to the different colors in order of +// appearance in the image. Return the number of unique colors found. +// The colors are pre-quantized to 3 * 6 bits precision. +static int BuildRGBColorIndex(const uint8_t* const image, int const num_pixels, + int* const count, uint8_t* const red, + uint8_t* const green, uint8_t* const blue) { + // Impossible because rgb are in the low 24 bits, and the upper 8 bits is 0. + const uint32_t impossible_pixel_value = 0x10000000; + std::unordered_map<uint32_t, int, RGBPixelHasher> index_map(1 << 12); + std::unordered_map<uint32_t, int, RGBPixelHasher>::iterator index_map_lookup; + const uint8_t* imagep = &image[0]; + uint32_t prev_pixel = impossible_pixel_value; + int index = 0; + int n = 0; + for (int i = 0; i < num_pixels; ++i) { + uint8_t r = ((*imagep++) & 0xfc) + 2; + uint8_t g = ((*imagep++) & 0xfc) + 2; + uint8_t b = ((*imagep++) & 0xfc) + 2; + uint32_t pixel = (b << 16) | (g << 8) | r; + if (pixel != prev_pixel) { + prev_pixel = pixel; + index_map_lookup = index_map.find(pixel); + if (index_map_lookup != index_map.end()) { + index = index_map_lookup->second; + } else { + index_map[pixel] = index = n++; + red[index] = r; + green[index] = g; + blue[index] = b; + } + } + ++count[index]; + } + return n; +} + +} // namespace + +void ChooseColorMap2Pass(j_decompress_ptr cinfo) { + if (cinfo->out_color_space != JCS_RGB) { + JPEGLI_ERROR("Two-pass quantizer must use RGB output color space."); + } + jpeg_decomp_master* m = cinfo->master; + const size_t num_pixels = cinfo->output_width * cinfo->output_height; + const int max_color_count = std::max<size_t>(num_pixels, 1u << 18); + const int max_palette_size = cinfo->desired_number_of_colors; + std::unique_ptr<uint8_t[]> red(new uint8_t[max_color_count]); + std::unique_ptr<uint8_t[]> green(new uint8_t[max_color_count]); + std::unique_ptr<uint8_t[]> blue(new uint8_t[max_color_count]); + std::vector<int> count(max_color_count, 0); + // number of colors + int n = BuildRGBColorIndex(m->pixels_, num_pixels, &count[0], &red[0], + &green[0], &blue[0]); + + std::vector<int> dist(n, std::numeric_limits<int>::max()); + std::vector<int> cluster(n); + std::vector<bool> in_palette(n, false); + int center[256]; + int k = 0; // palette size + const int count_threshold = (num_pixels * 4) / max_palette_size; + static constexpr int kAveragePixelErrorThreshold = 1; + const int64_t error_threshold = num_pixels * kAveragePixelErrorThreshold; + int64_t error = 0; // quantization error + + int max_count = 0; + int winner = 0; + for (int i = 0; i < n; ++i) { + if (count[i] > max_count) { + max_count = count[i]; + winner = i; + } + if (!in_palette[i] && count[i] > count_threshold) { + AddToRGBPalette(&red[0], &green[0], &blue[0], &count[0], i, k++, n, + &dist[0], &cluster[0], ¢er[0], &error); + in_palette[i] = true; + } + } + if (k == 0) { + AddToRGBPalette(&red[0], &green[0], &blue[0], &count[0], winner, k++, n, + &dist[0], &cluster[0], ¢er[0], &error); + in_palette[winner] = true; + } + + // Calculation of the multi-resolution density grid. + std::vector<int> density(n * kMaxLevel); + std::vector<int> radius(n * kMaxLevel); + std::unordered_map<uint32_t, int, WangHasher> histogram[kMaxLevel]; + for (int level = 0; level < kMaxLevel; ++level) { + // This value is never used because key = InterlaceBitsRGB(...) >> 6 + } + + for (int i = 0; i < n; ++i) { + if (!in_palette[i]) { + const int key = InterlaceBitsRGB(red[i], green[i], blue[i]) >> 6; + for (int level = 0; level < kMaxLevel; ++level) { + histogram[level][key >> (3 * level)] += count[i]; + } + } + } + for (int i = 0; i < n; ++i) { + if (!in_palette[i]) { + for (int level = 0; level < kMaxLevel; ++level) { + const int mask = (4 << level) - 1; + const int rd = std::max(red[i] & mask, mask - (red[i] & mask)); + const int gd = std::max(green[i] & mask, mask - (green[i] & mask)); + const int bd = std::max(blue[i] & mask, mask - (blue[i] & mask)); + radius[i * kMaxLevel + level] = + ScaleQuadDistanceRGB(ColorIntQuadDistanceRGB(0, 0, 0, rd, gd, bd)); + } + const int key = InterlaceBitsRGB(red[i], green[i], blue[i]) >> 6; + if (kMaxLevel > 0) { + density[i * kMaxLevel] = histogram[0][key] - count[i]; + } + for (int level = 1; level < kMaxLevel; ++level) { + density[i * kMaxLevel + level] = + (histogram[level][key >> (3 * level)] - + histogram[level - 1][key >> (3 * level - 3)]); + } + } + } + + // Calculate the initial error now that the palette has been initialized. + error = 0; + for (int i = 0; i < n; ++i) { + error += static_cast<int64_t>(dist[i]) * static_cast<int64_t>(count[i]); + } + + std::unique_ptr<std::vector<int>[]> bucket_array( + new std::vector<int>[kMaxPriority]); + int top_priority = -1; + for (int i = 0; i < n; ++i) { + if (!in_palette[i]) { + int priority = Priority(ScaleQuadDistanceRGB(dist[i]), count[i], + &density[i * kMaxLevel], &radius[i * kMaxLevel]); + bucket_array[priority].push_back(i); + top_priority = std::max(priority, top_priority); + } + } + double error_accum = 0; + while (top_priority >= 0 && k < max_palette_size) { + if (error < error_threshold) { + error_accum += std::min(error_threshold, error_threshold - error); + if (error_accum >= 10 * error_threshold) { + break; + } + } + int i = bucket_array[top_priority].back(); + int priority = Priority(ScaleQuadDistanceRGB(dist[i]), count[i], + &density[i * kMaxLevel], &radius[i * kMaxLevel]); + if (priority < top_priority) { + bucket_array[priority].push_back(i); + } else { + AddToRGBPalette(&red[0], &green[0], &blue[0], &count[0], i, k++, n, + &dist[0], &cluster[0], ¢er[0], &error); + } + bucket_array[top_priority].pop_back(); + while (top_priority >= 0 && bucket_array[top_priority].empty()) { + --top_priority; + } + } + + cinfo->actual_number_of_colors = k; + cinfo->colormap = (*cinfo->mem->alloc_sarray)( + reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE, k, 3); + for (int i = 0; i < k; ++i) { + int index = center[i]; + cinfo->colormap[0][i] = red[index]; + cinfo->colormap[1][i] = green[index]; + cinfo->colormap[2][i] = blue[index]; + } +} + +namespace { + +void FindCandidatesForCell(j_decompress_ptr cinfo, int ncomp, int cell[], + std::vector<uint8_t>* candidates) { + int cell_min[kMaxComponents]; + int cell_max[kMaxComponents]; + int cell_center[kMaxComponents]; + for (int c = 0; c < ncomp; ++c) { + cell_min[c] = cell[c] << (8 - kNumColorCellBits[c]); + cell_max[c] = cell_min[c] + (1 << (8 - kNumColorCellBits[c])) - 1; + cell_center[c] = (cell_min[c] + cell_max[c]) >> 1; + } + int min_maxdist = std::numeric_limits<int>::max(); + int mindist[256]; + for (int i = 0; i < cinfo->actual_number_of_colors; ++i) { + int dmin = 0; + int dmax = 0; + for (int c = 0; c < ncomp; ++c) { + int palette_c = cinfo->colormap[c][i]; + int dminc = 0, dmaxc; + if (palette_c < cell_min[c]) { + dminc = cell_min[c] - palette_c; + dmaxc = cell_max[c] - palette_c; + } else if (palette_c > cell_max[c]) { + dminc = palette_c - cell_max[c]; + dmaxc = palette_c - cell_min[c]; + } else if (palette_c > cell_center[c]) { + dmaxc = palette_c - cell_min[c]; + } else { + dmaxc = cell_max[c] - palette_c; + } + dminc *= kCompW[c]; + dmaxc *= kCompW[c]; + dmin += dminc * dminc; + dmax += dmaxc * dmaxc; + } + mindist[i] = dmin; + min_maxdist = std::min(dmax, min_maxdist); + } + for (int i = 0; i < cinfo->actual_number_of_colors; ++i) { + if (mindist[i] < min_maxdist) { + candidates->push_back(i); + } + } +} + +} // namespace + +void CreateInverseColorMap(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + int ncomp = cinfo->out_color_components; + int num_cells = 1; + for (int c = 0; c < ncomp; ++c) { + num_cells *= (1 << kNumColorCellBits[c]); + } + m->candidate_lists_.resize(num_cells); + + int next_cell[kMaxComponents] = {0}; + for (int i = 0; i < num_cells; ++i) { + m->candidate_lists_[i].clear(); + FindCandidatesForCell(cinfo, ncomp, next_cell, &m->candidate_lists_[i]); + int c = ncomp - 1; + while (c > 0 && next_cell[c] + 1 == (1 << kNumColorCellBits[c])) { + next_cell[c--] = 0; + } + ++next_cell[c]; + } + m->regenerate_inverse_colormap_ = false; +} + +int LookupColorIndex(j_decompress_ptr cinfo, JSAMPLE* pixel) { + jpeg_decomp_master* m = cinfo->master; + int num_channels = cinfo->out_color_components; + int index = 0; + if (m->quant_mode_ == 1) { + for (int c = 0; c < num_channels; ++c) { + index += m->colormap_lut_[c * 256 + pixel[c]]; + } + } else { + size_t cell_idx = 0; + size_t stride = 1; + for (int c = num_channels - 1; c >= 0; --c) { + cell_idx += (pixel[c] >> (8 - kNumColorCellBits[c])) * stride; + stride <<= kNumColorCellBits[c]; + } + JXL_ASSERT(cell_idx < m->candidate_lists_.size()); + int mindist = std::numeric_limits<int>::max(); + const auto& candidates = m->candidate_lists_[cell_idx]; + for (uint8_t i : candidates) { + int dist = 0; + for (int c = 0; c < num_channels; ++c) { + int d = (cinfo->colormap[c][i] - pixel[c]) * kCompW[c]; + dist += d * d; + } + if (dist < mindist) { + mindist = dist; + index = i; + } + } + } + JXL_ASSERT(index < cinfo->actual_number_of_colors); + return index; +} + +void CreateOrderedDitherTables(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + static constexpr size_t kDitherSize = 4; + static constexpr size_t kDitherMask = kDitherSize - 1; + static constexpr float kBaseDitherMatrix[] = { + 0, 8, 2, 10, // + 12, 4, 14, 6, // + 3, 11, 1, 9, // + 15, 7, 13, 5, // + }; + m->dither_size_ = kDitherSize; + m->dither_mask_ = kDitherMask; + size_t ncells = m->dither_size_ * m->dither_size_; + for (int c = 0; c < cinfo->out_color_components; ++c) { + float spread = 1.0f / (m->num_colors_[c] - 1); + float mul = spread / ncells; + float offset = 0.5f * spread; + if (m->dither_[c] == nullptr) { + m->dither_[c] = Allocate<float>(cinfo, ncells, JPOOL_IMAGE_ALIGNED); + } + for (size_t idx = 0; idx < ncells; ++idx) { + m->dither_[c][idx] = kBaseDitherMatrix[idx] * mul - offset; + } + } +} + +void InitFSDitherState(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + for (int c = 0; c < cinfo->out_color_components; ++c) { + if (m->error_row_[c] == nullptr) { + m->error_row_[c] = + Allocate<float>(cinfo, cinfo->output_width, JPOOL_IMAGE_ALIGNED); + m->error_row_[c + kMaxComponents] = + Allocate<float>(cinfo, cinfo->output_width, JPOOL_IMAGE_ALIGNED); + } + memset(m->error_row_[c], 0.0, cinfo->output_width * sizeof(float)); + memset(m->error_row_[c + kMaxComponents], 0.0, + cinfo->output_width * sizeof(float)); + } +} + +} // namespace jpegli diff --git a/lib/jpegli/color_quantize.h b/lib/jpegli/color_quantize.h new file mode 100644 index 0000000..3dda1d8 --- /dev/null +++ b/lib/jpegli/color_quantize.h @@ -0,0 +1,27 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_COLOR_QUANTIZE_H_ +#define LIB_JPEGLI_COLOR_QUANTIZE_H_ + +#include "lib/jpegli/common.h" + +namespace jpegli { + +void ChooseColorMap1Pass(j_decompress_ptr cinfo); + +void ChooseColorMap2Pass(j_decompress_ptr cinfo); + +void CreateInverseColorMap(j_decompress_ptr cinfo); + +void CreateOrderedDitherTables(j_decompress_ptr cinfo); + +void InitFSDitherState(j_decompress_ptr cinfo); + +int LookupColorIndex(j_decompress_ptr cinfo, JSAMPLE* pixel); + +} // namespace jpegli + +#endif // LIB_JPEGLI_COLOR_QUANTIZE_H_ diff --git a/lib/jpegli/color_transform.cc b/lib/jpegli/color_transform.cc new file mode 100644 index 0000000..020a6fd --- /dev/null +++ b/lib/jpegli/color_transform.cc @@ -0,0 +1,281 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/color_transform.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jpegli/color_transform.cc" +#include <hwy/foreach_target.h> +#include <hwy/highway.h> + +#include "lib/jpegli/decode_internal.h" +#include "lib/jpegli/encode_internal.h" +#include "lib/jpegli/error.h" +#include "lib/jxl/base/compiler_specific.h" + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::Div; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::Sub; + +void YCbCrToRGB(float* row[kMaxComponents], size_t xsize) { + const HWY_CAPPED(float, 8) df; + float* JXL_RESTRICT row0 = row[0]; + float* JXL_RESTRICT row1 = row[1]; + float* JXL_RESTRICT row2 = row[2]; + + // Full-range BT.601 as defined by JFIF Clause 7: + // https://www.itu.int/rec/T-REC-T.871-201105-I/en + const auto crcr = Set(df, 1.402f); + const auto cgcb = Set(df, -0.114f * 1.772f / 0.587f); + const auto cgcr = Set(df, -0.299f * 1.402f / 0.587f); + const auto cbcb = Set(df, 1.772f); + + for (size_t x = 0; x < xsize; x += Lanes(df)) { + const auto y_vec = Load(df, row0 + x); + const auto cb_vec = Load(df, row1 + x); + const auto cr_vec = Load(df, row2 + x); + const auto r_vec = MulAdd(crcr, cr_vec, y_vec); + const auto g_vec = MulAdd(cgcr, cr_vec, MulAdd(cgcb, cb_vec, y_vec)); + const auto b_vec = MulAdd(cbcb, cb_vec, y_vec); + Store(r_vec, df, row0 + x); + Store(g_vec, df, row1 + x); + Store(b_vec, df, row2 + x); + } +} + +void YCCKToCMYK(float* row[kMaxComponents], size_t xsize) { + const HWY_CAPPED(float, 8) df; + float* JXL_RESTRICT row0 = row[0]; + float* JXL_RESTRICT row1 = row[1]; + float* JXL_RESTRICT row2 = row[2]; + YCbCrToRGB(row, xsize); + const auto offset = Set(df, -1.0f / 255.0f); + for (size_t x = 0; x < xsize; x += Lanes(df)) { + Store(Sub(offset, Load(df, row0 + x)), df, row0 + x); + Store(Sub(offset, Load(df, row1 + x)), df, row1 + x); + Store(Sub(offset, Load(df, row2 + x)), df, row2 + x); + } +} + +void RGBToYCbCr(float* row[kMaxComponents], size_t xsize) { + const HWY_CAPPED(float, 8) df; + float* JXL_RESTRICT row0 = row[0]; + float* JXL_RESTRICT row1 = row[1]; + float* JXL_RESTRICT row2 = row[2]; + // Full-range BT.601 as defined by JFIF Clause 7: + // https://www.itu.int/rec/T-REC-T.871-201105-I/en + const auto c128 = Set(df, 128.0f); + const auto kR = Set(df, 0.299f); // NTSC luma + const auto kG = Set(df, 0.587f); + const auto kB = Set(df, 0.114f); + const auto kAmpR = Set(df, 0.701f); + const auto kAmpB = Set(df, 0.886f); + const auto kDiffR = Add(kAmpR, kR); + const auto kDiffB = Add(kAmpB, kB); + const auto kNormR = Div(Set(df, 1.0f), (Add(kAmpR, Add(kG, kB)))); + const auto kNormB = Div(Set(df, 1.0f), (Add(kR, Add(kG, kAmpB)))); + + for (size_t x = 0; x < xsize; x += Lanes(df)) { + const auto r = Load(df, row0 + x); + const auto g = Load(df, row1 + x); + const auto b = Load(df, row2 + x); + const auto r_base = Mul(r, kR); + const auto r_diff = Mul(r, kDiffR); + const auto g_base = Mul(g, kG); + const auto b_base = Mul(b, kB); + const auto b_diff = Mul(b, kDiffB); + const auto y_base = Add(r_base, Add(g_base, b_base)); + const auto cb_vec = MulAdd(Sub(b_diff, y_base), kNormB, c128); + const auto cr_vec = MulAdd(Sub(r_diff, y_base), kNormR, c128); + Store(y_base, df, row0 + x); + Store(cb_vec, df, row1 + x); + Store(cr_vec, df, row2 + x); + } +} + +void CMYKToYCCK(float* row[kMaxComponents], size_t xsize) { + const HWY_CAPPED(float, 8) df; + float* JXL_RESTRICT row0 = row[0]; + float* JXL_RESTRICT row1 = row[1]; + float* JXL_RESTRICT row2 = row[2]; + const auto unity = Set(df, 255.0f); + for (size_t x = 0; x < xsize; x += Lanes(df)) { + Store(Sub(unity, Load(df, row0 + x)), df, row0 + x); + Store(Sub(unity, Load(df, row1 + x)), df, row1 + x); + Store(Sub(unity, Load(df, row2 + x)), df, row2 + x); + } + RGBToYCbCr(row, xsize); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jpegli { + +HWY_EXPORT(CMYKToYCCK); +HWY_EXPORT(YCCKToCMYK); +HWY_EXPORT(YCbCrToRGB); +HWY_EXPORT(RGBToYCbCr); + +bool CheckColorSpaceComponents(int num_components, J_COLOR_SPACE colorspace) { + switch (colorspace) { + case JCS_GRAYSCALE: + return num_components == 1; + case JCS_RGB: + case JCS_YCbCr: + case JCS_EXT_RGB: + case JCS_EXT_BGR: + return num_components == 3; + case JCS_CMYK: + case JCS_YCCK: + case JCS_EXT_RGBX: + case JCS_EXT_BGRX: + case JCS_EXT_XBGR: + case JCS_EXT_XRGB: + case JCS_EXT_RGBA: + case JCS_EXT_BGRA: + case JCS_EXT_ABGR: + case JCS_EXT_ARGB: + return num_components == 4; + default: + // Unrecognized colorspaces can have any number of channels, since no + // color transform will be performed on them. + return true; + } +} + +void NullTransform(float* row[kMaxComponents], size_t len) {} + +void GrayscaleToRGB(float* row[kMaxComponents], size_t len) { + memcpy(row[1], row[0], len * sizeof(row[1][0])); + memcpy(row[2], row[0], len * sizeof(row[2][0])); +} + +void GrayscaleToYCbCr(float* row[kMaxComponents], size_t len) { + memset(row[1], 0, len * sizeof(row[1][0])); + memset(row[2], 0, len * sizeof(row[2][0])); +} + +void ChooseColorTransform(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + if (!CheckColorSpaceComponents(cinfo->input_components, + cinfo->in_color_space)) { + JPEGLI_ERROR("Invalid number of input components %d for colorspace %d", + cinfo->input_components, cinfo->in_color_space); + } + if (!CheckColorSpaceComponents(cinfo->num_components, + cinfo->jpeg_color_space)) { + JPEGLI_ERROR("Invalid number of components %d for colorspace %d", + cinfo->num_components, cinfo->jpeg_color_space); + } + if (cinfo->jpeg_color_space == cinfo->in_color_space) { + if (cinfo->num_components != cinfo->input_components) { + JPEGLI_ERROR("Input/output components mismatch: %d vs %d", + cinfo->input_components, cinfo->num_components); + } + // No color transform requested. + m->color_transform = NullTransform; + return; + } + + if (cinfo->in_color_space == JCS_RGB && m->xyb_mode) { + JPEGLI_ERROR("Color transform on XYB colorspace is not supported."); + } + + m->color_transform = nullptr; + if (cinfo->jpeg_color_space == JCS_GRAYSCALE) { + if (cinfo->in_color_space == JCS_RGB) { + m->color_transform = HWY_DYNAMIC_DISPATCH(RGBToYCbCr); + } else if (cinfo->in_color_space == JCS_YCbCr || + cinfo->in_color_space == JCS_YCCK) { + // Since the first luminance channel is the grayscale version of the + // image, nothing to do here + m->color_transform = NullTransform; + } + } else if (cinfo->jpeg_color_space == JCS_RGB) { + if (cinfo->in_color_space == JCS_GRAYSCALE) { + m->color_transform = GrayscaleToRGB; + } + } else if (cinfo->jpeg_color_space == JCS_YCbCr) { + if (cinfo->in_color_space == JCS_RGB) { + m->color_transform = HWY_DYNAMIC_DISPATCH(RGBToYCbCr); + } else if (cinfo->in_color_space == JCS_GRAYSCALE) { + m->color_transform = GrayscaleToYCbCr; + } + } else if (cinfo->jpeg_color_space == JCS_YCCK) { + if (cinfo->in_color_space == JCS_CMYK) { + m->color_transform = HWY_DYNAMIC_DISPATCH(CMYKToYCCK); + } + } + + if (m->color_transform == nullptr) { + // TODO(szabadka) Support more color transforms. + JPEGLI_ERROR("Unsupported color transform %d -> %d", cinfo->in_color_space, + cinfo->jpeg_color_space); + } +} + +void ChooseColorTransform(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + if (!CheckColorSpaceComponents(cinfo->out_color_components, + cinfo->out_color_space)) { + JPEGLI_ERROR("Invalid number of output components %d for colorspace %d", + cinfo->out_color_components, cinfo->out_color_space); + } + if (!CheckColorSpaceComponents(cinfo->num_components, + cinfo->jpeg_color_space)) { + JPEGLI_ERROR("Invalid number of components %d for colorspace %d", + cinfo->num_components, cinfo->jpeg_color_space); + } + if (cinfo->jpeg_color_space == cinfo->out_color_space) { + if (cinfo->num_components != cinfo->out_color_components) { + JPEGLI_ERROR("Input/output components mismatch: %d vs %d", + cinfo->num_components, cinfo->out_color_components); + } + // No color transform requested. + m->color_transform = NullTransform; + return; + } + + m->color_transform = nullptr; + if (cinfo->jpeg_color_space == JCS_GRAYSCALE) { + if (cinfo->out_color_space == JCS_RGB) { + m->color_transform = GrayscaleToRGB; + } + } else if (cinfo->jpeg_color_space == JCS_RGB) { + if (cinfo->out_color_space == JCS_GRAYSCALE) { + m->color_transform = HWY_DYNAMIC_DISPATCH(RGBToYCbCr); + } + } else if (cinfo->jpeg_color_space == JCS_YCbCr) { + if (cinfo->out_color_space == JCS_RGB) { + m->color_transform = HWY_DYNAMIC_DISPATCH(YCbCrToRGB); + } else if (cinfo->out_color_space == JCS_GRAYSCALE) { + m->color_transform = NullTransform; + } + } else if (cinfo->jpeg_color_space == JCS_YCCK) { + if (cinfo->out_color_space == JCS_CMYK) { + m->color_transform = HWY_DYNAMIC_DISPATCH(YCCKToCMYK); + } + } + + if (m->color_transform == nullptr) { + // TODO(szabadka) Support more color transforms. + JPEGLI_ERROR("Unsupported color transform %d -> %d", + cinfo->jpeg_color_space, cinfo->out_color_space); + } +} + +} // namespace jpegli +#endif // HWY_ONCE diff --git a/lib/jpegli/color_transform.h b/lib/jpegli/color_transform.h new file mode 100644 index 0000000..8d58f88 --- /dev/null +++ b/lib/jpegli/color_transform.h @@ -0,0 +1,20 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_COLOR_TRANSFORM_H_ +#define LIB_JPEGLI_COLOR_TRANSFORM_H_ + +#include "lib/jpegli/common.h" +#include "lib/jxl/base/compiler_specific.h" + +namespace jpegli { + +void ChooseColorTransform(j_compress_ptr cinfo); + +void ChooseColorTransform(j_decompress_ptr cinfo); + +} // namespace jpegli + +#endif // LIB_JPEGLI_COLOR_TRANSFORM_H_ diff --git a/lib/jpegli/common.cc b/lib/jpegli/common.cc new file mode 100644 index 0000000..5f34372 --- /dev/null +++ b/lib/jpegli/common.cc @@ -0,0 +1,59 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/common.h" + +#include "lib/jpegli/decode_internal.h" +#include "lib/jpegli/encode_internal.h" +#include "lib/jpegli/memory_manager.h" + +void jpegli_abort(j_common_ptr cinfo) { + if (cinfo->mem == nullptr) return; + for (int pool_id = 0; pool_id < JPOOL_NUMPOOLS; ++pool_id) { + if (pool_id == JPOOL_PERMANENT) continue; + (*cinfo->mem->free_pool)(cinfo, pool_id); + } + if (cinfo->is_decompressor) { + cinfo->global_state = jpegli::kDecStart; + } else { + cinfo->global_state = jpegli::kEncStart; + } +} + +void jpegli_destroy(j_common_ptr cinfo) { + if (cinfo->mem == nullptr) return; + (*cinfo->mem->self_destruct)(cinfo); + if (cinfo->is_decompressor) { + cinfo->global_state = jpegli::kDecNull; + delete reinterpret_cast<j_decompress_ptr>(cinfo)->master; + } else { + cinfo->global_state = jpegli::kEncNull; + } +} + +JQUANT_TBL* jpegli_alloc_quant_table(j_common_ptr cinfo) { + JQUANT_TBL* table = jpegli::Allocate<JQUANT_TBL>(cinfo, 1); + table->sent_table = FALSE; + return table; +} + +JHUFF_TBL* jpegli_alloc_huff_table(j_common_ptr cinfo) { + JHUFF_TBL* table = jpegli::Allocate<JHUFF_TBL>(cinfo, 1); + table->sent_table = FALSE; + return table; +} + +int jpegli_bytes_per_sample(JpegliDataType data_type) { + switch (data_type) { + case JPEGLI_TYPE_UINT8: + return 1; + case JPEGLI_TYPE_UINT16: + return 2; + case JPEGLI_TYPE_FLOAT: + return 4; + default: + return 0; + } +} diff --git a/lib/jpegli/common.h b/lib/jpegli/common.h new file mode 100644 index 0000000..42487f2 --- /dev/null +++ b/lib/jpegli/common.h @@ -0,0 +1,48 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// This file contains the C API of the common encoder/decoder part of libjpegli +// library, which is based on the C API of libjpeg, with the function names +// changed from jpeg_* to jpegli_*, while compressor and decompressor object +// definitions are included directly from jpeglib.h +// +// Applications can use the libjpegli library in one of the following ways: +// +// (1) Include jpegli/encode.h and/or jpegli/decode.h, update the function +// names of the API and link against libjpegli. +// +// (2) Leave the application code unchanged, but replace the libjpeg.so library +// with the one built by this project that is API- and ABI-compatible with +// libjpeg-turbo's version of libjpeg.so. + +#ifndef LIB_JPEGLI_COMMON_H_ +#define LIB_JPEGLI_COMMON_H_ + +/* clang-format off */ +#include <stdio.h> +#include <jpeglib.h> +/* clang-format on */ + +#include "lib/jpegli/types.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +struct jpeg_error_mgr* jpegli_std_error(struct jpeg_error_mgr* err); + +void jpegli_abort(j_common_ptr cinfo); + +void jpegli_destroy(j_common_ptr cinfo); + +JQUANT_TBL* jpegli_alloc_quant_table(j_common_ptr cinfo); + +JHUFF_TBL* jpegli_alloc_huff_table(j_common_ptr cinfo); + +#if defined(__cplusplus) || defined(c_plusplus) +} // extern "C" +#endif + +#endif // LIB_JPEGLI_COMMON_H_ diff --git a/lib/jpegli/common_internal.h b/lib/jpegli/common_internal.h new file mode 100644 index 0000000..248d315 --- /dev/null +++ b/lib/jpegli/common_internal.h @@ -0,0 +1,150 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_COMMON_INTERNAL_H_ +#define LIB_JPEGLI_COMMON_INTERNAL_H_ + +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#include <algorithm> +#include <hwy/aligned_allocator.h> + +#include "lib/jpegli/memory_manager.h" +#include "lib/jpegli/simd.h" +#include "lib/jxl/base/compiler_specific.h" // for ssize_t +#include "lib/jxl/base/status.h" // for JXL_CHECK + +namespace jpegli { + +enum State { + kDecNull, + kDecStart, + kDecInHeader, + kDecHeaderDone, + kDecProcessMarkers, + kDecProcessScan, + kEncNull, + kEncStart, + kEncHeader, + kEncReadImage, + kEncWriteCoeffs, +}; + +template <typename T1, typename T2> +constexpr inline T1 DivCeil(T1 a, T2 b) { + return (a + b - 1) / b; +} + +template <typename T1, typename T2> +constexpr inline T1 RoundUpTo(T1 a, T2 b) { + return DivCeil(a, b) * b; +} + +constexpr size_t kDCTBlockSize = 64; +// This is set to the same value as MAX_COMPS_IN_SCAN, because that is the +// maximum number of channels the libjpeg-turbo decoder can decode. +constexpr int kMaxComponents = 4; +constexpr int kMaxQuantTables = 4; +constexpr int kJpegPrecision = 8; +constexpr int kMaxHuffmanTables = 4; +constexpr size_t kJpegHuffmanMaxBitLength = 16; +constexpr int kJpegHuffmanAlphabetSize = 256; +constexpr int kJpegDCAlphabetSize = 12; +constexpr int kMaxDHTMarkers = 512; +constexpr int kMaxDimPixels = 65535; +constexpr uint8_t kApp1 = 0xE1; +constexpr uint8_t kApp2 = 0xE2; +const uint8_t kIccProfileTag[12] = "ICC_PROFILE"; +const uint8_t kExifTag[6] = "Exif\0"; +const uint8_t kXMPTag[29] = "http://ns.adobe.com/xap/1.0/"; + +/* clang-format off */ +constexpr uint32_t kJPEGNaturalOrder[80] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63, + // extra entries for safety in decoder + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63 +}; + +constexpr uint32_t kJPEGZigZagOrder[64] = { + 0, 1, 5, 6, 14, 15, 27, 28, + 2, 4, 7, 13, 16, 26, 29, 42, + 3, 8, 12, 17, 25, 30, 41, 43, + 9, 11, 18, 24, 31, 40, 44, 53, + 10, 19, 23, 32, 39, 45, 52, 54, + 20, 22, 33, 38, 46, 51, 55, 60, + 21, 34, 37, 47, 50, 56, 59, 61, + 35, 36, 48, 49, 57, 58, 62, 63 +}; +/* clang-format on */ + +template <typename T> +class RowBuffer { + public: + template <typename CInfoType> + void Allocate(CInfoType cinfo, size_t num_rows, size_t rowsize) { + size_t vec_size = std::max(VectorSize(), sizeof(T)); + JXL_CHECK(vec_size % sizeof(T) == 0); + size_t alignment = std::max<size_t>(HWY_ALIGNMENT, vec_size); + size_t min_memstride = alignment + rowsize * sizeof(T) + vec_size; + size_t memstride = RoundUpTo(min_memstride, alignment); + xsize_ = rowsize; + ysize_ = num_rows; + stride_ = memstride / sizeof(T); + offset_ = alignment / sizeof(T); + data_ = ::jpegli::Allocate<T>(cinfo, ysize_ * stride_, JPOOL_IMAGE_ALIGNED); + } + + T* Row(ssize_t y) const { + return &data_[((ysize_ + y) % ysize_) * stride_ + offset_]; + } + + size_t xsize() const { return xsize_; }; + size_t ysize() const { return ysize_; }; + size_t stride() const { return stride_; } + + void PadRow(size_t y, size_t from, int border) { + float* row = Row(y); + for (int offset = -border; offset < 0; ++offset) { + row[offset] = row[0]; + } + float last_val = row[from - 1]; + for (size_t x = from; x < xsize_ + border; ++x) { + row[x] = last_val; + } + } + + void CopyRow(ssize_t dst_row, ssize_t src_row, int border) { + memcpy(Row(dst_row) - border, Row(src_row) - border, + (xsize_ + 2 * border) * sizeof(T)); + } + + void FillRow(ssize_t y, T val, size_t len) { + T* row = Row(y); + for (size_t x = 0; x < len; ++x) { + row[x] = val; + } + } + + private: + size_t xsize_; + size_t ysize_; + size_t stride_; + size_t offset_; + T* data_; +}; + +} // namespace jpegli + +#endif // LIB_JPEGLI_COMMON_INTERNAL_H_ diff --git a/lib/jpegli/dct-inl.h b/lib/jpegli/dct-inl.h new file mode 100644 index 0000000..1cbe704 --- /dev/null +++ b/lib/jpegli/dct-inl.h @@ -0,0 +1,258 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#if defined(LIB_JPEGLI_DCT_INL_H_) == defined(HWY_TARGET_TOGGLE) +#ifdef LIB_JPEGLI_DCT_INL_H_ +#undef LIB_JPEGLI_DCT_INL_H_ +#else +#define LIB_JPEGLI_DCT_INL_H_ +#endif + +#include "lib/jpegli/transpose-inl.h" +#include "lib/jxl/base/compiler_specific.h" + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { +namespace { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Abs; +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::DemoteTo; +using hwy::HWY_NAMESPACE::Ge; +using hwy::HWY_NAMESPACE::IfThenElseZero; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::Rebind; +using hwy::HWY_NAMESPACE::Round; +using hwy::HWY_NAMESPACE::Sub; +using hwy::HWY_NAMESPACE::Vec; + +using D = HWY_FULL(float); +using DI = HWY_FULL(int32_t); + +template <size_t N> +void AddReverse(const float* JXL_RESTRICT ain1, const float* JXL_RESTRICT ain2, + float* JXL_RESTRICT aout) { + HWY_CAPPED(float, 8) d8; + for (size_t i = 0; i < N; i++) { + auto in1 = Load(d8, ain1 + i * 8); + auto in2 = Load(d8, ain2 + (N - i - 1) * 8); + Store(Add(in1, in2), d8, aout + i * 8); + } +} + +template <size_t N> +void SubReverse(const float* JXL_RESTRICT ain1, const float* JXL_RESTRICT ain2, + float* JXL_RESTRICT aout) { + HWY_CAPPED(float, 8) d8; + for (size_t i = 0; i < N; i++) { + auto in1 = Load(d8, ain1 + i * 8); + auto in2 = Load(d8, ain2 + (N - i - 1) * 8); + Store(Sub(in1, in2), d8, aout + i * 8); + } +} + +template <size_t N> +void B(float* JXL_RESTRICT coeff) { + HWY_CAPPED(float, 8) d8; + constexpr float kSqrt2 = 1.41421356237f; + auto sqrt2 = Set(d8, kSqrt2); + auto in1 = Load(d8, coeff); + auto in2 = Load(d8, coeff + 8); + Store(MulAdd(in1, sqrt2, in2), d8, coeff); + for (size_t i = 1; i + 1 < N; i++) { + auto in1 = Load(d8, coeff + i * 8); + auto in2 = Load(d8, coeff + (i + 1) * 8); + Store(Add(in1, in2), d8, coeff + i * 8); + } +} + +// Ideally optimized away by compiler (except the multiply). +template <size_t N> +void InverseEvenOdd(const float* JXL_RESTRICT ain, float* JXL_RESTRICT aout) { + HWY_CAPPED(float, 8) d8; + for (size_t i = 0; i < N / 2; i++) { + auto in1 = Load(d8, ain + i * 8); + Store(in1, d8, aout + 2 * i * 8); + } + for (size_t i = N / 2; i < N; i++) { + auto in1 = Load(d8, ain + i * 8); + Store(in1, d8, aout + (2 * (i - N / 2) + 1) * 8); + } +} + +// Constants for DCT implementation. Generated by the following snippet: +// for i in range(N // 2): +// print(1.0 / (2 * math.cos((i + 0.5) * math.pi / N)), end=", ") +template <size_t N> +struct WcMultipliers; + +template <> +struct WcMultipliers<4> { + static constexpr float kMultipliers[] = { + 0.541196100146197, + 1.3065629648763764, + }; +}; + +template <> +struct WcMultipliers<8> { + static constexpr float kMultipliers[] = { + 0.5097955791041592, + 0.6013448869350453, + 0.8999762231364156, + 2.5629154477415055, + }; +}; + +constexpr float WcMultipliers<4>::kMultipliers[]; +constexpr float WcMultipliers<8>::kMultipliers[]; + +// Invoked on full vector. +template <size_t N> +void Multiply(float* JXL_RESTRICT coeff) { + HWY_CAPPED(float, 8) d8; + for (size_t i = 0; i < N / 2; i++) { + auto in1 = Load(d8, coeff + (N / 2 + i) * 8); + auto mul = Set(d8, WcMultipliers<N>::kMultipliers[i]); + Store(Mul(in1, mul), d8, coeff + (N / 2 + i) * 8); + } +} + +void LoadFromBlock(const float* JXL_RESTRICT pixels, size_t pixels_stride, + size_t off, float* JXL_RESTRICT coeff) { + HWY_CAPPED(float, 8) d8; + for (size_t i = 0; i < 8; i++) { + Store(LoadU(d8, pixels + i * pixels_stride + off), d8, coeff + i * 8); + } +} + +void StoreToBlockAndScale(const float* JXL_RESTRICT coeff, float* output, + size_t off) { + HWY_CAPPED(float, 8) d8; + auto mul = Set(d8, 1.0f / 8); + for (size_t i = 0; i < 8; i++) { + StoreU(Mul(mul, Load(d8, coeff + i * 8)), d8, output + i * 8 + off); + } +} + +template <size_t N> +struct DCT1DImpl; + +template <> +struct DCT1DImpl<1> { + JXL_INLINE void operator()(float* JXL_RESTRICT mem) {} +}; + +template <> +struct DCT1DImpl<2> { + JXL_INLINE void operator()(float* JXL_RESTRICT mem) { + HWY_CAPPED(float, 8) d8; + auto in1 = Load(d8, mem); + auto in2 = Load(d8, mem + 8); + Store(Add(in1, in2), d8, mem); + Store(Sub(in1, in2), d8, mem + 8); + } +}; + +template <size_t N> +struct DCT1DImpl { + void operator()(float* JXL_RESTRICT mem) { + HWY_ALIGN float tmp[N * 8]; + AddReverse<N / 2>(mem, mem + N * 4, tmp); + DCT1DImpl<N / 2>()(tmp); + SubReverse<N / 2>(mem, mem + N * 4, tmp + N * 4); + Multiply<N>(tmp); + DCT1DImpl<N / 2>()(tmp + N * 4); + B<N / 2>(tmp + N * 4); + InverseEvenOdd<N>(tmp, mem); + } +}; + +void DCT1D(const float* JXL_RESTRICT pixels, size_t pixels_stride, + float* JXL_RESTRICT output) { + HWY_CAPPED(float, 8) d8; + HWY_ALIGN float tmp[64]; + for (size_t i = 0; i < 8; i += Lanes(d8)) { + // TODO(veluca): consider removing the temporary memory here (as is done in + // IDCT), if it turns out that some compilers don't optimize away the loads + // and this is performance-critical. + LoadFromBlock(pixels, pixels_stride, i, tmp); + DCT1DImpl<8>()(tmp); + StoreToBlockAndScale(tmp, output, i); + } +} + +static JXL_INLINE JXL_MAYBE_UNUSED void TransformFromPixels( + const float* JXL_RESTRICT pixels, size_t pixels_stride, + float* JXL_RESTRICT coefficients, float* JXL_RESTRICT scratch_space) { + DCT1D(pixels, pixels_stride, scratch_space); + Transpose8x8Block(scratch_space, coefficients); + DCT1D(coefficients, 8, scratch_space); + Transpose8x8Block(scratch_space, coefficients); +} + +static JXL_INLINE JXL_MAYBE_UNUSED void StoreQuantizedValue(const Vec<DI>& ival, + int16_t* out) { + Rebind<int16_t, DI> di16; + Store(DemoteTo(di16, ival), di16, out); +} + +static JXL_INLINE JXL_MAYBE_UNUSED void StoreQuantizedValue(const Vec<DI>& ival, + int32_t* out) { + DI di; + Store(ival, di, out); +} + +template <typename T> +void QuantizeBlock(const float* dct, const float* qmc, float aq_strength, + const float* zero_bias_offset, const float* zero_bias_mul, + T* block) { + D d; + DI di; + const auto aq_mul = Set(d, aq_strength); + for (size_t k = 0; k < DCTSIZE2; k += Lanes(d)) { + const auto val = Load(d, dct + k); + const auto q = Load(d, qmc + k); + const auto qval = Mul(val, q); + const auto zb_offset = Load(d, zero_bias_offset + k); + const auto zb_mul = Load(d, zero_bias_mul + k); + const auto threshold = Add(zb_offset, Mul(zb_mul, aq_mul)); + const auto nzero_mask = Ge(Abs(qval), threshold); + const auto ival = ConvertTo(di, IfThenElseZero(nzero_mask, Round(qval))); + StoreQuantizedValue(ival, block + k); + } +} + +template <typename T> +void ComputeCoefficientBlock(const float* JXL_RESTRICT pixels, size_t stride, + const float* JXL_RESTRICT qmc, + int16_t last_dc_coeff, float aq_strength, + const float* zero_bias_offset, + const float* zero_bias_mul, + float* JXL_RESTRICT tmp, T* block) { + float* JXL_RESTRICT dct = tmp; + float* JXL_RESTRICT scratch_space = tmp + DCTSIZE2; + TransformFromPixels(pixels, stride, dct, scratch_space); + QuantizeBlock(dct, qmc, aq_strength, zero_bias_offset, zero_bias_mul, block); + // Center DC values around zero. + static constexpr float kDCBias = 128.0f; + const float dc = (dct[0] - kDCBias) * qmc[0]; + float dc_threshold = zero_bias_offset[0] + aq_strength * zero_bias_mul[0]; + if (std::abs(dc - last_dc_coeff) < dc_threshold) { + block[0] = last_dc_coeff; + } else { + block[0] = std::round(dc); + } +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); +#endif // LIB_JPEGLI_DCT_INL_H_ diff --git a/lib/jpegli/decode.cc b/lib/jpegli/decode.cc new file mode 100644 index 0000000..758babe --- /dev/null +++ b/lib/jpegli/decode.cc @@ -0,0 +1,1028 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/decode.h" + +#include <string.h> + +#include <vector> + +#include "lib/jpegli/color_quantize.h" +#include "lib/jpegli/decode_internal.h" +#include "lib/jpegli/decode_marker.h" +#include "lib/jpegli/decode_scan.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/memory_manager.h" +#include "lib/jpegli/render.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/status.h" + +namespace jpegli { + +void InitializeImage(j_decompress_ptr cinfo) { + cinfo->restart_interval = 0; + cinfo->saw_JFIF_marker = FALSE; + cinfo->JFIF_major_version = 1; + cinfo->JFIF_minor_version = 1; + cinfo->density_unit = 0; + cinfo->X_density = 1; + cinfo->Y_density = 1; + cinfo->saw_Adobe_marker = FALSE; + cinfo->Adobe_transform = 0; + cinfo->CCIR601_sampling = FALSE; // not used + cinfo->marker_list = nullptr; + cinfo->comp_info = nullptr; + cinfo->input_scan_number = 0; + cinfo->input_iMCU_row = 0; + cinfo->output_scan_number = 0; + cinfo->output_iMCU_row = 0; + cinfo->output_scanline = 0; + cinfo->unread_marker = 0; + cinfo->coef_bits = nullptr; + // We set all these to zero since we don't yet support arithmetic coding. + memset(cinfo->arith_dc_L, 0, sizeof(cinfo->arith_dc_L)); + memset(cinfo->arith_dc_U, 0, sizeof(cinfo->arith_dc_U)); + memset(cinfo->arith_ac_K, 0, sizeof(cinfo->arith_ac_K)); + // Initialize the private fields. + jpeg_decomp_master* m = cinfo->master; + m->input_buffer_.clear(); + m->input_buffer_pos_ = 0; + m->codestream_bits_ahead_ = 0; + m->is_multiscan_ = false; + m->found_soi_ = false; + m->found_dri_ = false; + m->found_sof_ = false; + m->found_eoi_ = false; + m->icc_index_ = 0; + m->icc_total_ = 0; + m->icc_profile_.clear(); + memset(m->dc_huff_lut_, 0, sizeof(m->dc_huff_lut_)); + memset(m->ac_huff_lut_, 0, sizeof(m->ac_huff_lut_)); + // Initialize the values to an invalid symbol so that we can recognize it + // when reading the bit stream using a Huffman code with space > 0. + for (size_t i = 0; i < kAllHuffLutSize; ++i) { + m->dc_huff_lut_[i].bits = 0; + m->dc_huff_lut_[i].value = 0xffff; + m->ac_huff_lut_[i].bits = 0; + m->ac_huff_lut_[i].value = 0xffff; + } + m->colormap_lut_ = nullptr; + m->pixels_ = nullptr; + m->scanlines_ = nullptr; + m->regenerate_inverse_colormap_ = true; + for (int i = 0; i < kMaxComponents; ++i) { + m->dither_[i] = nullptr; + m->error_row_[i] = nullptr; + } + m->output_passes_done_ = 0; + m->xoffset_ = 0; + m->dequant_ = nullptr; +} + +void InitializeDecompressParams(j_decompress_ptr cinfo) { + cinfo->jpeg_color_space = JCS_UNKNOWN; + cinfo->out_color_space = JCS_UNKNOWN; + cinfo->scale_num = 1; + cinfo->scale_denom = 1; + cinfo->output_gamma = 0.0f; + cinfo->buffered_image = FALSE; + cinfo->raw_data_out = FALSE; + cinfo->dct_method = JDCT_DEFAULT; + cinfo->do_fancy_upsampling = TRUE; + cinfo->do_block_smoothing = TRUE; + cinfo->quantize_colors = FALSE; + cinfo->dither_mode = JDITHER_FS; + cinfo->two_pass_quantize = TRUE; + cinfo->desired_number_of_colors = 256; + cinfo->enable_1pass_quant = FALSE; + cinfo->enable_external_quant = FALSE; + cinfo->enable_2pass_quant = FALSE; + cinfo->actual_number_of_colors = 0; + cinfo->colormap = nullptr; +} + +void InitProgressMonitor(j_decompress_ptr cinfo, bool coef_only) { + if (!cinfo->progress) return; + jpeg_decomp_master* m = cinfo->master; + int nc = cinfo->num_components; + int estimated_num_scans = + cinfo->progressive_mode ? 2 + 3 * nc : (m->is_multiscan_ ? nc : 1); + cinfo->progress->pass_limit = cinfo->total_iMCU_rows * estimated_num_scans; + cinfo->progress->pass_counter = 0; + if (coef_only) { + cinfo->progress->total_passes = 1; + } else { + int input_passes = !cinfo->buffered_image && m->is_multiscan_ ? 1 : 0; + bool two_pass_quant = cinfo->quantize_colors && !cinfo->colormap && + cinfo->two_pass_quantize && cinfo->enable_2pass_quant; + cinfo->progress->total_passes = input_passes + (two_pass_quant ? 2 : 1); + } + cinfo->progress->completed_passes = 0; +} + +void InitProgressMonitorForOutput(j_decompress_ptr cinfo) { + if (!cinfo->progress) return; + jpeg_decomp_master* m = cinfo->master; + int passes_per_output = cinfo->enable_2pass_quant ? 2 : 1; + int output_passes_left = cinfo->buffered_image && !m->found_eoi_ ? 2 : 1; + cinfo->progress->total_passes = + m->output_passes_done_ + passes_per_output * output_passes_left; + cinfo->progress->completed_passes = m->output_passes_done_; +} + +void ProgressMonitorInputPass(j_decompress_ptr cinfo) { + if (!cinfo->progress) return; + cinfo->progress->pass_counter = + ((cinfo->input_scan_number - 1) * cinfo->total_iMCU_rows + + cinfo->input_iMCU_row); + if (cinfo->progress->pass_counter > cinfo->progress->pass_limit) { + cinfo->progress->pass_limit = + cinfo->input_scan_number * cinfo->total_iMCU_rows; + } + (*cinfo->progress->progress_monitor)(reinterpret_cast<j_common_ptr>(cinfo)); +} + +void ProgressMonitorOutputPass(j_decompress_ptr cinfo) { + if (!cinfo->progress) return; + jpeg_decomp_master* m = cinfo->master; + int input_passes = !cinfo->buffered_image && m->is_multiscan_ ? 1 : 0; + cinfo->progress->pass_counter = cinfo->output_scanline; + cinfo->progress->pass_limit = cinfo->output_height; + cinfo->progress->completed_passes = input_passes + m->output_passes_done_; + (*cinfo->progress->progress_monitor)(reinterpret_cast<j_common_ptr>(cinfo)); +} + +void BuildHuffmanLookupTable(j_decompress_ptr cinfo, JHUFF_TBL* table, + HuffmanTableEntry* huff_lut) { + uint32_t counts[kJpegHuffmanMaxBitLength + 1] = {}; + counts[0] = 0; + int total_count = 0; + int space = 1 << kJpegHuffmanMaxBitLength; + int max_depth = 1; + for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) { + int count = table->bits[i]; + if (count != 0) { + max_depth = i; + } + counts[i] = count; + total_count += count; + space -= count * (1 << (kJpegHuffmanMaxBitLength - i)); + } + uint32_t values[kJpegHuffmanAlphabetSize + 1] = {}; + uint8_t values_seen[256] = {0}; + for (int i = 0; i < total_count; ++i) { + int value = table->huffval[i]; + if (values_seen[value]) { + return JPEGLI_ERROR("Duplicate Huffman code value %d", value); + } + values_seen[value] = 1; + values[i] = value; + } + // Add an invalid symbol that will have the all 1 code. + ++counts[max_depth]; + values[total_count] = kJpegHuffmanAlphabetSize; + space -= (1 << (kJpegHuffmanMaxBitLength - max_depth)); + if (space < 0) { + JPEGLI_ERROR("Invalid Huffman code lengths."); + } else if (space > 0 && huff_lut[0].value != 0xffff) { + // Re-initialize the values to an invalid symbol so that we can recognize + // it when reading the bit stream using a Huffman code with space > 0. + for (int i = 0; i < kJpegHuffmanLutSize; ++i) { + huff_lut[i].bits = 0; + huff_lut[i].value = 0xffff; + } + } + BuildJpegHuffmanTable(&counts[0], &values[0], huff_lut); +} + +void PrepareForScan(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + for (int i = 0; i < cinfo->comps_in_scan; ++i) { + int comp_idx = cinfo->cur_comp_info[i]->component_index; + int* prev_coef_bits = cinfo->coef_bits[comp_idx + cinfo->num_components]; + for (int k = std::min(cinfo->Ss, 1); k <= std::max(cinfo->Se, 9); k++) { + prev_coef_bits[k] = + (cinfo->input_scan_number > 0) ? cinfo->coef_bits[comp_idx][k] : 0; + } + for (int k = cinfo->Ss; k <= cinfo->Se; ++k) { + cinfo->coef_bits[comp_idx][k] = cinfo->Al; + } + } + AddStandardHuffmanTables(reinterpret_cast<j_common_ptr>(cinfo), + /*is_dc=*/false); + AddStandardHuffmanTables(reinterpret_cast<j_common_ptr>(cinfo), + /*is_dc=*/true); + // Check that all the Huffman tables needed for this scan are defined and + // build derived lookup tables. + for (int i = 0; i < cinfo->comps_in_scan; ++i) { + if (cinfo->Ss == 0) { + int dc_tbl_idx = cinfo->cur_comp_info[i]->dc_tbl_no; + JHUFF_TBL* table = cinfo->dc_huff_tbl_ptrs[dc_tbl_idx]; + HuffmanTableEntry* huff_lut = + &m->dc_huff_lut_[dc_tbl_idx * kJpegHuffmanLutSize]; + if (!table) { + return JPEGLI_ERROR("DC Huffman table %d not found", dc_tbl_idx); + } + BuildHuffmanLookupTable(cinfo, table, huff_lut); + } + if (cinfo->Se > 0) { + int ac_tbl_idx = cinfo->cur_comp_info[i]->ac_tbl_no; + JHUFF_TBL* table = cinfo->ac_huff_tbl_ptrs[ac_tbl_idx]; + HuffmanTableEntry* huff_lut = + &m->ac_huff_lut_[ac_tbl_idx * kJpegHuffmanLutSize]; + if (!table) { + return JPEGLI_ERROR("AC Huffman table %d not found", ac_tbl_idx); + } + BuildHuffmanLookupTable(cinfo, table, huff_lut); + } + } + // Copy quantization tables into comp_info. + for (int i = 0; i < cinfo->comps_in_scan; ++i) { + jpeg_component_info* comp = cinfo->cur_comp_info[i]; + if (comp->quant_table == nullptr) { + comp->quant_table = Allocate<JQUANT_TBL>(cinfo, 1, JPOOL_IMAGE); + memcpy(comp->quant_table, cinfo->quant_tbl_ptrs[comp->quant_tbl_no], + sizeof(JQUANT_TBL)); + } + } + if (cinfo->comps_in_scan == 1) { + const auto& comp = *cinfo->cur_comp_info[0]; + cinfo->MCUs_per_row = DivCeil(cinfo->image_width * comp.h_samp_factor, + cinfo->max_h_samp_factor * DCTSIZE); + cinfo->MCU_rows_in_scan = DivCeil(cinfo->image_height * comp.v_samp_factor, + cinfo->max_v_samp_factor * DCTSIZE); + m->mcu_rows_per_iMCU_row_ = cinfo->cur_comp_info[0]->v_samp_factor; + } else { + cinfo->MCU_rows_in_scan = cinfo->total_iMCU_rows; + cinfo->MCUs_per_row = m->iMCU_cols_; + m->mcu_rows_per_iMCU_row_ = 1; + size_t mcu_size = 0; + for (int i = 0; i < cinfo->comps_in_scan; ++i) { + jpeg_component_info* comp = cinfo->cur_comp_info[i]; + mcu_size += comp->h_samp_factor * comp->v_samp_factor; + } + if (mcu_size > D_MAX_BLOCKS_IN_MCU) { + JPEGLI_ERROR("MCU size too big"); + } + } + memset(m->last_dc_coeff_, 0, sizeof(m->last_dc_coeff_)); + m->restarts_to_go_ = cinfo->restart_interval; + m->next_restart_marker_ = 0; + m->eobrun_ = -1; + m->scan_mcu_row_ = 0; + m->scan_mcu_col_ = 0; + m->codestream_bits_ahead_ = 0; + ++cinfo->input_scan_number; + cinfo->input_iMCU_row = 0; + PrepareForiMCURow(cinfo); + cinfo->global_state = kDecProcessScan; +} + +int ConsumeInput(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + if (cinfo->global_state == kDecProcessScan && m->streaming_mode_ && + cinfo->input_iMCU_row > cinfo->output_iMCU_row) { + // Prevent input from getting ahead of output in streaming mode. + return JPEG_SUSPENDED; + } + jpeg_source_mgr* src = cinfo->src; + int status; + for (;;) { + const uint8_t* data; + size_t len; + if (m->input_buffer_.empty()) { + data = cinfo->src->next_input_byte; + len = cinfo->src->bytes_in_buffer; + } else { + data = &m->input_buffer_[m->input_buffer_pos_]; + len = m->input_buffer_.size() - m->input_buffer_pos_; + } + size_t pos = 0; + if (cinfo->global_state == kDecProcessScan) { + status = ProcessScan(cinfo, data, len, &pos, &m->codestream_bits_ahead_); + } else { + status = ProcessMarkers(cinfo, data, len, &pos); + } + if (m->input_buffer_.empty()) { + cinfo->src->next_input_byte += pos; + cinfo->src->bytes_in_buffer -= pos; + } else { + m->input_buffer_pos_ += pos; + size_t bytes_left = m->input_buffer_.size() - m->input_buffer_pos_; + if (bytes_left <= src->bytes_in_buffer) { + src->next_input_byte += (src->bytes_in_buffer - bytes_left); + src->bytes_in_buffer = bytes_left; + m->input_buffer_.clear(); + m->input_buffer_pos_ = 0; + } + } + if (status == kHandleRestart) { + JXL_DASSERT(m->input_buffer_.size() <= + m->input_buffer_pos_ + src->bytes_in_buffer); + m->input_buffer_.clear(); + m->input_buffer_pos_ = 0; + if (cinfo->unread_marker == 0xd0 + m->next_restart_marker_) { + cinfo->unread_marker = 0; + } else { + if (!(*cinfo->src->resync_to_restart)(cinfo, m->next_restart_marker_)) { + return JPEG_SUSPENDED; + } + } + m->next_restart_marker_ += 1; + m->next_restart_marker_ &= 0x7; + m->restarts_to_go_ = cinfo->restart_interval; + if (cinfo->unread_marker != 0) { + JPEGLI_WARN("Failed to resync to next restart marker, skipping scan."); + return JPEG_SCAN_COMPLETED; + } + continue; + } + if (status == kHandleMarkerProcessor) { + JXL_DASSERT(m->input_buffer_.size() <= + m->input_buffer_pos_ + src->bytes_in_buffer); + m->input_buffer_.clear(); + m->input_buffer_pos_ = 0; + if (!(*GetMarkerProcessor(cinfo))(cinfo)) { + return JPEG_SUSPENDED; + } + cinfo->unread_marker = 0; + continue; + } + if (status != kNeedMoreInput) { + break; + } + if (m->input_buffer_.empty()) { + JXL_DASSERT(m->input_buffer_pos_ == 0); + m->input_buffer_.assign(src->next_input_byte, + src->next_input_byte + src->bytes_in_buffer); + } + if (!(*cinfo->src->fill_input_buffer)(cinfo)) { + m->input_buffer_.clear(); + m->input_buffer_pos_ = 0; + return JPEG_SUSPENDED; + } + if (src->bytes_in_buffer == 0) { + JPEGLI_ERROR("Empty input."); + } + m->input_buffer_.insert(m->input_buffer_.end(), src->next_input_byte, + src->next_input_byte + src->bytes_in_buffer); + } + if (status == JPEG_SCAN_COMPLETED) { + cinfo->global_state = kDecProcessMarkers; + } else if (status == JPEG_REACHED_SOS) { + if (cinfo->global_state == kDecInHeader) { + cinfo->global_state = kDecHeaderDone; + } else { + PrepareForScan(cinfo); + } + } + return status; +} + +bool IsInputReady(j_decompress_ptr cinfo) { + if (cinfo->master->found_eoi_) { + return true; + } + if (cinfo->input_scan_number > cinfo->output_scan_number) { + return true; + } + if (cinfo->input_scan_number < cinfo->output_scan_number) { + return false; + } + if (cinfo->input_iMCU_row == cinfo->total_iMCU_rows) { + return true; + } + return cinfo->input_iMCU_row > + cinfo->output_iMCU_row + (cinfo->master->streaming_mode_ ? 0 : 2); +} + +bool ReadOutputPass(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + if (!m->pixels_) { + size_t stride = cinfo->out_color_components * cinfo->output_width; + size_t num_samples = cinfo->output_height * stride; + m->pixels_ = Allocate<uint8_t>(cinfo, num_samples, JPOOL_IMAGE); + m->scanlines_ = + Allocate<JSAMPROW>(cinfo, cinfo->output_height, JPOOL_IMAGE); + for (size_t i = 0; i < cinfo->output_height; ++i) { + m->scanlines_[i] = &m->pixels_[i * stride]; + } + } + size_t num_output_rows = 0; + while (num_output_rows < cinfo->output_height) { + if (IsInputReady(cinfo)) { + ProgressMonitorOutputPass(cinfo); + ProcessOutput(cinfo, &num_output_rows, m->scanlines_, + cinfo->output_height); + } else if (ConsumeInput(cinfo) == JPEG_SUSPENDED) { + return false; + } + } + cinfo->output_scanline = 0; + cinfo->output_iMCU_row = 0; + return true; +} + +boolean PrepareQuantizedOutput(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + if (cinfo->raw_data_out) { + JPEGLI_ERROR("Color quantization is not supported in raw data mode."); + } + if (m->output_data_type_ != JPEGLI_TYPE_UINT8) { + JPEGLI_ERROR("Color quantization must use 8-bit mode."); + } + if (cinfo->colormap) { + m->quant_mode_ = 3; + } else if (cinfo->two_pass_quantize && cinfo->enable_2pass_quant) { + m->quant_mode_ = 2; + } else if (cinfo->enable_1pass_quant) { + m->quant_mode_ = 1; + } else { + JPEGLI_ERROR("Invalid quantization mode change"); + } + if (m->quant_mode_ > 1 && cinfo->dither_mode == JDITHER_ORDERED) { + cinfo->dither_mode = JDITHER_FS; + } + if (m->quant_mode_ == 1) { + ChooseColorMap1Pass(cinfo); + } else if (m->quant_mode_ == 2) { + m->quant_pass_ = 0; + if (!ReadOutputPass(cinfo)) { + return FALSE; + } + ChooseColorMap2Pass(cinfo); + } + if (m->quant_mode_ == 2 || + (m->quant_mode_ == 3 && m->regenerate_inverse_colormap_)) { + CreateInverseColorMap(cinfo); + } + if (cinfo->dither_mode == JDITHER_ORDERED) { + CreateOrderedDitherTables(cinfo); + } else if (cinfo->dither_mode == JDITHER_FS) { + InitFSDitherState(cinfo); + } + m->quant_pass_ = 1; + return TRUE; +} + +void AllocateCoefficientBuffer(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + j_common_ptr comptr = reinterpret_cast<j_common_ptr>(cinfo); + jvirt_barray_ptr* coef_arrays = jpegli::Allocate<jvirt_barray_ptr>( + cinfo, cinfo->num_components, JPOOL_IMAGE); + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + size_t height_in_blocks = + m->streaming_mode_ ? comp->v_samp_factor : comp->height_in_blocks; + coef_arrays[c] = (*cinfo->mem->request_virt_barray)( + comptr, JPOOL_IMAGE, TRUE, comp->width_in_blocks, height_in_blocks, + comp->v_samp_factor); + } + cinfo->master->coef_arrays = coef_arrays; + (*cinfo->mem->realize_virt_arrays)(comptr); +} + +void AllocateOutputBuffers(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + size_t iMCU_width = cinfo->max_h_samp_factor * m->min_scaled_dct_size; + size_t output_stride = m->iMCU_cols_ * iMCU_width; + m->need_context_rows_ = false; + for (int c = 0; c < cinfo->num_components; ++c) { + if (cinfo->do_fancy_upsampling && m->v_factor[c] == 2) { + m->need_context_rows_ = true; + } + } + for (int c = 0; c < cinfo->num_components; ++c) { + const auto& comp = cinfo->comp_info[c]; + size_t cheight = comp.v_samp_factor * m->scaled_dct_size[c]; + int downsampled_width = output_stride / m->h_factor[c]; + m->raw_height_[c] = cinfo->total_iMCU_rows * cheight; + if (m->need_context_rows_) { + cheight *= 3; + } + m->raw_output_[c].Allocate(cinfo, cheight, downsampled_width); + } + int num_all_components = + std::max(cinfo->out_color_components, cinfo->num_components); + for (int c = 0; c < num_all_components; ++c) { + m->render_output_[c].Allocate(cinfo, cinfo->max_v_samp_factor, + output_stride); + } + m->idct_scratch_ = Allocate<float>(cinfo, 5 * DCTSIZE2, JPOOL_IMAGE_ALIGNED); + // Padding for horizontal chroma upsampling. + constexpr size_t kPaddingLeft = 64; + constexpr size_t kPaddingRight = 64; + m->upsample_scratch_ = Allocate<float>( + cinfo, output_stride + kPaddingLeft + kPaddingRight, JPOOL_IMAGE_ALIGNED); + size_t bytes_per_sample = jpegli_bytes_per_sample(m->output_data_type_); + size_t bytes_per_pixel = cinfo->out_color_components * bytes_per_sample; + size_t scratch_stride = RoundUpTo(output_stride, HWY_ALIGNMENT); + m->output_scratch_ = Allocate<uint8_t>( + cinfo, bytes_per_pixel * scratch_stride, JPOOL_IMAGE_ALIGNED); + m->smoothing_scratch_ = + Allocate<int16_t>(cinfo, DCTSIZE2, JPOOL_IMAGE_ALIGNED); + size_t coeffs_per_block = cinfo->num_components * DCTSIZE2; + m->nonzeros_ = Allocate<int>(cinfo, coeffs_per_block, JPOOL_IMAGE_ALIGNED); + m->sumabs_ = Allocate<int>(cinfo, coeffs_per_block, JPOOL_IMAGE_ALIGNED); + m->biases_ = Allocate<float>(cinfo, coeffs_per_block, JPOOL_IMAGE_ALIGNED); + m->dequant_ = Allocate<float>(cinfo, coeffs_per_block, JPOOL_IMAGE_ALIGNED); + memset(m->dequant_, 0, coeffs_per_block * sizeof(float)); +} + +} // namespace jpegli + +void jpegli_CreateDecompress(j_decompress_ptr cinfo, int version, + size_t structsize) { + cinfo->mem = nullptr; + if (structsize != sizeof(*cinfo)) { + JPEGLI_ERROR("jpeg_decompress_struct has wrong size."); + } + jpegli::InitMemoryManager(reinterpret_cast<j_common_ptr>(cinfo)); + cinfo->is_decompressor = TRUE; + cinfo->progress = nullptr; + cinfo->src = nullptr; + for (int i = 0; i < NUM_QUANT_TBLS; i++) { + cinfo->quant_tbl_ptrs[i] = nullptr; + } + for (int i = 0; i < NUM_HUFF_TBLS; i++) { + cinfo->dc_huff_tbl_ptrs[i] = nullptr; + cinfo->ac_huff_tbl_ptrs[i] = nullptr; + } + cinfo->global_state = jpegli::kDecStart; + cinfo->sample_range_limit = nullptr; // not used + cinfo->rec_outbuf_height = 1; // output works with any buffer height + cinfo->master = new jpeg_decomp_master; + jpeg_decomp_master* m = cinfo->master; + for (int i = 0; i < 16; ++i) { + m->app_marker_parsers[i] = nullptr; + } + m->com_marker_parser = nullptr; + memset(m->markers_to_save_, 0, sizeof(m->markers_to_save_)); + jpegli::InitializeDecompressParams(cinfo); + jpegli::InitializeImage(cinfo); +} + +void jpegli_destroy_decompress(j_decompress_ptr cinfo) { + jpegli_destroy(reinterpret_cast<j_common_ptr>(cinfo)); +} + +void jpegli_abort_decompress(j_decompress_ptr cinfo) { + jpegli_abort(reinterpret_cast<j_common_ptr>(cinfo)); +} + +void jpegli_save_markers(j_decompress_ptr cinfo, int marker_code, + unsigned int length_limit) { + // TODO(szabadka) Limit our memory usage by taking into account length_limit. + jpeg_decomp_master* m = cinfo->master; + if (marker_code < 0xe0) { + JPEGLI_ERROR("jpegli_save_markers: invalid marker code %d", marker_code); + } + m->markers_to_save_[marker_code - 0xe0] = 1; +} + +void jpegli_set_marker_processor(j_decompress_ptr cinfo, int marker_code, + jpeg_marker_parser_method routine) { + jpeg_decomp_master* m = cinfo->master; + if (marker_code == 0xfe) { + m->com_marker_parser = routine; + } else if (marker_code >= 0xe0 && marker_code <= 0xef) { + m->app_marker_parsers[marker_code - 0xe0] = routine; + } else { + JPEGLI_ERROR("jpegli_set_marker_processor: invalid marker code %d", + marker_code); + } +} + +int jpegli_consume_input(j_decompress_ptr cinfo) { + if (cinfo->global_state == jpegli::kDecStart) { + (*cinfo->err->reset_error_mgr)(reinterpret_cast<j_common_ptr>(cinfo)); + (*cinfo->src->init_source)(cinfo); + jpegli::InitializeDecompressParams(cinfo); + jpegli::InitializeImage(cinfo); + cinfo->global_state = jpegli::kDecInHeader; + } + if (cinfo->global_state == jpegli::kDecHeaderDone) { + return JPEG_REACHED_SOS; + } + if (cinfo->master->found_eoi_) { + return JPEG_REACHED_EOI; + } + if (cinfo->global_state == jpegli::kDecInHeader || + cinfo->global_state == jpegli::kDecProcessMarkers || + cinfo->global_state == jpegli::kDecProcessScan) { + return jpegli::ConsumeInput(cinfo); + } + JPEGLI_ERROR("Unexpected state %d", cinfo->global_state); + return JPEG_REACHED_EOI; // return value does not matter +} + +int jpegli_read_header(j_decompress_ptr cinfo, boolean require_image) { + if (cinfo->global_state != jpegli::kDecStart && + cinfo->global_state != jpegli::kDecInHeader) { + JPEGLI_ERROR("jpegli_read_header: unexpected state %d", + cinfo->global_state); + } + if (cinfo->src == nullptr) { + JPEGLI_ERROR("Missing source."); + } + for (;;) { + int retcode = jpegli_consume_input(cinfo); + if (retcode == JPEG_SUSPENDED) { + return retcode; + } else if (retcode == JPEG_REACHED_SOS) { + break; + } else if (retcode == JPEG_REACHED_EOI) { + if (require_image) { + JPEGLI_ERROR("jpegli_read_header: unexpected EOI marker."); + } + jpegli_abort_decompress(cinfo); + return JPEG_HEADER_TABLES_ONLY; + } + }; + return JPEG_HEADER_OK; +} + +boolean jpegli_read_icc_profile(j_decompress_ptr cinfo, JOCTET** icc_data_ptr, + unsigned int* icc_data_len) { + if (cinfo->global_state == jpegli::kDecStart || + cinfo->global_state == jpegli::kDecInHeader) { + JPEGLI_ERROR("jpegli_read_icc_profile: unexpected state %d", + cinfo->global_state); + } + if (icc_data_ptr == nullptr || icc_data_len == nullptr) { + JPEGLI_ERROR("jpegli_read_icc_profile: invalid output buffer"); + } + jpeg_decomp_master* m = cinfo->master; + if (m->icc_profile_.empty()) { + *icc_data_ptr = nullptr; + *icc_data_len = 0; + return FALSE; + } + *icc_data_len = m->icc_profile_.size(); + *icc_data_ptr = (JOCTET*)malloc(*icc_data_len); + if (*icc_data_ptr == nullptr) { + JPEGLI_ERROR("jpegli_read_icc_profile: Out of memory"); + } + memcpy(*icc_data_ptr, m->icc_profile_.data(), *icc_data_len); + return TRUE; +} + +void jpegli_core_output_dimensions(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + if (!m->found_sof_) { + JPEGLI_ERROR("No SOF marker found."); + } + if (cinfo->raw_data_out) { + if (cinfo->scale_num != 1 || cinfo->scale_denom != 1) { + JPEGLI_ERROR("Output scaling is not supported in raw output mode"); + } + } + if (cinfo->scale_num != 1 || cinfo->scale_denom != 1) { + int dctsize = 16; + while (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * (dctsize - 1)) { + --dctsize; + } + m->min_scaled_dct_size = dctsize; + cinfo->output_width = + jpegli::DivCeil(cinfo->image_width * dctsize, DCTSIZE); + cinfo->output_height = + jpegli::DivCeil(cinfo->image_height * dctsize, DCTSIZE); + for (int c = 0; c < cinfo->num_components; ++c) { + m->scaled_dct_size[c] = m->min_scaled_dct_size; + } + } else { + cinfo->output_width = cinfo->image_width; + cinfo->output_height = cinfo->image_height; + m->min_scaled_dct_size = DCTSIZE; + for (int c = 0; c < cinfo->num_components; ++c) { + m->scaled_dct_size[c] = DCTSIZE; + } + } +} + +void jpegli_calc_output_dimensions(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + jpegli_core_output_dimensions(cinfo); + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + m->h_factor[c] = cinfo->max_h_samp_factor / comp->h_samp_factor; + m->v_factor[c] = cinfo->max_v_samp_factor / comp->v_samp_factor; + } + if (cinfo->scale_num != 1 || cinfo->scale_denom != 1) { + for (int c = 0; c < cinfo->num_components; ++c) { + // Prefer IDCT scaling over 2x upsampling. + while (m->scaled_dct_size[c] < DCTSIZE && (m->v_factor[c] % 2) == 0 && + (m->h_factor[c] % 2) == 0) { + m->scaled_dct_size[c] *= 2; + m->v_factor[c] /= 2; + m->h_factor[c] /= 2; + } + } + } + if (cinfo->out_color_space == JCS_GRAYSCALE) { + cinfo->out_color_components = 1; + } else if (cinfo->out_color_space == JCS_RGB || + cinfo->out_color_space == JCS_YCbCr) { + cinfo->out_color_components = 3; + } else if (cinfo->out_color_space == JCS_CMYK || + cinfo->out_color_space == JCS_YCCK) { + cinfo->out_color_components = 4; + } else { + cinfo->out_color_components = cinfo->num_components; + } + cinfo->output_components = + cinfo->quantize_colors ? 1 : cinfo->out_color_components; + cinfo->rec_outbuf_height = 1; +} + +boolean jpegli_has_multiple_scans(j_decompress_ptr cinfo) { + if (cinfo->input_scan_number == 0) { + JPEGLI_ERROR("No SOS marker found."); + } + return cinfo->master->is_multiscan_; +} + +boolean jpegli_input_complete(j_decompress_ptr cinfo) { + return cinfo->master->found_eoi_; +} + +boolean jpegli_start_decompress(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + if (cinfo->global_state == jpegli::kDecHeaderDone) { + m->streaming_mode_ = !m->is_multiscan_ && !cinfo->buffered_image && + (!cinfo->quantize_colors || !cinfo->two_pass_quantize); + jpegli::AllocateCoefficientBuffer(cinfo); + jpegli_calc_output_dimensions(cinfo); + jpegli::PrepareForScan(cinfo); + if (cinfo->quantize_colors) { + if (cinfo->colormap != nullptr) { + cinfo->enable_external_quant = TRUE; + } else if (cinfo->two_pass_quantize && + cinfo->out_color_space == JCS_RGB) { + cinfo->enable_2pass_quant = TRUE; + } else { + cinfo->enable_1pass_quant = TRUE; + } + } + jpegli::InitProgressMonitor(cinfo, /*coef_only=*/false); + jpegli::AllocateOutputBuffers(cinfo); + if (cinfo->buffered_image == TRUE) { + cinfo->output_scan_number = 0; + return TRUE; + } + } else if (!m->is_multiscan_) { + JPEGLI_ERROR("jpegli_start_decompress: unexpected state %d", + cinfo->global_state); + } + if (m->is_multiscan_) { + if (cinfo->global_state != jpegli::kDecProcessScan && + cinfo->global_state != jpegli::kDecProcessMarkers) { + JPEGLI_ERROR("jpegli_start_decompress: unexpected state %d", + cinfo->global_state); + } + while (!m->found_eoi_) { + jpegli::ProgressMonitorInputPass(cinfo); + if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) { + return FALSE; + } + } + } + cinfo->output_scan_number = cinfo->input_scan_number; + jpegli::PrepareForOutput(cinfo); + if (cinfo->quantize_colors) { + return jpegli::PrepareQuantizedOutput(cinfo); + } else { + return TRUE; + } +} + +boolean jpegli_start_output(j_decompress_ptr cinfo, int scan_number) { + jpeg_decomp_master* m = cinfo->master; + if (!cinfo->buffered_image) { + JPEGLI_ERROR("jpegli_start_output: buffered image mode was not set"); + } + if (cinfo->global_state != jpegli::kDecProcessScan && + cinfo->global_state != jpegli::kDecProcessMarkers) { + JPEGLI_ERROR("jpegli_start_output: unexpected state %d", + cinfo->global_state); + } + cinfo->output_scan_number = std::max(1, scan_number); + if (m->found_eoi_) { + cinfo->output_scan_number = + std::min(cinfo->output_scan_number, cinfo->input_scan_number); + } + jpegli::InitProgressMonitorForOutput(cinfo); + jpegli::PrepareForOutput(cinfo); + if (cinfo->quantize_colors) { + return jpegli::PrepareQuantizedOutput(cinfo); + } else { + return TRUE; + } +} + +boolean jpegli_finish_output(j_decompress_ptr cinfo) { + if (!cinfo->buffered_image) { + JPEGLI_ERROR("jpegli_finish_output: buffered image mode was not set"); + } + if (cinfo->global_state != jpegli::kDecProcessScan && + cinfo->global_state != jpegli::kDecProcessMarkers) { + JPEGLI_ERROR("jpegli_finish_output: unexpected state %d", + cinfo->global_state); + } + // Advance input to the start of the next scan, or to the end of input. + while (cinfo->input_scan_number <= cinfo->output_scan_number && + !cinfo->master->found_eoi_) { + if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) { + return FALSE; + } + } + return TRUE; +} + +JDIMENSION jpegli_read_scanlines(j_decompress_ptr cinfo, JSAMPARRAY scanlines, + JDIMENSION max_lines) { + jpeg_decomp_master* m = cinfo->master; + if (cinfo->global_state != jpegli::kDecProcessScan && + cinfo->global_state != jpegli::kDecProcessMarkers) { + JPEGLI_ERROR("jpegli_read_scanlines: unexpected state %d", + cinfo->global_state); + } + if (cinfo->buffered_image) { + if (cinfo->output_scan_number == 0) { + JPEGLI_ERROR( + "jpegli_read_scanlines: " + "jpegli_start_output() was not called"); + } + } else if (m->is_multiscan_ && !m->found_eoi_) { + JPEGLI_ERROR( + "jpegli_read_scanlines: " + "jpegli_start_decompress() did not finish"); + } + if (cinfo->output_scanline + max_lines > cinfo->output_height) { + max_lines = cinfo->output_height - cinfo->output_scanline; + } + jpegli::ProgressMonitorOutputPass(cinfo); + size_t num_output_rows = 0; + while (num_output_rows < max_lines) { + if (jpegli::IsInputReady(cinfo)) { + jpegli::ProcessOutput(cinfo, &num_output_rows, scanlines, max_lines); + } else if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) { + break; + } + } + return num_output_rows; +} + +JDIMENSION jpegli_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines) { + // TODO(szabadka) Skip the IDCT for skipped over blocks. + return jpegli_read_scanlines(cinfo, nullptr, num_lines); +} + +void jpegli_crop_scanline(j_decompress_ptr cinfo, JDIMENSION* xoffset, + JDIMENSION* width) { + jpeg_decomp_master* m = cinfo->master; + if ((cinfo->global_state != jpegli::kDecProcessScan && + cinfo->global_state != jpegli::kDecProcessMarkers) || + cinfo->output_scanline != 0) { + JPEGLI_ERROR("jpegli_crop_decompress: unexpected state %d", + cinfo->global_state); + } + if (cinfo->raw_data_out) { + JPEGLI_ERROR("Output cropping is not supported in raw data mode"); + } + if (xoffset == nullptr || width == nullptr || *width == 0 || + *xoffset + *width > cinfo->output_width) { + JPEGLI_ERROR("jpegli_crop_scanline: Invalid arguments"); + } + // TODO(szabadka) Skip the IDCT for skipped over blocks. + size_t xend = *xoffset + *width; + size_t iMCU_width = m->min_scaled_dct_size * cinfo->max_h_samp_factor; + *xoffset = (*xoffset / iMCU_width) * iMCU_width; + *width = xend - *xoffset; + cinfo->master->xoffset_ = *xoffset; + cinfo->output_width = *width; +} + +JDIMENSION jpegli_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data, + JDIMENSION max_lines) { + if ((cinfo->global_state != jpegli::kDecProcessScan && + cinfo->global_state != jpegli::kDecProcessMarkers) || + !cinfo->raw_data_out) { + JPEGLI_ERROR("jpegli_read_raw_data: unexpected state %d", + cinfo->global_state); + } + size_t iMCU_height = cinfo->max_v_samp_factor * DCTSIZE; + if (max_lines < iMCU_height) { + JPEGLI_ERROR("jpegli_read_raw_data: output buffer too small"); + } + jpegli::ProgressMonitorOutputPass(cinfo); + while (!jpegli::IsInputReady(cinfo)) { + if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) { + return 0; + } + } + if (cinfo->output_iMCU_row < cinfo->total_iMCU_rows) { + jpegli::ProcessRawOutput(cinfo, data); + return iMCU_height; + } + return 0; +} + +jvirt_barray_ptr* jpegli_read_coefficients(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + m->streaming_mode_ = false; + if (!cinfo->buffered_image && cinfo->global_state == jpegli::kDecHeaderDone) { + jpegli::AllocateCoefficientBuffer(cinfo); + jpegli_calc_output_dimensions(cinfo); + jpegli::InitProgressMonitor(cinfo, /*coef_only=*/true); + jpegli::PrepareForScan(cinfo); + } + if (cinfo->global_state != jpegli::kDecProcessScan && + cinfo->global_state != jpegli::kDecProcessMarkers) { + JPEGLI_ERROR("jpegli_read_coefficients: unexpected state %d", + cinfo->global_state); + } + if (!cinfo->buffered_image) { + while (!m->found_eoi_) { + jpegli::ProgressMonitorInputPass(cinfo); + if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) { + return nullptr; + } + } + cinfo->output_scanline = cinfo->output_height; + } + return m->coef_arrays; +} + +boolean jpegli_finish_decompress(j_decompress_ptr cinfo) { + if (cinfo->global_state != jpegli::kDecProcessScan && + cinfo->global_state != jpegli::kDecProcessMarkers) { + JPEGLI_ERROR("jpegli_finish_decompress: unexpected state %d", + cinfo->global_state); + } + if (!cinfo->buffered_image && cinfo->output_scanline < cinfo->output_height) { + JPEGLI_ERROR("Incomplete output"); + } + while (!cinfo->master->found_eoi_) { + if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) { + return FALSE; + } + } + (*cinfo->src->term_source)(cinfo); + jpegli_abort_decompress(cinfo); + return TRUE; +} + +boolean jpegli_resync_to_restart(j_decompress_ptr cinfo, int desired) { + JPEGLI_WARN("Invalid restart marker found: 0x%02x vs 0x%02x.", + cinfo->unread_marker, 0xd0 + desired); + // This is a trivial implementation, we just let the decoder skip the entire + // scan and attempt to render the partial input. + return TRUE; +} + +void jpegli_new_colormap(j_decompress_ptr cinfo) { + if (cinfo->global_state != jpegli::kDecProcessScan && + cinfo->global_state != jpegli::kDecProcessMarkers) { + JPEGLI_ERROR("jpegli_new_colormap: unexpected state %d", + cinfo->global_state); + } + if (!cinfo->buffered_image) { + JPEGLI_ERROR("jpegli_new_colormap: not in buffered image mode"); + } + if (!cinfo->enable_external_quant) { + JPEGLI_ERROR("external colormap quantizer was not enabled"); + } + if (!cinfo->quantize_colors || cinfo->colormap == nullptr) { + JPEGLI_ERROR("jpegli_new_colormap: not in external colormap mode"); + } + cinfo->master->regenerate_inverse_colormap_ = true; +} + +void jpegli_set_output_format(j_decompress_ptr cinfo, JpegliDataType data_type, + JpegliEndianness endianness) { + switch (data_type) { + case JPEGLI_TYPE_UINT8: + case JPEGLI_TYPE_UINT16: + case JPEGLI_TYPE_FLOAT: + cinfo->master->output_data_type_ = data_type; + break; + default: + JPEGLI_ERROR("Unsupported data type %d", data_type); + } + switch (endianness) { + case JPEGLI_NATIVE_ENDIAN: + cinfo->master->swap_endianness_ = false; + break; + case JPEGLI_LITTLE_ENDIAN: + cinfo->master->swap_endianness_ = !IsLittleEndian(); + break; + case JPEGLI_BIG_ENDIAN: + cinfo->master->swap_endianness_ = IsLittleEndian(); + break; + default: + JPEGLI_ERROR("Unsupported endianness %d", endianness); + } +} diff --git a/lib/jpegli/decode.h b/lib/jpegli/decode.h new file mode 100644 index 0000000..9800ebf --- /dev/null +++ b/lib/jpegli/decode.h @@ -0,0 +1,106 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// This file contains the C API of the decoder part of the libjpegli library, +// which is based on the C API of libjpeg, with the function names changed from +// jpeg_* to jpegli_*, while decompressor object definitions are included +// directly from jpeglib.h +// +// Applications can use the libjpegli library in one of the following ways: +// +// (1) Include jpegli/encode.h and/or jpegli/decode.h, update the function +// names of the API and link against libjpegli. +// +// (2) Leave the application code unchanged, but replace the libjpeg.so library +// with the one built by this project that is API- and ABI-compatible with +// libjpeg-turbo's version of libjpeg.so. + +#ifndef LIB_JPEGLI_DECODE_H_ +#define LIB_JPEGLI_DECODE_H_ + +#include "lib/jpegli/common.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +#define jpegli_create_decompress(cinfo) \ + jpegli_CreateDecompress((cinfo), JPEG_LIB_VERSION, \ + (size_t)sizeof(struct jpeg_decompress_struct)) + +void jpegli_CreateDecompress(j_decompress_ptr cinfo, int version, + size_t structsize); + +void jpegli_stdio_src(j_decompress_ptr cinfo, FILE *infile); + +void jpegli_mem_src(j_decompress_ptr cinfo, const unsigned char *inbuffer, + unsigned long insize); + +int jpegli_read_header(j_decompress_ptr cinfo, boolean require_image); + +boolean jpegli_start_decompress(j_decompress_ptr cinfo); + +JDIMENSION jpegli_read_scanlines(j_decompress_ptr cinfo, JSAMPARRAY scanlines, + JDIMENSION max_lines); + +JDIMENSION jpegli_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines); + +void jpegli_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset, + JDIMENSION *width); + +boolean jpegli_finish_decompress(j_decompress_ptr cinfo); + +JDIMENSION jpegli_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data, + JDIMENSION max_lines); + +jvirt_barray_ptr *jpegli_read_coefficients(j_decompress_ptr cinfo); + +boolean jpegli_has_multiple_scans(j_decompress_ptr cinfo); + +boolean jpegli_start_output(j_decompress_ptr cinfo, int scan_number); + +boolean jpegli_finish_output(j_decompress_ptr cinfo); + +boolean jpegli_input_complete(j_decompress_ptr cinfo); + +int jpegli_consume_input(j_decompress_ptr cinfo); + +#if JPEG_LIB_VERSION >= 80 +void jpegli_core_output_dimensions(j_decompress_ptr cinfo); +#endif +void jpegli_calc_output_dimensions(j_decompress_ptr cinfo); + +void jpegli_save_markers(j_decompress_ptr cinfo, int marker_code, + unsigned int length_limit); + +void jpegli_set_marker_processor(j_decompress_ptr cinfo, int marker_code, + jpeg_marker_parser_method routine); + +boolean jpegli_resync_to_restart(j_decompress_ptr cinfo, int desired); + +boolean jpegli_read_icc_profile(j_decompress_ptr cinfo, JOCTET **icc_data_ptr, + unsigned int *icc_data_len); + +void jpegli_abort_decompress(j_decompress_ptr cinfo); + +void jpegli_destroy_decompress(j_decompress_ptr cinfo); + +void jpegli_new_colormap(j_decompress_ptr cinfo); + +// +// New API functions that are not available in libjpeg +// +// NOTE: This part of the API is still experimental and will probably change in +// the future. +// + +void jpegli_set_output_format(j_decompress_ptr cinfo, JpegliDataType data_type, + JpegliEndianness endianness); + +#if defined(__cplusplus) || defined(c_plusplus) +} // extern "C" +#endif + +#endif // LIB_JPEGLI_DECODE_H_ diff --git a/lib/jpegli/decode_api_test.cc b/lib/jpegli/decode_api_test.cc new file mode 100644 index 0000000..c48b937 --- /dev/null +++ b/lib/jpegli/decode_api_test.cc @@ -0,0 +1,1304 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include <cmath> +#include <cstdint> +#include <vector> + +#include "lib/jpegli/decode.h" +#include "lib/jpegli/encode.h" +#include "lib/jpegli/test_utils.h" +#include "lib/jpegli/testing.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/sanitizers.h" + +namespace jpegli { +namespace { + +static constexpr uint8_t kFakeEoiMarker[2] = {0xff, 0xd9}; +static constexpr size_t kNumSourceBuffers = 4; + +// Custom source manager that refills the input buffer in chunks, simulating +// a file reader with a fixed buffer size. +class SourceManager { + public: + SourceManager(const uint8_t* data, size_t len, size_t max_chunk_size) + : data_(data), len_(len), max_chunk_size_(max_chunk_size) { + pub_.skip_input_data = skip_input_data; + pub_.resync_to_restart = jpegli_resync_to_restart; + pub_.term_source = term_source; + pub_.init_source = init_source; + pub_.fill_input_buffer = fill_input_buffer; + if (max_chunk_size_ == 0) max_chunk_size_ = len; + buffers_.resize(kNumSourceBuffers, std::vector<uint8_t>(max_chunk_size_)); + Reset(); + } + + void Reset() { + pub_.next_input_byte = nullptr; + pub_.bytes_in_buffer = 0; + pos_ = 0; + chunk_idx_ = 0; + } + + ~SourceManager() { + EXPECT_EQ(0, pub_.bytes_in_buffer); + EXPECT_EQ(len_, pos_); + } + + private: + jpeg_source_mgr pub_; + const uint8_t* data_; + size_t len_; + size_t chunk_idx_; + size_t pos_; + size_t max_chunk_size_; + std::vector<std::vector<uint8_t>> buffers_; + + static void init_source(j_decompress_ptr cinfo) {} + + static boolean fill_input_buffer(j_decompress_ptr cinfo) { + auto src = reinterpret_cast<SourceManager*>(cinfo->src); + if (src->pos_ < src->len_) { + size_t chunk_size = std::min(src->len_ - src->pos_, src->max_chunk_size_); + size_t next_idx = ++src->chunk_idx_ % kNumSourceBuffers; + uint8_t* next_buffer = src->buffers_[next_idx].data(); + memcpy(next_buffer, src->data_ + src->pos_, chunk_size); + src->pub_.next_input_byte = next_buffer; + src->pub_.bytes_in_buffer = chunk_size; + } else { + src->pub_.next_input_byte = kFakeEoiMarker; + src->pub_.bytes_in_buffer = 2; + src->len_ += 2; + } + src->pos_ += src->pub_.bytes_in_buffer; + return TRUE; + } + + static void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { + auto src = reinterpret_cast<SourceManager*>(cinfo->src); + if (num_bytes <= 0) { + return; + } + if (src->pub_.bytes_in_buffer >= static_cast<size_t>(num_bytes)) { + src->pub_.bytes_in_buffer -= num_bytes; + src->pub_.next_input_byte += num_bytes; + } else { + src->pos_ += num_bytes - src->pub_.bytes_in_buffer; + src->pub_.bytes_in_buffer = 0; + } + } + + static void term_source(j_decompress_ptr cinfo) {} +}; + +uint8_t markers_seen[kMarkerSequenceLen]; +size_t num_markers_seen = 0; + +uint8_t get_next_byte(j_decompress_ptr cinfo) { + if (cinfo->src->bytes_in_buffer == 0) { + (*cinfo->src->fill_input_buffer)(cinfo); + } + cinfo->src->bytes_in_buffer--; + return *cinfo->src->next_input_byte++; +} + +boolean test_marker_processor(j_decompress_ptr cinfo) { + markers_seen[num_markers_seen] = cinfo->unread_marker; + size_t marker_len = (get_next_byte(cinfo) << 8) + get_next_byte(cinfo); + EXPECT_EQ(2 + ((num_markers_seen + 2) % sizeof(kMarkerData)), marker_len); + if (marker_len > 2) { + (*cinfo->src->skip_input_data)(cinfo, marker_len - 2); + } + ++num_markers_seen; + return TRUE; +} + +void ReadOutputImage(const DecompressParams& dparams, j_decompress_ptr cinfo, + TestImage* output) { + JDIMENSION xoffset = 0; + JDIMENSION yoffset = 0; + JDIMENSION xsize_cropped = cinfo->output_width; + JDIMENSION ysize_cropped = cinfo->output_height; + if (dparams.crop_output) { + xoffset = xsize_cropped = cinfo->output_width / 3; + yoffset = ysize_cropped = cinfo->output_height / 3; + jpegli_crop_scanline(cinfo, &xoffset, &xsize_cropped); + } + output->ysize = ysize_cropped; + output->xsize = cinfo->output_width; + output->components = cinfo->out_color_components; + output->data_type = dparams.data_type; + output->endianness = dparams.endianness; + size_t bytes_per_sample = jpegli_bytes_per_sample(dparams.data_type); + if (cinfo->raw_data_out) { + output->color_space = cinfo->jpeg_color_space; + for (int c = 0; c < cinfo->num_components; ++c) { + size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE; + size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE; + std::vector<uint8_t> plane(ysize * xsize * bytes_per_sample); + output->raw_data.emplace_back(std::move(plane)); + } + } else { + output->color_space = cinfo->out_color_space; + output->AllocatePixels(); + } + size_t total_output_lines = 0; + while (cinfo->output_scanline < cinfo->output_height) { + size_t max_lines; + size_t num_output_lines; + if (cinfo->raw_data_out) { + size_t iMCU_height = cinfo->max_v_samp_factor * DCTSIZE; + EXPECT_EQ(cinfo->output_scanline, cinfo->output_iMCU_row * iMCU_height); + max_lines = iMCU_height; + std::vector<std::vector<JSAMPROW>> rowdata(cinfo->num_components); + std::vector<JSAMPARRAY> data(cinfo->num_components); + for (int c = 0; c < cinfo->num_components; ++c) { + size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE; + size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE; + size_t num_lines = cinfo->comp_info[c].v_samp_factor * DCTSIZE; + rowdata[c].resize(num_lines); + size_t y0 = cinfo->output_iMCU_row * num_lines; + for (size_t i = 0; i < num_lines; ++i) { + rowdata[c][i] = + y0 + i < ysize ? &output->raw_data[c][(y0 + i) * xsize] : nullptr; + } + data[c] = &rowdata[c][0]; + } + num_output_lines = jpegli_read_raw_data(cinfo, &data[0], max_lines); + } else { + size_t max_output_lines = dparams.max_output_lines; + if (max_output_lines == 0) max_output_lines = cinfo->output_height; + if (cinfo->output_scanline < yoffset) { + max_lines = yoffset - cinfo->output_scanline; + num_output_lines = jpegli_skip_scanlines(cinfo, max_lines); + } else if (cinfo->output_scanline >= yoffset + ysize_cropped) { + max_lines = cinfo->output_height - cinfo->output_scanline; + num_output_lines = jpegli_skip_scanlines(cinfo, max_lines); + } else { + size_t lines_left = yoffset + ysize_cropped - cinfo->output_scanline; + max_lines = std::min<size_t>(max_output_lines, lines_left); + size_t stride = cinfo->output_width * cinfo->out_color_components * + bytes_per_sample; + std::vector<JSAMPROW> scanlines(max_lines); + for (size_t i = 0; i < max_lines; ++i) { + size_t yidx = cinfo->output_scanline - yoffset + i; + scanlines[i] = &output->pixels[yidx * stride]; + } + num_output_lines = + jpegli_read_scanlines(cinfo, &scanlines[0], max_lines); + if (cinfo->quantize_colors) { + for (size_t i = 0; i < num_output_lines; ++i) { + UnmapColors(scanlines[i], cinfo->output_width, + cinfo->out_color_components, cinfo->colormap, + cinfo->actual_number_of_colors); + } + } + } + } + total_output_lines += num_output_lines; + EXPECT_EQ(total_output_lines, cinfo->output_scanline); + EXPECT_EQ(num_output_lines, max_lines); + } + EXPECT_EQ(cinfo->total_iMCU_rows, + DivCeil(cinfo->image_height, cinfo->max_v_samp_factor * DCTSIZE)); +} + +struct TestConfig { + std::string fn; + std::string fn_desc; + TestImage input; + CompressParams jparams; + DecompressParams dparams; + bool compare_to_orig = false; + float max_tolerance_factor = 1.01f; + float max_rms_dist = 1.0f; + float max_diff = 35.0f; +}; + +std::vector<uint8_t> GetTestJpegData(TestConfig& config) { + std::vector<uint8_t> compressed; + if (!config.fn.empty()) { + compressed = ReadTestData(config.fn.c_str()); + } else { + GeneratePixels(&config.input); + JXL_CHECK(EncodeWithJpegli(config.input, config.jparams, &compressed)); + } + if (config.dparams.size_factor < 1.0f) { + compressed.resize(compressed.size() * config.dparams.size_factor); + } + return compressed; +} + +void TestAPINonBuffered(const CompressParams& jparams, + const DecompressParams& dparams, + const TestImage& expected_output, + j_decompress_ptr cinfo, TestImage* output) { + if (jparams.add_marker) { + jpegli_save_markers(cinfo, kSpecialMarker0, 0xffff); + jpegli_save_markers(cinfo, kSpecialMarker1, 0xffff); + num_markers_seen = 0; + jpegli_set_marker_processor(cinfo, 0xe6, test_marker_processor); + jpegli_set_marker_processor(cinfo, 0xe7, test_marker_processor); + jpegli_set_marker_processor(cinfo, 0xe8, test_marker_processor); + } + if (!jparams.icc.empty()) { + jpegli_save_markers(cinfo, JPEG_APP0 + 2, 0xffff); + } + jpegli_read_header(cinfo, /*require_image=*/TRUE); + if (jparams.add_marker) { + EXPECT_EQ(num_markers_seen, kMarkerSequenceLen); + EXPECT_EQ(0, memcmp(markers_seen, kMarkerSequence, num_markers_seen)); + } + if (!jparams.icc.empty()) { + uint8_t* icc_data = nullptr; + unsigned int icc_len; + JXL_CHECK(jpegli_read_icc_profile(cinfo, &icc_data, &icc_len)); + JXL_CHECK(icc_data); + EXPECT_EQ(0, memcmp(jparams.icc.data(), icc_data, icc_len)); + free(icc_data); + } + // Check that jpegli_calc_output_dimensions can be called multiple times + // even with different parameters. + if (!cinfo->raw_data_out) { + cinfo->scale_num = 1; + cinfo->scale_denom = 2; + } + jpegli_calc_output_dimensions(cinfo); + SetDecompressParams(dparams, cinfo); + jpegli_set_output_format(cinfo, dparams.data_type, dparams.endianness); + VerifyHeader(jparams, cinfo); + jpegli_calc_output_dimensions(cinfo); + EXPECT_LE(expected_output.xsize, cinfo->output_width); + if (!dparams.crop_output) { + EXPECT_EQ(expected_output.xsize, cinfo->output_width); + } + if (dparams.output_mode == COEFFICIENTS) { + jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(cinfo); + JXL_CHECK(coef_arrays != nullptr); + CopyCoefficients(cinfo, coef_arrays, output); + } else { + jpegli_start_decompress(cinfo); + VerifyScanHeader(jparams, cinfo); + ReadOutputImage(dparams, cinfo, output); + } + jpegli_finish_decompress(cinfo); +} + +void TestAPIBuffered(const CompressParams& jparams, + const DecompressParams& dparams, j_decompress_ptr cinfo, + std::vector<TestImage>* output_progression) { + EXPECT_EQ(JPEG_REACHED_SOS, + jpegli_read_header(cinfo, /*require_image=*/TRUE)); + cinfo->buffered_image = TRUE; + SetDecompressParams(dparams, cinfo); + jpegli_set_output_format(cinfo, dparams.data_type, dparams.endianness); + VerifyHeader(jparams, cinfo); + EXPECT_TRUE(jpegli_start_decompress(cinfo)); + // start decompress should not read the whole input in buffered image mode + EXPECT_FALSE(jpegli_input_complete(cinfo)); + bool has_multiple_scans = jpegli_has_multiple_scans(cinfo); + EXPECT_EQ(0, cinfo->output_scan_number); + int sos_marker_cnt = 1; // read_header reads the first SOS marker + while (!jpegli_input_complete(cinfo)) { + EXPECT_EQ(cinfo->input_scan_number, sos_marker_cnt); + if (dparams.skip_scans && (cinfo->input_scan_number % 2) != 1) { + int result = JPEG_SUSPENDED; + while (result != JPEG_REACHED_SOS && result != JPEG_REACHED_EOI) { + result = jpegli_consume_input(cinfo); + } + if (result == JPEG_REACHED_SOS) ++sos_marker_cnt; + continue; + } + SetScanDecompressParams(dparams, cinfo, cinfo->input_scan_number); + EXPECT_TRUE(jpegli_start_output(cinfo, cinfo->input_scan_number)); + // start output sets output_scan_number, but does not change + // input_scan_number + EXPECT_EQ(cinfo->output_scan_number, cinfo->input_scan_number); + EXPECT_EQ(cinfo->input_scan_number, sos_marker_cnt); + VerifyScanHeader(jparams, cinfo); + TestImage output; + ReadOutputImage(dparams, cinfo, &output); + output_progression->emplace_back(std::move(output)); + // read scanlines/read raw data does not change input/output scan number + EXPECT_EQ(cinfo->input_scan_number, sos_marker_cnt); + EXPECT_EQ(cinfo->output_scan_number, cinfo->input_scan_number); + EXPECT_TRUE(jpegli_finish_output(cinfo)); + ++sos_marker_cnt; // finish output reads the next SOS marker or EOI + if (dparams.output_mode == COEFFICIENTS) { + jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(cinfo); + JXL_CHECK(coef_arrays != nullptr); + CopyCoefficients(cinfo, coef_arrays, &output_progression->back()); + } + } + jpegli_finish_decompress(cinfo); + if (dparams.size_factor == 1.0f) { + EXPECT_EQ(has_multiple_scans, cinfo->input_scan_number > 1); + } +} + +TEST(DecodeAPITest, ReuseCinfo) { + TestImage input, output, expected; + std::vector<TestImage> output_progression, expected_output_progression; + CompressParams jparams; + DecompressParams dparams; + std::vector<uint8_t> compressed; + jpeg_decompress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + input.xsize = 129; + input.ysize = 73; + GeneratePixels(&input); + for (int h_samp : {2, 1}) { + for (int v_samp : {2, 1}) { + for (int progr : {0, 2}) { + jparams.h_sampling = {h_samp, 1, 1}; + jparams.v_sampling = {v_samp, 1, 1}; + jparams.progressive_mode = progr; + printf( + "Generating input with %dx%d chroma subsampling " + "progressive level %d\n", + h_samp, v_samp, progr); + JXL_CHECK(EncodeWithJpegli(input, jparams, &compressed)); + for (JpegIOMode output_mode : {PIXELS, RAW_DATA, COEFFICIENTS}) { + for (bool crop : {true, false}) { + if (crop && output_mode != PIXELS) continue; + for (int scale_num : {1, 2, 3, 4, 7, 8, 13, 16}) { + if (scale_num != 8 && output_mode != PIXELS) continue; + int scale_denom = 8; + while (scale_num % 2 == 0 && scale_denom % 2 == 0) { + scale_num /= 2; + scale_denom /= 2; + } + printf("Decoding with output mode %d output scaling %d/%d %s\n", + output_mode, scale_num, scale_denom, + crop ? "with cropped output" : ""); + dparams.output_mode = output_mode; + dparams.scale_num = scale_num; + dparams.scale_denom = scale_denom; + expected.Clear(); + DecodeWithLibjpeg(jparams, dparams, compressed, &expected); + output.Clear(); + cinfo.buffered_image = false; + cinfo.raw_data_out = false; + cinfo.scale_num = cinfo.scale_denom = 1; + SourceManager src(compressed.data(), compressed.size(), + 1u << 12); + cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src); + jpegli_read_header(&cinfo, /*require_image=*/TRUE); + jpegli_abort_decompress(&cinfo); + src.Reset(); + TestAPINonBuffered(jparams, dparams, expected, &cinfo, &output); + float max_rms = output_mode == COEFFICIENTS ? 0.0f : 1.0f; + if (scale_num == 1 && scale_denom == 8 && h_samp != v_samp) { + max_rms = 5.0f; // libjpeg does not do fancy upsampling + } + VerifyOutputImage(expected, output, max_rms); + printf("Decoding in buffered image mode\n"); + expected_output_progression.clear(); + DecodeAllScansWithLibjpeg(jparams, dparams, compressed, + &expected_output_progression); + output_progression.clear(); + src.Reset(); + TestAPIBuffered(jparams, dparams, &cinfo, &output_progression); + JXL_CHECK(output_progression.size() == + expected_output_progression.size()); + for (size_t i = 0; i < output_progression.size(); ++i) { + const TestImage& output = output_progression[i]; + const TestImage& expected = expected_output_progression[i]; + VerifyOutputImage(expected, output, max_rms); + } + } + } + } + } + } + } + return true; + }; + ASSERT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); +} + +std::vector<TestConfig> GenerateBasicConfigs() { + std::vector<TestConfig> all_configs; + for (int samp : {1, 2}) { + for (int progr : {0, 2}) { + TestConfig config; + config.input.xsize = 257 + samp * 37; + config.input.ysize = 265 + (progr / 2) * 17; + config.jparams.h_sampling = {samp, 1, 1}; + config.jparams.v_sampling = {samp, 1, 1}; + config.jparams.progressive_mode = progr; + GeneratePixels(&config.input); + all_configs.push_back(config); + } + } + return all_configs; +} + +TEST(DecodeAPITest, ReuseCinfoSameMemSource) { + std::vector<TestConfig> all_configs = GenerateBasicConfigs(); + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + { + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + for (const TestConfig& config : all_configs) { + EncodeWithJpegli(config.input, config.jparams, &cinfo); + } + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + } + std::vector<TestImage> all_outputs(all_configs.size()); + { + jpeg_decompress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + jpegli_mem_src(&cinfo, buffer, buffer_size); + for (size_t i = 0; i < all_configs.size(); ++i) { + TestAPINonBuffered(all_configs[i].jparams, DecompressParams(), + all_configs[i].input, &cinfo, &all_outputs[i]); + } + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); + } + for (size_t i = 0; i < all_configs.size(); ++i) { + VerifyOutputImage(all_configs[i].input, all_outputs[i], 2.35f); + } + if (buffer) free(buffer); +} + +TEST(DecodeAPITest, ReuseCinfoSameStdSource) { + std::vector<TestConfig> all_configs = GenerateBasicConfigs(); + FILE* tmpf = tmpfile(); + JXL_CHECK(tmpf); + { + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_stdio_dest(&cinfo, tmpf); + for (const TestConfig& config : all_configs) { + EncodeWithJpegli(config.input, config.jparams, &cinfo); + } + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + } + rewind(tmpf); + std::vector<TestImage> all_outputs(all_configs.size()); + { + jpeg_decompress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + jpegli_stdio_src(&cinfo, tmpf); + for (size_t i = 0; i < all_configs.size(); ++i) { + TestAPINonBuffered(all_configs[i].jparams, DecompressParams(), + all_configs[i].input, &cinfo, &all_outputs[i]); + } + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); + } + for (size_t i = 0; i < all_configs.size(); ++i) { + VerifyOutputImage(all_configs[i].input, all_outputs[i], 2.35f); + } + fclose(tmpf); +} + +TEST(DecodeAPITest, AbbreviatedStreams) { + uint8_t* table_stream = nullptr; + unsigned long table_stream_size = 0; + uint8_t* data_stream = nullptr; + unsigned long data_stream_size = 0; + { + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &table_stream, &table_stream_size); + cinfo.input_components = 3; + cinfo.in_color_space = JCS_RGB; + jpegli_set_defaults(&cinfo); + jpegli_write_tables(&cinfo); + jpegli_mem_dest(&cinfo, &data_stream, &data_stream_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.optimize_coding = FALSE; + jpegli_set_progressive_level(&cinfo, 0); + jpegli_start_compress(&cinfo, FALSE); + JSAMPLE image[3] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_TRUE(try_catch_block()); + EXPECT_LT(data_stream_size, 50); + jpegli_destroy_compress(&cinfo); + } + { + jpeg_decompress_struct cinfo = {}; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + jpegli_mem_src(&cinfo, table_stream, table_stream_size); + jpegli_read_header(&cinfo, FALSE); + jpegli_mem_src(&cinfo, data_stream, data_stream_size); + jpegli_read_header(&cinfo, TRUE); + EXPECT_EQ(1, cinfo.image_width); + EXPECT_EQ(1, cinfo.image_height); + EXPECT_EQ(3, cinfo.num_components); + jpegli_start_decompress(&cinfo); + JSAMPLE image[3] = {0}; + JSAMPROW row[] = {image}; + jpegli_read_scanlines(&cinfo, row, 1); + EXPECT_EQ(0, image[0]); + EXPECT_EQ(0, image[1]); + EXPECT_EQ(0, image[2]); + jpegli_finish_decompress(&cinfo); + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); + } + if (table_stream) free(table_stream); + if (data_stream) free(data_stream); +} + +class DecodeAPITestParam : public ::testing::TestWithParam<TestConfig> {}; + +TEST_P(DecodeAPITestParam, TestAPI) { + TestConfig config = GetParam(); + const DecompressParams& dparams = config.dparams; + if (dparams.skip_scans) return; + const std::vector<uint8_t> compressed = GetTestJpegData(config); + SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size); + + TestImage output1; + DecodeWithLibjpeg(config.jparams, dparams, compressed, &output1); + + TestImage output0; + jpeg_decompress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src); + TestAPINonBuffered(config.jparams, dparams, output1, &cinfo, &output0); + return true; + }; + ASSERT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); + + if (config.compare_to_orig) { + double rms0 = DistanceRms(config.input, output0); + double rms1 = DistanceRms(config.input, output1); + printf("rms: %f vs %f\n", rms0, rms1); + EXPECT_LE(rms0, rms1 * config.max_tolerance_factor); + } else { + VerifyOutputImage(output0, output1, config.max_rms_dist, config.max_diff); + } +} + +class DecodeAPITestParamBuffered : public ::testing::TestWithParam<TestConfig> { +}; + +TEST_P(DecodeAPITestParamBuffered, TestAPI) { + TestConfig config = GetParam(); + const DecompressParams& dparams = config.dparams; + const std::vector<uint8_t> compressed = GetTestJpegData(config); + SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size); + + std::vector<TestImage> output_progression1; + DecodeAllScansWithLibjpeg(config.jparams, dparams, compressed, + &output_progression1); + + std::vector<TestImage> output_progression0; + jpeg_decompress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src); + TestAPIBuffered(config.jparams, dparams, &cinfo, &output_progression0); + return true; + }; + ASSERT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); + + ASSERT_EQ(output_progression0.size(), output_progression1.size()); + for (size_t i = 0; i < output_progression0.size(); ++i) { + const TestImage& output = output_progression0[i]; + const TestImage& expected = output_progression1[i]; + if (config.compare_to_orig) { + double rms0 = DistanceRms(config.input, output); + double rms1 = DistanceRms(config.input, expected); + printf("rms: %f vs %f\n", rms0, rms1); + EXPECT_LE(rms0, rms1 * config.max_tolerance_factor); + } else { + VerifyOutputImage(expected, output, config.max_rms_dist, config.max_diff); + } + } +} + +std::vector<TestConfig> GenerateTests(bool buffered) { + std::vector<TestConfig> all_tests; + { + std::vector<std::pair<std::string, std::string>> testfiles({ + {"jxl/flower/flower.png.im_q85_420_progr.jpg", "Q85YUV420PROGR"}, + {"jxl/flower/flower.png.im_q85_420_R13B.jpg", "Q85YUV420R13B"}, + {"jxl/flower/flower.png.im_q85_444.jpg", "Q85YUV444"}, + }); + for (size_t i = 0; i < (buffered ? 1u : testfiles.size()); ++i) { + TestConfig config; + config.fn = testfiles[i].first; + config.fn_desc = testfiles[i].second; + for (size_t chunk_size : {0, 1, 64, 65536}) { + config.dparams.chunk_size = chunk_size; + for (size_t max_output_lines : {0, 1, 8, 16}) { + config.dparams.max_output_lines = max_output_lines; + config.dparams.output_mode = PIXELS; + all_tests.push_back(config); + } + { + config.dparams.max_output_lines = 16; + config.dparams.output_mode = RAW_DATA; + all_tests.push_back(config); + } + } + } + } + + { + std::vector<std::pair<std::string, std::string>> testfiles({ + {"jxl/flower/flower_small.q85_444_non_interleaved.jpg", + "Q85YUV444NonInterleaved"}, + {"jxl/flower/flower_small.q85_420_non_interleaved.jpg", + "Q85YUV420NonInterleaved"}, + {"jxl/flower/flower_small.q85_444_partially_interleaved.jpg", + "Q85YUV444PartiallyInterleaved"}, + {"jxl/flower/flower_small.q85_420_partially_interleaved.jpg", + "Q85YUV420PartiallyInterleaved"}, + {"jxl/flower/flower.png.im_q85_422.jpg", "Q85YUV422"}, + {"jxl/flower/flower.png.im_q85_440.jpg", "Q85YUV440"}, + {"jxl/flower/flower.png.im_q85_444_1x2.jpg", "Q85YUV444_1x2"}, + {"jxl/flower/flower.png.im_q85_asymmetric.jpg", "Q85Asymmetric"}, + {"jxl/flower/flower.png.im_q85_gray.jpg", "Q85Gray"}, + {"jxl/flower/flower.png.im_q85_luma_subsample.jpg", "Q85LumaSubsample"}, + {"jxl/flower/flower.png.im_q85_rgb.jpg", "Q85RGB"}, + {"jxl/flower/flower.png.im_q85_rgb_subsample_blue.jpg", + "Q85RGBSubsampleBlue"}, + {"jxl/flower/flower_small.cmyk.jpg", "CMYK"}, + }); + for (size_t i = 0; i < (buffered ? 4u : testfiles.size()); ++i) { + for (JpegIOMode output_mode : {PIXELS, RAW_DATA}) { + TestConfig config; + config.fn = testfiles[i].first; + config.fn_desc = testfiles[i].second; + config.dparams.output_mode = output_mode; + all_tests.push_back(config); + } + } + } + + // Tests for common chroma subsampling and output modes. + for (JpegIOMode output_mode : {PIXELS, RAW_DATA, COEFFICIENTS}) { + for (int h_samp : {1, 2}) { + for (int v_samp : {1, 2}) { + for (bool fancy : {true, false}) { + if (!fancy && (output_mode != PIXELS || h_samp * v_samp == 1)) { + continue; + } + TestConfig config; + config.dparams.output_mode = output_mode; + config.dparams.do_fancy_upsampling = fancy; + config.jparams.progressive_mode = 2; + config.jparams.h_sampling = {h_samp, 1, 1}; + config.jparams.v_sampling = {v_samp, 1, 1}; + if (output_mode == COEFFICIENTS) { + config.max_rms_dist = 0.0f; + } + all_tests.push_back(config); + } + } + } + } + + // Tests for partial input. + for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f}) { + for (int progr : {0, 1, 3}) { + for (int samp : {1, 2}) { + for (bool skip_scans : {false, true}) { + if (skip_scans && (progr != 1 || size_factor < 0.5f)) continue; + for (JpegIOMode output_mode : {PIXELS, RAW_DATA}) { + TestConfig config; + config.input.xsize = 517; + config.input.ysize = 523; + config.jparams.h_sampling = {samp, 1, 1}; + config.jparams.v_sampling = {samp, 1, 1}; + config.jparams.progressive_mode = progr; + config.dparams.size_factor = size_factor; + config.dparams.output_mode = output_mode; + config.dparams.skip_scans = skip_scans; + // The last partially available block can behave differently. + // TODO(szabadka) Figure out if we can make the behaviour more + // similar. + config.max_rms_dist = samp == 1 ? 1.75f : 3.0f; + config.max_diff = 255.0f; + all_tests.push_back(config); + } + } + } + } + } + + // Tests for block smoothing. + for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f, 1.0f}) { + for (int samp : {1, 2}) { + for (bool skip_scans : {false, true}) { + if (skip_scans && size_factor < 0.3f) continue; + TestConfig config; + config.input.xsize = 517; + config.input.ysize = 523; + config.jparams.h_sampling = {samp, 1, 1}; + config.jparams.v_sampling = {samp, 1, 1}; + config.jparams.progressive_mode = 2; + config.dparams.size_factor = size_factor; + config.dparams.do_block_smoothing = true; + config.dparams.skip_scans = skip_scans; + // libjpeg does smoothing for incomplete scans differently at + // the border between current and previous scans. + config.max_rms_dist = 8.0f; + config.max_diff = 255.0f; + all_tests.push_back(config); + } + } + } + + // Test for switching output color quantization modes between scans. + if (buffered) { + TestConfig config; + config.jparams.progressive_mode = 2; + config.dparams.quantize_colors = true; + config.dparams.scan_params = { + {3, JDITHER_NONE, CQUANT_1PASS}, {4, JDITHER_ORDERED, CQUANT_1PASS}, + {5, JDITHER_FS, CQUANT_1PASS}, {6, JDITHER_NONE, CQUANT_EXTERNAL}, + {8, JDITHER_NONE, CQUANT_REUSE}, {9, JDITHER_NONE, CQUANT_EXTERNAL}, + {10, JDITHER_NONE, CQUANT_2PASS}, {11, JDITHER_NONE, CQUANT_REUSE}, + {12, JDITHER_NONE, CQUANT_2PASS}, {13, JDITHER_FS, CQUANT_2PASS}, + }; + config.compare_to_orig = true; + config.max_tolerance_factor = 1.04f; + all_tests.push_back(config); + } + + if (buffered) { + return all_tests; + } + + // Tests for output color quantization. + for (int num_colors : {8, 64, 256}) { + for (ColorQuantMode mode : {CQUANT_1PASS, CQUANT_EXTERNAL, CQUANT_2PASS}) { + if (mode == CQUANT_EXTERNAL && num_colors != 256) continue; + for (J_DITHER_MODE dither : {JDITHER_NONE, JDITHER_ORDERED, JDITHER_FS}) { + if (mode == CQUANT_EXTERNAL && dither != JDITHER_NONE) continue; + if (mode != CQUANT_1PASS && dither == JDITHER_ORDERED) continue; + for (bool crop : {false, true}) { + for (bool scale : {false, true}) { + for (bool samp : {false, true}) { + if ((num_colors != 256) && (crop || scale || samp)) { + continue; + } + if (mode == CQUANT_2PASS && crop) continue; + TestConfig config; + config.input.xsize = 1024; + config.input.ysize = 768; + config.dparams.quantize_colors = true; + config.dparams.desired_number_of_colors = num_colors; + config.dparams.scan_params = {{kLastScan, dither, mode}}; + config.dparams.crop_output = crop; + if (scale) { + config.dparams.scale_num = 7; + config.dparams.scale_denom = 8; + } + if (samp) { + config.jparams.h_sampling = {2, 1, 1}; + config.jparams.v_sampling = {2, 1, 1}; + } + if (!scale && !crop) { + config.compare_to_orig = true; + if (dither != JDITHER_NONE) { + config.max_tolerance_factor = 1.05f; + } + if (mode == CQUANT_2PASS && + (num_colors == 8 || dither == JDITHER_FS)) { + // TODO(szabadka) Lower this bound. + config.max_tolerance_factor = 1.5f; + } + } else { + // We only test for buffer overflows, etc. + config.max_rms_dist = 100.0f; + config.max_diff = 255.0f; + } + all_tests.push_back(config); + } + } + } + } + } + } + + // Tests for output formats. + for (JpegliDataType type : + {JPEGLI_TYPE_UINT8, JPEGLI_TYPE_UINT16, JPEGLI_TYPE_FLOAT}) { + for (JpegliEndianness endianness : + {JPEGLI_NATIVE_ENDIAN, JPEGLI_LITTLE_ENDIAN, JPEGLI_BIG_ENDIAN}) { + if (type == JPEGLI_TYPE_UINT8 && endianness != JPEGLI_NATIVE_ENDIAN) { + continue; + } + for (int channels = 1; channels <= 4; ++channels) { + TestConfig config; + config.dparams.data_type = type; + config.dparams.endianness = endianness; + config.input.color_space = JCS_UNKNOWN; + config.input.components = channels; + config.dparams.set_out_color_space = true; + config.dparams.out_color_space = JCS_UNKNOWN; + all_tests.push_back(config); + } + } + } + // Test for output cropping. + { + TestConfig config; + config.dparams.crop_output = true; + all_tests.push_back(config); + } + // Tests for color transforms. + for (J_COLOR_SPACE out_color_space : {JCS_RGB, JCS_GRAYSCALE}) { + TestConfig config; + config.input.xsize = config.input.ysize = 256; + config.input.color_space = JCS_GRAYSCALE; + config.dparams.set_out_color_space = true; + config.dparams.out_color_space = out_color_space; + all_tests.push_back(config); + } + for (J_COLOR_SPACE jpeg_color_space : {JCS_RGB, JCS_YCbCr}) { + for (J_COLOR_SPACE out_color_space : {JCS_RGB, JCS_YCbCr, JCS_GRAYSCALE}) { + if (jpeg_color_space == JCS_RGB && out_color_space == JCS_YCbCr) continue; + TestConfig config; + config.input.xsize = config.input.ysize = 256; + config.jparams.set_jpeg_colorspace = true; + config.jparams.jpeg_color_space = jpeg_color_space; + config.dparams.set_out_color_space = true; + config.dparams.out_color_space = out_color_space; + all_tests.push_back(config); + } + } + for (J_COLOR_SPACE jpeg_color_space : {JCS_CMYK, JCS_YCCK}) { + for (J_COLOR_SPACE out_color_space : {JCS_CMYK, JCS_YCCK}) { + if (jpeg_color_space == JCS_CMYK && out_color_space == JCS_YCCK) continue; + TestConfig config; + config.input.xsize = config.input.ysize = 256; + config.input.color_space = JCS_CMYK; + config.jparams.set_jpeg_colorspace = true; + config.jparams.jpeg_color_space = jpeg_color_space; + config.dparams.set_out_color_space = true; + config.dparams.out_color_space = out_color_space; + all_tests.push_back(config); + } + } + // Tests for progressive levels. + for (int p = 0; p < 3 + NumTestScanScripts(); ++p) { + TestConfig config; + config.jparams.progressive_mode = p; + all_tests.push_back(config); + } + // Tests for RST markers. + for (size_t r : {1, 17, 1024}) { + for (size_t chunk_size : {1, 65536}) { + for (int progr : {0, 2}) { + TestConfig config; + config.dparams.chunk_size = chunk_size; + config.jparams.progressive_mode = progr; + config.jparams.restart_interval = r; + all_tests.push_back(config); + } + } + } + for (size_t rr : {1, 3, 8, 100}) { + TestConfig config; + config.jparams.restart_in_rows = rr; + all_tests.push_back(config); + } + // Tests for custom quantization tables. + for (int type : {0, 1, 10, 100, 10000}) { + for (int scale : {1, 50, 100, 200, 500}) { + for (bool add_raw : {false, true}) { + for (bool baseline : {true, false}) { + if (!baseline && (add_raw || type * scale < 25500)) continue; + TestConfig config; + config.input.xsize = 64; + config.input.ysize = 64; + CustomQuantTable table; + table.table_type = type; + table.scale_factor = scale; + table.force_baseline = baseline; + table.add_raw = add_raw; + table.Generate(); + config.jparams.quant_tables.push_back(table); + config.jparams.quant_indexes = {0, 0, 0}; + config.compare_to_orig = true; + config.max_tolerance_factor = 1.02; + all_tests.push_back(config); + } + } + } + } + for (int qidx = 0; qidx < 8; ++qidx) { + if (qidx == 3) continue; + TestConfig config; + config.input.xsize = 256; + config.input.ysize = 256; + config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1, + (qidx >> 0) & 1}; + all_tests.push_back(config); + } + for (int qidx = 0; qidx < 8; ++qidx) { + for (int slot_idx = 0; slot_idx < 2; ++slot_idx) { + if (qidx == 0 && slot_idx == 0) continue; + TestConfig config; + config.input.xsize = 256; + config.input.ysize = 256; + config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1, + (qidx >> 0) & 1}; + CustomQuantTable table; + table.slot_idx = slot_idx; + table.Generate(); + config.jparams.quant_tables.push_back(table); + all_tests.push_back(config); + } + } + for (int qidx = 0; qidx < 8; ++qidx) { + for (bool xyb : {false, true}) { + TestConfig config; + config.input.xsize = 256; + config.input.ysize = 256; + config.jparams.xyb_mode = xyb; + config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1, + (qidx >> 0) & 1}; + { + CustomQuantTable table; + table.slot_idx = 0; + table.Generate(); + config.jparams.quant_tables.push_back(table); + } + { + CustomQuantTable table; + table.slot_idx = 1; + table.table_type = 20; + table.Generate(); + config.jparams.quant_tables.push_back(table); + } + config.compare_to_orig = true; + all_tests.push_back(config); + } + } + for (bool xyb : {false, true}) { + TestConfig config; + config.input.xsize = 256; + config.input.ysize = 256; + config.jparams.xyb_mode = xyb; + config.jparams.quant_indexes = {0, 1, 2}; + { + CustomQuantTable table; + table.slot_idx = 0; + table.Generate(); + config.jparams.quant_tables.push_back(table); + } + { + CustomQuantTable table; + table.slot_idx = 1; + table.table_type = 20; + table.Generate(); + config.jparams.quant_tables.push_back(table); + } + { + CustomQuantTable table; + table.slot_idx = 2; + table.table_type = 30; + table.Generate(); + config.jparams.quant_tables.push_back(table); + } + config.compare_to_orig = true; + all_tests.push_back(config); + } + // Tests for fixed (and custom) prefix codes. + for (J_COLOR_SPACE jpeg_color_space : {JCS_RGB, JCS_YCbCr}) { + for (bool flat_dc_luma : {false, true}) { + TestConfig config; + config.jparams.set_jpeg_colorspace = true; + config.jparams.jpeg_color_space = jpeg_color_space; + config.jparams.progressive_mode = 0; + config.jparams.optimize_coding = 0; + config.jparams.use_flat_dc_luma_code = flat_dc_luma; + all_tests.push_back(config); + } + } + for (J_COLOR_SPACE jpeg_color_space : {JCS_CMYK, JCS_YCCK}) { + for (bool flat_dc_luma : {false, true}) { + TestConfig config; + config.input.color_space = JCS_CMYK; + config.jparams.set_jpeg_colorspace = true; + config.jparams.jpeg_color_space = jpeg_color_space; + config.jparams.progressive_mode = 0; + config.jparams.optimize_coding = 0; + config.jparams.use_flat_dc_luma_code = flat_dc_luma; + all_tests.push_back(config); + } + } + // Test for jpeg without DHT marker. + { + TestConfig config; + config.jparams.progressive_mode = 0; + config.jparams.optimize_coding = 0; + config.jparams.omit_standard_tables = true; + all_tests.push_back(config); + } + // Test for custom component ids. + { + TestConfig config; + config.input.xsize = config.input.ysize = 128; + config.jparams.comp_ids = {7, 17, 177}; + all_tests.push_back(config); + } + // Tests for JFIF/Adobe markers. + for (int override_JFIF : {-1, 0, 1}) { + for (int override_Adobe : {-1, 0, 1}) { + if (override_JFIF == -1 && override_Adobe == -1) continue; + TestConfig config; + config.input.xsize = config.input.ysize = 128; + config.jparams.override_JFIF = override_JFIF; + config.jparams.override_Adobe = override_Adobe; + all_tests.push_back(config); + } + } + // Tests for small images. + for (int xsize : {1, 7, 8, 9, 15, 16, 17}) { + for (int ysize : {1, 7, 8, 9, 15, 16, 17}) { + TestConfig config; + config.input.xsize = xsize; + config.input.ysize = ysize; + all_tests.push_back(config); + } + } + // Tests for custom marker processor. + for (size_t chunk_size : {0, 1, 64, 65536}) { + TestConfig config; + config.input.xsize = config.input.ysize = 256; + config.dparams.chunk_size = chunk_size; + config.jparams.add_marker = true; + all_tests.push_back(config); + } + // Tests for icc profile decoding. + for (size_t icc_size : {728, 70000, 1000000}) { + TestConfig config; + config.input.xsize = config.input.ysize = 256; + config.jparams.icc.resize(icc_size); + for (size_t i = 0; i < icc_size; ++i) { + config.jparams.icc[i] = (i * 17) & 0xff; + } + all_tests.push_back(config); + } + // Tests for unusual sampling factors. + for (int h0_samp : {1, 2, 3, 4}) { + for (int v0_samp : {1, 2, 3, 4}) { + for (int dxb = 0; dxb < h0_samp; ++dxb) { + for (int dyb = 0; dyb < v0_samp; ++dyb) { + for (int dx = 0; dx < 2; ++dx) { + for (int dy = 0; dy < 2; ++dy) { + TestConfig config; + config.input.xsize = 128 + dyb * 8 + dy; + config.input.ysize = 256 + dxb * 8 + dx; + config.jparams.progressive_mode = 2; + config.jparams.h_sampling = {h0_samp, 1, 1}; + config.jparams.v_sampling = {v0_samp, 1, 1}; + config.compare_to_orig = true; + all_tests.push_back(config); + } + } + } + } + } + } + for (int h0_samp : {1, 2, 4}) { + for (int v0_samp : {1, 2, 4}) { + for (int h2_samp : {1, 2, 4}) { + for (int v2_samp : {1, 2, 4}) { + TestConfig config; + config.input.xsize = 137; + config.input.ysize = 75; + config.jparams.progressive_mode = 2; + config.jparams.h_sampling = {h0_samp, 1, h2_samp}; + config.jparams.v_sampling = {v0_samp, 1, v2_samp}; + config.compare_to_orig = true; + all_tests.push_back(config); + } + } + } + } + for (int h0_samp : {1, 3}) { + for (int v0_samp : {1, 3}) { + for (int h2_samp : {1, 3}) { + for (int v2_samp : {1, 3}) { + TestConfig config; + config.input.xsize = 205; + config.input.ysize = 99; + config.jparams.progressive_mode = 2; + config.jparams.h_sampling = {h0_samp, 1, h2_samp}; + config.jparams.v_sampling = {v0_samp, 1, v2_samp}; + all_tests.push_back(config); + } + } + } + } + // Tests for output scaling. + for (int scale_num = 1; scale_num <= 16; ++scale_num) { + if (scale_num == 8) continue; + for (bool crop : {false, true}) { + for (int samp : {1, 2}) { + for (int progr : {0, 2}) { + TestConfig config; + config.jparams.h_sampling = {samp, 1, 1}; + config.jparams.v_sampling = {samp, 1, 1}; + config.jparams.progressive_mode = progr; + config.dparams.scale_num = scale_num; + config.dparams.scale_denom = 8; + config.dparams.crop_output = crop; + all_tests.push_back(config); + } + } + } + } + return all_tests; +} + +std::string QuantMode(ColorQuantMode mode) { + switch (mode) { + case CQUANT_1PASS: + return "1pass"; + case CQUANT_EXTERNAL: + return "External"; + case CQUANT_2PASS: + return "2pass"; + case CQUANT_REUSE: + return "Reuse"; + } + return ""; +} + +std::string DitherMode(J_DITHER_MODE mode) { + switch (mode) { + case JDITHER_NONE: + return "No"; + case JDITHER_ORDERED: + return "Ordered"; + case JDITHER_FS: + return "FS"; + } + return ""; +} + +std::ostream& operator<<(std::ostream& os, const DecompressParams& dparams) { + if (dparams.chunk_size == 0) { + os << "CompleteInput"; + } else { + os << "InputChunks" << dparams.chunk_size; + } + if (dparams.size_factor < 1.0f) { + os << "Partial" << static_cast<int>(dparams.size_factor * 100) << "p"; + } + if (dparams.max_output_lines == 0) { + os << "CompleteOutput"; + } else { + os << "OutputLines" << dparams.max_output_lines; + } + if (dparams.output_mode == RAW_DATA) { + os << "RawDataOut"; + } else if (dparams.output_mode == COEFFICIENTS) { + os << "CoeffsOut"; + } + os << IOMethodName(dparams.data_type, dparams.endianness); + if (dparams.set_out_color_space) { + os << "OutColor" << ColorSpaceName((J_COLOR_SPACE)dparams.out_color_space); + } + if (dparams.crop_output) { + os << "Crop"; + } + if (dparams.do_block_smoothing) { + os << "BlockSmoothing"; + } + if (!dparams.do_fancy_upsampling) { + os << "NoFancyUpsampling"; + } + if (dparams.scale_num != 1 || dparams.scale_denom != 1) { + os << "Scale" << dparams.scale_num << "_" << dparams.scale_denom; + } + if (dparams.quantize_colors) { + os << "Quant" << dparams.desired_number_of_colors << "colors"; + for (size_t i = 0; i < dparams.scan_params.size(); ++i) { + if (i > 0) os << "_"; + const auto& sparam = dparams.scan_params[i]; + os << QuantMode(sparam.color_quant_mode); + os << DitherMode((J_DITHER_MODE)sparam.dither_mode) << "Dither"; + } + } + if (dparams.skip_scans) { + os << "SkipScans"; + } + return os; +} + +std::ostream& operator<<(std::ostream& os, const TestConfig& c) { + if (!c.fn.empty()) { + os << c.fn_desc; + } else { + os << c.input; + } + os << c.jparams; + os << c.dparams; + return os; +} + +std::string TestDescription(const testing::TestParamInfo<TestConfig>& info) { + std::stringstream name; + name << info.param; + return name.str(); +} + +JPEGLI_INSTANTIATE_TEST_SUITE_P(DecodeAPITest, DecodeAPITestParam, + testing::ValuesIn(GenerateTests(false)), + TestDescription); + +JPEGLI_INSTANTIATE_TEST_SUITE_P(DecodeAPITestBuffered, + DecodeAPITestParamBuffered, + testing::ValuesIn(GenerateTests(true)), + TestDescription); + +} // namespace +} // namespace jpegli diff --git a/lib/jpegli/decode_internal.h b/lib/jpegli/decode_internal.h new file mode 100644 index 0000000..ed7baa3 --- /dev/null +++ b/lib/jpegli/decode_internal.h @@ -0,0 +1,151 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_DECODE_INTERNAL_H_ +#define LIB_JPEGLI_DECODE_INTERNAL_H_ + +#include <stdint.h> +#include <sys/types.h> + +#include <vector> + +#include "lib/jpegli/common.h" +#include "lib/jpegli/common_internal.h" +#include "lib/jpegli/huffman.h" + +namespace jpegli { + +static constexpr int kNeedMoreInput = 100; +static constexpr int kHandleRestart = 101; +static constexpr int kHandleMarkerProcessor = 102; +static constexpr int kProcessNextMarker = 103; +static constexpr size_t kAllHuffLutSize = NUM_HUFF_TBLS * kJpegHuffmanLutSize; + +typedef int16_t coeff_t; + +// State of the decoder that has to be saved before decoding one MCU in case +// we run out of the bitstream. +struct MCUCodingState { + coeff_t last_dc_coeff[kMaxComponents]; + int eobrun; + coeff_t coeffs[D_MAX_BLOCKS_IN_MCU * DCTSIZE2]; +}; + +} // namespace jpegli + +// Use this forward-declared libjpeg struct to hold all our private variables. +// TODO(szabadka) Remove variables that have a corresponding version in cinfo. +struct jpeg_decomp_master { + // + // Input handling state. + // + std::vector<uint8_t> input_buffer_; + size_t input_buffer_pos_; + // Number of bits after codestream_pos_ that were already processed. + size_t codestream_bits_ahead_; + bool streaming_mode_; + + // Coefficient buffers + jvirt_barray_ptr* coef_arrays; + JBLOCKARRAY coeff_rows[jpegli::kMaxComponents]; + + // + // Marker data processing state. + // + bool found_soi_; + bool found_dri_; + bool found_sof_; + bool found_eoi_; + size_t icc_index_; + size_t icc_total_; + std::vector<uint8_t> icc_profile_; + jpegli::HuffmanTableEntry dc_huff_lut_[jpegli::kAllHuffLutSize]; + jpegli::HuffmanTableEntry ac_huff_lut_[jpegli::kAllHuffLutSize]; + uint8_t markers_to_save_[32]; + jpeg_marker_parser_method app_marker_parsers[16]; + jpeg_marker_parser_method com_marker_parser; + // Whether this jpeg has multiple scans (progressive or non-interleaved + // sequential). + bool is_multiscan_; + + // Fields defined by SOF marker. + size_t iMCU_cols_; + int h_factor[jpegli::kMaxComponents]; + int v_factor[jpegli::kMaxComponents]; + + // Initialized at strat of frame. + uint16_t scan_progression_[jpegli::kMaxComponents][DCTSIZE2]; + + // + // Per scan state. + // + size_t scan_mcu_row_; + size_t scan_mcu_col_; + size_t mcu_rows_per_iMCU_row_; + jpegli::coeff_t last_dc_coeff_[jpegli::kMaxComponents]; + int eobrun_; + int restarts_to_go_; + int next_restart_marker_; + + jpegli::MCUCodingState mcu_; + + // + // Rendering state. + // + int output_passes_done_; + JpegliDataType output_data_type_ = JPEGLI_TYPE_UINT8; + bool swap_endianness_ = false; + size_t xoffset_; + bool need_context_rows_; + + int min_scaled_dct_size; + int scaled_dct_size[jpegli::kMaxComponents]; + + size_t raw_height_[jpegli::kMaxComponents]; + jpegli::RowBuffer<float> raw_output_[jpegli::kMaxComponents]; + jpegli::RowBuffer<float> render_output_[jpegli::kMaxComponents]; + + void (*inverse_transform[jpegli::kMaxComponents])( + const int16_t* JXL_RESTRICT qblock, const float* JXL_RESTRICT dequant, + const float* JXL_RESTRICT biases, float* JXL_RESTRICT scratch_space, + float* JXL_RESTRICT output, size_t output_stride, size_t dctsize); + + void (*color_transform)(float* row[jpegli::kMaxComponents], size_t len); + + float* idct_scratch_; + float* upsample_scratch_; + uint8_t* output_scratch_; + int16_t* smoothing_scratch_; + float* dequant_; + // 1 = 1pass, 2 = 2pass, 3 = external + int quant_mode_; + int quant_pass_; + int num_colors_[jpegli::kMaxComponents]; + uint8_t* colormap_lut_; + uint8_t* pixels_; + JSAMPARRAY scanlines_; + std::vector<std::vector<uint8_t>> candidate_lists_; + bool regenerate_inverse_colormap_; + float* dither_[jpegli::kMaxComponents]; + float* error_row_[2 * jpegli::kMaxComponents]; + size_t dither_size_; + size_t dither_mask_; + + // Per channel and per frequency statistics about the number of nonzeros and + // the sum of coefficient absolute values, used in dequantization bias + // computation. + int* nonzeros_; + int* sumabs_; + size_t num_processed_blocks_[jpegli::kMaxComponents]; + float* biases_; +#define SAVED_COEFS 10 + // This holds the coef_bits of the scan before the current scan, + // i.e. the bottom half when rendering incomplete scans. + int (*coef_bits_latch)[SAVED_COEFS]; + int (*prev_coef_bits_latch)[SAVED_COEFS]; + bool apply_smoothing; +}; + +#endif // LIB_JPEGLI_DECODE_INTERNAL_H_ diff --git a/lib/jpegli/decode_marker.cc b/lib/jpegli/decode_marker.cc new file mode 100644 index 0000000..c5c5790 --- /dev/null +++ b/lib/jpegli/decode_marker.cc @@ -0,0 +1,588 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/decode_marker.h" + +#include <string.h> + +#include "lib/jpegli/common.h" +#include "lib/jpegli/decode_internal.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/huffman.h" +#include "lib/jpegli/memory_manager.h" +#include "lib/jxl/base/printf_macros.h" + +namespace jpegli { +namespace { + +constexpr int kMaxDimPixels = 65535; +constexpr uint8_t kIccProfileTag[12] = "ICC_PROFILE"; + +// Macros for commonly used error conditions. + +#define JPEG_VERIFY_LEN(n) \ + if (pos + (n) > len) { \ + return JPEGLI_ERROR("Unexpected end of marker: pos=%" PRIuS \ + " need=%d len=%" PRIuS, \ + pos, static_cast<int>(n), len); \ + } + +#define JPEG_VERIFY_INPUT(var, low, high) \ + if ((var) < (low) || (var) > (high)) { \ + return JPEGLI_ERROR("Invalid " #var ": %d", static_cast<int>(var)); \ + } + +#define JPEG_VERIFY_MARKER_END() \ + if (pos != len) { \ + return JPEGLI_ERROR("Invalid marker length: declared=%" PRIuS \ + " actual=%" PRIuS, \ + len, pos); \ + } + +inline int ReadUint8(const uint8_t* data, size_t* pos) { + return data[(*pos)++]; +} + +inline int ReadUint16(const uint8_t* data, size_t* pos) { + int v = (data[*pos] << 8) + data[*pos + 1]; + *pos += 2; + return v; +} + +void ProcessSOF(j_decompress_ptr cinfo, const uint8_t* data, size_t len) { + jpeg_decomp_master* m = cinfo->master; + if (!m->found_soi_) { + JPEGLI_ERROR("Unexpected SOF marker."); + } + if (m->found_sof_) { + JPEGLI_ERROR("Duplicate SOF marker."); + } + m->found_sof_ = true; + cinfo->progressive_mode = (cinfo->unread_marker == 0xc2); + cinfo->arith_code = 0; + size_t pos = 2; + JPEG_VERIFY_LEN(6); + cinfo->data_precision = ReadUint8(data, &pos); + cinfo->image_height = ReadUint16(data, &pos); + cinfo->image_width = ReadUint16(data, &pos); + cinfo->num_components = ReadUint8(data, &pos); + JPEG_VERIFY_INPUT(cinfo->data_precision, kJpegPrecision, kJpegPrecision); + JPEG_VERIFY_INPUT(cinfo->image_height, 1, kMaxDimPixels); + JPEG_VERIFY_INPUT(cinfo->image_width, 1, kMaxDimPixels); + JPEG_VERIFY_INPUT(cinfo->num_components, 1, kMaxComponents); + JPEG_VERIFY_LEN(3 * cinfo->num_components); + cinfo->comp_info = jpegli::Allocate<jpeg_component_info>( + cinfo, cinfo->num_components, JPOOL_IMAGE); + + // Read sampling factors and quant table index for each component. + uint8_t ids_seen[256] = {0}; + cinfo->max_h_samp_factor = 1; + cinfo->max_v_samp_factor = 1; + for (int i = 0; i < cinfo->num_components; ++i) { + jpeg_component_info* comp = &cinfo->comp_info[i]; + comp->component_index = i; + const int id = ReadUint8(data, &pos); + if (ids_seen[id]) { // (cf. section B.2.2, syntax of Ci) + JPEGLI_ERROR("Duplicate ID %d in SOF.", id); + } + ids_seen[id] = 1; + comp->component_id = id; + int factor = ReadUint8(data, &pos); + int h_samp_factor = factor >> 4; + int v_samp_factor = factor & 0xf; + JPEG_VERIFY_INPUT(h_samp_factor, 1, MAX_SAMP_FACTOR); + JPEG_VERIFY_INPUT(v_samp_factor, 1, MAX_SAMP_FACTOR); + comp->h_samp_factor = h_samp_factor; + comp->v_samp_factor = v_samp_factor; + cinfo->max_h_samp_factor = + std::max(cinfo->max_h_samp_factor, h_samp_factor); + cinfo->max_v_samp_factor = + std::max(cinfo->max_v_samp_factor, v_samp_factor); + int quant_tbl_idx = ReadUint8(data, &pos); + JPEG_VERIFY_INPUT(quant_tbl_idx, 0, NUM_QUANT_TBLS - 1); + comp->quant_tbl_no = quant_tbl_idx; + if (cinfo->quant_tbl_ptrs[quant_tbl_idx] == nullptr) { + JPEGLI_ERROR("Quantization table with index %u not found", quant_tbl_idx); + } + comp->quant_table = nullptr; // will be allocated after SOS marker + } + JPEG_VERIFY_MARKER_END(); + + // Set the input colorspace based on the markers we have seen and set + // default output colorspace. + if (cinfo->num_components == 1) { + cinfo->jpeg_color_space = JCS_GRAYSCALE; + cinfo->out_color_space = JCS_GRAYSCALE; + } else if (cinfo->num_components == 3) { + if (cinfo->saw_JFIF_marker) { + cinfo->jpeg_color_space = JCS_YCbCr; + } else if (cinfo->saw_Adobe_marker) { + cinfo->jpeg_color_space = + cinfo->Adobe_transform == 0 ? JCS_RGB : JCS_YCbCr; + } else { + cinfo->jpeg_color_space = JCS_YCbCr; + if (cinfo->comp_info[0].component_id == 'R' && // + cinfo->comp_info[1].component_id == 'G' && // + cinfo->comp_info[2].component_id == 'B') { + cinfo->jpeg_color_space = JCS_RGB; + } + } + cinfo->out_color_space = JCS_RGB; + } else if (cinfo->num_components == 4) { + if (cinfo->saw_Adobe_marker) { + cinfo->jpeg_color_space = + cinfo->Adobe_transform == 0 ? JCS_CMYK : JCS_YCCK; + } else { + cinfo->jpeg_color_space = JCS_CMYK; + } + cinfo->out_color_space = JCS_CMYK; + } + + // We have checked above that none of the sampling factors are 0, so the max + // sampling factors can not be 0. + cinfo->total_iMCU_rows = + DivCeil(cinfo->image_height, cinfo->max_v_samp_factor * DCTSIZE); + m->iMCU_cols_ = + DivCeil(cinfo->image_width, cinfo->max_h_samp_factor * DCTSIZE); + // Compute the block dimensions for each component. + for (int i = 0; i < cinfo->num_components; ++i) { + jpeg_component_info* comp = &cinfo->comp_info[i]; + if (cinfo->max_h_samp_factor % comp->h_samp_factor != 0 || + cinfo->max_v_samp_factor % comp->v_samp_factor != 0) { + JPEGLI_ERROR("Non-integral subsampling ratios."); + } + m->h_factor[i] = cinfo->max_h_samp_factor / comp->h_samp_factor; + m->v_factor[i] = cinfo->max_v_samp_factor / comp->v_samp_factor; + comp->downsampled_width = DivCeil(cinfo->image_width, m->h_factor[i]); + comp->downsampled_height = DivCeil(cinfo->image_height, m->v_factor[i]); + comp->width_in_blocks = DivCeil(comp->downsampled_width, DCTSIZE); + comp->height_in_blocks = DivCeil(comp->downsampled_height, DCTSIZE); + } + memset(m->scan_progression_, 0, sizeof(m->scan_progression_)); +} + +void ProcessSOS(j_decompress_ptr cinfo, const uint8_t* data, size_t len) { + jpeg_decomp_master* m = cinfo->master; + if (!m->found_sof_) { + JPEGLI_ERROR("Unexpected SOS marker."); + } + size_t pos = 2; + JPEG_VERIFY_LEN(1); + cinfo->comps_in_scan = ReadUint8(data, &pos); + JPEG_VERIFY_INPUT(cinfo->comps_in_scan, 1, cinfo->num_components); + JPEG_VERIFY_INPUT(cinfo->comps_in_scan, 1, MAX_COMPS_IN_SCAN); + + JPEG_VERIFY_LEN(2 * cinfo->comps_in_scan); + bool is_interleaved = (cinfo->comps_in_scan > 1); + uint8_t ids_seen[256] = {0}; + cinfo->blocks_in_MCU = 0; + for (int i = 0; i < cinfo->comps_in_scan; ++i) { + int id = ReadUint8(data, &pos); + if (ids_seen[id]) { // (cf. section B.2.3, regarding CSj) + return JPEGLI_ERROR("Duplicate ID %d in SOS.", id); + } + ids_seen[id] = 1; + jpeg_component_info* comp = nullptr; + for (int j = 0; j < cinfo->num_components; ++j) { + if (cinfo->comp_info[j].component_id == id) { + comp = &cinfo->comp_info[j]; + cinfo->cur_comp_info[i] = comp; + } + } + if (!comp) { + return JPEGLI_ERROR("SOS marker: Could not find component with id %d", + id); + } + int c = ReadUint8(data, &pos); + comp->dc_tbl_no = c >> 4; + comp->ac_tbl_no = c & 0xf; + JPEG_VERIFY_INPUT(comp->dc_tbl_no, 0, 3); + JPEG_VERIFY_INPUT(comp->ac_tbl_no, 0, 3); + comp->MCU_width = is_interleaved ? comp->h_samp_factor : 1; + comp->MCU_height = is_interleaved ? comp->v_samp_factor : 1; + comp->MCU_blocks = comp->MCU_width * comp->MCU_height; + if (cinfo->blocks_in_MCU + comp->MCU_blocks > D_MAX_BLOCKS_IN_MCU) { + JPEGLI_ERROR("Too many blocks in MCU."); + } + for (int j = 0; j < comp->MCU_blocks; ++j) { + cinfo->MCU_membership[cinfo->blocks_in_MCU++] = i; + } + } + JPEG_VERIFY_LEN(3); + cinfo->Ss = ReadUint8(data, &pos); + cinfo->Se = ReadUint8(data, &pos); + JPEG_VERIFY_INPUT(cinfo->Ss, 0, 63); + JPEG_VERIFY_INPUT(cinfo->Se, cinfo->Ss, 63); + int c = ReadUint8(data, &pos); + cinfo->Ah = c >> 4; + cinfo->Al = c & 0xf; + JPEG_VERIFY_MARKER_END(); + + if (cinfo->input_scan_number == 0) { + m->is_multiscan_ = (cinfo->comps_in_scan < cinfo->num_components || + cinfo->progressive_mode); + } + if (cinfo->Ah != 0 && cinfo->Al != cinfo->Ah - 1) { + // section G.1.1.1.2 : Successive approximation control only improves + // by one bit at a time. + JPEGLI_ERROR("Invalid progressive parameters: Al=%d Ah=%d", cinfo->Al, + cinfo->Ah); + } + if (!cinfo->progressive_mode) { + cinfo->Ss = 0; + cinfo->Se = 63; + cinfo->Ah = 0; + cinfo->Al = 0; + } + const uint16_t scan_bitmask = + cinfo->Ah == 0 ? (0xffff << cinfo->Al) : (1u << cinfo->Al); + const uint16_t refinement_bitmask = (1 << cinfo->Al) - 1; + if (!cinfo->coef_bits) { + cinfo->coef_bits = + Allocate<int[DCTSIZE2]>(cinfo, cinfo->num_components * 2, JPOOL_IMAGE); + m->coef_bits_latch = + Allocate<int[SAVED_COEFS]>(cinfo, cinfo->num_components, JPOOL_IMAGE); + m->prev_coef_bits_latch = + Allocate<int[SAVED_COEFS]>(cinfo, cinfo->num_components, JPOOL_IMAGE); + + for (int c = 0; c < cinfo->num_components; ++c) { + for (int i = 0; i < DCTSIZE2; ++i) { + cinfo->coef_bits[c][i] = -1; + if (i < SAVED_COEFS) { + m->coef_bits_latch[c][i] = -1; + } + } + } + } + + for (int i = 0; i < cinfo->comps_in_scan; ++i) { + int comp_idx = cinfo->cur_comp_info[i]->component_index; + for (int k = cinfo->Ss; k <= cinfo->Se; ++k) { + if (m->scan_progression_[comp_idx][k] & scan_bitmask) { + return JPEGLI_ERROR( + "Overlapping scans: component=%d k=%d prev_mask: %u cur_mask %u", + comp_idx, k, m->scan_progression_[i][k], scan_bitmask); + } + if (m->scan_progression_[comp_idx][k] & refinement_bitmask) { + return JPEGLI_ERROR( + "Invalid scan order, a more refined scan was already done: " + "component=%d k=%d prev_mask=%u cur_mask=%u", + comp_idx, k, m->scan_progression_[i][k], scan_bitmask); + } + m->scan_progression_[comp_idx][k] |= scan_bitmask; + } + } + if (cinfo->Al > 10) { + return JPEGLI_ERROR("Scan parameter Al=%d is not supported.", cinfo->Al); + } +} + +// Reads the Define Huffman Table (DHT) marker segment and builds the Huffman +// decoding table in either dc_huff_lut_ or ac_huff_lut_, depending on the type +// and solt_id of Huffman code being read. +void ProcessDHT(j_decompress_ptr cinfo, const uint8_t* data, size_t len) { + size_t pos = 2; + if (pos == len) { + return JPEGLI_ERROR("DHT marker: no Huffman table found"); + } + while (pos < len) { + JPEG_VERIFY_LEN(1 + kJpegHuffmanMaxBitLength); + // The index of the Huffman code in the current set of Huffman codes. For AC + // component Huffman codes, 0x10 is added to the index. + int slot_id = ReadUint8(data, &pos); + int huffman_index = slot_id; + int is_ac_table = (slot_id & 0x10) != 0; + JHUFF_TBL** table; + if (is_ac_table) { + huffman_index -= 0x10; + JPEG_VERIFY_INPUT(huffman_index, 0, NUM_HUFF_TBLS - 1); + table = &cinfo->ac_huff_tbl_ptrs[huffman_index]; + } else { + JPEG_VERIFY_INPUT(huffman_index, 0, NUM_HUFF_TBLS - 1); + table = &cinfo->dc_huff_tbl_ptrs[huffman_index]; + } + if (*table == nullptr) { + *table = jpegli_alloc_huff_table(reinterpret_cast<j_common_ptr>(cinfo)); + } + int total_count = 0; + for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) { + int count = ReadUint8(data, &pos); + (*table)->bits[i] = count; + total_count += count; + } + if (is_ac_table) { + JPEG_VERIFY_INPUT(total_count, 0, kJpegHuffmanAlphabetSize); + } else { + // Allow symbols up to 15 here, we check later whether any invalid symbols + // are actually decoded. + // TODO(szabadka) Make sure decoder works (does not crash) with up to + // 15-nbits DC symbols and then increase kJpegDCAlphabetSize. + JPEG_VERIFY_INPUT(total_count, 0, 16); + } + JPEG_VERIFY_LEN(total_count); + for (int i = 0; i < total_count; ++i) { + int value = ReadUint8(data, &pos); + if (!is_ac_table) { + JPEG_VERIFY_INPUT(value, 0, 15); + } + (*table)->huffval[i] = value; + } + for (int i = total_count; i < kJpegHuffmanAlphabetSize; ++i) { + (*table)->huffval[i] = 0; + } + } + JPEG_VERIFY_MARKER_END(); +} + +void ProcessDQT(j_decompress_ptr cinfo, const uint8_t* data, size_t len) { + jpeg_decomp_master* m = cinfo->master; + if (m->found_sof_) { + JPEGLI_ERROR("Updating quant tables between scans is not supported."); + } + size_t pos = 2; + if (pos == len) { + return JPEGLI_ERROR("DQT marker: no quantization table found"); + } + while (pos < len) { + JPEG_VERIFY_LEN(1); + int quant_table_index = ReadUint8(data, &pos); + int precision = quant_table_index >> 4; + JPEG_VERIFY_INPUT(precision, 0, 1); + quant_table_index &= 0xf; + JPEG_VERIFY_INPUT(quant_table_index, 0, NUM_QUANT_TBLS - 1); + JPEG_VERIFY_LEN((precision + 1) * DCTSIZE2); + + if (cinfo->quant_tbl_ptrs[quant_table_index] == nullptr) { + cinfo->quant_tbl_ptrs[quant_table_index] = + jpegli_alloc_quant_table(reinterpret_cast<j_common_ptr>(cinfo)); + } + JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[quant_table_index]; + + for (size_t i = 0; i < DCTSIZE2; ++i) { + int quant_val = + precision ? ReadUint16(data, &pos) : ReadUint8(data, &pos); + JPEG_VERIFY_INPUT(quant_val, 1, 65535); + quant_table->quantval[kJPEGNaturalOrder[i]] = quant_val; + } + } + JPEG_VERIFY_MARKER_END(); +} + +void ProcessDNL(j_decompress_ptr cinfo, const uint8_t* data, size_t len) { + // Ignore marker. +} + +void ProcessDRI(j_decompress_ptr cinfo, const uint8_t* data, size_t len) { + jpeg_decomp_master* m = cinfo->master; + if (m->found_dri_) { + return JPEGLI_ERROR("Duplicate DRI marker."); + } + m->found_dri_ = true; + size_t pos = 2; + JPEG_VERIFY_LEN(2); + cinfo->restart_interval = ReadUint16(data, &pos); + JPEG_VERIFY_MARKER_END(); +} + +void ProcessAPP(j_decompress_ptr cinfo, const uint8_t* data, size_t len) { + jpeg_decomp_master* m = cinfo->master; + const uint8_t marker = cinfo->unread_marker; + const uint8_t* payload = data + 2; + size_t payload_size = len - 2; + if (marker == 0xE0) { + if (payload_size >= 14 && memcmp(payload, "JFIF", 4) == 0) { + cinfo->saw_JFIF_marker = TRUE; + cinfo->JFIF_major_version = payload[5]; + cinfo->JFIF_minor_version = payload[6]; + cinfo->density_unit = payload[7]; + cinfo->X_density = (payload[8] << 8) + payload[9]; + cinfo->Y_density = (payload[10] << 8) + payload[11]; + } + } else if (marker == 0xEE) { + if (payload_size >= 12 && memcmp(payload, "Adobe", 5) == 0) { + cinfo->saw_Adobe_marker = TRUE; + cinfo->Adobe_transform = payload[11]; + } + } else if (marker == 0xE2) { + if (payload_size >= sizeof(kIccProfileTag) && + memcmp(payload, kIccProfileTag, sizeof(kIccProfileTag)) == 0) { + payload += sizeof(kIccProfileTag); + payload_size -= sizeof(kIccProfileTag); + if (payload_size < 2) { + return JPEGLI_ERROR("ICC chunk is too small."); + } + uint8_t index = payload[0]; + uint8_t total = payload[1]; + ++m->icc_index_; + if (m->icc_index_ != index) { + return JPEGLI_ERROR("Invalid ICC chunk order."); + } + if (total == 0) { + return JPEGLI_ERROR("Invalid ICC chunk total."); + } + if (m->icc_total_ == 0) { + m->icc_total_ = total; + } else if (m->icc_total_ != total) { + return JPEGLI_ERROR("Invalid ICC chunk total."); + } + if (m->icc_index_ > m->icc_total_) { + return JPEGLI_ERROR("Invalid ICC chunk index."); + } + m->icc_profile_.insert(m->icc_profile_.end(), payload + 2, + payload + payload_size); + } + } +} + +void ProcessCOM(j_decompress_ptr cinfo, const uint8_t* data, size_t len) { + // Ignore marker. +} + +void ProcessSOI(j_decompress_ptr cinfo, const uint8_t* data, size_t len) { + jpeg_decomp_master* m = cinfo->master; + if (m->found_soi_) { + JPEGLI_ERROR("Duplicate SOI marker"); + } + m->found_soi_ = true; +} + +void ProcessEOI(j_decompress_ptr cinfo, const uint8_t* data, size_t len) { + cinfo->master->found_eoi_ = true; +} + +void SaveMarker(j_decompress_ptr cinfo, const uint8_t* data, size_t len) { + const uint8_t marker = cinfo->unread_marker; + const uint8_t* payload = data + 2; + size_t payload_size = len - 2; + + // Insert new saved marker to the head of the list. + jpeg_saved_marker_ptr next = cinfo->marker_list; + cinfo->marker_list = + jpegli::Allocate<jpeg_marker_struct>(cinfo, 1, JPOOL_IMAGE); + cinfo->marker_list->next = next; + cinfo->marker_list->marker = marker; + cinfo->marker_list->original_length = payload_size; + cinfo->marker_list->data_length = payload_size; + cinfo->marker_list->data = + jpegli::Allocate<uint8_t>(cinfo, payload_size, JPOOL_IMAGE); + memcpy(cinfo->marker_list->data, payload, payload_size); +} + +uint8_t ProcessNextMarker(j_decompress_ptr cinfo, const uint8_t* const data, + const size_t len, size_t* pos) { + jpeg_decomp_master* m = cinfo->master; + size_t num_skipped = 0; + uint8_t marker = cinfo->unread_marker; + if (marker == 0) { + // kIsValidMarker[i] == 1 means (0xc0 + i) is a valid marker. + static const uint8_t kIsValidMarker[] = { + 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, + }; + // Skip bytes between markers. + while (*pos + 1 < len && (data[*pos] != 0xff || data[*pos + 1] < 0xc0 || + !kIsValidMarker[data[*pos + 1] - 0xc0])) { + ++(*pos); + ++num_skipped; + } + if (*pos + 2 > len) { + return kNeedMoreInput; + } + marker = data[*pos + 1]; + if (num_skipped > 0) { + if (m->found_soi_) { + JPEGLI_WARN("Skipped %d bytes before marker 0x%02x", (int)num_skipped, + marker); + } else { + JPEGLI_ERROR("Did not find SOI marker."); + } + } + *pos += 2; + cinfo->unread_marker = marker; + } + if (!m->found_soi_ && marker != 0xd8) { + JPEGLI_ERROR("Did not find SOI marker."); + } + if (GetMarkerProcessor(cinfo)) { + return kHandleMarkerProcessor; + } + const uint8_t* marker_data = &data[*pos]; + size_t marker_len = 0; + if (marker != 0xd8 && marker != 0xd9) { + if (*pos + 2 > len) { + return kNeedMoreInput; + } + marker_len += (data[*pos] << 8) + data[*pos + 1]; + if (marker_len < 2) { + JPEGLI_ERROR("Invalid marker length"); + } + if (*pos + marker_len > len) { + // TODO(szabadka) Limit our memory usage by using the skip_input_data + // source manager callback on APP markers that are not saved. + return kNeedMoreInput; + } + if (marker >= 0xe0 && m->markers_to_save_[marker - 0xe0]) { + SaveMarker(cinfo, marker_data, marker_len); + } + } + if (marker == 0xc0 || marker == 0xc1 || marker == 0xc2) { + ProcessSOF(cinfo, marker_data, marker_len); + } else if (marker == 0xc4) { + ProcessDHT(cinfo, marker_data, marker_len); + } else if (marker == 0xda) { + ProcessSOS(cinfo, marker_data, marker_len); + } else if (marker == 0xdb) { + ProcessDQT(cinfo, marker_data, marker_len); + } else if (marker == 0xdc) { + ProcessDNL(cinfo, marker_data, marker_len); + } else if (marker == 0xdd) { + ProcessDRI(cinfo, marker_data, marker_len); + } else if (marker >= 0xe0 && marker <= 0xef) { + ProcessAPP(cinfo, marker_data, marker_len); + } else if (marker == 0xfe) { + ProcessCOM(cinfo, marker_data, marker_len); + } else if (marker == 0xd8) { + ProcessSOI(cinfo, marker_data, marker_len); + } else if (marker == 0xd9) { + ProcessEOI(cinfo, marker_data, marker_len); + } else { + JPEGLI_ERROR("Unexpected marker 0x%x", marker); + } + *pos += marker_len; + cinfo->unread_marker = 0; + if (marker == 0xda) { + return JPEG_REACHED_SOS; + } else if (marker == 0xd9) { + return JPEG_REACHED_EOI; + } + return kProcessNextMarker; +} + +} // namespace + +jpeg_marker_parser_method GetMarkerProcessor(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + uint8_t marker = cinfo->unread_marker; + jpeg_marker_parser_method callback = nullptr; + if (marker >= 0xe0 && marker <= 0xef) { + callback = m->app_marker_parsers[marker - 0xe0]; + } else if (marker == 0xfe) { + callback = m->com_marker_parser; + } + return callback; +} + +int ProcessMarkers(j_decompress_ptr cinfo, const uint8_t* const data, + const size_t len, size_t* pos) { + for (;;) { + int status = ProcessNextMarker(cinfo, data, len, pos); + if (status != kProcessNextMarker) { + return status; + } + } +} + +} // namespace jpegli diff --git a/lib/jpegli/decode_marker.h b/lib/jpegli/decode_marker.h new file mode 100644 index 0000000..fb24b3e --- /dev/null +++ b/lib/jpegli/decode_marker.h @@ -0,0 +1,32 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_DECODE_MARKER_H_ +#define LIB_JPEGLI_DECODE_MARKER_H_ + +#include <stdint.h> + +#include "lib/jpegli/common.h" + +namespace jpegli { + +// Reads the available input in the source manager's input buffer until either +// the end of the next SOS marker or the end of the input. +// The corresponding fields of cinfo are updated with the processed input data. +// Upon return, the input buffer will be at the start or at the end of a marker +// data segment (inter-marker data is allowed). +// Return value is one of: +// * JPEG_SUSPENDED, if the current input buffer ends before the next SOS or +// EOI marker. Input buffer refill is handled by the caller; +// * JPEG_REACHED_SOS, if the next SOS marker is found; +// * JPEG_REACHED_EOR, if the end of the input is found. +int ProcessMarkers(j_decompress_ptr cinfo, const uint8_t* const data, + const size_t len, size_t* pos); + +jpeg_marker_parser_method GetMarkerProcessor(j_decompress_ptr cinfo); + +} // namespace jpegli + +#endif // LIB_JPEGLI_DECODE_MARKER_H_ diff --git a/lib/jpegli/decode_scan.cc b/lib/jpegli/decode_scan.cc new file mode 100644 index 0000000..05b1f37 --- /dev/null +++ b/lib/jpegli/decode_scan.cc @@ -0,0 +1,566 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/decode_scan.h" + +#include <string.h> + +#include <hwy/base.h> + +#include "lib/jpegli/decode_internal.h" +#include "lib/jpegli/error.h" +#include "lib/jxl/base/status.h" + +namespace jpegli { +namespace { + +// Max 14 block per MCU (when 1 channel is subsampled) +// Max 64 nonzero coefficients per block +// Max 16 symbol bits plus 11 extra bits per nonzero symbol +// Max 2 bytes per 8 bits (worst case is all bytes are escaped 0xff) +constexpr int kMaxMCUByteSize = 6048; + +// Helper structure to read bits from the entropy coded data segment. +struct BitReaderState { + BitReaderState(const uint8_t* data, const size_t len, size_t pos) + : data_(data), len_(len), start_pos_(pos) { + Reset(pos); + } + + void Reset(size_t pos) { + pos_ = pos; + val_ = 0; + bits_left_ = 0; + next_marker_pos_ = len_; + FillBitWindow(); + } + + // Returns the next byte and skips the 0xff/0x00 escape sequences. + uint8_t GetNextByte() { + if (pos_ >= next_marker_pos_) { + ++pos_; + return 0; + } + uint8_t c = data_[pos_++]; + if (c == 0xff) { + uint8_t escape = pos_ < len_ ? data_[pos_] : 0; + if (escape == 0) { + ++pos_; + } else { + // 0xff was followed by a non-zero byte, which means that we found the + // start of the next marker segment. + next_marker_pos_ = pos_ - 1; + } + } + return c; + } + + void FillBitWindow() { + if (bits_left_ <= 16) { + while (bits_left_ <= 56) { + val_ <<= 8; + val_ |= (uint64_t)GetNextByte(); + bits_left_ += 8; + } + } + } + + int ReadBits(int nbits) { + FillBitWindow(); + uint64_t val = (val_ >> (bits_left_ - nbits)) & ((1ULL << nbits) - 1); + bits_left_ -= nbits; + return val; + } + + // Sets *pos to the next stream position, and *bit_pos to the bit position + // within the next byte where parsing should continue. + // Returns false if the stream ended too early. + bool FinishStream(size_t* pos, size_t* bit_pos) { + *bit_pos = (8 - (bits_left_ & 7)) & 7; + // Give back some bytes that we did not use. + int unused_bytes_left = DivCeil(bits_left_, 8); + while (unused_bytes_left-- > 0) { + --pos_; + // If we give back a 0 byte, we need to check if it was a 0xff/0x00 escape + // sequence, and if yes, we need to give back one more byte. + if (((pos_ == len_ && pos_ == next_marker_pos_) || + (pos_ > 0 && pos_ < next_marker_pos_ && data_[pos_] == 0)) && + (data_[pos_ - 1] == 0xff)) { + --pos_; + } + } + if (pos_ >= next_marker_pos_) { + *pos = next_marker_pos_; + if (pos_ > next_marker_pos_ || *bit_pos > 0) { + // Data ran out before the scan was complete. + return false; + } + } + *pos = pos_; + return true; + } + + const uint8_t* data_; + const size_t len_; + size_t pos_; + uint64_t val_; + int bits_left_; + size_t next_marker_pos_; + size_t start_pos_; +}; + +// Returns the next Huffman-coded symbol. +int ReadSymbol(const HuffmanTableEntry* table, BitReaderState* br) { + int nbits; + br->FillBitWindow(); + int val = (br->val_ >> (br->bits_left_ - 8)) & 0xff; + table += val; + nbits = table->bits - 8; + if (nbits > 0) { + br->bits_left_ -= 8; + table += table->value; + val = (br->val_ >> (br->bits_left_ - nbits)) & ((1 << nbits) - 1); + table += val; + } + br->bits_left_ -= table->bits; + return table->value; +} + +/** + * Returns the DC diff or AC value for extra bits value x and prefix code s. + * + * CCITT Rec. T.81 (1992 E) + * Table F.1 – Difference magnitude categories for DC coding + * SSSS | DIFF values + * ------+-------------------------- + * 0 | 0 + * 1 | –1, 1 + * 2 | –3, –2, 2, 3 + * 3 | –7..–4, 4..7 + * ......|.......................... + * 11 | –2047..–1024, 1024..2047 + * + * CCITT Rec. T.81 (1992 E) + * Table F.2 – Categories assigned to coefficient values + * [ Same as Table F.1, but does not include SSSS equal to 0 and 11] + * + * + * CCITT Rec. T.81 (1992 E) + * F.1.2.1.1 Structure of DC code table + * For each category,... additional bits... appended... to uniquely identify + * which difference... occurred... When DIFF is positive... SSSS... bits of DIFF + * are appended. When DIFF is negative... SSSS... bits of (DIFF – 1) are + * appended... Most significant bit... is 0 for negative differences and 1 for + * positive differences. + * + * In other words the upper half of extra bits range represents DIFF as is. + * The lower half represents the negative DIFFs with an offset. + */ +int HuffExtend(int x, int s) { + JXL_DASSERT(s >= 1); + int half = 1 << (s - 1); + if (x >= half) { + JXL_DASSERT(x < (1 << s)); + return x; + } else { + return x - (1 << s) + 1; + } +} + +// Decodes one 8x8 block of DCT coefficients from the bit stream. +bool DecodeDCTBlock(const HuffmanTableEntry* dc_huff, + const HuffmanTableEntry* ac_huff, int Ss, int Se, int Al, + int* eobrun, BitReaderState* br, coeff_t* last_dc_coeff, + coeff_t* coeffs) { + // Nowadays multiplication is even faster than variable shift. + int Am = 1 << Al; + bool eobrun_allowed = Ss > 0; + if (Ss == 0) { + int s = ReadSymbol(dc_huff, br); + if (s >= kJpegDCAlphabetSize) { + return false; + } + int diff = 0; + if (s > 0) { + int bits = br->ReadBits(s); + diff = HuffExtend(bits, s); + } + int coeff = diff + *last_dc_coeff; + const int dc_coeff = coeff * Am; + coeffs[0] = dc_coeff; + // TODO(eustas): is there a more elegant / explicit way to check this? + if (dc_coeff != coeffs[0]) { + return false; + } + *last_dc_coeff = coeff; + ++Ss; + } + if (Ss > Se) { + return true; + } + if (*eobrun > 0) { + --(*eobrun); + return true; + } + for (int k = Ss; k <= Se; k++) { + int sr = ReadSymbol(ac_huff, br); + if (sr >= kJpegHuffmanAlphabetSize) { + return false; + } + int r = sr >> 4; + int s = sr & 15; + if (s > 0) { + k += r; + if (k > Se) { + return false; + } + if (s + Al >= kJpegDCAlphabetSize) { + return false; + } + int bits = br->ReadBits(s); + int coeff = HuffExtend(bits, s); + coeffs[kJPEGNaturalOrder[k]] = coeff * Am; + } else if (r == 15) { + k += 15; + } else { + *eobrun = 1 << r; + if (r > 0) { + if (!eobrun_allowed) { + return false; + } + *eobrun += br->ReadBits(r); + } + break; + } + } + --(*eobrun); + return true; +} + +bool RefineDCTBlock(const HuffmanTableEntry* ac_huff, int Ss, int Se, int Al, + int* eobrun, BitReaderState* br, coeff_t* coeffs) { + // Nowadays multiplication is even faster than variable shift. + int Am = 1 << Al; + bool eobrun_allowed = Ss > 0; + if (Ss == 0) { + int s = br->ReadBits(1); + coeff_t dc_coeff = coeffs[0]; + dc_coeff |= s * Am; + coeffs[0] = dc_coeff; + ++Ss; + } + if (Ss > Se) { + return true; + } + int p1 = Am; + int m1 = -Am; + int k = Ss; + int r; + int s; + bool in_zero_run = false; + if (*eobrun <= 0) { + for (; k <= Se; k++) { + s = ReadSymbol(ac_huff, br); + if (s >= kJpegHuffmanAlphabetSize) { + return false; + } + r = s >> 4; + s &= 15; + if (s) { + if (s != 1) { + return false; + } + s = br->ReadBits(1) ? p1 : m1; + in_zero_run = false; + } else { + if (r != 15) { + *eobrun = 1 << r; + if (r > 0) { + if (!eobrun_allowed) { + return false; + } + *eobrun += br->ReadBits(r); + } + break; + } + in_zero_run = true; + } + do { + coeff_t thiscoef = coeffs[kJPEGNaturalOrder[k]]; + if (thiscoef != 0) { + if (br->ReadBits(1)) { + if ((thiscoef & p1) == 0) { + if (thiscoef >= 0) { + thiscoef += p1; + } else { + thiscoef += m1; + } + } + } + coeffs[kJPEGNaturalOrder[k]] = thiscoef; + } else { + if (--r < 0) { + break; + } + } + k++; + } while (k <= Se); + if (s) { + if (k > Se) { + return false; + } + coeffs[kJPEGNaturalOrder[k]] = s; + } + } + } + if (in_zero_run) { + return false; + } + if (*eobrun > 0) { + for (; k <= Se; k++) { + coeff_t thiscoef = coeffs[kJPEGNaturalOrder[k]]; + if (thiscoef != 0) { + if (br->ReadBits(1)) { + if ((thiscoef & p1) == 0) { + if (thiscoef >= 0) { + thiscoef += p1; + } else { + thiscoef += m1; + } + } + } + coeffs[kJPEGNaturalOrder[k]] = thiscoef; + } + } + } + --(*eobrun); + return true; +} + +void SaveMCUCodingState(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + memcpy(m->mcu_.last_dc_coeff, m->last_dc_coeff_, sizeof(m->last_dc_coeff_)); + m->mcu_.eobrun = m->eobrun_; + size_t offset = 0; + for (int i = 0; i < cinfo->comps_in_scan; ++i) { + const jpeg_component_info* comp = cinfo->cur_comp_info[i]; + int c = comp->component_index; + size_t block_x = m->scan_mcu_col_ * comp->MCU_width; + for (int iy = 0; iy < comp->MCU_height; ++iy) { + size_t block_y = m->scan_mcu_row_ * comp->MCU_height + iy; + size_t biy = block_y % comp->v_samp_factor; + if (block_y >= comp->height_in_blocks) { + continue; + } + size_t nblocks = + std::min<size_t>(comp->MCU_width, comp->width_in_blocks - block_x); + size_t ncoeffs = nblocks * DCTSIZE2; + coeff_t* coeffs = &m->coeff_rows[c][biy][block_x][0]; + memcpy(&m->mcu_.coeffs[offset], coeffs, ncoeffs * sizeof(coeffs[0])); + offset += ncoeffs; + } + } +} + +void RestoreMCUCodingState(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + memcpy(m->last_dc_coeff_, m->mcu_.last_dc_coeff, sizeof(m->last_dc_coeff_)); + m->eobrun_ = m->mcu_.eobrun; + size_t offset = 0; + for (int i = 0; i < cinfo->comps_in_scan; ++i) { + const jpeg_component_info* comp = cinfo->cur_comp_info[i]; + int c = comp->component_index; + size_t block_x = m->scan_mcu_col_ * comp->MCU_width; + for (int iy = 0; iy < comp->MCU_height; ++iy) { + size_t block_y = m->scan_mcu_row_ * comp->MCU_height + iy; + size_t biy = block_y % comp->v_samp_factor; + if (block_y >= comp->height_in_blocks) { + continue; + } + size_t nblocks = + std::min<size_t>(comp->MCU_width, comp->width_in_blocks - block_x); + size_t ncoeffs = nblocks * DCTSIZE2; + coeff_t* coeffs = &m->coeff_rows[c][biy][block_x][0]; + memcpy(coeffs, &m->mcu_.coeffs[offset], ncoeffs * sizeof(coeffs[0])); + offset += ncoeffs; + } + } +} + +bool FinishScan(j_decompress_ptr cinfo, const uint8_t* data, const size_t len, + size_t* pos, size_t* bit_pos) { + jpeg_decomp_master* m = cinfo->master; + if (m->eobrun_ > 0) { + JPEGLI_ERROR("End-of-block run too long."); + } + m->eobrun_ = -1; + memset(m->last_dc_coeff_, 0, sizeof(m->last_dc_coeff_)); + if (*bit_pos == 0) { + return true; + } + if (data[*pos] == 0xff) { + // After last br.FinishStream we checked that there is at least 2 bytes + // in the buffer. + JXL_DASSERT(*pos + 1 < len); + // br.FinishStream would have detected an early marker. + JXL_DASSERT(data[*pos + 1] == 0); + *pos += 2; + } else { + *pos += 1; + } + *bit_pos = 0; + return true; +} + +} // namespace + +void PrepareForiMCURow(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + for (int i = 0; i < cinfo->comps_in_scan; ++i) { + const jpeg_component_info* comp = cinfo->cur_comp_info[i]; + int c = comp->component_index; + int by0 = cinfo->input_iMCU_row * comp->v_samp_factor; + int block_rows_left = comp->height_in_blocks - by0; + int max_block_rows = std::min(comp->v_samp_factor, block_rows_left); + int offset = m->streaming_mode_ ? 0 : by0; + m->coeff_rows[c] = (*cinfo->mem->access_virt_barray)( + reinterpret_cast<j_common_ptr>(cinfo), m->coef_arrays[c], offset, + max_block_rows, true); + } +} + +int ProcessScan(j_decompress_ptr cinfo, const uint8_t* const data, + const size_t len, size_t* pos, size_t* bit_pos) { + if (len == 0) { + return kNeedMoreInput; + } + jpeg_decomp_master* m = cinfo->master; + for (;;) { + // Handle the restart intervals. + if (cinfo->restart_interval > 0 && m->restarts_to_go_ == 0) { + if (!FinishScan(cinfo, data, len, pos, bit_pos)) { + return kNeedMoreInput; + } + // Go to the next marker, warn if we had to skip any data. + size_t num_skipped = 0; + while (*pos + 1 < len && (data[*pos] != 0xff || data[*pos + 1] == 0 || + data[*pos + 1] == 0xff)) { + ++(*pos); + ++num_skipped; + } + if (num_skipped > 0) { + JPEGLI_WARN("Skipped %d bytes before restart marker", (int)num_skipped); + } + if (*pos + 2 > len) { + return kNeedMoreInput; + } + cinfo->unread_marker = data[*pos + 1]; + *pos += 2; + return kHandleRestart; + } + + size_t start_pos = *pos; + BitReaderState br(data, len, start_pos); + if (*bit_pos > 0) { + br.ReadBits(*bit_pos); + } + if (start_pos + kMaxMCUByteSize > len) { + SaveMCUCodingState(cinfo); + } + + // Decode one MCU. + HWY_ALIGN_MAX coeff_t sink_block[DCTSIZE2]; + bool scan_ok = true; + for (int i = 0; i < cinfo->comps_in_scan; ++i) { + const jpeg_component_info* comp = cinfo->cur_comp_info[i]; + int c = comp->component_index; + const HuffmanTableEntry* dc_lut = + &m->dc_huff_lut_[comp->dc_tbl_no * kJpegHuffmanLutSize]; + const HuffmanTableEntry* ac_lut = + &m->ac_huff_lut_[comp->ac_tbl_no * kJpegHuffmanLutSize]; + for (int iy = 0; iy < comp->MCU_height; ++iy) { + size_t block_y = m->scan_mcu_row_ * comp->MCU_height + iy; + int biy = block_y % comp->v_samp_factor; + for (int ix = 0; ix < comp->MCU_width; ++ix) { + size_t block_x = m->scan_mcu_col_ * comp->MCU_width + ix; + coeff_t* coeffs; + if (block_x >= comp->width_in_blocks || + block_y >= comp->height_in_blocks) { + // Note that it is OK that sink_block is uninitialized because + // it will never be used in any branches, even in the RefineDCTBlock + // case, because only DC scans can be interleaved and we don't use + // the zero-ness of the DC coeff in the DC refinement code-path. + coeffs = sink_block; + } else { + coeffs = &m->coeff_rows[c][biy][block_x][0]; + } + if (cinfo->Ah == 0) { + if (!DecodeDCTBlock(dc_lut, ac_lut, cinfo->Ss, cinfo->Se, cinfo->Al, + &m->eobrun_, &br, + &m->last_dc_coeff_[comp->component_index], + coeffs)) { + scan_ok = false; + } + } else { + if (!RefineDCTBlock(ac_lut, cinfo->Ss, cinfo->Se, cinfo->Al, + &m->eobrun_, &br, coeffs)) { + scan_ok = false; + } + } + } + } + } + size_t new_pos; + size_t new_bit_pos; + bool stream_ok = br.FinishStream(&new_pos, &new_bit_pos); + if (new_pos + 2 > len) { + // If reading stopped within the last two bytes, we have to request more + // input even if FinishStream() returned true, since the Huffman code + // reader could have peaked ahead some bits past the current input chunk + // and thus the last prefix code length could have been wrong. We can do + // this because a valid JPEG bit stream has two extra bytes at the end. + RestoreMCUCodingState(cinfo); + return kNeedMoreInput; + } + *pos = new_pos; + *bit_pos = new_bit_pos; + if (!stream_ok) { + // We hit a marker during parsing. + JXL_DASSERT(data[*pos] == 0xff); + JXL_DASSERT(data[*pos + 1] != 0); + RestoreMCUCodingState(cinfo); + JPEGLI_WARN("Incomplete scan detected."); + return JPEG_SCAN_COMPLETED; + } + if (!scan_ok) { + JPEGLI_ERROR("Failed to decode DCT block"); + } + if (m->restarts_to_go_ > 0) { + --m->restarts_to_go_; + } + ++m->scan_mcu_col_; + if (m->scan_mcu_col_ == cinfo->MCUs_per_row) { + ++m->scan_mcu_row_; + m->scan_mcu_col_ = 0; + if (m->scan_mcu_row_ == cinfo->MCU_rows_in_scan) { + if (!FinishScan(cinfo, data, len, pos, bit_pos)) { + return kNeedMoreInput; + } + break; + } else if ((m->scan_mcu_row_ % m->mcu_rows_per_iMCU_row_) == 0) { + // Current iMCU row is done. + break; + } + } + } + ++cinfo->input_iMCU_row; + if (cinfo->input_iMCU_row < cinfo->total_iMCU_rows) { + PrepareForiMCURow(cinfo); + return JPEG_ROW_COMPLETED; + } + return JPEG_SCAN_COMPLETED; +} + +} // namespace jpegli diff --git a/lib/jpegli/decode_scan.h b/lib/jpegli/decode_scan.h new file mode 100644 index 0000000..1d7b18f --- /dev/null +++ b/lib/jpegli/decode_scan.h @@ -0,0 +1,31 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_DECODE_SCAN_H_ +#define LIB_JPEGLI_DECODE_SCAN_H_ + +#include <stdint.h> + +#include "lib/jpegli/common.h" + +namespace jpegli { + +// Reads the available input in the source manager's input buffer until the end +// of the next iMCU row. +// The corresponding fields of cinfo are updated with the processed input data. +// Upon return, the input buffer will be at the start of an MCU, or at the end +// of the scan. +// Return value is one of: +// * JPEG_SUSPENDED, if the input buffer ends before the end of an iMCU row; +// * JPEG_ROW_COMPLETED, if the next iMCU row (but not the scan) is reached; +// * JPEG_SCAN_COMPLETED, if the end of the scan is reached. +int ProcessScan(j_decompress_ptr cinfo, const uint8_t* const data, + const size_t len, size_t* pos, size_t* bit_pos); + +void PrepareForiMCURow(j_decompress_ptr cinfo); + +} // namespace jpegli + +#endif // LIB_JPEGLI_DECODE_SCAN_H_ diff --git a/lib/jpegli/destination_manager.cc b/lib/jpegli/destination_manager.cc new file mode 100644 index 0000000..9bc269f --- /dev/null +++ b/lib/jpegli/destination_manager.cc @@ -0,0 +1,148 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include <string.h> + +#include "lib/jpegli/encode.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/memory_manager.h" + +namespace jpegli { + +constexpr size_t kDestBufferSize = 64 << 10; + +struct StdioDestinationManager { + jpeg_destination_mgr pub; + FILE* f; + uint8_t* buffer; + + static void init_destination(j_compress_ptr cinfo) { + auto dest = reinterpret_cast<StdioDestinationManager*>(cinfo->dest); + dest->pub.next_output_byte = dest->buffer; + dest->pub.free_in_buffer = kDestBufferSize; + } + + static boolean empty_output_buffer(j_compress_ptr cinfo) { + auto dest = reinterpret_cast<StdioDestinationManager*>(cinfo->dest); + if (fwrite(dest->buffer, 1, kDestBufferSize, dest->f) != kDestBufferSize) { + JPEGLI_ERROR("Failed to write to output stream."); + } + dest->pub.next_output_byte = dest->buffer; + dest->pub.free_in_buffer = kDestBufferSize; + return TRUE; + } + + static void term_destination(j_compress_ptr cinfo) { + auto dest = reinterpret_cast<StdioDestinationManager*>(cinfo->dest); + size_t bytes_left = kDestBufferSize - dest->pub.free_in_buffer; + if (bytes_left && + fwrite(dest->buffer, 1, bytes_left, dest->f) != bytes_left) { + JPEGLI_ERROR("Failed to write to output stream."); + } + fflush(dest->f); + if (ferror(dest->f)) { + JPEGLI_ERROR("Failed to write to output stream."); + } + } +}; + +struct MemoryDestinationManager { + jpeg_destination_mgr pub; + // Output buffer supplied by the application + uint8_t** output; + unsigned long* output_size; + // Output buffer allocated by us. + uint8_t* temp_buffer; + // Current output buffer (either application supplied or allocated by us). + uint8_t* current_buffer; + size_t buffer_size; + + static void init_destination(j_compress_ptr cinfo) {} + + static boolean empty_output_buffer(j_compress_ptr cinfo) { + auto dest = reinterpret_cast<MemoryDestinationManager*>(cinfo->dest); + uint8_t* next_buffer = + reinterpret_cast<uint8_t*>(malloc(dest->buffer_size * 2)); + memcpy(next_buffer, dest->current_buffer, dest->buffer_size); + if (dest->temp_buffer != nullptr) { + free(dest->temp_buffer); + } + dest->temp_buffer = next_buffer; + dest->current_buffer = next_buffer; + *dest->output = next_buffer; + *dest->output_size = dest->buffer_size; + dest->pub.next_output_byte = next_buffer + dest->buffer_size; + dest->pub.free_in_buffer = dest->buffer_size; + dest->buffer_size *= 2; + return TRUE; + } + + static void term_destination(j_compress_ptr cinfo) { + auto dest = reinterpret_cast<MemoryDestinationManager*>(cinfo->dest); + *dest->output_size = dest->buffer_size - dest->pub.free_in_buffer; + } +}; + +} // namespace jpegli + +void jpegli_stdio_dest(j_compress_ptr cinfo, FILE* outfile) { + if (outfile == nullptr) { + JPEGLI_ERROR("jpegli_stdio_dest: Invalid destination."); + } + if (cinfo->dest && cinfo->dest->init_destination != + jpegli::StdioDestinationManager::init_destination) { + JPEGLI_ERROR("jpegli_stdio_dest: a different dest manager was already set"); + } + if (!cinfo->dest) { + cinfo->dest = reinterpret_cast<jpeg_destination_mgr*>( + jpegli::Allocate<jpegli::StdioDestinationManager>(cinfo, 1)); + } + auto dest = reinterpret_cast<jpegli::StdioDestinationManager*>(cinfo->dest); + dest->f = outfile; + dest->buffer = jpegli::Allocate<uint8_t>(cinfo, jpegli::kDestBufferSize); + dest->pub.next_output_byte = dest->buffer; + dest->pub.free_in_buffer = jpegli::kDestBufferSize; + dest->pub.init_destination = + jpegli::StdioDestinationManager::init_destination; + dest->pub.empty_output_buffer = + jpegli::StdioDestinationManager::empty_output_buffer; + dest->pub.term_destination = + jpegli::StdioDestinationManager::term_destination; +} + +void jpegli_mem_dest(j_compress_ptr cinfo, unsigned char** outbuffer, + unsigned long* outsize) { + if (outbuffer == nullptr || outsize == nullptr) { + JPEGLI_ERROR("jpegli_mem_dest: Invalid destination."); + } + if (cinfo->dest && cinfo->dest->init_destination != + jpegli::MemoryDestinationManager::init_destination) { + JPEGLI_ERROR("jpegli_mem_dest: a different dest manager was already set"); + } + if (!cinfo->dest) { + auto dest = jpegli::Allocate<jpegli::MemoryDestinationManager>(cinfo, 1); + dest->temp_buffer = nullptr; + cinfo->dest = reinterpret_cast<jpeg_destination_mgr*>(dest); + } + auto dest = reinterpret_cast<jpegli::MemoryDestinationManager*>(cinfo->dest); + dest->pub.init_destination = + jpegli::MemoryDestinationManager::init_destination; + dest->pub.empty_output_buffer = + jpegli::MemoryDestinationManager::empty_output_buffer; + dest->pub.term_destination = + jpegli::MemoryDestinationManager::term_destination; + dest->output = outbuffer; + dest->output_size = outsize; + if (*outbuffer == nullptr || *outsize == 0) { + dest->temp_buffer = + reinterpret_cast<uint8_t*>(malloc(jpegli::kDestBufferSize)); + *outbuffer = dest->temp_buffer; + *outsize = jpegli::kDestBufferSize; + } + dest->current_buffer = *outbuffer; + dest->buffer_size = *outsize; + dest->pub.next_output_byte = dest->current_buffer; + dest->pub.free_in_buffer = dest->buffer_size; +} diff --git a/lib/jpegli/downsample.cc b/lib/jpegli/downsample.cc new file mode 100644 index 0000000..df2c156 --- /dev/null +++ b/lib/jpegli/downsample.cc @@ -0,0 +1,356 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/downsample.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jpegli/downsample.cc" +#include <hwy/foreach_target.h> +#include <hwy/highway.h> + +#include "lib/jpegli/encode_internal.h" +#include "lib/jpegli/error.h" + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::Vec; + +using D = HWY_CAPPED(float, 8); +constexpr D d; + +void DownsampleRow2x1(const float* row_in, size_t len, float* row_out) { + const size_t N = Lanes(d); + const size_t len_out = len / 2; + const auto mul = Set(d, 0.5f); + Vec<D> v0, v1; + for (size_t x = 0; x < len_out; x += N) { + LoadInterleaved2(d, row_in + 2 * x, v0, v1); + Store(Mul(mul, Add(v0, v1)), d, row_out + x); + } +} + +void DownsampleRow3x1(const float* row_in, size_t len, float* row_out) { + const size_t N = Lanes(d); + const size_t len_out = len / 3; + const auto mul = Set(d, 1.0f / 3); + Vec<D> v0, v1, v2; + for (size_t x = 0; x < len_out; x += N) { + LoadInterleaved3(d, row_in + 3 * x, v0, v1, v2); + Store(Mul(mul, Add(Add(v0, v1), v2)), d, row_out + x); + } +} + +void DownsampleRow4x1(const float* row_in, size_t len, float* row_out) { + const size_t N = Lanes(d); + const size_t len_out = len / 4; + const auto mul = Set(d, 0.25f); + Vec<D> v0, v1, v2, v3; + for (size_t x = 0; x < len_out; x += N) { + LoadInterleaved4(d, row_in + 4 * x, v0, v1, v2, v3); + Store(Mul(mul, Add(Add(v0, v1), Add(v2, v3))), d, row_out + x); + } +} + +void Downsample2x1(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + DownsampleRow2x1(rows_in[0], len, row_out); +} + +void Downsample3x1(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + DownsampleRow3x1(rows_in[0], len, row_out); +} + +void Downsample4x1(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + DownsampleRow4x1(rows_in[0], len, row_out); +} + +void Downsample1x2(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + const size_t N = Lanes(d); + const auto mul = Set(d, 0.5f); + float* row0 = rows_in[0]; + float* row1 = rows_in[1]; + for (size_t x = 0; x < len; x += N) { + Store(Mul(mul, Add(Load(d, row0 + x), Load(d, row1 + x))), d, row_out + x); + } +} + +void Downsample2x2(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + const size_t N = Lanes(d); + const size_t len_out = len / 2; + const auto mul = Set(d, 0.25f); + float* row0 = rows_in[0]; + float* row1 = rows_in[1]; + Vec<D> v0, v1, v2, v3; + for (size_t x = 0; x < len_out; x += N) { + LoadInterleaved2(d, row0 + 2 * x, v0, v1); + LoadInterleaved2(d, row1 + 2 * x, v2, v3); + Store(Mul(mul, Add(Add(v0, v1), Add(v2, v3))), d, row_out + x); + } +} + +void Downsample3x2(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + DownsampleRow3x1(rows_in[0], len, rows_in[0]); + DownsampleRow3x1(rows_in[1], len, rows_in[1]); + Downsample1x2(rows_in, len / 3, row_out); +} + +void Downsample4x2(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + DownsampleRow4x1(rows_in[0], len, rows_in[0]); + DownsampleRow4x1(rows_in[1], len, rows_in[1]); + Downsample1x2(rows_in, len / 4, row_out); +} + +void Downsample1x3(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + const size_t N = Lanes(d); + const auto mul = Set(d, 1.0f / 3); + float* row0 = rows_in[0]; + float* row1 = rows_in[1]; + float* row2 = rows_in[2]; + for (size_t x = 0; x < len; x += N) { + const auto in0 = Load(d, row0 + x); + const auto in1 = Load(d, row1 + x); + const auto in2 = Load(d, row2 + x); + Store(Mul(mul, Add(Add(in0, in1), in2)), d, row_out + x); + } +} + +void Downsample2x3(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + DownsampleRow2x1(rows_in[0], len, rows_in[0]); + DownsampleRow2x1(rows_in[1], len, rows_in[1]); + DownsampleRow2x1(rows_in[2], len, rows_in[2]); + Downsample1x3(rows_in, len / 2, row_out); +} + +void Downsample3x3(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + DownsampleRow3x1(rows_in[0], len, rows_in[0]); + DownsampleRow3x1(rows_in[1], len, rows_in[1]); + DownsampleRow3x1(rows_in[2], len, rows_in[2]); + Downsample1x3(rows_in, len / 3, row_out); +} + +void Downsample4x3(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + DownsampleRow4x1(rows_in[0], len, rows_in[0]); + DownsampleRow4x1(rows_in[1], len, rows_in[1]); + DownsampleRow4x1(rows_in[2], len, rows_in[2]); + Downsample1x3(rows_in, len / 4, row_out); +} + +void Downsample1x4(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + const size_t N = Lanes(d); + const auto mul = Set(d, 0.25f); + float* row0 = rows_in[0]; + float* row1 = rows_in[1]; + float* row2 = rows_in[2]; + float* row3 = rows_in[3]; + for (size_t x = 0; x < len; x += N) { + const auto in0 = Load(d, row0 + x); + const auto in1 = Load(d, row1 + x); + const auto in2 = Load(d, row2 + x); + const auto in3 = Load(d, row3 + x); + Store(Mul(mul, Add(Add(in0, in1), Add(in2, in3))), d, row_out + x); + } +} + +void Downsample2x4(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + DownsampleRow2x1(rows_in[0], len, rows_in[0]); + DownsampleRow2x1(rows_in[1], len, rows_in[1]); + DownsampleRow2x1(rows_in[2], len, rows_in[2]); + DownsampleRow2x1(rows_in[3], len, rows_in[3]); + Downsample1x4(rows_in, len / 2, row_out); +} + +void Downsample3x4(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + DownsampleRow3x1(rows_in[0], len, rows_in[0]); + DownsampleRow3x1(rows_in[1], len, rows_in[1]); + DownsampleRow3x1(rows_in[2], len, rows_in[2]); + DownsampleRow3x1(rows_in[3], len, rows_in[3]); + Downsample1x4(rows_in, len / 3, row_out); +} + +void Downsample4x4(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + DownsampleRow4x1(rows_in[0], len, rows_in[0]); + DownsampleRow4x1(rows_in[1], len, rows_in[1]); + DownsampleRow4x1(rows_in[2], len, rows_in[2]); + DownsampleRow4x1(rows_in[3], len, rows_in[3]); + Downsample1x4(rows_in, len / 4, row_out); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jpegli { + +HWY_EXPORT(Downsample1x2); +HWY_EXPORT(Downsample1x3); +HWY_EXPORT(Downsample1x4); +HWY_EXPORT(Downsample2x1); +HWY_EXPORT(Downsample2x2); +HWY_EXPORT(Downsample2x3); +HWY_EXPORT(Downsample2x4); +HWY_EXPORT(Downsample3x1); +HWY_EXPORT(Downsample3x2); +HWY_EXPORT(Downsample3x3); +HWY_EXPORT(Downsample3x4); +HWY_EXPORT(Downsample4x1); +HWY_EXPORT(Downsample4x2); +HWY_EXPORT(Downsample4x3); +HWY_EXPORT(Downsample4x4); + +void NullDownsample(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) {} + +void ChooseDownsampleMethods(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + for (int c = 0; c < cinfo->num_components; c++) { + m->downsample_method[c] = nullptr; + jpeg_component_info* comp = &cinfo->comp_info[c]; + const int h_factor = cinfo->max_h_samp_factor / comp->h_samp_factor; + const int v_factor = cinfo->max_v_samp_factor / comp->v_samp_factor; + if (v_factor == 1) { + if (h_factor == 1) { + m->downsample_method[c] = NullDownsample; + } else if (h_factor == 2) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample2x1); + } else if (h_factor == 3) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample3x1); + } else if (h_factor == 4) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample4x1); + } + } else if (v_factor == 2) { + if (h_factor == 1) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample1x2); + } else if (h_factor == 2) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample2x2); + } else if (h_factor == 3) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample3x2); + } else if (h_factor == 4) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample4x2); + } + } else if (v_factor == 3) { + if (h_factor == 1) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample1x2); + } else if (h_factor == 2) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample2x2); + } else if (h_factor == 3) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample3x2); + } else if (h_factor == 4) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample4x2); + } + } else if (v_factor == 4) { + if (h_factor == 1) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample1x4); + } else if (h_factor == 2) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample2x4); + } else if (h_factor == 3) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample3x4); + } else if (h_factor == 4) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample4x4); + } + } + if (m->downsample_method[c] == nullptr) { + JPEGLI_ERROR("Unsupported downsampling ratio %dx%d", h_factor, v_factor); + } + } +} + +void DownsampleInputBuffer(j_compress_ptr cinfo) { + if (cinfo->max_h_samp_factor == 1 && cinfo->max_v_samp_factor == 1) { + return; + } + jpeg_comp_master* m = cinfo->master; + const size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor; + const size_t y0 = m->next_iMCU_row * iMCU_height; + const size_t y1 = y0 + iMCU_height; + const size_t xsize_padded = m->xsize_blocks * DCTSIZE; + for (int c = 0; c < cinfo->num_components; c++) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + const int h_factor = cinfo->max_h_samp_factor / comp->h_samp_factor; + const int v_factor = cinfo->max_v_samp_factor / comp->v_samp_factor; + if (h_factor == 1 && v_factor == 1) { + continue; + } + auto& input = *m->smooth_input[c]; + auto& output = *m->raw_data[c]; + const size_t yout0 = y0 / v_factor; + float* rows_in[MAX_SAMP_FACTOR]; + for (size_t yin = y0, yout = yout0; yin < y1; yin += v_factor, ++yout) { + for (int iy = 0; iy < v_factor; ++iy) { + rows_in[iy] = input.Row(yin + iy); + } + float* row_out = output.Row(yout); + (*m->downsample_method[c])(rows_in, xsize_padded, row_out); + } + } +} + +void ApplyInputSmoothing(j_compress_ptr cinfo) { + if (!cinfo->smoothing_factor) { + return; + } + jpeg_comp_master* m = cinfo->master; + const float kW1 = cinfo->smoothing_factor / 1024.0; + const float kW0 = 1.0f - 8.0f * kW1; + const size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor; + const ssize_t y0 = m->next_iMCU_row * iMCU_height; + const ssize_t y1 = y0 + iMCU_height; + const ssize_t xsize_padded = m->xsize_blocks * DCTSIZE; + for (int c = 0; c < cinfo->num_components; c++) { + auto& input = m->input_buffer[c]; + auto& output = *m->smooth_input[c]; + if (m->next_iMCU_row == 0) { + input.CopyRow(-1, 0, 1); + } + if (m->next_iMCU_row + 1 == cinfo->total_iMCU_rows) { + size_t last_row = m->ysize_blocks * DCTSIZE - 1; + input.CopyRow(last_row + 1, last_row, 1); + } + // TODO(szabadka) SIMDify this. + for (ssize_t y = y0; y < y1; ++y) { + const float* row_t = input.Row(y - 1); + const float* row_m = input.Row(y); + const float* row_b = input.Row(y + 1); + float* row_out = output.Row(y); + for (ssize_t x = 0; x < xsize_padded; ++x) { + float val_tl = row_t[x - 1]; + float val_tm = row_t[x]; + float val_tr = row_t[x + 1]; + float val_ml = row_m[x - 1]; + float val_mm = row_m[x]; + float val_mr = row_m[x + 1]; + float val_bl = row_b[x - 1]; + float val_bm = row_b[x]; + float val_br = row_b[x + 1]; + float val1 = (val_tl + val_tm + val_tr + val_ml + val_mr + val_bl + + val_bm + val_br); + row_out[x] = val_mm * kW0 + val1 * kW1; + } + } + } +} + +} // namespace jpegli +#endif // HWY_ONCE diff --git a/lib/jpegli/downsample.h b/lib/jpegli/downsample.h new file mode 100644 index 0000000..3ccf069 --- /dev/null +++ b/lib/jpegli/downsample.h @@ -0,0 +1,21 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_DOWNSAMPLE_H_ +#define LIB_JPEGLI_DOWNSAMPLE_H_ + +#include "lib/jpegli/common.h" + +namespace jpegli { + +void ChooseDownsampleMethods(j_compress_ptr cinfo); + +void DownsampleInputBuffer(j_compress_ptr cinfo); + +void ApplyInputSmoothing(j_compress_ptr cinfo); + +} // namespace jpegli + +#endif // LIB_JPEGLI_DOWNSAMPLE_H_ diff --git a/lib/jpegli/encode.cc b/lib/jpegli/encode.cc new file mode 100644 index 0000000..8a106e2 --- /dev/null +++ b/lib/jpegli/encode.cc @@ -0,0 +1,1253 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/encode.h" + +#include <cmath> +#include <initializer_list> +#include <vector> + +#include "lib/jpegli/adaptive_quantization.h" +#include "lib/jpegli/bit_writer.h" +#include "lib/jpegli/bitstream.h" +#include "lib/jpegli/color_transform.h" +#include "lib/jpegli/downsample.h" +#include "lib/jpegli/encode_finish.h" +#include "lib/jpegli/encode_internal.h" +#include "lib/jpegli/encode_streaming.h" +#include "lib/jpegli/entropy_coding.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/huffman.h" +#include "lib/jpegli/input.h" +#include "lib/jpegli/memory_manager.h" +#include "lib/jpegli/quant.h" + +namespace jpegli { + +constexpr size_t kMaxBytesInMarker = 65533; + +void CheckState(j_compress_ptr cinfo, int state) { + if (cinfo->global_state != state) { + JPEGLI_ERROR("Unexpected global state %d [expected %d]", + cinfo->global_state, state); + } +} + +void CheckState(j_compress_ptr cinfo, int state1, int state2) { + if (cinfo->global_state != state1 && cinfo->global_state != state2) { + JPEGLI_ERROR("Unexpected global state %d [expected %d or %d]", + cinfo->global_state, state1, state2); + } +} + +// +// Parameter setup +// + +// Initialize cinfo fields that are not dependent on input image. This is shared +// between jpegli_CreateCompress() and jpegli_set_defaults() +void InitializeCompressParams(j_compress_ptr cinfo) { + cinfo->data_precision = 8; + cinfo->num_scans = 0; + cinfo->scan_info = nullptr; + cinfo->raw_data_in = FALSE; + cinfo->arith_code = FALSE; + cinfo->optimize_coding = FALSE; + cinfo->CCIR601_sampling = FALSE; + cinfo->smoothing_factor = 0; + cinfo->dct_method = JDCT_FLOAT; + cinfo->restart_interval = 0; + cinfo->restart_in_rows = 0; + cinfo->write_JFIF_header = FALSE; + cinfo->JFIF_major_version = 1; + cinfo->JFIF_minor_version = 1; + cinfo->density_unit = 0; + cinfo->X_density = 1; + cinfo->Y_density = 1; +#if JPEG_LIB_VERSION >= 70 + cinfo->scale_num = 1; + cinfo->scale_denom = 1; + cinfo->do_fancy_downsampling = FALSE; + cinfo->min_DCT_h_scaled_size = DCTSIZE; + cinfo->min_DCT_v_scaled_size = DCTSIZE; +#endif + cinfo->master->psnr_target = 0.0f; + cinfo->master->psnr_tolerance = 0.01f; + cinfo->master->min_distance = 0.1f; + cinfo->master->max_distance = 25.0f; +} + +float LinearQualityToDistance(int scale_factor) { + scale_factor = std::min(5000, std::max(0, scale_factor)); + int quality = + scale_factor < 100 ? 100 - scale_factor / 2 : 5000 / scale_factor; + return jpegli_quality_to_distance(quality); +} + +template <typename T> +void SetSentTableFlag(T** table_ptrs, size_t num, boolean val) { + for (size_t i = 0; i < num; ++i) { + if (table_ptrs[i]) table_ptrs[i]->sent_table = val; + } +} + +// +// Compressor initialization +// + +struct ProgressiveScan { + int Ss, Se, Ah, Al; + bool interleaved; +}; + +void SetDefaultScanScript(j_compress_ptr cinfo) { + int level = cinfo->master->progressive_level; + std::vector<ProgressiveScan> progressive_mode; + bool interleave_dc = + (cinfo->max_h_samp_factor == 1 && cinfo->max_v_samp_factor == 1); + if (level == 0) { + progressive_mode.push_back({0, 63, 0, 0, true}); + } else if (level == 1) { + progressive_mode.push_back({0, 0, 0, 0, interleave_dc}); + progressive_mode.push_back({1, 63, 0, 1, false}); + progressive_mode.push_back({1, 63, 1, 0, false}); + } else { + progressive_mode.push_back({0, 0, 0, 0, interleave_dc}); + progressive_mode.push_back({1, 2, 0, 0, false}); + progressive_mode.push_back({3, 63, 0, 2, false}); + progressive_mode.push_back({3, 63, 2, 1, false}); + progressive_mode.push_back({3, 63, 1, 0, false}); + } + + cinfo->script_space_size = 0; + for (const auto& scan : progressive_mode) { + int comps = scan.interleaved ? MAX_COMPS_IN_SCAN : 1; + cinfo->script_space_size += DivCeil(cinfo->num_components, comps); + } + cinfo->script_space = + Allocate<jpeg_scan_info>(cinfo, cinfo->script_space_size); + + jpeg_scan_info* next_scan = cinfo->script_space; + for (const auto& scan : progressive_mode) { + int comps = scan.interleaved ? MAX_COMPS_IN_SCAN : 1; + for (int c = 0; c < cinfo->num_components; c += comps) { + next_scan->Ss = scan.Ss; + next_scan->Se = scan.Se; + next_scan->Ah = scan.Ah; + next_scan->Al = scan.Al; + next_scan->comps_in_scan = std::min(comps, cinfo->num_components - c); + for (int j = 0; j < next_scan->comps_in_scan; ++j) { + next_scan->component_index[j] = c + j; + } + ++next_scan; + } + } + JXL_ASSERT(next_scan - cinfo->script_space == cinfo->script_space_size); + cinfo->scan_info = cinfo->script_space; + cinfo->num_scans = cinfo->script_space_size; +} + +void ValidateScanScript(j_compress_ptr cinfo) { + // Mask of coefficient bits defined by the scan script, for each component + // and coefficient index. + uint16_t comp_mask[kMaxComponents][DCTSIZE2] = {}; + static constexpr int kMaxRefinementBit = 10; + + for (int i = 0; i < cinfo->num_scans; ++i) { + const jpeg_scan_info& si = cinfo->scan_info[i]; + if (si.comps_in_scan < 1 || si.comps_in_scan > MAX_COMPS_IN_SCAN) { + JPEGLI_ERROR("Invalid number of components in scan %d", si.comps_in_scan); + } + int last_ci = -1; + for (int j = 0; j < si.comps_in_scan; ++j) { + int ci = si.component_index[j]; + if (ci < 0 || ci >= cinfo->num_components) { + JPEGLI_ERROR("Invalid component index %d in scan", ci); + } else if (ci == last_ci) { + JPEGLI_ERROR("Duplicate component index %d in scan", ci); + } else if (ci < last_ci) { + JPEGLI_ERROR("Out of order component index %d in scan", ci); + } + last_ci = ci; + } + if (si.Ss < 0 || si.Se < si.Ss || si.Se >= DCTSIZE2) { + JPEGLI_ERROR("Invalid spectral range %d .. %d in scan", si.Ss, si.Se); + } + if (si.Ah < 0 || si.Al < 0 || si.Al > kMaxRefinementBit) { + JPEGLI_ERROR("Invalid refinement bits %d/%d", si.Ah, si.Al); + } + if (!cinfo->progressive_mode) { + if (si.Ss != 0 || si.Se != DCTSIZE2 - 1 || si.Ah != 0 || si.Al != 0) { + JPEGLI_ERROR("Invalid scan for sequential mode"); + } + } else { + if (si.Ss == 0 && si.Se != 0) { + JPEGLI_ERROR("DC and AC together in progressive scan"); + } + } + if (si.Ss != 0 && si.comps_in_scan != 1) { + JPEGLI_ERROR("Interleaved AC only scan."); + } + for (int j = 0; j < si.comps_in_scan; ++j) { + int ci = si.component_index[j]; + if (si.Ss != 0 && comp_mask[ci][0] == 0) { + JPEGLI_ERROR("AC before DC in component %d of scan", ci); + } + for (int k = si.Ss; k <= si.Se; ++k) { + if (comp_mask[ci][k] == 0) { + if (si.Ah != 0) { + JPEGLI_ERROR("Invalid first scan refinement bit"); + } + comp_mask[ci][k] = ((0xffff << si.Al) & 0xffff); + } else { + if (comp_mask[ci][k] != ((0xffff << si.Ah) & 0xffff) || + si.Al != si.Ah - 1) { + JPEGLI_ERROR("Invalid refinement bit progression."); + } + comp_mask[ci][k] |= 1 << si.Al; + } + } + } + if (si.comps_in_scan > 1) { + size_t mcu_size = 0; + for (int j = 0; j < si.comps_in_scan; ++j) { + int ci = si.component_index[j]; + jpeg_component_info* comp = &cinfo->comp_info[ci]; + mcu_size += comp->h_samp_factor * comp->v_samp_factor; + } + if (mcu_size > C_MAX_BLOCKS_IN_MCU) { + JPEGLI_ERROR("MCU size too big"); + } + } + } + for (int c = 0; c < cinfo->num_components; ++c) { + for (int k = 0; k < DCTSIZE2; ++k) { + if (comp_mask[c][k] != 0xffff) { + JPEGLI_ERROR("Incomplete scan of component %d and frequency %d", c, k); + } + } + } +} + +void ProcessCompressionParams(j_compress_ptr cinfo) { + if (cinfo->dest == nullptr) { + JPEGLI_ERROR("Missing destination."); + } + if (cinfo->image_width < 1 || cinfo->image_height < 1 || + cinfo->input_components < 1) { + JPEGLI_ERROR("Empty input image."); + } + if (cinfo->image_width > static_cast<int>(JPEG_MAX_DIMENSION) || + cinfo->image_height > static_cast<int>(JPEG_MAX_DIMENSION) || + cinfo->input_components > static_cast<int>(kMaxComponents)) { + JPEGLI_ERROR("Input image too big."); + } + if (cinfo->num_components < 1 || + cinfo->num_components > static_cast<int>(kMaxComponents)) { + JPEGLI_ERROR("Invalid number of components."); + } + if (cinfo->data_precision != kJpegPrecision) { + JPEGLI_ERROR("Invalid data precision"); + } + if (cinfo->arith_code) { + JPEGLI_ERROR("Arithmetic coding is not implemented."); + } + if (cinfo->CCIR601_sampling) { + JPEGLI_ERROR("CCIR601 sampling is not implemented."); + } + if (cinfo->restart_interval > 65535u) { + JPEGLI_ERROR("Restart interval too big"); + } + if (cinfo->smoothing_factor < 0 || cinfo->smoothing_factor > 100) { + JPEGLI_ERROR("Invalid smoothing factor %d", cinfo->smoothing_factor); + } + jpeg_comp_master* m = cinfo->master; + cinfo->max_h_samp_factor = cinfo->max_v_samp_factor = 1; + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + if (comp->component_index != c) { + JPEGLI_ERROR("Invalid component index"); + } + for (int j = 0; j < c; ++j) { + if (cinfo->comp_info[j].component_id == comp->component_id) { + JPEGLI_ERROR("Duplicate component id %d", comp->component_id); + } + } + if (comp->h_samp_factor <= 0 || comp->v_samp_factor <= 0 || + comp->h_samp_factor > MAX_SAMP_FACTOR || + comp->v_samp_factor > MAX_SAMP_FACTOR) { + JPEGLI_ERROR("Invalid sampling factor %d x %d", comp->h_samp_factor, + comp->v_samp_factor); + } + cinfo->max_h_samp_factor = + std::max(comp->h_samp_factor, cinfo->max_h_samp_factor); + cinfo->max_v_samp_factor = + std::max(comp->v_samp_factor, cinfo->max_v_samp_factor); + } + if (cinfo->num_components == 1 && + (cinfo->max_h_samp_factor != 1 || cinfo->max_v_samp_factor != 1)) { + JPEGLI_ERROR("Sampling is not supported for simgle component image."); + } + size_t iMCU_width = DCTSIZE * cinfo->max_h_samp_factor; + size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor; + size_t total_iMCU_cols = DivCeil(cinfo->image_width, iMCU_width); + cinfo->total_iMCU_rows = DivCeil(cinfo->image_height, iMCU_height); + m->xsize_blocks = total_iMCU_cols * cinfo->max_h_samp_factor; + m->ysize_blocks = cinfo->total_iMCU_rows * cinfo->max_v_samp_factor; + + size_t blocks_per_iMCU = 0; + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + if (cinfo->max_h_samp_factor % comp->h_samp_factor != 0 || + cinfo->max_v_samp_factor % comp->v_samp_factor != 0) { + JPEGLI_ERROR("Non-integral sampling ratios are not supported."); + } + m->h_factor[c] = cinfo->max_h_samp_factor / comp->h_samp_factor; + m->v_factor[c] = cinfo->max_v_samp_factor / comp->v_samp_factor; + comp->downsampled_width = DivCeil(cinfo->image_width, m->h_factor[c]); + comp->downsampled_height = DivCeil(cinfo->image_height, m->v_factor[c]); + comp->width_in_blocks = DivCeil(comp->downsampled_width, DCTSIZE); + comp->height_in_blocks = DivCeil(comp->downsampled_height, DCTSIZE); + blocks_per_iMCU += comp->h_samp_factor * comp->v_samp_factor; + } + m->blocks_per_iMCU_row = total_iMCU_cols * blocks_per_iMCU; + // Disable adaptive quantization for subsampled luma channel. + int y_channel = cinfo->jpeg_color_space == JCS_RGB ? 1 : 0; + jpeg_component_info* y_comp = &cinfo->comp_info[y_channel]; + if (y_comp->h_samp_factor != cinfo->max_h_samp_factor || + y_comp->v_samp_factor != cinfo->max_v_samp_factor) { + m->use_adaptive_quantization = false; + } + if (cinfo->scan_info == nullptr) { + SetDefaultScanScript(cinfo); + } + cinfo->progressive_mode = + cinfo->scan_info->Ss != 0 || cinfo->scan_info->Se != DCTSIZE2 - 1; + ValidateScanScript(cinfo); + m->scan_token_info = + Allocate<ScanTokenInfo>(cinfo, cinfo->num_scans, JPOOL_IMAGE); + memset(m->scan_token_info, 0, cinfo->num_scans * sizeof(ScanTokenInfo)); + m->ac_ctx_offset = Allocate<uint8_t>(cinfo, cinfo->num_scans, JPOOL_IMAGE); + size_t num_ac_contexts = 0; + for (int i = 0; i < cinfo->num_scans; ++i) { + const jpeg_scan_info* scan_info = &cinfo->scan_info[i]; + m->ac_ctx_offset[i] = 4 + num_ac_contexts; + if (scan_info->Se > 0) { + num_ac_contexts += scan_info->comps_in_scan; + } + if (num_ac_contexts > 252) { + JPEGLI_ERROR("Too many AC scans in image"); + } + ScanTokenInfo* sti = &m->scan_token_info[i]; + if (scan_info->comps_in_scan == 1) { + int comp_idx = scan_info->component_index[0]; + jpeg_component_info* comp = &cinfo->comp_info[comp_idx]; + sti->MCUs_per_row = comp->width_in_blocks; + sti->MCU_rows_in_scan = comp->height_in_blocks; + sti->blocks_in_MCU = 1; + } else { + sti->MCUs_per_row = + DivCeil(cinfo->image_width, DCTSIZE * cinfo->max_h_samp_factor); + sti->MCU_rows_in_scan = + DivCeil(cinfo->image_height, DCTSIZE * cinfo->max_v_samp_factor); + sti->blocks_in_MCU = 0; + for (int j = 0; j < scan_info->comps_in_scan; ++j) { + int comp_idx = scan_info->component_index[j]; + jpeg_component_info* comp = &cinfo->comp_info[comp_idx]; + sti->blocks_in_MCU += comp->h_samp_factor * comp->v_samp_factor; + } + } + size_t num_MCUs = sti->MCU_rows_in_scan * sti->MCUs_per_row; + sti->num_blocks = num_MCUs * sti->blocks_in_MCU; + if (cinfo->restart_in_rows <= 0) { + sti->restart_interval = cinfo->restart_interval; + } else { + sti->restart_interval = + std::min<size_t>(sti->MCUs_per_row * cinfo->restart_in_rows, 65535u); + } + sti->num_restarts = sti->restart_interval > 0 + ? DivCeil(num_MCUs, sti->restart_interval) + : 1; + sti->restarts = Allocate<size_t>(cinfo, sti->num_restarts, JPOOL_IMAGE); + } + m->num_contexts = 4 + num_ac_contexts; +} + +bool IsStreamingSupported(j_compress_ptr cinfo) { + if (cinfo->global_state == kEncWriteCoeffs) { + return false; + } + // TODO(szabadka) Remove this restriction. + if (cinfo->restart_interval > 0 || cinfo->restart_in_rows > 0) { + return false; + } + if (cinfo->num_scans > 1) { + return false; + } + if (cinfo->master->psnr_target > 0) { + return false; + } + return true; +} + +void AllocateBuffers(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + memset(m->last_dc_coeff, 0, sizeof(m->last_dc_coeff)); + if (!IsStreamingSupported(cinfo) || cinfo->optimize_coding) { + int ysize_blocks = DivCeil(cinfo->image_height, DCTSIZE); + int num_arrays = cinfo->num_scans * ysize_blocks; + m->token_arrays = Allocate<TokenArray>(cinfo, num_arrays, JPOOL_IMAGE); + m->cur_token_array = 0; + memset(m->token_arrays, 0, num_arrays * sizeof(TokenArray)); + m->num_tokens = 0; + m->total_num_tokens = 0; + } + if (cinfo->global_state == kEncWriteCoeffs) { + return; + } + size_t iMCU_width = DCTSIZE * cinfo->max_h_samp_factor; + size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor; + size_t total_iMCU_cols = DivCeil(cinfo->image_width, iMCU_width); + size_t xsize_full = total_iMCU_cols * iMCU_width; + size_t ysize_full = 3 * iMCU_height; + if (!cinfo->raw_data_in) { + int num_all_components = + std::max(cinfo->input_components, cinfo->num_components); + for (int c = 0; c < num_all_components; ++c) { + m->input_buffer[c].Allocate(cinfo, ysize_full, xsize_full); + } + } + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + size_t xsize = total_iMCU_cols * comp->h_samp_factor * DCTSIZE; + size_t ysize = 3 * comp->v_samp_factor * DCTSIZE; + if (cinfo->raw_data_in) { + m->input_buffer[c].Allocate(cinfo, ysize, xsize); + } + m->smooth_input[c] = &m->input_buffer[c]; + if (!cinfo->raw_data_in && cinfo->smoothing_factor) { + m->smooth_input[c] = Allocate<RowBuffer<float>>(cinfo, 1, JPOOL_IMAGE); + m->smooth_input[c]->Allocate(cinfo, ysize_full, xsize_full); + } + m->raw_data[c] = m->smooth_input[c]; + if (!cinfo->raw_data_in && (m->h_factor[c] > 1 || m->v_factor[c] > 1)) { + m->raw_data[c] = Allocate<RowBuffer<float>>(cinfo, 1, JPOOL_IMAGE); + m->raw_data[c]->Allocate(cinfo, ysize, xsize); + } + m->quant_mul[c] = Allocate<float>(cinfo, DCTSIZE2, JPOOL_IMAGE_ALIGNED); + } + m->dct_buffer = Allocate<float>(cinfo, 2 * DCTSIZE2, JPOOL_IMAGE_ALIGNED); + m->block_tmp = Allocate<int32_t>(cinfo, DCTSIZE2 * 4, JPOOL_IMAGE_ALIGNED); + if (!IsStreamingSupported(cinfo)) { + m->coeff_buffers = + Allocate<jvirt_barray_ptr>(cinfo, cinfo->num_components, JPOOL_IMAGE); + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + const size_t xsize_blocks = comp->width_in_blocks; + const size_t ysize_blocks = comp->height_in_blocks; + m->coeff_buffers[c] = (*cinfo->mem->request_virt_barray)( + reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE, + /*pre_zero=*/false, xsize_blocks, ysize_blocks, comp->v_samp_factor); + } + } + if (m->use_adaptive_quantization) { + int y_channel = cinfo->jpeg_color_space == JCS_RGB ? 1 : 0; + jpeg_component_info* y_comp = &cinfo->comp_info[y_channel]; + const size_t xsize_blocks = y_comp->width_in_blocks; + const size_t vecsize = VectorSize(); + const size_t xsize_padded = DivCeil(2 * xsize_blocks, vecsize) * vecsize; + m->diff_buffer = + Allocate<float>(cinfo, xsize_blocks * DCTSIZE + 8, JPOOL_IMAGE_ALIGNED); + m->fuzzy_erosion_tmp.Allocate(cinfo, 2, xsize_padded); + m->pre_erosion.Allocate(cinfo, 6 * cinfo->max_v_samp_factor, xsize_padded); + size_t qf_height = cinfo->max_v_samp_factor; + if (m->psnr_target > 0) { + qf_height *= cinfo->total_iMCU_rows; + } + m->quant_field.Allocate(cinfo, qf_height, xsize_blocks); + } else { + m->quant_field.Allocate(cinfo, 1, m->xsize_blocks); + m->quant_field.FillRow(0, 0, m->xsize_blocks); + } + for (int c = 0; c < cinfo->num_components; ++c) { + m->zero_bias_offset[c] = + Allocate<float>(cinfo, DCTSIZE2, JPOOL_IMAGE_ALIGNED); + m->zero_bias_mul[c] = Allocate<float>(cinfo, DCTSIZE2, JPOOL_IMAGE_ALIGNED); + memset(m->zero_bias_mul[c], 0, DCTSIZE2 * sizeof(float)); + memset(m->zero_bias_offset[c], 0, DCTSIZE2 * sizeof(float)); + } +} + +void InitProgressMonitor(j_compress_ptr cinfo) { + if (cinfo->progress == nullptr) { + return; + } + if (IsStreamingSupported(cinfo)) { + // We have only one input pass. + cinfo->progress->total_passes = 1; + } else { + // We have one input pass, a histogram pass for each scan, and an encode + // pass for each scan. + cinfo->progress->total_passes = 1 + 2 * cinfo->num_scans; + } +} + +// Common setup code between streaming and transcoding code paths. Called in +// both jpegli_start_compress() and jpegli_write_coefficients(). +void InitCompress(j_compress_ptr cinfo, boolean write_all_tables) { + jpeg_comp_master* m = cinfo->master; + (*cinfo->err->reset_error_mgr)(reinterpret_cast<j_common_ptr>(cinfo)); + ProcessCompressionParams(cinfo); + InitProgressMonitor(cinfo); + AllocateBuffers(cinfo); + if (cinfo->global_state != kEncWriteCoeffs) { + ChooseInputMethod(cinfo); + if (!cinfo->raw_data_in) { + ChooseColorTransform(cinfo); + ChooseDownsampleMethods(cinfo); + } + QuantPass pass = m->psnr_target > 0 ? QuantPass::SEARCH_FIRST_PASS + : QuantPass::NO_SEARCH; + InitQuantizer(cinfo, pass); + } + if (write_all_tables) { + jpegli_suppress_tables(cinfo, FALSE); + } + if (!cinfo->optimize_coding && !cinfo->progressive_mode) { + CopyHuffmanTables(cinfo); + InitEntropyCoder(cinfo); + } + (*cinfo->dest->init_destination)(cinfo); + WriteFileHeader(cinfo); + JpegBitWriterInit(cinfo); + m->next_iMCU_row = 0; + m->last_restart_interval = 0; + m->next_dht_index = 0; +} + +// +// Input streaming +// + +void ProgressMonitorInputPass(j_compress_ptr cinfo) { + if (cinfo->progress == nullptr) { + return; + } + cinfo->progress->completed_passes = 0; + cinfo->progress->pass_counter = cinfo->next_scanline; + cinfo->progress->pass_limit = cinfo->image_height; + (*cinfo->progress->progress_monitor)(reinterpret_cast<j_common_ptr>(cinfo)); +} + +void ReadInputRow(j_compress_ptr cinfo, const uint8_t* scanline, + float* row[kMaxComponents]) { + jpeg_comp_master* m = cinfo->master; + int num_all_components = + std::max(cinfo->input_components, cinfo->num_components); + for (int c = 0; c < num_all_components; ++c) { + row[c] = m->input_buffer[c].Row(m->next_input_row); + } + ++m->next_input_row; + if (scanline == nullptr) { + for (int c = 0; c < cinfo->input_components; ++c) { + memset(row[c], 0, cinfo->image_width * sizeof(row[c][0])); + } + return; + } + (*m->input_method)(scanline, cinfo->image_width, row); +} + +void PadInputBuffer(j_compress_ptr cinfo, float* row[kMaxComponents]) { + jpeg_comp_master* m = cinfo->master; + const size_t len0 = cinfo->image_width; + const size_t len1 = m->xsize_blocks * DCTSIZE; + for (int c = 0; c < cinfo->num_components; ++c) { + // Pad row to a multiple of the iMCU width, plus create a border of 1 + // repeated pixel for adaptive quant field calculation. + float last_val = row[c][len0 - 1]; + for (size_t x = len0; x <= len1; ++x) { + row[c][x] = last_val; + } + row[c][-1] = row[c][0]; + } + if (m->next_input_row == cinfo->image_height) { + size_t num_rows = m->ysize_blocks * DCTSIZE - cinfo->image_height; + for (size_t i = 0; i < num_rows; ++i) { + for (int c = 0; c < cinfo->num_components; ++c) { + float* dest = m->input_buffer[c].Row(m->next_input_row) - 1; + memcpy(dest, row[c] - 1, (len1 + 2) * sizeof(dest[0])); + } + ++m->next_input_row; + } + } +} + +void ProcessiMCURow(j_compress_ptr cinfo) { + JXL_ASSERT(cinfo->master->next_iMCU_row < cinfo->total_iMCU_rows); + if (!cinfo->raw_data_in) { + ApplyInputSmoothing(cinfo); + DownsampleInputBuffer(cinfo); + } + ComputeAdaptiveQuantField(cinfo); + if (IsStreamingSupported(cinfo)) { + if (cinfo->optimize_coding) { + ComputeTokensForiMCURow(cinfo); + } else { + WriteiMCURow(cinfo); + } + } else { + ComputeCoefficientsForiMCURow(cinfo); + } + ++cinfo->master->next_iMCU_row; +} + +void ProcessiMCURows(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor; + // To have context rows both above and below the current iMCU row, we delay + // processing the first iMCU row and process two iMCU rows after we receive + // the last input row. + if (m->next_input_row % iMCU_height == 0 && m->next_input_row > iMCU_height) { + ProcessiMCURow(cinfo); + } + if (m->next_input_row >= cinfo->image_height) { + ProcessiMCURow(cinfo); + } +} + +// +// Non-streaming part +// + +void ZigZagShuffleBlocks(j_compress_ptr cinfo) { + JCOEF tmp[DCTSIZE2]; + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) { + JBLOCKARRAY ba = GetBlockRow(cinfo, c, by); + for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) { + JCOEF* block = &ba[0][bx][0]; + for (int k = 0; k < DCTSIZE2; ++k) { + tmp[k] = block[kJPEGNaturalOrder[k]]; + } + memcpy(block, tmp, sizeof(tmp)); + } + } + } +} + +} // namespace jpegli + +// +// Parameter setup +// + +void jpegli_CreateCompress(j_compress_ptr cinfo, int version, + size_t structsize) { + cinfo->mem = nullptr; + if (structsize != sizeof(*cinfo)) { + JPEGLI_ERROR("jpegli_compress_struct has wrong size."); + } + jpegli::InitMemoryManager(reinterpret_cast<j_common_ptr>(cinfo)); + cinfo->progress = nullptr; + cinfo->is_decompressor = FALSE; + cinfo->global_state = jpegli::kEncStart; + cinfo->dest = nullptr; + cinfo->image_width = 0; + cinfo->image_height = 0; + cinfo->input_components = 0; + cinfo->in_color_space = JCS_UNKNOWN; + cinfo->input_gamma = 1.0f; + cinfo->num_components = 0; + cinfo->jpeg_color_space = JCS_UNKNOWN; + cinfo->comp_info = nullptr; + for (int i = 0; i < NUM_QUANT_TBLS; ++i) { + cinfo->quant_tbl_ptrs[i] = nullptr; + } + for (int i = 0; i < NUM_HUFF_TBLS; ++i) { + cinfo->dc_huff_tbl_ptrs[i] = nullptr; + cinfo->ac_huff_tbl_ptrs[i] = nullptr; + } + memset(cinfo->arith_dc_L, 0, sizeof(cinfo->arith_dc_L)); + memset(cinfo->arith_dc_U, 0, sizeof(cinfo->arith_dc_U)); + memset(cinfo->arith_ac_K, 0, sizeof(cinfo->arith_ac_K)); + cinfo->write_Adobe_marker = false; + cinfo->master = jpegli::Allocate<jpeg_comp_master>(cinfo, 1); + jpegli::InitializeCompressParams(cinfo); + cinfo->master->force_baseline = true; + cinfo->master->xyb_mode = false; + cinfo->master->cicp_transfer_function = 2; // unknown transfer function code + cinfo->master->use_std_tables = false; + cinfo->master->use_adaptive_quantization = true; + cinfo->master->progressive_level = jpegli::kDefaultProgressiveLevel; + cinfo->master->data_type = JPEGLI_TYPE_UINT8; + cinfo->master->endianness = JPEGLI_NATIVE_ENDIAN; + cinfo->master->coeff_buffers = nullptr; +} + +void jpegli_set_xyb_mode(j_compress_ptr cinfo) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->master->xyb_mode = true; +} + +void jpegli_set_cicp_transfer_function(j_compress_ptr cinfo, int code) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->master->cicp_transfer_function = code; +} + +void jpegli_set_defaults(j_compress_ptr cinfo) { + CheckState(cinfo, jpegli::kEncStart); + jpegli::InitializeCompressParams(cinfo); + jpegli_default_colorspace(cinfo); + jpegli_set_quality(cinfo, 90, TRUE); + jpegli_set_progressive_level(cinfo, jpegli::kDefaultProgressiveLevel); + jpegli::AddStandardHuffmanTables(reinterpret_cast<j_common_ptr>(cinfo), + /*is_dc=*/false); + jpegli::AddStandardHuffmanTables(reinterpret_cast<j_common_ptr>(cinfo), + /*is_dc=*/true); +} + +void jpegli_default_colorspace(j_compress_ptr cinfo) { + CheckState(cinfo, jpegli::kEncStart); + switch (cinfo->in_color_space) { + case JCS_GRAYSCALE: + jpegli_set_colorspace(cinfo, JCS_GRAYSCALE); + break; + case JCS_RGB: { + if (cinfo->master->xyb_mode) { + jpegli_set_colorspace(cinfo, JCS_RGB); + } else { + jpegli_set_colorspace(cinfo, JCS_YCbCr); + } + break; + } + case JCS_YCbCr: + jpegli_set_colorspace(cinfo, JCS_YCbCr); + break; + case JCS_CMYK: + jpegli_set_colorspace(cinfo, JCS_CMYK); + break; + case JCS_YCCK: + jpegli_set_colorspace(cinfo, JCS_YCCK); + break; + case JCS_UNKNOWN: + jpegli_set_colorspace(cinfo, JCS_UNKNOWN); + break; + default: + JPEGLI_ERROR("Unsupported input colorspace %d", cinfo->in_color_space); + } +} + +void jpegli_set_colorspace(j_compress_ptr cinfo, J_COLOR_SPACE colorspace) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->jpeg_color_space = colorspace; + switch (colorspace) { + case JCS_GRAYSCALE: + cinfo->num_components = 1; + break; + case JCS_RGB: + case JCS_YCbCr: + cinfo->num_components = 3; + break; + case JCS_CMYK: + case JCS_YCCK: + cinfo->num_components = 4; + break; + case JCS_UNKNOWN: + cinfo->num_components = + std::min<int>(jpegli::kMaxComponents, cinfo->input_components); + break; + default: + JPEGLI_ERROR("Unsupported jpeg colorspace %d", colorspace); + } + // Adobe marker is only needed to distinguish CMYK and YCCK JPEGs. + cinfo->write_Adobe_marker = (cinfo->jpeg_color_space == JCS_YCCK); + if (cinfo->comp_info == nullptr) { + cinfo->comp_info = + jpegli::Allocate<jpeg_component_info>(cinfo, MAX_COMPONENTS); + } + memset(cinfo->comp_info, 0, + jpegli::kMaxComponents * sizeof(jpeg_component_info)); + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + comp->component_index = c; + comp->component_id = c + 1; + comp->h_samp_factor = 1; + comp->v_samp_factor = 1; + comp->quant_tbl_no = 0; + comp->dc_tbl_no = 0; + comp->ac_tbl_no = 0; + } + if (colorspace == JCS_RGB) { + cinfo->comp_info[0].component_id = 'R'; + cinfo->comp_info[1].component_id = 'G'; + cinfo->comp_info[2].component_id = 'B'; + if (cinfo->master->xyb_mode) { + // Subsample blue channel. + cinfo->comp_info[0].h_samp_factor = cinfo->comp_info[0].v_samp_factor = 2; + cinfo->comp_info[1].h_samp_factor = cinfo->comp_info[1].v_samp_factor = 2; + cinfo->comp_info[2].h_samp_factor = cinfo->comp_info[2].v_samp_factor = 1; + // Use separate quantization tables for each component + cinfo->comp_info[1].quant_tbl_no = 1; + cinfo->comp_info[2].quant_tbl_no = 2; + } + } else if (colorspace == JCS_CMYK) { + cinfo->comp_info[0].component_id = 'C'; + cinfo->comp_info[1].component_id = 'M'; + cinfo->comp_info[2].component_id = 'Y'; + cinfo->comp_info[3].component_id = 'K'; + } else if (colorspace == JCS_YCbCr || colorspace == JCS_YCCK) { + // Use separate quantization and Huffman tables for luma and chroma + cinfo->comp_info[1].quant_tbl_no = 1; + cinfo->comp_info[2].quant_tbl_no = 1; + cinfo->comp_info[1].dc_tbl_no = cinfo->comp_info[1].ac_tbl_no = 1; + cinfo->comp_info[2].dc_tbl_no = cinfo->comp_info[2].ac_tbl_no = 1; + } +} + +void jpegli_set_distance(j_compress_ptr cinfo, float distance, + boolean force_baseline) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->master->force_baseline = force_baseline; + float distances[NUM_QUANT_TBLS] = {distance, distance, distance}; + jpegli::SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/true); +} + +float jpegli_quality_to_distance(int quality) { + return (quality >= 100 ? 0.01f + : quality >= 30 ? 0.1f + (100 - quality) * 0.09f + : 53.0f / 3000.0f * quality * quality - + 23.0f / 20.0f * quality + 25.0f); +} + +void jpegli_set_psnr(j_compress_ptr cinfo, float psnr, float tolerance, + float min_distance, float max_distance) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->master->psnr_target = psnr; + cinfo->master->psnr_tolerance = tolerance; + cinfo->master->min_distance = min_distance; + cinfo->master->max_distance = max_distance; +} + +void jpegli_set_quality(j_compress_ptr cinfo, int quality, + boolean force_baseline) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->master->force_baseline = force_baseline; + float distance = jpegli_quality_to_distance(quality); + float distances[NUM_QUANT_TBLS] = {distance, distance, distance}; + jpegli::SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/false); +} + +void jpegli_set_linear_quality(j_compress_ptr cinfo, int scale_factor, + boolean force_baseline) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->master->force_baseline = force_baseline; + float distance = jpegli::LinearQualityToDistance(scale_factor); + float distances[NUM_QUANT_TBLS] = {distance, distance, distance}; + jpegli::SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/false); +} + +#if JPEG_LIB_VERSION >= 70 +void jpegli_default_qtables(j_compress_ptr cinfo, boolean force_baseline) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->master->force_baseline = force_baseline; + float distances[NUM_QUANT_TBLS]; + for (int i = 0; i < NUM_QUANT_TBLS; ++i) { + distances[i] = jpegli::LinearQualityToDistance(cinfo->q_scale_factor[i]); + } + jpegli::SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/false); +} +#endif + +int jpegli_quality_scaling(int quality) { + quality = std::min(100, std::max(1, quality)); + return quality < 50 ? 5000 / quality : 200 - 2 * quality; +} + +void jpegli_use_standard_quant_tables(j_compress_ptr cinfo) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->master->use_std_tables = true; +} + +void jpegli_add_quant_table(j_compress_ptr cinfo, int which_tbl, + const unsigned int* basic_table, int scale_factor, + boolean force_baseline) { + CheckState(cinfo, jpegli::kEncStart); + if (which_tbl < 0 || which_tbl > NUM_QUANT_TBLS) { + JPEGLI_ERROR("Invalid quant table index %d", which_tbl); + } + if (cinfo->quant_tbl_ptrs[which_tbl] == nullptr) { + cinfo->quant_tbl_ptrs[which_tbl] = + jpegli_alloc_quant_table(reinterpret_cast<j_common_ptr>(cinfo)); + } + int max_qval = force_baseline ? 255 : 32767U; + JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[which_tbl]; + for (int k = 0; k < DCTSIZE2; ++k) { + int qval = (basic_table[k] * scale_factor + 50) / 100; + qval = std::max(1, std::min(qval, max_qval)); + quant_table->quantval[k] = qval; + } + quant_table->sent_table = FALSE; +} + +void jpegli_enable_adaptive_quantization(j_compress_ptr cinfo, boolean value) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->master->use_adaptive_quantization = value; +} + +void jpegli_simple_progression(j_compress_ptr cinfo) { + CheckState(cinfo, jpegli::kEncStart); + jpegli_set_progressive_level(cinfo, 2); +} + +void jpegli_set_progressive_level(j_compress_ptr cinfo, int level) { + CheckState(cinfo, jpegli::kEncStart); + if (level < 0) { + JPEGLI_ERROR("Invalid progressive level %d", level); + } + cinfo->master->progressive_level = level; +} + +void jpegli_set_input_format(j_compress_ptr cinfo, JpegliDataType data_type, + JpegliEndianness endianness) { + CheckState(cinfo, jpegli::kEncStart); + switch (data_type) { + case JPEGLI_TYPE_UINT8: + case JPEGLI_TYPE_UINT16: + case JPEGLI_TYPE_FLOAT: + cinfo->master->data_type = data_type; + break; + default: + JPEGLI_ERROR("Unsupported data type %d", data_type); + } + switch (endianness) { + case JPEGLI_NATIVE_ENDIAN: + case JPEGLI_LITTLE_ENDIAN: + case JPEGLI_BIG_ENDIAN: + cinfo->master->endianness = endianness; + break; + default: + JPEGLI_ERROR("Unsupported endianness %d", endianness); + } +} + +#if JPEG_LIB_VERSION >= 70 +void jpegli_calc_jpeg_dimensions(j_compress_ptr cinfo) { + // Since input scaling is not supported, we just copy the image dimensions. + cinfo->jpeg_width = cinfo->image_width; + cinfo->jpeg_height = cinfo->image_height; +} +#endif + +void jpegli_copy_critical_parameters(j_decompress_ptr srcinfo, + j_compress_ptr dstinfo) { + CheckState(dstinfo, jpegli::kEncStart); + // Image parameters. + dstinfo->image_width = srcinfo->image_width; + dstinfo->image_height = srcinfo->image_height; + dstinfo->input_components = srcinfo->num_components; + dstinfo->in_color_space = srcinfo->jpeg_color_space; + dstinfo->input_gamma = srcinfo->output_gamma; + // Compression parameters. + jpegli_set_defaults(dstinfo); + jpegli_set_colorspace(dstinfo, srcinfo->jpeg_color_space); + if (dstinfo->num_components != srcinfo->num_components) { + const auto& cinfo = dstinfo; + return JPEGLI_ERROR("Mismatch between src colorspace and components"); + } + dstinfo->data_precision = srcinfo->data_precision; + dstinfo->CCIR601_sampling = srcinfo->CCIR601_sampling; + dstinfo->JFIF_major_version = srcinfo->JFIF_major_version; + dstinfo->JFIF_minor_version = srcinfo->JFIF_minor_version; + dstinfo->density_unit = srcinfo->density_unit; + dstinfo->X_density = srcinfo->X_density; + dstinfo->Y_density = srcinfo->Y_density; + for (int c = 0; c < dstinfo->num_components; ++c) { + jpeg_component_info* srccomp = &srcinfo->comp_info[c]; + jpeg_component_info* dstcomp = &dstinfo->comp_info[c]; + dstcomp->component_id = srccomp->component_id; + dstcomp->h_samp_factor = srccomp->h_samp_factor; + dstcomp->v_samp_factor = srccomp->v_samp_factor; + dstcomp->quant_tbl_no = srccomp->quant_tbl_no; + } + for (int i = 0; i < NUM_QUANT_TBLS; ++i) { + if (!srcinfo->quant_tbl_ptrs[i]) continue; + if (dstinfo->quant_tbl_ptrs[i] == nullptr) { + dstinfo->quant_tbl_ptrs[i] = jpegli::Allocate<JQUANT_TBL>(dstinfo, 1); + } + memcpy(dstinfo->quant_tbl_ptrs[i], srcinfo->quant_tbl_ptrs[i], + sizeof(JQUANT_TBL)); + dstinfo->quant_tbl_ptrs[i]->sent_table = FALSE; + } +} + +void jpegli_suppress_tables(j_compress_ptr cinfo, boolean suppress) { + jpegli::SetSentTableFlag(cinfo->quant_tbl_ptrs, NUM_QUANT_TBLS, suppress); + jpegli::SetSentTableFlag(cinfo->dc_huff_tbl_ptrs, NUM_HUFF_TBLS, suppress); + jpegli::SetSentTableFlag(cinfo->ac_huff_tbl_ptrs, NUM_HUFF_TBLS, suppress); +} + +// +// Compressor initialization +// + +void jpegli_start_compress(j_compress_ptr cinfo, boolean write_all_tables) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->global_state = jpegli::kEncHeader; + jpegli::InitCompress(cinfo, write_all_tables); + cinfo->next_scanline = 0; + cinfo->master->next_input_row = 0; +} + +void jpegli_write_coefficients(j_compress_ptr cinfo, + jvirt_barray_ptr* coef_arrays) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->global_state = jpegli::kEncWriteCoeffs; + jpegli::InitCompress(cinfo, /*write_all_tables=*/true); + cinfo->master->coeff_buffers = coef_arrays; + cinfo->next_scanline = cinfo->image_height; + cinfo->master->next_input_row = cinfo->image_height; +} + +void jpegli_write_tables(j_compress_ptr cinfo) { + CheckState(cinfo, jpegli::kEncStart); + if (cinfo->dest == nullptr) { + JPEGLI_ERROR("Missing destination."); + } + jpeg_comp_master* m = cinfo->master; + (*cinfo->err->reset_error_mgr)(reinterpret_cast<j_common_ptr>(cinfo)); + (*cinfo->dest->init_destination)(cinfo); + jpegli::WriteOutput(cinfo, {0xFF, 0xD8}); // SOI + jpegli::EncodeDQT(cinfo, /*write_all_tables=*/true); + jpegli::CopyHuffmanTables(cinfo); + jpegli::EncodeDHT(cinfo, 0, m->num_huffman_tables); + jpegli::WriteOutput(cinfo, {0xFF, 0xD9}); // EOI + (*cinfo->dest->term_destination)(cinfo); + jpegli_suppress_tables(cinfo, TRUE); +} + +// +// Marker writing +// + +void jpegli_write_m_header(j_compress_ptr cinfo, int marker, + unsigned int datalen) { + CheckState(cinfo, jpegli::kEncHeader, jpegli::kEncWriteCoeffs); + if (datalen > jpegli::kMaxBytesInMarker) { + JPEGLI_ERROR("Invalid marker length %u", datalen); + } + if (marker != 0xfe && (marker < 0xe0 || marker > 0xef)) { + JPEGLI_ERROR( + "jpegli_write_m_header: Only APP and COM markers are supported."); + } + std::vector<uint8_t> marker_data(4 + datalen); + marker_data[0] = 0xff; + marker_data[1] = marker; + marker_data[2] = (datalen + 2) >> 8; + marker_data[3] = (datalen + 2) & 0xff; + jpegli::WriteOutput(cinfo, &marker_data[0], 4); +} + +void jpegli_write_m_byte(j_compress_ptr cinfo, int val) { + uint8_t data = val; + jpegli::WriteOutput(cinfo, &data, 1); +} + +void jpegli_write_marker(j_compress_ptr cinfo, int marker, + const JOCTET* dataptr, unsigned int datalen) { + jpegli_write_m_header(cinfo, marker, datalen); + jpegli::WriteOutput(cinfo, dataptr, datalen); +} + +void jpegli_write_icc_profile(j_compress_ptr cinfo, const JOCTET* icc_data_ptr, + unsigned int icc_data_len) { + constexpr size_t kMaxIccBytesInMarker = + jpegli::kMaxBytesInMarker - sizeof jpegli::kICCSignature - 2; + const int num_markers = + static_cast<int>(jpegli::DivCeil(icc_data_len, kMaxIccBytesInMarker)); + size_t begin = 0; + for (int current_marker = 0; current_marker < num_markers; ++current_marker) { + const size_t length = std::min(kMaxIccBytesInMarker, icc_data_len - begin); + jpegli_write_m_header( + cinfo, jpegli::kICCMarker, + static_cast<unsigned int>(length + sizeof jpegli::kICCSignature + 2)); + for (const unsigned char c : jpegli::kICCSignature) { + jpegli_write_m_byte(cinfo, c); + } + jpegli_write_m_byte(cinfo, current_marker + 1); + jpegli_write_m_byte(cinfo, num_markers); + for (size_t i = 0; i < length; ++i) { + jpegli_write_m_byte(cinfo, icc_data_ptr[begin]); + ++begin; + } + } +} + +// +// Input streaming +// + +JDIMENSION jpegli_write_scanlines(j_compress_ptr cinfo, JSAMPARRAY scanlines, + JDIMENSION num_lines) { + CheckState(cinfo, jpegli::kEncHeader, jpegli::kEncReadImage); + if (cinfo->raw_data_in) { + JPEGLI_ERROR("jpegli_write_raw_data() must be called for raw data mode."); + } + jpegli::ProgressMonitorInputPass(cinfo); + if (cinfo->global_state == jpegli::kEncHeader && + jpegli::IsStreamingSupported(cinfo) && !cinfo->optimize_coding) { + jpegli::WriteFrameHeader(cinfo); + jpegli::WriteScanHeader(cinfo, 0); + } + cinfo->global_state = jpegli::kEncReadImage; + jpeg_comp_master* m = cinfo->master; + if (num_lines + cinfo->next_scanline > cinfo->image_height) { + num_lines = cinfo->image_height - cinfo->next_scanline; + } + JDIMENSION prev_scanline = cinfo->next_scanline; + size_t input_lag = (std::min<size_t>(cinfo->image_height, m->next_input_row) - + cinfo->next_scanline); + if (input_lag > num_lines) { + JPEGLI_ERROR("Need at least %u lines to continue", input_lag); + } + if (input_lag > 0) { + if (!jpegli::EmptyBitWriterBuffer(&m->bw)) { + return 0; + } + cinfo->next_scanline += input_lag; + } + float* rows[jpegli::kMaxComponents]; + for (size_t i = input_lag; i < num_lines; ++i) { + jpegli::ReadInputRow(cinfo, scanlines[i], rows); + (*m->color_transform)(rows, cinfo->image_width); + jpegli::PadInputBuffer(cinfo, rows); + jpegli::ProcessiMCURows(cinfo); + if (!jpegli::EmptyBitWriterBuffer(&m->bw)) { + break; + } + ++cinfo->next_scanline; + } + return cinfo->next_scanline - prev_scanline; +} + +JDIMENSION jpegli_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data, + JDIMENSION num_lines) { + CheckState(cinfo, jpegli::kEncHeader, jpegli::kEncReadImage); + if (!cinfo->raw_data_in) { + JPEGLI_ERROR("jpegli_write_raw_data(): raw data mode was not set"); + } + jpegli::ProgressMonitorInputPass(cinfo); + if (cinfo->global_state == jpegli::kEncHeader && + jpegli::IsStreamingSupported(cinfo) && !cinfo->optimize_coding) { + jpegli::WriteFrameHeader(cinfo); + jpegli::WriteScanHeader(cinfo, 0); + } + cinfo->global_state = jpegli::kEncReadImage; + jpeg_comp_master* m = cinfo->master; + if (cinfo->next_scanline >= cinfo->image_height) { + return 0; + } + size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor; + if (num_lines < iMCU_height) { + JPEGLI_ERROR("Missing input lines, minimum is %u", iMCU_height); + } + if (cinfo->next_scanline < m->next_input_row) { + JXL_ASSERT(m->next_input_row - cinfo->next_scanline == iMCU_height); + if (!jpegli::EmptyBitWriterBuffer(&m->bw)) { + return 0; + } + cinfo->next_scanline = m->next_input_row; + return iMCU_height; + } + size_t iMCU_y = m->next_input_row / iMCU_height; + float* rows[jpegli::kMaxComponents]; + for (int c = 0; c < cinfo->num_components; ++c) { + JSAMPARRAY plane = data[c]; + jpeg_component_info* comp = &cinfo->comp_info[c]; + size_t xsize = comp->width_in_blocks * DCTSIZE; + size_t ysize = comp->v_samp_factor * DCTSIZE; + size_t y0 = iMCU_y * ysize; + auto& buffer = m->input_buffer[c]; + for (size_t i = 0; i < ysize; ++i) { + rows[0] = buffer.Row(y0 + i); + if (plane[i] == nullptr) { + memset(rows[0], 0, xsize * sizeof(rows[0][0])); + } else { + (*m->input_method)(plane[i], xsize, rows); + } + // We need a border of 1 repeated pixel for adaptive quant field. + buffer.PadRow(y0 + i, xsize, /*border=*/1); + } + } + m->next_input_row += iMCU_height; + jpegli::ProcessiMCURows(cinfo); + if (!jpegli::EmptyBitWriterBuffer(&m->bw)) { + return 0; + } + cinfo->next_scanline += iMCU_height; + return iMCU_height; +} + +// +// Non-streaming part +// + +void jpegli_finish_compress(j_compress_ptr cinfo) { + CheckState(cinfo, jpegli::kEncReadImage, jpegli::kEncWriteCoeffs); + jpeg_comp_master* m = cinfo->master; + if (cinfo->next_scanline < cinfo->image_height) { + JPEGLI_ERROR("Incomplete image, expected %d rows, got %d", + cinfo->image_height, cinfo->next_scanline); + } + + if (cinfo->global_state == jpegli::kEncWriteCoeffs) { + // Zig-zag shuffle all the blocks. For non-transcoding case it was already + // done in EncodeiMCURow(). + jpegli::ZigZagShuffleBlocks(cinfo); + } + + if (m->psnr_target > 0) { + jpegli::QuantizetoPSNR(cinfo); + } + + const bool tokens_done = jpegli::IsStreamingSupported(cinfo); + const bool bitstream_done = tokens_done && !cinfo->optimize_coding; + + if (!tokens_done) { + jpegli::TokenizeJpeg(cinfo); + } + + if (cinfo->optimize_coding || cinfo->progressive_mode) { + jpegli::OptimizeHuffmanCodes(cinfo); + jpegli::InitEntropyCoder(cinfo); + } + + if (!bitstream_done) { + jpegli::WriteFrameHeader(cinfo); + for (int i = 0; i < cinfo->num_scans; ++i) { + jpegli::WriteScanHeader(cinfo, i); + jpegli::WriteScanData(cinfo, i); + } + } else { + JumpToByteBoundary(&m->bw); + if (!EmptyBitWriterBuffer(&m->bw)) { + JPEGLI_ERROR("Output suspension is not supported in finish_compress"); + } + } + + jpegli::WriteOutput(cinfo, {0xFF, 0xD9}); // EOI + (*cinfo->dest->term_destination)(cinfo); + + // Release memory and reset global state. + jpegli_abort_compress(cinfo); +} + +void jpegli_abort_compress(j_compress_ptr cinfo) { + jpegli_abort(reinterpret_cast<j_common_ptr>(cinfo)); +} + +void jpegli_destroy_compress(j_compress_ptr cinfo) { + jpegli_destroy(reinterpret_cast<j_common_ptr>(cinfo)); +} diff --git a/lib/jpegli/encode.h b/lib/jpegli/encode.h new file mode 100644 index 0000000..320dfaa --- /dev/null +++ b/lib/jpegli/encode.h @@ -0,0 +1,158 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// This file contains the C API of the encoder part of the libjpegli library, +// which is based on the C API of libjpeg, with the function names changed from +// jpeg_* to jpegli_*, while compressor object definitions are included directly +// from jpeglib.h +// +// Applications can use the libjpegli library in one of the following ways: +// +// (1) Include jpegli/encode.h and/or jpegli/decode.h, update the function +// names of the API and link against libjpegli. +// +// (2) Leave the application code unchanged, but replace the libjpeg.so library +// with the one built by this project that is API- and ABI-compatible with +// libjpeg-turbo's version of libjpeg.so. + +#ifndef LIB_JPEGLI_ENCODE_H_ +#define LIB_JPEGLI_ENCODE_H_ + +#include "lib/jpegli/common.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +#define jpegli_create_compress(cinfo) \ + jpegli_CreateCompress((cinfo), JPEG_LIB_VERSION, \ + (size_t)sizeof(struct jpeg_compress_struct)) +void jpegli_CreateCompress(j_compress_ptr cinfo, int version, + size_t structsize); + +void jpegli_stdio_dest(j_compress_ptr cinfo, FILE* outfile); + +void jpegli_mem_dest(j_compress_ptr cinfo, unsigned char** outbuffer, + unsigned long* outsize); + +void jpegli_set_defaults(j_compress_ptr cinfo); + +void jpegli_default_colorspace(j_compress_ptr cinfo); + +void jpegli_set_colorspace(j_compress_ptr cinfo, J_COLOR_SPACE colorspace); + +void jpegli_set_quality(j_compress_ptr cinfo, int quality, + boolean force_baseline); + +void jpegli_set_linear_quality(j_compress_ptr cinfo, int scale_factor, + boolean force_baseline); + +#if JPEG_LIB_VERSION >= 70 +void jpegli_default_qtables(j_compress_ptr cinfo, boolean force_baseline); +#endif + +int jpegli_quality_scaling(int quality); + +void jpegli_add_quant_table(j_compress_ptr cinfo, int which_tbl, + const unsigned int* basic_table, int scale_factor, + boolean force_baseline); + +void jpegli_simple_progression(j_compress_ptr cinfo); + +void jpegli_suppress_tables(j_compress_ptr cinfo, boolean suppress); + +#if JPEG_LIB_VERSION >= 70 +void jpegli_calc_jpeg_dimensions(j_compress_ptr cinfo); +#endif + +void jpegli_copy_critical_parameters(j_decompress_ptr srcinfo, + j_compress_ptr dstinfo); + +void jpegli_write_m_header(j_compress_ptr cinfo, int marker, + unsigned int datalen); + +void jpegli_write_m_byte(j_compress_ptr cinfo, int val); + +void jpegli_write_marker(j_compress_ptr cinfo, int marker, + const JOCTET* dataptr, unsigned int datalen); + +void jpegli_write_icc_profile(j_compress_ptr cinfo, const JOCTET* icc_data_ptr, + unsigned int icc_data_len); + +void jpegli_start_compress(j_compress_ptr cinfo, boolean write_all_tables); + +void jpegli_write_tables(j_compress_ptr cinfo); + +JDIMENSION jpegli_write_scanlines(j_compress_ptr cinfo, JSAMPARRAY scanlines, + JDIMENSION num_lines); + +JDIMENSION jpegli_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data, + JDIMENSION num_lines); + +void jpegli_write_coefficients(j_compress_ptr cinfo, + jvirt_barray_ptr* coef_arrays); + +void jpegli_finish_compress(j_compress_ptr cinfo); + +void jpegli_abort_compress(j_compress_ptr cinfo); + +void jpegli_destroy_compress(j_compress_ptr cinfo); + +// +// New API functions that are not available in libjpeg +// +// NOTE: This part of the API is still experimental and will probably change in +// the future. +// + +// Sets the butteraugli target distance for the compressor. This may override +// the default quantization table indexes based on jpeg colorspace, therefore +// it must be called after jpegli_set_defaults() or after the last +// jpegli_set_colorspace() or jpegli_default_colorspace() calls. +void jpegli_set_distance(j_compress_ptr cinfo, float distance, + boolean force_baseline); + +// Returns the butteraugli target distance for the given quality parameter. +float jpegli_quality_to_distance(int quality); + +// Enables distance parameter search to meet the given psnr target. +void jpegli_set_psnr(j_compress_ptr cinfo, float psnr, float tolerance, + float min_distance, float max_distance); + +// Changes the default behaviour of the encoder in the selection of quantization +// matrices and chroma subsampling. Must be called before jpegli_set_defaults() +// because some default setting depend on the XYB mode. +void jpegli_set_xyb_mode(j_compress_ptr cinfo); + +// Signals to the encoder that the pixel data that will be provided later +// through jpegli_write_scanlines() has this transfer function. This must be +// called before jpegli_set_defaults() because it changes the default +// quantization tables. +void jpegli_set_cicp_transfer_function(j_compress_ptr cinfo, int code); + +void jpegli_set_input_format(j_compress_ptr cinfo, JpegliDataType data_type, + JpegliEndianness endianness); + +// Sets whether or not the encoder uses adaptive quantization for creating more +// zero coefficients based on the local properties of the image. +// Enabled by default. +void jpegli_enable_adaptive_quantization(j_compress_ptr cinfo, boolean value); + +// Sets the default progression parameters, where level 0 is sequential, and +// greater level value means more progression steps. Default is 2. +void jpegli_set_progressive_level(j_compress_ptr cinfo, int level); + +// If this function is called before starting compression, the quality and +// linear quality parameters will be used to scale the standard quantization +// tables from Annex K of the JPEG standard. By default jpegli uses a different +// set of quantization tables and used different scaling parameters for DC and +// AC coefficients. Must be called before jpegli_set_defaults(). +void jpegli_use_standard_quant_tables(j_compress_ptr cinfo); + +#if defined(__cplusplus) || defined(c_plusplus) +} // extern "C" +#endif + +#endif // LIB_JPEGLI_ENCODE_H_ diff --git a/lib/jpegli/encode_api_test.cc b/lib/jpegli/encode_api_test.cc new file mode 100644 index 0000000..8d53557 --- /dev/null +++ b/lib/jpegli/encode_api_test.cc @@ -0,0 +1,837 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include <algorithm> +#include <cmath> +#include <vector> + +#include "lib/jpegli/encode.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/test_utils.h" +#include "lib/jpegli/testing.h" +#include "lib/jxl/sanitizers.h" + +namespace jpegli { +namespace { + +struct TestConfig { + TestImage input; + CompressParams jparams; + JpegIOMode input_mode = PIXELS; + double max_bpp; + double max_dist; +}; + +class EncodeAPITestParam : public ::testing::TestWithParam<TestConfig> {}; + +void GenerateInput(JpegIOMode input_mode, const CompressParams& jparams, + TestImage* input) { + GeneratePixels(input); + if (input_mode == RAW_DATA) { + GenerateRawData(jparams, input); + } else if (input_mode == COEFFICIENTS) { + GenerateCoeffs(jparams, input); + } +} + +TEST_P(EncodeAPITestParam, TestAPI) { + TestConfig config = GetParam(); + GenerateInput(config.input_mode, config.jparams, &config.input); + std::vector<uint8_t> compressed; + ASSERT_TRUE(EncodeWithJpegli(config.input, config.jparams, &compressed)); + if (config.jparams.icc.empty()) { + double bpp = + compressed.size() * 8.0 / (config.input.xsize * config.input.ysize); + printf("bpp: %f\n", bpp); + EXPECT_LT(bpp, config.max_bpp); + } + DecompressParams dparams; + dparams.output_mode = + config.input_mode == COEFFICIENTS ? COEFFICIENTS : PIXELS; + if (config.jparams.set_jpeg_colorspace && + config.jparams.jpeg_color_space == JCS_GRAYSCALE) { + ConvertToGrayscale(&config.input); + } else { + dparams.set_out_color_space = true; + dparams.out_color_space = config.input.color_space; + } + TestImage output; + DecodeWithLibjpeg(config.jparams, dparams, compressed, &output); + VerifyOutputImage(config.input, output, config.max_dist); +} + +TEST(EncodeAPITest, ReuseCinfoSameImageTwice) { + TestImage input; + input.xsize = 129; + input.ysize = 73; + CompressParams jparams; + GenerateInput(PIXELS, jparams, &input); + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + std::vector<uint8_t> compressed0; + std::vector<uint8_t> compressed1; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + EncodeWithJpegli(input, jparams, &cinfo); + compressed0.assign(buffer, buffer + buffer_size); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + EncodeWithJpegli(input, jparams, &cinfo); + compressed1.assign(buffer, buffer + buffer_size); + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); + ASSERT_EQ(compressed0.size(), compressed1.size()); + EXPECT_EQ(0, + memcmp(compressed0.data(), compressed1.data(), compressed0.size())); +} + +std::vector<TestConfig> GenerateBasicConfigs() { + std::vector<TestConfig> all_configs; + for (int samp : {1, 2}) { + for (int progr : {0, 2}) { + for (int optimize : {0, 1}) { + if (progr && optimize) continue; + TestConfig config; + config.input.xsize = 257 + samp * 37; + config.input.ysize = 265 + optimize * 17; + config.jparams.h_sampling = {samp, 1, 1}; + config.jparams.v_sampling = {samp, 1, 1}; + config.jparams.progressive_mode = progr; + config.jparams.optimize_coding = optimize; + config.max_dist = 2.4f; + GeneratePixels(&config.input); + all_configs.push_back(config); + } + } + } + return all_configs; +} + +TEST(EncodeAPITest, ReuseCinfoSameMemOutput) { + std::vector<TestConfig> all_configs = GenerateBasicConfigs(); + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + { + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + for (const TestConfig& config : all_configs) { + EncodeWithJpegli(config.input, config.jparams, &cinfo); + } + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + } + size_t pos = 0; + for (size_t i = 0; i < all_configs.size(); ++i) { + TestImage output; + pos += + DecodeWithLibjpeg(all_configs[i].jparams, DecompressParams(), nullptr, + 0, buffer + pos, buffer_size - pos, &output); + VerifyOutputImage(all_configs[i].input, output, all_configs[i].max_dist); + } + if (buffer) free(buffer); +} + +TEST(EncodeAPITest, ReuseCinfoSameStdOutput) { + std::vector<TestConfig> all_configs = GenerateBasicConfigs(); + FILE* tmpf = tmpfile(); + JXL_CHECK(tmpf); + { + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_stdio_dest(&cinfo, tmpf); + for (const TestConfig& config : all_configs) { + EncodeWithJpegli(config.input, config.jparams, &cinfo); + } + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + } + size_t total_size = ftell(tmpf); + rewind(tmpf); + std::vector<uint8_t> compressed(total_size); + JXL_CHECK(total_size == fread(&compressed[0], 1, total_size, tmpf)); + fclose(tmpf); + size_t pos = 0; + for (size_t i = 0; i < all_configs.size(); ++i) { + TestImage output; + pos += DecodeWithLibjpeg(all_configs[i].jparams, DecompressParams(), + nullptr, 0, &compressed[pos], + compressed.size() - pos, &output); + VerifyOutputImage(all_configs[i].input, output, all_configs[i].max_dist); + } +} + +TEST(EncodeAPITest, ReuseCinfoChangeParams) { + TestImage input, output; + CompressParams jparams; + DecompressParams dparams; + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + std::vector<uint8_t> compressed; + jpeg_compress_struct cinfo; + const auto max_rms = [](int q, int hs, int vs) { + if (hs == 1 && vs == 1) return q == 90 ? 2.2 : 0.6; + if (hs == 2 && vs == 2) return q == 90 ? 2.8 : 1.2; + return q == 90 ? 2.4 : 1.0; + }; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + input.xsize = 129; + input.ysize = 73; + dparams.set_out_color_space = true; + for (JpegIOMode input_mode : {PIXELS, RAW_DATA, PIXELS, COEFFICIENTS}) { + for (int h_samp : {2, 1}) { + for (int v_samp : {2, 1}) { + for (int progr : {0, 2}) { + for (int quality : {90, 100}) { + input.Clear(); + input.color_space = + (input_mode == RAW_DATA ? JCS_YCbCr : JCS_RGB); + jparams.quality = quality; + jparams.h_sampling = {h_samp, 1, 1}; + jparams.v_sampling = {v_samp, 1, 1}; + jparams.progressive_mode = progr; + printf( + "Generating input with quality %d chroma subsampling %dx%d " + "input mode %d progressive_mode %d\n", + quality, h_samp, v_samp, input_mode, progr); + GenerateInput(input_mode, jparams, &input); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + if (input_mode != COEFFICIENTS) { + cinfo.image_width = input.xsize; + cinfo.image_height = input.ysize; + cinfo.input_components = input.components; + jpegli_set_defaults(&cinfo); + jpegli_start_compress(&cinfo, TRUE); + jpegli_abort_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + } + EncodeWithJpegli(input, jparams, &cinfo); + compressed.resize(buffer_size); + std::copy_n(buffer, buffer_size, compressed.data()); + dparams.output_mode = + input_mode == COEFFICIENTS ? COEFFICIENTS : PIXELS; + dparams.out_color_space = input.color_space; + output.Clear(); + DecodeWithLibjpeg(jparams, dparams, compressed, &output); + VerifyOutputImage(input, output, + max_rms(quality, h_samp, v_samp)); + } + } + } + } + } + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncodeAPITest, AbbreviatedStreams) { + uint8_t* table_stream = nullptr; + unsigned long table_stream_size = 0; + uint8_t* data_stream = nullptr; + unsigned long data_stream_size = 0; + { + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &table_stream, &table_stream_size); + cinfo.input_components = 3; + cinfo.in_color_space = JCS_RGB; + jpegli_set_defaults(&cinfo); + jpegli_write_tables(&cinfo); + jpegli_mem_dest(&cinfo, &data_stream, &data_stream_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.optimize_coding = FALSE; + jpegli_set_progressive_level(&cinfo, 0); + jpegli_start_compress(&cinfo, FALSE); + JSAMPLE image[3] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_TRUE(try_catch_block()); + EXPECT_LT(data_stream_size, 50); + jpegli_destroy_compress(&cinfo); + } + TestImage output; + DecodeWithLibjpeg(CompressParams(), DecompressParams(), table_stream, + table_stream_size, data_stream, data_stream_size, &output); + EXPECT_EQ(1, output.xsize); + EXPECT_EQ(1, output.ysize); + EXPECT_EQ(3, output.components); + EXPECT_EQ(0, output.pixels[0]); + EXPECT_EQ(0, output.pixels[1]); + EXPECT_EQ(0, output.pixels[2]); + if (table_stream) free(table_stream); + if (data_stream) free(data_stream); +} + +void CopyQuantTables(j_compress_ptr cinfo, uint16_t* quant_tables) { + for (int c = 0; c < cinfo->num_components; ++c) { + int quant_idx = cinfo->comp_info[c].quant_tbl_no; + JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[quant_idx]; + for (int k = 0; k < DCTSIZE2; ++k) { + quant_tables[c * DCTSIZE2 + k] = quant_table->quantval[k]; + } + } +} + +TEST(EncodeAPITest, QualitySettings) { + // Test that jpegli_set_quality, jpegli_set_linear_quality and + // jpegli_quality_scaling are consistent with each other. + uint16_t quant_tables0[3 * DCTSIZE2]; + uint16_t quant_tables1[3 * DCTSIZE2]; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + cinfo.input_components = 3; + cinfo.in_color_space = JCS_RGB; + jpegli_set_defaults(&cinfo); + for (boolean baseline : {FALSE, TRUE}) { + for (int q = 1; q <= 100; ++q) { + jpegli_set_quality(&cinfo, q, baseline); + CopyQuantTables(&cinfo, quant_tables0); + jpegli_set_linear_quality(&cinfo, jpegli_quality_scaling(q), baseline); + CopyQuantTables(&cinfo, quant_tables1); + EXPECT_EQ(0, + memcmp(quant_tables0, quant_tables1, sizeof(quant_tables0))); +#if JPEG_LIB_VERSION >= 70 + for (int i = 0; i < NUM_QUANT_TBLS; ++i) { + cinfo.q_scale_factor[i] = jpegli_quality_scaling(q); + } + jpegli_default_qtables(&cinfo, baseline); + CopyQuantTables(&cinfo, quant_tables1); + EXPECT_EQ(0, + memcmp(quant_tables0, quant_tables1, sizeof(quant_tables0))); +#endif + } + } + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + // Test jpegli_quality_scaling for some specific values . + EXPECT_EQ(5000, jpegli_quality_scaling(-1)); + EXPECT_EQ(5000, jpegli_quality_scaling(0)); + EXPECT_EQ(5000, jpegli_quality_scaling(1)); + EXPECT_EQ(100, jpegli_quality_scaling(50)); + EXPECT_EQ(50, jpegli_quality_scaling(75)); + EXPECT_EQ(20, jpegli_quality_scaling(90)); + EXPECT_EQ(0, jpegli_quality_scaling(100)); + EXPECT_EQ(0, jpegli_quality_scaling(101)); +} + +std::vector<TestConfig> GenerateTests() { + std::vector<TestConfig> all_tests; + for (int h_samp : {1, 2}) { + for (int v_samp : {1, 2}) { + for (int progr : {0, 2}) { + for (int optimize : {0, 1}) { + if (progr && optimize) continue; + TestConfig config; + config.jparams.h_sampling = {h_samp, 1, 1}; + config.jparams.v_sampling = {v_samp, 1, 1}; + config.jparams.progressive_mode = progr; + if (!progr) { + config.jparams.optimize_coding = optimize; + } + const float kMaxBpp[4] = {1.55, 1.4, 1.4, 1.32}; + const float kMaxDist[4] = {1.95, 2.2, 2.2, 2.0}; + const int idx = v_samp * 2 + h_samp - 3; + config.max_bpp = + kMaxBpp[idx] * (optimize ? 0.97 : 1.0) * (progr ? 0.97 : 1.0); + config.max_dist = kMaxDist[idx]; + all_tests.push_back(config); + } + } + } + } + { + TestConfig config; + config.jparams.quality = 100; + config.max_bpp = 6.6; + config.max_dist = 0.6; + all_tests.push_back(config); + } + { + TestConfig config; + config.jparams.quality = 80; + config.max_bpp = 1.05; + config.max_dist = 2.7; + all_tests.push_back(config); + } + for (int samp : {1, 2}) { + for (int progr : {0, 2}) { + for (int optimize : {0, 1}) { + if (progr && optimize) continue; + TestConfig config; + config.input.xsize = 257; + config.input.ysize = 265; + config.jparams.h_sampling = {samp, 1, 1}; + config.jparams.v_sampling = {samp, 1, 1}; + config.jparams.progressive_mode = progr; + if (!progr) { + config.jparams.optimize_coding = optimize; + } + config.jparams.use_adaptive_quantization = false; + config.max_bpp = 2.05f; + config.max_dist = 2.3f; + all_tests.push_back(config); + } + } + } + for (int h0_samp : {1, 2, 4}) { + for (int v0_samp : {1, 2, 4}) { + for (int h2_samp : {1, 2, 4}) { + for (int v2_samp : {1, 2, 4}) { + TestConfig config; + config.input.xsize = 137; + config.input.ysize = 75; + config.jparams.progressive_mode = 2; + config.jparams.h_sampling = {h0_samp, 1, h2_samp}; + config.jparams.v_sampling = {v0_samp, 1, v2_samp}; + config.max_bpp = 2.5; + config.max_dist = 12.0; + all_tests.push_back(config); + } + } + } + } + for (int h0_samp : {1, 3}) { + for (int v0_samp : {1, 3}) { + for (int h2_samp : {1, 3}) { + for (int v2_samp : {1, 3}) { + TestConfig config; + config.input.xsize = 205; + config.input.ysize = 99; + config.jparams.progressive_mode = 2; + config.jparams.h_sampling = {h0_samp, 1, h2_samp}; + config.jparams.v_sampling = {v0_samp, 1, v2_samp}; + config.max_bpp = 2.5; + config.max_dist = 10.0; + all_tests.push_back(config); + } + } + } + } + for (int h0_samp : {1, 2, 3, 4}) { + for (int v0_samp : {1, 2, 3, 4}) { + TestConfig config; + config.input.xsize = 217; + config.input.ysize = 129; + config.jparams.progressive_mode = 2; + config.jparams.h_sampling = {h0_samp, 1, 1}; + config.jparams.v_sampling = {v0_samp, 1, 1}; + config.max_bpp = 2.0; + config.max_dist = 5.5; + all_tests.push_back(config); + } + } + for (int p = 0; p < 3 + NumTestScanScripts(); ++p) { + for (int samp : {1, 2}) { + for (int quality : {100, 90, 1}) { + for (int r : {0, 1024, 1}) { + for (int optimize : {0, 1}) { + bool progressive = p == 1 || p == 2 || p > 4; + if (progressive && !optimize) continue; + TestConfig config; + config.input.xsize = 273; + config.input.ysize = 265; + config.jparams.progressive_mode = p; + if (!progressive) { + config.jparams.optimize_coding = optimize; + } + config.jparams.h_sampling = {samp, 1, 1}; + config.jparams.v_sampling = {samp, 1, 1}; + config.jparams.quality = quality; + config.jparams.restart_interval = r; + config.max_bpp = quality == 100 ? 8.0 : 1.9; + if (r == 1) { + config.max_bpp += 10.0; + } + config.max_dist = quality == 1 ? 20.0 : 2.1; + all_tests.push_back(config); + } + } + } + } + } + { + TestConfig config; + config.jparams.simple_progression = true; + config.max_bpp = 1.48; + config.max_dist = 2.0; + all_tests.push_back(config); + } + { + TestConfig config; + config.input_mode = COEFFICIENTS; + config.jparams.h_sampling = {2, 1, 1}; + config.jparams.v_sampling = {2, 1, 1}; + config.jparams.progressive_mode = 0; + config.jparams.optimize_coding = 0; + config.max_bpp = 16; + config.max_dist = 0.0; + all_tests.push_back(config); + } + { + TestConfig config; + config.jparams.xyb_mode = true; + config.jparams.progressive_mode = 2; + config.max_bpp = 1.5; + config.max_dist = 3.5; + all_tests.push_back(config); + } + { + TestConfig config; + config.jparams.libjpeg_mode = true; + config.max_bpp = 2.1; + config.max_dist = 1.7; + all_tests.push_back(config); + } + + for (J_COLOR_SPACE in_color_space : {JCS_RGB, JCS_YCbCr, JCS_GRAYSCALE}) { + for (J_COLOR_SPACE jpeg_color_space : {JCS_RGB, JCS_YCbCr, JCS_GRAYSCALE}) { + if (jpeg_color_space == JCS_RGB && in_color_space == JCS_YCbCr) continue; + TestConfig config; + config.input.xsize = config.input.ysize = 256; + config.input.color_space = in_color_space; + config.jparams.set_jpeg_colorspace = true; + config.jparams.jpeg_color_space = jpeg_color_space; + config.max_bpp = jpeg_color_space == JCS_RGB ? 4.5 : 1.85; + config.max_dist = jpeg_color_space == JCS_RGB ? 1.4 : 2.05; + all_tests.push_back(config); + } + } + for (J_COLOR_SPACE in_color_space : {JCS_CMYK, JCS_YCCK}) { + for (J_COLOR_SPACE jpeg_color_space : {JCS_CMYK, JCS_YCCK}) { + if (jpeg_color_space == JCS_CMYK && in_color_space == JCS_YCCK) continue; + TestConfig config; + config.input.xsize = config.input.ysize = 256; + config.input.color_space = in_color_space; + if (in_color_space != jpeg_color_space) { + config.jparams.set_jpeg_colorspace = true; + config.jparams.jpeg_color_space = jpeg_color_space; + } + config.max_bpp = jpeg_color_space == JCS_CMYK ? 4.0 : 3.6; + config.max_dist = jpeg_color_space == JCS_CMYK ? 1.2 : 1.5; + all_tests.push_back(config); + } + } + { + TestConfig config; + config.input.color_space = JCS_YCbCr; + config.max_bpp = 1.6; + config.max_dist = 1.35; + all_tests.push_back(config); + } + for (bool xyb : {false, true}) { + TestConfig config; + config.input.color_space = JCS_GRAYSCALE; + config.jparams.xyb_mode = xyb; + config.max_bpp = 1.35; + config.max_dist = 1.4; + all_tests.push_back(config); + } + for (int channels = 1; channels <= 4; ++channels) { + TestConfig config; + config.input.color_space = JCS_UNKNOWN; + config.input.components = channels; + config.max_bpp = 1.35 * channels; + config.max_dist = 1.4; + all_tests.push_back(config); + } + for (size_t r : {1, 3, 17, 1024}) { + for (int progr : {0, 2}) { + TestConfig config; + config.jparams.restart_interval = r; + config.jparams.progressive_mode = progr; + config.max_bpp = 1.58 + 5.5 / r; + config.max_dist = 2.2; + all_tests.push_back(config); + } + } + for (size_t rr : {1, 3, 8, 100}) { + TestConfig config; + config.jparams.restart_in_rows = rr; + config.max_bpp = 1.6; + config.max_dist = 2.2; + all_tests.push_back(config); + } + for (int type : {0, 1, 10, 100, 10000}) { + for (int scale : {1, 50, 100, 200, 500}) { + for (bool add_raw : {false, true}) { + for (bool baseline : {true, false}) { + if (!baseline && (add_raw || type * scale < 25500)) continue; + TestConfig config; + config.input.xsize = 64; + config.input.ysize = 64; + CustomQuantTable table; + table.table_type = type; + table.scale_factor = scale; + table.force_baseline = baseline; + table.add_raw = add_raw; + table.Generate(); + config.jparams.optimize_coding = 1; + config.jparams.quant_tables.push_back(table); + config.jparams.quant_indexes = {0, 0, 0}; + float q = (type == 0 ? 16 : type) * scale * 0.01f; + if (baseline && !add_raw) q = std::max(1.0f, std::min(255.0f, q)); + config.max_bpp = 1.5f + 25.0f / q; + config.max_dist = 0.6f + 0.25f * q; + all_tests.push_back(config); + } + } + } + } + for (int qidx = 0; qidx < 8; ++qidx) { + if (qidx == 3) continue; + TestConfig config; + config.input.xsize = 256; + config.input.ysize = 256; + config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1, + (qidx >> 0) & 1}; + config.max_bpp = 2.25; + config.max_dist = 2.8; + all_tests.push_back(config); + } + for (int qidx = 0; qidx < 8; ++qidx) { + for (int slot_idx = 0; slot_idx < 2; ++slot_idx) { + if (qidx == 0 && slot_idx == 0) continue; + TestConfig config; + config.input.xsize = 256; + config.input.ysize = 256; + config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1, + (qidx >> 0) & 1}; + CustomQuantTable table; + table.slot_idx = slot_idx; + table.Generate(); + config.jparams.quant_tables.push_back(table); + config.max_bpp = 2.3; + config.max_dist = 2.9; + all_tests.push_back(config); + } + } + for (int qidx = 0; qidx < 8; ++qidx) { + for (bool xyb : {false, true}) { + TestConfig config; + config.input.xsize = 256; + config.input.ysize = 256; + config.jparams.xyb_mode = xyb; + config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1, + (qidx >> 0) & 1}; + { + CustomQuantTable table; + table.slot_idx = 0; + table.Generate(); + config.jparams.quant_tables.push_back(table); + } + { + CustomQuantTable table; + table.slot_idx = 1; + table.table_type = 20; + table.Generate(); + config.jparams.quant_tables.push_back(table); + } + config.max_bpp = 2.0; + config.max_dist = 3.85; + all_tests.push_back(config); + } + } + for (bool xyb : {false, true}) { + TestConfig config; + config.input.xsize = 256; + config.input.ysize = 256; + config.jparams.xyb_mode = xyb; + config.jparams.quant_indexes = {0, 1, 2}; + { + CustomQuantTable table; + table.slot_idx = 0; + table.Generate(); + config.jparams.quant_tables.push_back(table); + } + { + CustomQuantTable table; + table.slot_idx = 1; + table.table_type = 20; + table.Generate(); + config.jparams.quant_tables.push_back(table); + } + { + CustomQuantTable table; + table.slot_idx = 2; + table.table_type = 30; + table.Generate(); + config.jparams.quant_tables.push_back(table); + } + config.max_bpp = 1.5; + config.max_dist = 3.75; + all_tests.push_back(config); + } + { + TestConfig config; + config.jparams.comp_ids = {7, 17, 177}; + config.input.xsize = config.input.ysize = 128; + config.max_bpp = 2.25; + config.max_dist = 2.4; + all_tests.push_back(config); + } + for (int override_JFIF : {-1, 0, 1}) { + for (int override_Adobe : {-1, 0, 1}) { + if (override_JFIF == -1 && override_Adobe == -1) continue; + TestConfig config; + config.input.xsize = config.input.ysize = 128; + config.jparams.override_JFIF = override_JFIF; + config.jparams.override_Adobe = override_Adobe; + config.max_bpp = 2.25; + config.max_dist = 2.4; + all_tests.push_back(config); + } + } + { + TestConfig config; + config.input.xsize = config.input.ysize = 256; + config.max_bpp = 1.85; + config.max_dist = 2.05; + config.jparams.add_marker = true; + all_tests.push_back(config); + } + for (size_t icc_size : {728, 70000, 1000000}) { + TestConfig config; + config.input.xsize = config.input.ysize = 256; + config.max_dist = 2.05; + config.jparams.icc.resize(icc_size); + for (size_t i = 0; i < icc_size; ++i) { + config.jparams.icc[i] = (i * 17) & 0xff; + } + all_tests.push_back(config); + } + for (JpegIOMode input_mode : {PIXELS, RAW_DATA, COEFFICIENTS}) { + TestConfig config; + config.input.xsize = config.input.ysize = 256; + config.input_mode = input_mode; + if (input_mode == RAW_DATA) { + config.input.color_space = JCS_YCbCr; + } + config.jparams.progressive_mode = 0; + config.jparams.optimize_coding = 0; + config.max_bpp = 1.85; + config.max_dist = 2.05; + if (input_mode == COEFFICIENTS) { + config.max_bpp = 3.5; + config.max_dist = 0.0; + } + all_tests.push_back(config); + config.jparams.use_flat_dc_luma_code = true; + all_tests.push_back(config); + } + for (int xsize : {640, 641, 648, 649}) { + for (int ysize : {640, 641, 648, 649}) { + for (int h_sampling : {1, 2}) { + for (int v_sampling : {1, 2}) { + if (h_sampling == 1 && v_sampling == 1) continue; + for (int progr : {0, 2}) { + TestConfig config; + config.input.xsize = xsize; + config.input.ysize = ysize; + config.input.color_space = JCS_YCbCr; + config.jparams.h_sampling = {h_sampling, 1, 1}; + config.jparams.v_sampling = {v_sampling, 1, 1}; + config.jparams.progressive_mode = progr; + config.input_mode = RAW_DATA; + config.max_bpp = 1.75; + config.max_dist = 2.0; + all_tests.push_back(config); + config.input_mode = COEFFICIENTS; + if (xsize & 1) { + config.jparams.add_marker = true; + } + config.max_bpp = 24.0; + all_tests.push_back(config); + } + } + } + } + } + for (JpegliDataType data_type : {JPEGLI_TYPE_UINT16, JPEGLI_TYPE_FLOAT}) { + for (JpegliEndianness endianness : + {JPEGLI_LITTLE_ENDIAN, JPEGLI_BIG_ENDIAN, JPEGLI_NATIVE_ENDIAN}) { + J_COLOR_SPACE colorspace[4] = {JCS_GRAYSCALE, JCS_UNKNOWN, JCS_RGB, + JCS_CMYK}; + float max_bpp[4] = {1.32, 2.7, 1.6, 4.0}; + for (int channels = 1; channels <= 4; ++channels) { + TestConfig config; + config.input.data_type = data_type; + config.input.endianness = endianness; + config.input.components = channels; + config.input.color_space = colorspace[channels - 1]; + config.max_bpp = max_bpp[channels - 1]; + config.max_dist = 2.2; + all_tests.push_back(config); + } + } + } + for (int smoothing : {1, 5, 50, 100}) { + for (int h_sampling : {1, 2}) { + for (int v_sampling : {1, 2}) { + TestConfig config; + config.input.xsize = 257; + config.input.ysize = 265; + config.jparams.smoothing_factor = smoothing; + config.jparams.h_sampling = {h_sampling, 1, 1}; + config.jparams.v_sampling = {v_sampling, 1, 1}; + config.max_bpp = 1.85; + config.max_dist = 3.05f; + all_tests.push_back(config); + } + } + } + return all_tests; +}; + +std::ostream& operator<<(std::ostream& os, const TestConfig& c) { + os << c.input; + os << c.jparams; + if (c.input_mode == RAW_DATA) { + os << "RawDataIn"; + } else if (c.input_mode == COEFFICIENTS) { + os << "WriteCoeffs"; + } + return os; +} + +std::string TestDescription( + const testing::TestParamInfo<EncodeAPITestParam::ParamType>& info) { + std::stringstream name; + name << info.param; + return name.str(); +} + +JPEGLI_INSTANTIATE_TEST_SUITE_P(EncodeAPITest, EncodeAPITestParam, + testing::ValuesIn(GenerateTests()), + TestDescription); +} // namespace +} // namespace jpegli diff --git a/lib/jpegli/encode_finish.cc b/lib/jpegli/encode_finish.cc new file mode 100644 index 0000000..955676b --- /dev/null +++ b/lib/jpegli/encode_finish.cc @@ -0,0 +1,230 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/encode_finish.h" + +#include <cmath> +#include <limits> + +#include "lib/jpegli/error.h" +#include "lib/jpegli/memory_manager.h" +#include "lib/jpegli/quant.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jpegli/encode_finish.cc" +#include <hwy/foreach_target.h> +#include <hwy/highway.h> + +#include "lib/jpegli/dct-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::GetLane; + +using D = HWY_FULL(float); +using DI = HWY_FULL(int32_t); +using DI16 = Rebind<int16_t, HWY_FULL(int32_t)>; + +void ReQuantizeBlock(int16_t* block, const float* qmc, float aq_strength, + const float* zero_bias_offset, + const float* zero_bias_mul) { + D d; + DI di; + DI16 di16; + const auto aq_mul = Set(d, aq_strength); + for (size_t k = 0; k < DCTSIZE2; k += Lanes(d)) { + const auto in = Load(di16, block + k); + const auto val = ConvertTo(d, PromoteTo(di, in)); + const auto q = Load(d, qmc + k); + const auto qval = Mul(val, q); + const auto zb_offset = Load(d, zero_bias_offset + k); + const auto zb_mul = Load(d, zero_bias_mul + k); + const auto threshold = Add(zb_offset, Mul(zb_mul, aq_mul)); + const auto nzero_mask = Ge(Abs(qval), threshold); + const auto iqval = IfThenElseZero(nzero_mask, Round(qval)); + Store(DemoteTo(di16, ConvertTo(di, iqval)), di16, block + k); + } +} + +float BlockError(const int16_t* block, const float* qmc, const float* iqmc, + const float aq_strength, const float* zero_bias_offset, + const float* zero_bias_mul) { + D d; + DI di; + DI16 di16; + auto err = Zero(d); + const auto scale = Set(d, 1.0 / 16); + const auto aq_mul = Set(d, aq_strength); + for (size_t k = 0; k < DCTSIZE2; k += Lanes(d)) { + const auto in = Load(di16, block + k); + const auto val = ConvertTo(d, PromoteTo(di, in)); + const auto q = Load(d, qmc + k); + const auto qval = Mul(val, q); + const auto zb_offset = Load(d, zero_bias_offset + k); + const auto zb_mul = Load(d, zero_bias_mul + k); + const auto threshold = Add(zb_offset, Mul(zb_mul, aq_mul)); + const auto nzero_mask = Ge(Abs(qval), threshold); + const auto iqval = IfThenElseZero(nzero_mask, Round(qval)); + const auto invq = Load(d, iqmc + k); + const auto rval = Mul(iqval, invq); + const auto diff = Mul(Sub(val, rval), scale); + err = Add(err, Mul(diff, diff)); + } + return GetLane(SumOfLanes(d, err)); +} + +void ComputeInverseWeights(const float* qmc, float* iqmc) { + for (int k = 0; k < 64; ++k) { + iqmc[k] = 1.0f / qmc[k]; + } +} + +float ComputePSNR(j_compress_ptr cinfo, int sampling) { + jpeg_comp_master* m = cinfo->master; + InitQuantizer(cinfo, QuantPass::SEARCH_SECOND_PASS); + double error = 0.0; + size_t num = 0; + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + const float* qmc = m->quant_mul[c]; + const int h_factor = m->h_factor[c]; + const int v_factor = m->v_factor[c]; + const float* zero_bias_offset = m->zero_bias_offset[c]; + const float* zero_bias_mul = m->zero_bias_mul[c]; + HWY_ALIGN float iqmc[64]; + ComputeInverseWeights(qmc, iqmc); + for (JDIMENSION by = 0; by < comp->height_in_blocks; by += sampling) { + JBLOCKARRAY ba = GetBlockRow(cinfo, c, by); + const float* qf = m->quant_field.Row(by * v_factor); + for (JDIMENSION bx = 0; bx < comp->width_in_blocks; bx += sampling) { + error += BlockError(&ba[0][bx][0], qmc, iqmc, qf[bx * h_factor], + zero_bias_offset, zero_bias_mul); + num += DCTSIZE2; + } + } + } + return 4.3429448f * log(num / (error / 255. / 255.)); +} + +void ReQuantizeCoeffs(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + InitQuantizer(cinfo, QuantPass::SEARCH_SECOND_PASS); + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + const float* qmc = m->quant_mul[c]; + const int h_factor = m->h_factor[c]; + const int v_factor = m->v_factor[c]; + const float* zero_bias_offset = m->zero_bias_offset[c]; + const float* zero_bias_mul = m->zero_bias_mul[c]; + for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) { + JBLOCKARRAY ba = GetBlockRow(cinfo, c, by); + const float* qf = m->quant_field.Row(by * v_factor); + for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) { + ReQuantizeBlock(&ba[0][bx][0], qmc, qf[bx * h_factor], zero_bias_offset, + zero_bias_mul); + } + } + } +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jpegli { +namespace { +HWY_EXPORT(ComputePSNR); +HWY_EXPORT(ReQuantizeCoeffs); + +void ReQuantizeCoeffs(j_compress_ptr cinfo) { + HWY_DYNAMIC_DISPATCH(ReQuantizeCoeffs)(cinfo); +} + +float ComputePSNR(j_compress_ptr cinfo, int sampling) { + return HWY_DYNAMIC_DISPATCH(ComputePSNR)(cinfo, sampling); +} + +void UpdateDistance(j_compress_ptr cinfo, float distance) { + float distances[NUM_QUANT_TBLS] = {distance, distance, distance}; + SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/true); +} + +float Clamp(float val, float minval, float maxval) { + return std::max(minval, std::min(maxval, val)); +} + +#define PSNR_SEARCH_DBG 0 + +float FindDistanceForPSNR(j_compress_ptr cinfo) { + constexpr int kMaxIters = 20; + const float psnr_target = cinfo->master->psnr_target; + const float tolerance = cinfo->master->psnr_tolerance; + const float min_dist = cinfo->master->min_distance; + const float max_dist = cinfo->master->max_distance; + float d = Clamp(1.0f, min_dist, max_dist); + for (int sampling : {4, 1}) { + float best_diff = std::numeric_limits<float>::max(); + float best_distance = 0.0f; + float best_psnr = 0.0; + float dmin = min_dist; + float dmax = max_dist; + bool found_lower_bound = false; + bool found_upper_bound = false; + for (int i = 0; i < kMaxIters; ++i) { + UpdateDistance(cinfo, d); + float psnr = ComputePSNR(cinfo, sampling); + if (psnr > psnr_target) { + dmin = d; + found_lower_bound = true; + } else { + dmax = d; + found_upper_bound = true; + } +#if (PSNR_SEARCH_DBG > 1) + printf("sampling %d iter %2d d %7.4f psnr %.2f", sampling, i, d, psnr); + if (found_upper_bound && found_lower_bound) { + printf(" d-interval: [ %7.4f .. %7.4f ]", dmin, dmax); + } + printf("\n"); +#endif + float diff = std::abs(psnr - psnr_target); + if (diff < best_diff) { + best_diff = diff; + best_distance = d; + best_psnr = psnr; + } + if (diff < tolerance * psnr_target || dmin == dmax) { + break; + } + if (!found_lower_bound || !found_upper_bound) { + d *= std::exp(0.15f * (psnr - psnr_target)); + } else { + d = 0.5f * (dmin + dmax); + } + d = Clamp(d, min_dist, max_dist); + } + d = best_distance; + if (sampling == 1 && PSNR_SEARCH_DBG) { + printf("Final PSNR %.2f at distance %.4f\n", best_psnr, d); + } + } + return d; +} + +} // namespace + +void QuantizetoPSNR(j_compress_ptr cinfo) { + float distance = FindDistanceForPSNR(cinfo); + UpdateDistance(cinfo, distance); + ReQuantizeCoeffs(cinfo); +} + +} // namespace jpegli +#endif // HWY_ONCE diff --git a/lib/jpegli/encode_finish.h b/lib/jpegli/encode_finish.h new file mode 100644 index 0000000..f6862de --- /dev/null +++ b/lib/jpegli/encode_finish.h @@ -0,0 +1,17 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_ENCODE_FINISH_H_ +#define LIB_JPEGLI_ENCODE_FINISH_H_ + +#include "lib/jpegli/encode_internal.h" + +namespace jpegli { + +void QuantizetoPSNR(j_compress_ptr cinfo); + +} // namespace jpegli + +#endif // LIB_JPEGLI_ENCODE_FINISH_H_ diff --git a/lib/jpegli/encode_internal.h b/lib/jpegli/encode_internal.h new file mode 100644 index 0000000..4dbef97 --- /dev/null +++ b/lib/jpegli/encode_internal.h @@ -0,0 +1,141 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_ENCODE_INTERNAL_H_ +#define LIB_JPEGLI_ENCODE_INTERNAL_H_ + +#include <stdint.h> + +#include "lib/jpegli/bit_writer.h" +#include "lib/jpegli/common.h" +#include "lib/jpegli/common_internal.h" +#include "lib/jpegli/encode.h" + +namespace jpegli { + +constexpr unsigned char kICCSignature[12] = { + 0x49, 0x43, 0x43, 0x5F, 0x50, 0x52, 0x4F, 0x46, 0x49, 0x4C, 0x45, 0x00}; +constexpr int kICCMarker = JPEG_APP0 + 2; + +constexpr int kDefaultProgressiveLevel = 0; + +typedef int16_t coeff_t; + +struct HuffmanCodeTable { + int depth[256]; + int code[256]; +}; + +struct Token { + uint8_t context; + uint8_t symbol; + uint16_t bits; + Token(int c, int s, int b) : context(c), symbol(s), bits(b) {} +}; + +struct TokenArray { + Token* tokens; + size_t num_tokens; +}; + +struct RefToken { + uint8_t symbol; + uint8_t refbits; +}; + +struct ScanTokenInfo { + RefToken* tokens; + size_t num_tokens; + uint8_t* refbits; + uint16_t* eobruns; + size_t* restarts; + size_t num_restarts; + size_t num_nonzeros; + size_t num_future_nonzeros; + size_t token_offset; + size_t restart_interval; + size_t MCUs_per_row; + size_t MCU_rows_in_scan; + size_t blocks_in_MCU; + size_t num_blocks; +}; + +} // namespace jpegli + +struct jpeg_comp_master { + jpegli::RowBuffer<float> input_buffer[jpegli::kMaxComponents]; + jpegli::RowBuffer<float>* smooth_input[jpegli::kMaxComponents]; + jpegli::RowBuffer<float>* raw_data[jpegli::kMaxComponents]; + bool force_baseline; + bool xyb_mode; + uint8_t cicp_transfer_function; + bool use_std_tables; + bool use_adaptive_quantization; + int progressive_level; + size_t xsize_blocks; + size_t ysize_blocks; + size_t blocks_per_iMCU_row; + jpegli::ScanTokenInfo* scan_token_info; + JpegliDataType data_type; + JpegliEndianness endianness; + void (*input_method)(const uint8_t* row_in, size_t len, + float* row_out[jpegli::kMaxComponents]); + void (*color_transform)(float* row[jpegli::kMaxComponents], size_t len); + void (*downsample_method[jpegli::kMaxComponents])( + float* rows_in[MAX_SAMP_FACTOR], size_t len, float* row_out); + float* quant_mul[jpegli::kMaxComponents]; + float* zero_bias_offset[jpegli::kMaxComponents]; + float* zero_bias_mul[jpegli::kMaxComponents]; + int h_factor[jpegli::kMaxComponents]; + int v_factor[jpegli::kMaxComponents]; + // Array of Huffman tables that will be encoded in one or more DHT segments. + // In progressive mode we compute all Huffman tables that will be used in any + // of the scans, thus we can have more than 4 tables here. + JHUFF_TBL* huffman_tables; + size_t num_huffman_tables; + // Array of num_huffman_tables slot ids, where the ith element is the slot id + // of the ith Huffman table, as it appears in the DHT segment. The range of + // the slot ids is 0..3 for DC and 16..19 for AC Huffman codes. + uint8_t* slot_id_map; + // Maps context ids to an index in the huffman_tables array. Each component in + // each scan has a DC and AC context id, which are defined as follows: + // - DC context id is the component index (relative to cinfo->comp_info) of + // the scan component + // - AC context ids start at 4 and are increased for each component of each + // scan that have AC components (i.e. Se > 0) + uint8_t* context_map; + size_t num_contexts; + // Array of cinfo->num_scans context ids, where the ith element is the context + // id of the first AC component of the ith scan. + uint8_t* ac_ctx_offset; + // Array of num_huffman tables derived coding tables. + jpegli::HuffmanCodeTable* coding_tables; + float* diff_buffer; + jpegli::RowBuffer<float> fuzzy_erosion_tmp; + jpegli::RowBuffer<float> pre_erosion; + jpegli::RowBuffer<float> quant_field; + jvirt_barray_ptr* coeff_buffers; + size_t next_input_row; + size_t next_iMCU_row; + size_t next_dht_index; + size_t last_restart_interval; + JCOEF last_dc_coeff[MAX_COMPS_IN_SCAN]; + jpegli::JpegBitWriter bw; + float* dct_buffer; + int32_t* block_tmp; + jpegli::TokenArray* token_arrays; + size_t cur_token_array; + jpegli::Token* next_token; + size_t num_tokens; + size_t total_num_tokens; + jpegli::RefToken* next_refinement_token; + uint8_t* next_refinement_bit; + float psnr_target; + float psnr_tolerance; + float min_distance; + float max_distance; +}; + +#endif // LIB_JPEGLI_ENCODE_INTERNAL_H_ diff --git a/lib/jpegli/encode_streaming.cc b/lib/jpegli/encode_streaming.cc new file mode 100644 index 0000000..89dbd81 --- /dev/null +++ b/lib/jpegli/encode_streaming.cc @@ -0,0 +1,259 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/encode_streaming.h" + +#include <cmath> + +#include "lib/jpegli/bit_writer.h" +#include "lib/jpegli/bitstream.h" +#include "lib/jpegli/entropy_coding.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/memory_manager.h" +#include "lib/jxl/base/bits.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jpegli/encode_streaming.cc" +#include <hwy/foreach_target.h> +#include <hwy/highway.h> + +#include "lib/jpegli/dct-inl.h" +#include "lib/jpegli/entropy_coding-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { + +static const int kStreamingModeCoefficients = 0; +static const int kStreamingModeTokens = 1; +static const int kStreamingModeBits = 2; + +namespace { +void ZigZagShuffle(int32_t* JXL_RESTRICT block) { + // TODO(szabadka) SIMDify this. + int32_t tmp[DCTSIZE2]; + tmp[0] = block[0]; + tmp[1] = block[1]; + tmp[2] = block[8]; + tmp[3] = block[16]; + tmp[4] = block[9]; + tmp[5] = block[2]; + tmp[6] = block[3]; + tmp[7] = block[10]; + tmp[8] = block[17]; + tmp[9] = block[24]; + tmp[10] = block[32]; + tmp[11] = block[25]; + tmp[12] = block[18]; + tmp[13] = block[11]; + tmp[14] = block[4]; + tmp[15] = block[5]; + tmp[16] = block[12]; + tmp[17] = block[19]; + tmp[18] = block[26]; + tmp[19] = block[33]; + tmp[20] = block[40]; + tmp[21] = block[48]; + tmp[22] = block[41]; + tmp[23] = block[34]; + tmp[24] = block[27]; + tmp[25] = block[20]; + tmp[26] = block[13]; + tmp[27] = block[6]; + tmp[28] = block[7]; + tmp[29] = block[14]; + tmp[30] = block[21]; + tmp[31] = block[28]; + tmp[32] = block[35]; + tmp[33] = block[42]; + tmp[34] = block[49]; + tmp[35] = block[56]; + tmp[36] = block[57]; + tmp[37] = block[50]; + tmp[38] = block[43]; + tmp[39] = block[36]; + tmp[40] = block[29]; + tmp[41] = block[22]; + tmp[42] = block[15]; + tmp[43] = block[23]; + tmp[44] = block[30]; + tmp[45] = block[37]; + tmp[46] = block[44]; + tmp[47] = block[51]; + tmp[48] = block[58]; + tmp[49] = block[59]; + tmp[50] = block[52]; + tmp[51] = block[45]; + tmp[52] = block[38]; + tmp[53] = block[31]; + tmp[54] = block[39]; + tmp[55] = block[46]; + tmp[56] = block[53]; + tmp[57] = block[60]; + tmp[58] = block[61]; + tmp[59] = block[54]; + tmp[60] = block[47]; + tmp[61] = block[55]; + tmp[62] = block[62]; + tmp[63] = block[63]; + memcpy(block, tmp, DCTSIZE2 * sizeof(tmp[0])); +} +} // namespace + +template <int kMode> +void ProcessiMCURow(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + JpegBitWriter* bw = &m->bw; + int xsize_mcus = DivCeil(cinfo->image_width, 8 * cinfo->max_h_samp_factor); + int ysize_mcus = DivCeil(cinfo->image_height, 8 * cinfo->max_v_samp_factor); + int mcu_y = m->next_iMCU_row; + int32_t* block = m->block_tmp; + int32_t* symbols = m->block_tmp + DCTSIZE2; + int32_t* nonzero_idx = m->block_tmp + 3 * DCTSIZE2; + coeff_t* JXL_RESTRICT last_dc_coeff = m->last_dc_coeff; + bool adaptive_quant = m->use_adaptive_quantization && m->psnr_target == 0; + JBLOCKARRAY ba[kMaxComponents]; + if (kMode == kStreamingModeCoefficients) { + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + int by0 = mcu_y * comp->v_samp_factor; + int block_rows_left = comp->height_in_blocks - by0; + int max_block_rows = std::min(comp->v_samp_factor, block_rows_left); + ba[c] = (*cinfo->mem->access_virt_barray)( + reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[c], by0, + max_block_rows, true); + } + } + if (kMode == kStreamingModeTokens) { + TokenArray* ta = &m->token_arrays[m->cur_token_array]; + int max_tokens_per_mcu_row = MaxNumTokensPerMCURow(cinfo); + if (ta->num_tokens + max_tokens_per_mcu_row > m->num_tokens) { + if (ta->tokens) { + m->total_num_tokens += ta->num_tokens; + ++m->cur_token_array; + ta = &m->token_arrays[m->cur_token_array]; + } + m->num_tokens = + EstimateNumTokens(cinfo, mcu_y, ysize_mcus, m->total_num_tokens, + max_tokens_per_mcu_row); + ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE); + m->next_token = ta->tokens; + } + } + const float* imcu_start[kMaxComponents]; + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + imcu_start[c] = m->raw_data[c]->Row(mcu_y * comp->v_samp_factor * DCTSIZE); + } + const float* qf = nullptr; + if (adaptive_quant) { + qf = m->quant_field.Row(0); + } + HuffmanCodeTable* dc_code = nullptr; + HuffmanCodeTable* ac_code = nullptr; + const size_t qf_stride = m->quant_field.stride(); + for (int mcu_x = 0; mcu_x < xsize_mcus; ++mcu_x) { + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + if (kMode == kStreamingModeBits) { + dc_code = &m->coding_tables[m->context_map[c]]; + ac_code = &m->coding_tables[m->context_map[c + 4]]; + } + float* JXL_RESTRICT qmc = m->quant_mul[c]; + const size_t stride = m->raw_data[c]->stride(); + const int h_factor = m->h_factor[c]; + const float* zero_bias_offset = m->zero_bias_offset[c]; + const float* zero_bias_mul = m->zero_bias_mul[c]; + float aq_strength = 0.0f; + for (int iy = 0; iy < comp->v_samp_factor; ++iy) { + for (int ix = 0; ix < comp->h_samp_factor; ++ix) { + size_t by = mcu_y * comp->v_samp_factor + iy; + size_t bx = mcu_x * comp->h_samp_factor + ix; + if (bx >= comp->width_in_blocks || by >= comp->height_in_blocks) { + if (kMode == kStreamingModeTokens) { + *m->next_token++ = Token(c, 0, 0); + *m->next_token++ = Token(c + 4, 0, 0); + } else if (kMode == kStreamingModeBits) { + WriteBits(bw, dc_code->depth[0], dc_code->code[0]); + WriteBits(bw, ac_code->depth[0], ac_code->code[0]); + } + continue; + } + if (adaptive_quant) { + aq_strength = qf[iy * qf_stride + bx * h_factor]; + } + const float* pixels = imcu_start[c] + (iy * stride + bx) * DCTSIZE; + ComputeCoefficientBlock(pixels, stride, qmc, last_dc_coeff[c], + aq_strength, zero_bias_offset, zero_bias_mul, + m->dct_buffer, block); + if (kMode == kStreamingModeCoefficients) { + JCOEF* cblock = &ba[c][iy][bx][0]; + for (int k = 0; k < DCTSIZE2; ++k) { + cblock[k] = block[kJPEGNaturalOrder[k]]; + } + } + block[0] -= last_dc_coeff[c]; + last_dc_coeff[c] += block[0]; + if (kMode == kStreamingModeTokens) { + ComputeTokensForBlock<int32_t, false>(block, 0, c, c + 4, + &m->next_token); + } else if (kMode == kStreamingModeBits) { + ZigZagShuffle(block); + const int num_nonzeros = CompactBlock(block, nonzero_idx); + const bool emit_eob = nonzero_idx[num_nonzeros - 1] < 1008; + ComputeSymbols(num_nonzeros, nonzero_idx, block, symbols); + WriteBlock(symbols, block, num_nonzeros, emit_eob, dc_code, ac_code, + bw); + } + } + } + } + } + if (kMode == kStreamingModeTokens) { + TokenArray* ta = &m->token_arrays[m->cur_token_array]; + ta->num_tokens = m->next_token - ta->tokens; + ScanTokenInfo* sti = &m->scan_token_info[0]; + sti->num_tokens = m->total_num_tokens + ta->num_tokens; + sti->restarts[0] = sti->num_tokens; + } +} + +void ComputeCoefficientsForiMCURow(j_compress_ptr cinfo) { + ProcessiMCURow<kStreamingModeCoefficients>(cinfo); +} + +void ComputeTokensForiMCURow(j_compress_ptr cinfo) { + ProcessiMCURow<kStreamingModeTokens>(cinfo); +} + +void WriteiMCURow(j_compress_ptr cinfo) { + ProcessiMCURow<kStreamingModeBits>(cinfo); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jpegli { +HWY_EXPORT(ComputeCoefficientsForiMCURow); +HWY_EXPORT(ComputeTokensForiMCURow); +HWY_EXPORT(WriteiMCURow); + +void ComputeCoefficientsForiMCURow(j_compress_ptr cinfo) { + HWY_DYNAMIC_DISPATCH(ComputeCoefficientsForiMCURow)(cinfo); +} + +void ComputeTokensForiMCURow(j_compress_ptr cinfo) { + HWY_DYNAMIC_DISPATCH(ComputeTokensForiMCURow)(cinfo); +} + +void WriteiMCURow(j_compress_ptr cinfo) { + HWY_DYNAMIC_DISPATCH(WriteiMCURow)(cinfo); +} + +} // namespace jpegli +#endif // HWY_ONCE diff --git a/lib/jpegli/encode_streaming.h b/lib/jpegli/encode_streaming.h new file mode 100644 index 0000000..69acff4 --- /dev/null +++ b/lib/jpegli/encode_streaming.h @@ -0,0 +1,21 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_ENCODE_STREAMING_H_ +#define LIB_JPEGLI_ENCODE_STREAMING_H_ + +#include "lib/jpegli/encode_internal.h" + +namespace jpegli { + +void ComputeCoefficientsForiMCURow(j_compress_ptr cinfo); + +void ComputeTokensForiMCURow(j_compress_ptr cinfo); + +void WriteiMCURow(j_compress_ptr cinfo); + +} // namespace jpegli + +#endif // LIB_JPEGLI_ENCODE_STREAMING_H_ diff --git a/lib/jpegli/entropy_coding-inl.h b/lib/jpegli/entropy_coding-inl.h new file mode 100644 index 0000000..bfb436d --- /dev/null +++ b/lib/jpegli/entropy_coding-inl.h @@ -0,0 +1,213 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#if defined(LIB_JPEGLI_ENTROPY_CODING_INL_H_) == defined(HWY_TARGET_TOGGLE) +#ifdef LIB_JPEGLI_ENTROPY_CODING_INL_H_ +#undef LIB_JPEGLI_ENTROPY_CODING_INL_H_ +#else +#define LIB_JPEGLI_ENTROPY_CODING_INL_H_ +#endif + +#include "lib/jxl/base/compiler_specific.h" + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { +namespace { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Abs; +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::And; +using hwy::HWY_NAMESPACE::AndNot; +using hwy::HWY_NAMESPACE::Compress; +using hwy::HWY_NAMESPACE::CountTrue; +using hwy::HWY_NAMESPACE::Eq; +using hwy::HWY_NAMESPACE::GetLane; +using hwy::HWY_NAMESPACE::MaskFromVec; +using hwy::HWY_NAMESPACE::Max; +using hwy::HWY_NAMESPACE::Not; +using hwy::HWY_NAMESPACE::Or; +using hwy::HWY_NAMESPACE::ShiftRight; +using hwy::HWY_NAMESPACE::Shl; +using hwy::HWY_NAMESPACE::Sub; + +using DI = HWY_FULL(int32_t); +constexpr DI di; + +template <typename DI, class V> +JXL_INLINE V NumBits(DI di, const V x) { + // TODO(szabadka) Add faster implementations for some specific architectures. + const auto b1 = And(x, Set(di, 1)); + const auto b2 = And(x, Set(di, 2)); + const auto b3 = Sub((And(x, Set(di, 4))), Set(di, 1)); + const auto b4 = Sub((And(x, Set(di, 8))), Set(di, 4)); + const auto b5 = Sub((And(x, Set(di, 16))), Set(di, 11)); + const auto b6 = Sub((And(x, Set(di, 32))), Set(di, 26)); + const auto b7 = Sub((And(x, Set(di, 64))), Set(di, 57)); + const auto b8 = Sub((And(x, Set(di, 128))), Set(di, 120)); + const auto b9 = Sub((And(x, Set(di, 256))), Set(di, 247)); + const auto b10 = Sub((And(x, Set(di, 512))), Set(di, 502)); + const auto b11 = Sub((And(x, Set(di, 1024))), Set(di, 1013)); + const auto b12 = Sub((And(x, Set(di, 2048))), Set(di, 2036)); + return Max(Max(Max(Max(b1, b2), Max(b3, b4)), Max(Max(b5, b6), Max(b7, b8))), + Max(Max(b9, b10), Max(b11, b12))); +} + +// Coefficient indexes pre-multiplied by 16 for the symbol calculation. +HWY_ALIGN constexpr int32_t kIndexes[64] = { + 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, + 208, 224, 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400, + 416, 432, 448, 464, 480, 496, 512, 528, 544, 560, 576, 592, 608, + 624, 640, 656, 672, 688, 704, 720, 736, 752, 768, 784, 800, 816, + 832, 848, 864, 880, 896, 912, 928, 944, 960, 976, 992, 1008, +}; + +JXL_INLINE int CompactBlock(int32_t* JXL_RESTRICT block, + int32_t* JXL_RESTRICT nonzero_idx) { + const auto zero = Zero(di); + HWY_ALIGN constexpr int32_t dc_mask_lanes[HWY_LANES(DI)] = {-1}; + const auto dc_mask = MaskFromVec(Load(di, dc_mask_lanes)); + int num_nonzeros = 0; + int k = 0; + { + const auto coef = Load(di, block); + const auto idx = Load(di, kIndexes); + const auto nonzero_mask = Or(dc_mask, Not(Eq(coef, zero))); + const auto nzero_coef = Compress(coef, nonzero_mask); + const auto nzero_idx = Compress(idx, nonzero_mask); + StoreU(nzero_coef, di, &block[num_nonzeros]); + StoreU(nzero_idx, di, &nonzero_idx[num_nonzeros]); + num_nonzeros += CountTrue(di, nonzero_mask); + k += Lanes(di); + } + for (; k < DCTSIZE2; k += Lanes(di)) { + const auto coef = Load(di, &block[k]); + const auto idx = Load(di, &kIndexes[k]); + const auto nonzero_mask = Not(Eq(coef, zero)); + const auto nzero_coef = Compress(coef, nonzero_mask); + const auto nzero_idx = Compress(idx, nonzero_mask); + StoreU(nzero_coef, di, &block[num_nonzeros]); + StoreU(nzero_idx, di, &nonzero_idx[num_nonzeros]); + num_nonzeros += CountTrue(di, nonzero_mask); + } + return num_nonzeros; +} + +JXL_INLINE void ComputeSymbols(const int num_nonzeros, + int32_t* JXL_RESTRICT nonzero_idx, + int32_t* JXL_RESTRICT block, + int32_t* JXL_RESTRICT symbols) { + nonzero_idx[-1] = -16; + const auto one = Set(di, 1); + const auto offset = Set(di, 16); + for (int i = 0; i < num_nonzeros; i += Lanes(di)) { + const auto idx = Load(di, &nonzero_idx[i]); + const auto prev_idx = LoadU(di, &nonzero_idx[i - 1]); + const auto coeff = Load(di, &block[i]); + const auto nbits = NumBits(di, Abs(coeff)); + const auto mask = ShiftRight<8 * sizeof(int32_t) - 1>(coeff); + const auto bits = And(Add(coeff, mask), Sub(Shl(one, nbits), one)); + const auto symbol = Sub(Add(nbits, idx), Add(prev_idx, offset)); + Store(symbol, di, symbols + i); + Store(bits, di, block + i); + } +} + +template <typename T> +int NumNonZero8x8ExceptDC(const T* block) { + const HWY_CAPPED(T, 8) di; + + const auto zero = Zero(di); + // Add FFFF for every zero coefficient, negate to get #zeros. + auto neg_sum_zero = zero; + { + // First row has DC, so mask + const size_t y = 0; + HWY_ALIGN const T dc_mask_lanes[8] = {-1}; + + for (size_t x = 0; x < 8; x += Lanes(di)) { + const auto dc_mask = Load(di, dc_mask_lanes + x); + + // DC counts as zero so we don't include it in nzeros. + const auto coef = AndNot(dc_mask, Load(di, &block[y * 8 + x])); + + neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero))); + } + } + // Remaining rows: no mask + for (size_t y = 1; y < 8; y++) { + for (size_t x = 0; x < 8; x += Lanes(di)) { + const auto coef = Load(di, &block[y * 8 + x]); + neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero))); + } + } + + // We want 64 - sum_zero, add because neg_sum_zero is already negated. + return kDCTBlockSize + GetLane(SumOfLanes(di, neg_sum_zero)); +} + +template <typename T, bool zig_zag_order> +void ComputeTokensForBlock(const T* block, int last_dc, int dc_ctx, int ac_ctx, + Token** tokens_ptr) { + Token* next_token = *tokens_ptr; + coeff_t temp2; + coeff_t temp; + temp = block[0] - last_dc; + if (temp == 0) { + *next_token++ = Token(dc_ctx, 0, 0); + } else { + temp2 = temp; + if (temp < 0) { + temp = -temp; + temp2--; + } + int dc_nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1; + int dc_mask = (1 << dc_nbits) - 1; + *next_token++ = Token(dc_ctx, dc_nbits, temp2 & dc_mask); + } + int num_nonzeros = NumNonZero8x8ExceptDC(block); + for (int k = 1; k < 64; ++k) { + if (num_nonzeros == 0) { + *next_token++ = Token(ac_ctx, 0, 0); + break; + } + int r = 0; + if (zig_zag_order) { + while ((temp = block[k]) == 0) { + r++; + k++; + } + } else { + while ((temp = block[kJPEGNaturalOrder[k]]) == 0) { + r++; + k++; + } + } + --num_nonzeros; + if (temp < 0) { + temp = -temp; + temp2 = ~temp; + } else { + temp2 = temp; + } + while (r > 15) { + *next_token++ = Token(ac_ctx, 0xf0, 0); + r -= 16; + } + int ac_nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1; + int ac_mask = (1 << ac_nbits) - 1; + int symbol = (r << 4u) + ac_nbits; + *next_token++ = Token(ac_ctx, symbol, temp2 & ac_mask); + } + *tokens_ptr = next_token; +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); +#endif // LIB_JPEGLI_ENTROPY_CODING_INL_H_ diff --git a/lib/jpegli/entropy_coding.cc b/lib/jpegli/entropy_coding.cc new file mode 100644 index 0000000..7e50bbc --- /dev/null +++ b/lib/jpegli/entropy_coding.cc @@ -0,0 +1,837 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/entropy_coding.h" + +#include <vector> + +#include "lib/jpegli/encode_internal.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/huffman.h" +#include "lib/jxl/base/bits.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jpegli/entropy_coding.cc" +#include <hwy/foreach_target.h> +#include <hwy/highway.h> + +#include "lib/jpegli/entropy_coding-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { + +void ComputeTokensSequential(const coeff_t* block, int last_dc, int dc_ctx, + int ac_ctx, Token** tokens_ptr) { + ComputeTokensForBlock<coeff_t, true>(block, last_dc, dc_ctx, ac_ctx, + tokens_ptr); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jpegli { + +size_t MaxNumTokensPerMCURow(j_compress_ptr cinfo) { + int MCUs_per_row = DivCeil(cinfo->image_width, 8 * cinfo->max_h_samp_factor); + size_t blocks_per_mcu = 0; + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + blocks_per_mcu += comp->h_samp_factor * comp->v_samp_factor; + } + return kDCTBlockSize * blocks_per_mcu * MCUs_per_row; +} + +size_t EstimateNumTokens(j_compress_ptr cinfo, size_t mcu_y, size_t ysize_mcus, + size_t num_tokens, size_t max_per_row) { + size_t estimate; + if (mcu_y == 0) { + estimate = 16 * max_per_row; + } else { + estimate = (4 * ysize_mcus * num_tokens) / (3 * mcu_y); + } + size_t mcus_left = ysize_mcus - mcu_y; + return std::min(mcus_left * max_per_row, + std::max(max_per_row, estimate - num_tokens)); +} + +namespace { +HWY_EXPORT(ComputeTokensSequential); + +void TokenizeProgressiveDC(const coeff_t* coeffs, int context, int Al, + coeff_t* last_dc_coeff, Token** next_token) { + coeff_t temp2; + coeff_t temp; + temp2 = coeffs[0] >> Al; + temp = temp2 - *last_dc_coeff; + *last_dc_coeff = temp2; + temp2 = temp; + if (temp < 0) { + temp = -temp; + temp2--; + } + int nbits = (temp == 0) ? 0 : (jxl::FloorLog2Nonzero<uint32_t>(temp) + 1); + int bits = temp2 & ((1 << nbits) - 1); + *(*next_token)++ = Token(context, nbits, bits); +} + +void TokenizeACProgressiveScan(j_compress_ptr cinfo, int scan_index, + int context, ScanTokenInfo* sti) { + jpeg_comp_master* m = cinfo->master; + const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index]; + const int comp_idx = scan_info->component_index[0]; + const jpeg_component_info* comp = &cinfo->comp_info[comp_idx]; + const int Al = scan_info->Al; + const int Ss = scan_info->Ss; + const int Se = scan_info->Se; + const size_t restart_interval = sti->restart_interval; + int restarts_to_go = restart_interval; + size_t num_blocks = comp->height_in_blocks * comp->width_in_blocks; + size_t num_restarts = + restart_interval > 0 ? DivCeil(num_blocks, restart_interval) : 1; + size_t restart_idx = 0; + int eob_run = 0; + TokenArray* ta = &m->token_arrays[m->cur_token_array]; + sti->token_offset = m->total_num_tokens + ta->num_tokens; + sti->restarts = Allocate<size_t>(cinfo, num_restarts, JPOOL_IMAGE); + for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) { + JBLOCKARRAY ba = (*cinfo->mem->access_virt_barray)( + reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[comp_idx], by, + 1, false); + // Each coefficient can appear in at most one token, but we have to reserve + // one extra EOBrun token that was rolled over from the previous block-row + // and has to be flushed at the end. + int max_tokens_per_row = 1 + comp->width_in_blocks * (Se - Ss + 1); + if (ta->num_tokens + max_tokens_per_row > m->num_tokens) { + if (ta->tokens) { + m->total_num_tokens += ta->num_tokens; + ++m->cur_token_array; + ta = &m->token_arrays[m->cur_token_array]; + } + m->num_tokens = + EstimateNumTokens(cinfo, by, comp->height_in_blocks, + m->total_num_tokens, max_tokens_per_row); + ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE); + m->next_token = ta->tokens; + } + for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) { + if (restart_interval > 0 && restarts_to_go == 0) { + if (eob_run > 0) { + int nbits = jxl::FloorLog2Nonzero<uint32_t>(eob_run); + int symbol = nbits << 4u; + *m->next_token++ = + Token(context, symbol, eob_run & ((1 << nbits) - 1)); + eob_run = 0; + } + ta->num_tokens = m->next_token - ta->tokens; + sti->restarts[restart_idx++] = m->total_num_tokens + ta->num_tokens; + restarts_to_go = restart_interval; + } + const coeff_t* block = &ba[0][bx][0]; + coeff_t temp2; + coeff_t temp; + int r = 0; + int num_nzeros = 0; + int num_future_nzeros = 0; + for (int k = Ss; k <= Se; ++k) { + if ((temp = block[k]) == 0) { + r++; + continue; + } + if (temp < 0) { + temp = -temp; + temp >>= Al; + temp2 = ~temp; + } else { + temp >>= Al; + temp2 = temp; + } + if (temp == 0) { + r++; + num_future_nzeros++; + continue; + } + if (eob_run > 0) { + int nbits = jxl::FloorLog2Nonzero<uint32_t>(eob_run); + int symbol = nbits << 4u; + *m->next_token++ = + Token(context, symbol, eob_run & ((1 << nbits) - 1)); + eob_run = 0; + } + while (r > 15) { + *m->next_token++ = Token(context, 0xf0, 0); + r -= 16; + } + int nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1; + int symbol = (r << 4u) + nbits; + *m->next_token++ = Token(context, symbol, temp2 & ((1 << nbits) - 1)); + ++num_nzeros; + r = 0; + } + if (r > 0) { + ++eob_run; + if (eob_run == 0x7FFF) { + int nbits = jxl::FloorLog2Nonzero<uint32_t>(eob_run); + int symbol = nbits << 4u; + *m->next_token++ = + Token(context, symbol, eob_run & ((1 << nbits) - 1)); + eob_run = 0; + } + } + sti->num_nonzeros += num_nzeros; + sti->num_future_nonzeros += num_future_nzeros; + --restarts_to_go; + } + ta->num_tokens = m->next_token - ta->tokens; + } + if (eob_run > 0) { + int nbits = jxl::FloorLog2Nonzero<uint32_t>(eob_run); + int symbol = nbits << 4u; + *m->next_token++ = Token(context, symbol, eob_run & ((1 << nbits) - 1)); + ++ta->num_tokens; + eob_run = 0; + } + sti->num_tokens = m->total_num_tokens + ta->num_tokens - sti->token_offset; + sti->restarts[restart_idx++] = m->total_num_tokens + ta->num_tokens; +} + +void TokenizeACRefinementScan(j_compress_ptr cinfo, int scan_index, + ScanTokenInfo* sti) { + jpeg_comp_master* m = cinfo->master; + const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index]; + const int comp_idx = scan_info->component_index[0]; + const jpeg_component_info* comp = &cinfo->comp_info[comp_idx]; + const int Al = scan_info->Al; + const int Ss = scan_info->Ss; + const int Se = scan_info->Se; + const size_t restart_interval = sti->restart_interval; + int restarts_to_go = restart_interval; + RefToken token; + int eob_run = 0; + int eob_refbits = 0; + size_t num_blocks = comp->height_in_blocks * comp->width_in_blocks; + size_t num_restarts = + restart_interval > 0 ? DivCeil(num_blocks, restart_interval) : 1; + sti->tokens = m->next_refinement_token; + sti->refbits = m->next_refinement_bit; + sti->eobruns = Allocate<uint16_t>(cinfo, num_blocks / 2, JPOOL_IMAGE); + sti->restarts = Allocate<size_t>(cinfo, num_restarts, JPOOL_IMAGE); + RefToken* next_token = sti->tokens; + RefToken* next_eob_token = next_token; + uint8_t* next_ref_bit = sti->refbits; + uint16_t* next_eobrun = sti->eobruns; + size_t restart_idx = 0; + for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) { + JBLOCKARRAY ba = (*cinfo->mem->access_virt_barray)( + reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[comp_idx], by, + 1, false); + for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) { + if (restart_interval > 0 && restarts_to_go == 0) { + sti->restarts[restart_idx++] = next_token - sti->tokens; + restarts_to_go = restart_interval; + next_eob_token = next_token; + eob_run = eob_refbits = 0; + } + const coeff_t* block = &ba[0][bx][0]; + int num_eob_refinement_bits = 0; + int num_refinement_bits = 0; + int num_nzeros = 0; + int r = 0; + for (int k = Ss; k <= Se; ++k) { + int absval = block[k]; + if (absval == 0) { + r++; + continue; + } + const int mask = absval >> (8 * sizeof(int) - 1); + absval += mask; + absval ^= mask; + absval >>= Al; + if (absval == 0) { + r++; + continue; + } + while (r > 15) { + token.symbol = 0xf0; + token.refbits = num_refinement_bits; + *next_token++ = token; + r -= 16; + num_eob_refinement_bits += num_refinement_bits; + num_refinement_bits = 0; + } + if (absval > 1) { + *next_ref_bit++ = absval & 1u; + ++num_refinement_bits; + continue; + } + int symbol = (r << 4u) + 1 + ((mask + 1) << 1); + token.symbol = symbol; + token.refbits = num_refinement_bits; + *next_token++ = token; + ++num_nzeros; + num_refinement_bits = 0; + num_eob_refinement_bits = 0; + r = 0; + next_eob_token = next_token; + eob_run = eob_refbits = 0; + } + if (r > 0 || num_eob_refinement_bits + num_refinement_bits > 0) { + ++eob_run; + eob_refbits += num_eob_refinement_bits + num_refinement_bits; + if (eob_refbits > 255) { + ++next_eob_token; + eob_refbits = num_eob_refinement_bits + num_refinement_bits; + eob_run = 1; + } + next_token = next_eob_token; + next_token->refbits = eob_refbits; + if (eob_run == 1) { + next_token->symbol = 0; + } else if (eob_run == 2) { + next_token->symbol = 16; + *next_eobrun++ = 0; + } else if ((eob_run & (eob_run - 1)) == 0) { + next_token->symbol += 16; + next_eobrun[-1] = 0; + } else { + ++next_eobrun[-1]; + } + ++next_token; + if (eob_run == 0x7fff) { + next_eob_token = next_token; + eob_run = eob_refbits = 0; + } + } + sti->num_nonzeros += num_nzeros; + --restarts_to_go; + } + } + sti->num_tokens = next_token - sti->tokens; + sti->restarts[restart_idx++] = sti->num_tokens; + m->next_refinement_token = next_token; + m->next_refinement_bit = next_ref_bit; +} + +void TokenizeScan(j_compress_ptr cinfo, size_t scan_index, int ac_ctx_offset, + ScanTokenInfo* sti) { + const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index]; + if (scan_info->Ss > 0) { + if (scan_info->Ah == 0) { + TokenizeACProgressiveScan(cinfo, scan_index, ac_ctx_offset, sti); + } else { + TokenizeACRefinementScan(cinfo, scan_index, sti); + } + return; + } + + jpeg_comp_master* m = cinfo->master; + size_t restart_interval = sti->restart_interval; + int restarts_to_go = restart_interval; + coeff_t last_dc_coeff[MAX_COMPS_IN_SCAN] = {0}; + + // "Non-interleaved" means color data comes in separate scans, in other words + // each scan can contain only one color component. + const bool is_interleaved = (scan_info->comps_in_scan > 1); + const bool is_progressive = cinfo->progressive_mode; + const int Ah = scan_info->Ah; + const int Al = scan_info->Al; + HWY_ALIGN constexpr coeff_t kSinkBlock[DCTSIZE2] = {0}; + + size_t restart_idx = 0; + TokenArray* ta = &m->token_arrays[m->cur_token_array]; + sti->token_offset = Ah > 0 ? 0 : m->total_num_tokens + ta->num_tokens; + + if (Ah > 0) { + sti->refbits = Allocate<uint8_t>(cinfo, sti->num_blocks, JPOOL_IMAGE); + } else if (cinfo->progressive_mode) { + if (ta->num_tokens + sti->num_blocks > m->num_tokens) { + if (ta->tokens) { + m->total_num_tokens += ta->num_tokens; + ++m->cur_token_array; + ta = &m->token_arrays[m->cur_token_array]; + } + m->num_tokens = sti->num_blocks; + ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE); + m->next_token = ta->tokens; + } + } + + JBLOCKARRAY ba[MAX_COMPS_IN_SCAN]; + size_t block_idx = 0; + for (size_t mcu_y = 0; mcu_y < sti->MCU_rows_in_scan; ++mcu_y) { + for (int i = 0; i < scan_info->comps_in_scan; ++i) { + int comp_idx = scan_info->component_index[i]; + jpeg_component_info* comp = &cinfo->comp_info[comp_idx]; + int n_blocks_y = is_interleaved ? comp->v_samp_factor : 1; + int by0 = mcu_y * n_blocks_y; + int block_rows_left = comp->height_in_blocks - by0; + int max_block_rows = std::min(n_blocks_y, block_rows_left); + ba[i] = (*cinfo->mem->access_virt_barray)( + reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[comp_idx], + by0, max_block_rows, false); + } + if (!cinfo->progressive_mode) { + int max_tokens_per_mcu_row = MaxNumTokensPerMCURow(cinfo); + if (ta->num_tokens + max_tokens_per_mcu_row > m->num_tokens) { + if (ta->tokens) { + m->total_num_tokens += ta->num_tokens; + ++m->cur_token_array; + ta = &m->token_arrays[m->cur_token_array]; + } + m->num_tokens = + EstimateNumTokens(cinfo, mcu_y, sti->MCU_rows_in_scan, + m->total_num_tokens, max_tokens_per_mcu_row); + ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE); + m->next_token = ta->tokens; + } + } + for (size_t mcu_x = 0; mcu_x < sti->MCUs_per_row; ++mcu_x) { + // Possibly emit a restart marker. + if (restart_interval > 0 && restarts_to_go == 0) { + restarts_to_go = restart_interval; + memset(last_dc_coeff, 0, sizeof(last_dc_coeff)); + ta->num_tokens = m->next_token - ta->tokens; + sti->restarts[restart_idx++] = + Ah > 0 ? block_idx : m->total_num_tokens + ta->num_tokens; + } + // Encode one MCU + for (int i = 0; i < scan_info->comps_in_scan; ++i) { + int comp_idx = scan_info->component_index[i]; + jpeg_component_info* comp = &cinfo->comp_info[comp_idx]; + int n_blocks_y = is_interleaved ? comp->v_samp_factor : 1; + int n_blocks_x = is_interleaved ? comp->h_samp_factor : 1; + for (int iy = 0; iy < n_blocks_y; ++iy) { + for (int ix = 0; ix < n_blocks_x; ++ix) { + size_t block_y = mcu_y * n_blocks_y + iy; + size_t block_x = mcu_x * n_blocks_x + ix; + const coeff_t* block; + if (block_x >= comp->width_in_blocks || + block_y >= comp->height_in_blocks) { + block = kSinkBlock; + } else { + block = &ba[i][iy][block_x][0]; + } + if (!is_progressive) { + HWY_DYNAMIC_DISPATCH(ComputeTokensSequential) + (block, last_dc_coeff[i], comp_idx, ac_ctx_offset + i, + &m->next_token); + last_dc_coeff[i] = block[0]; + } else { + if (Ah == 0) { + TokenizeProgressiveDC(block, comp_idx, Al, last_dc_coeff + i, + &m->next_token); + } else { + sti->refbits[block_idx] = (block[0] >> Al) & 1; + } + } + ++block_idx; + } + } + } + --restarts_to_go; + } + ta->num_tokens = m->next_token - ta->tokens; + } + JXL_DASSERT(block_idx == sti->num_blocks); + sti->num_tokens = + Ah > 0 ? sti->num_blocks + : m->total_num_tokens + ta->num_tokens - sti->token_offset; + sti->restarts[restart_idx++] = + Ah > 0 ? sti->num_blocks : m->total_num_tokens + ta->num_tokens; + if (Ah == 0 && cinfo->progressive_mode) { + JXL_DASSERT(sti->num_blocks == sti->num_tokens); + } +} + +} // namespace + +void TokenizeJpeg(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + std::vector<int> processed(cinfo->num_scans); + size_t max_refinement_tokens = 0; + size_t num_refinement_bits = 0; + int num_refinement_scans[DCTSIZE2] = {}; + int max_num_refinement_scans = 0; + for (int i = 0; i < cinfo->num_scans; ++i) { + const jpeg_scan_info* si = &cinfo->scan_info[i]; + ScanTokenInfo* sti = &m->scan_token_info[i]; + if (si->Ss > 0 && si->Ah == 0 && si->Al > 0) { + int offset = m->ac_ctx_offset[i]; + TokenizeScan(cinfo, i, offset, sti); + processed[i] = 1; + max_refinement_tokens += sti->num_future_nonzeros; + for (int k = si->Ss; k <= si->Se; ++k) { + num_refinement_scans[k] = si->Al; + } + max_num_refinement_scans = std::max(max_num_refinement_scans, si->Al); + num_refinement_bits += sti->num_nonzeros; + } + if (si->Ss > 0 && si->Ah > 0) { + int comp_idx = si->component_index[0]; + const jpeg_component_info* comp = &cinfo->comp_info[comp_idx]; + size_t num_blocks = comp->width_in_blocks * comp->height_in_blocks; + max_refinement_tokens += (1 + (si->Se - si->Ss) / 16) * num_blocks; + } + } + if (max_refinement_tokens > 0) { + m->next_refinement_token = + Allocate<RefToken>(cinfo, max_refinement_tokens, JPOOL_IMAGE); + } + for (int j = 0; j < max_num_refinement_scans; ++j) { + uint8_t* refinement_bits = + Allocate<uint8_t>(cinfo, num_refinement_bits, JPOOL_IMAGE); + m->next_refinement_bit = refinement_bits; + size_t new_refinement_bits = 0; + for (int i = 0; i < cinfo->num_scans; ++i) { + const jpeg_scan_info* si = &cinfo->scan_info[i]; + ScanTokenInfo* sti = &m->scan_token_info[i]; + if (si->Ss > 0 && si->Ah > 0 && + si->Ah == num_refinement_scans[si->Ss] - j) { + int offset = m->ac_ctx_offset[i]; + TokenizeScan(cinfo, i, offset, sti); + processed[i] = 1; + new_refinement_bits += sti->num_nonzeros; + } + } + JXL_DASSERT(m->next_refinement_bit == + refinement_bits + num_refinement_bits); + num_refinement_bits += new_refinement_bits; + } + for (int i = 0; i < cinfo->num_scans; ++i) { + if (processed[i]) { + continue; + } + int offset = m->ac_ctx_offset[i]; + TokenizeScan(cinfo, i, offset, &m->scan_token_info[i]); + processed[i] = 1; + } +} + +namespace { + +struct Histogram { + int count[kJpegHuffmanAlphabetSize]; + Histogram() { memset(count, 0, sizeof(count)); } +}; + +void BuildHistograms(j_compress_ptr cinfo, Histogram* histograms) { + jpeg_comp_master* m = cinfo->master; + size_t num_token_arrays = m->cur_token_array + 1; + for (size_t i = 0; i < num_token_arrays; ++i) { + Token* tokens = m->token_arrays[i].tokens; + size_t num_tokens = m->token_arrays[i].num_tokens; + for (size_t j = 0; j < num_tokens; ++j) { + Token t = tokens[j]; + ++histograms[t.context].count[t.symbol]; + } + } + for (int i = 0; i < cinfo->num_scans; ++i) { + const jpeg_scan_info& si = cinfo->scan_info[i]; + const ScanTokenInfo& sti = m->scan_token_info[i]; + if (si.Ss > 0 && si.Ah > 0) { + int context = m->ac_ctx_offset[i]; + int* ac_histo = &histograms[context].count[0]; + for (size_t j = 0; j < sti.num_tokens; ++j) { + ++ac_histo[sti.tokens[j].symbol & 253]; + } + } + } +} + +struct JpegClusteredHistograms { + std::vector<Histogram> histograms; + std::vector<uint32_t> histogram_indexes; + std::vector<uint32_t> slot_ids; +}; + +float HistogramCost(const Histogram& histo) { + std::vector<uint32_t> counts(kJpegHuffmanAlphabetSize + 1); + std::vector<uint8_t> depths(kJpegHuffmanAlphabetSize + 1); + for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) { + counts[i] = histo.count[i]; + } + counts[kJpegHuffmanAlphabetSize] = 1; + CreateHuffmanTree(counts.data(), counts.size(), kJpegHuffmanMaxBitLength, + &depths[0]); + size_t header_bits = (1 + kJpegHuffmanMaxBitLength) * 8; + size_t data_bits = 0; + for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) { + if (depths[i] > 0) { + header_bits += 8; + data_bits += counts[i] * depths[i]; + } + } + return header_bits + data_bits; +} + +void AddHistograms(const Histogram& a, const Histogram& b, Histogram* c) { + for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) { + c->count[i] = a.count[i] + b.count[i]; + } +} + +bool IsEmptyHistogram(const Histogram& histo) { + for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) { + if (histo.count[i]) return false; + } + return true; +} + +void ClusterJpegHistograms(const Histogram* histograms, size_t num, + JpegClusteredHistograms* clusters) { + clusters->histogram_indexes.resize(num); + std::vector<uint32_t> slot_histograms; + std::vector<float> slot_costs; + for (size_t i = 0; i < num; ++i) { + const Histogram& cur = histograms[i]; + if (IsEmptyHistogram(cur)) { + continue; + } + float best_cost = HistogramCost(cur); + size_t best_slot = slot_histograms.size(); + for (size_t j = 0; j < slot_histograms.size(); ++j) { + size_t prev_idx = slot_histograms[j]; + const Histogram& prev = clusters->histograms[prev_idx]; + Histogram combined; + AddHistograms(prev, cur, &combined); + float combined_cost = HistogramCost(combined); + float cost = combined_cost - slot_costs[j]; + if (cost < best_cost) { + best_cost = cost; + best_slot = j; + } + } + if (best_slot == slot_histograms.size()) { + // Create new histogram. + size_t histogram_index = clusters->histograms.size(); + clusters->histograms.push_back(cur); + clusters->histogram_indexes[i] = histogram_index; + if (best_slot < 4) { + // We have a free slot, so we put the new histogram there. + slot_histograms.push_back(histogram_index); + slot_costs.push_back(best_cost); + } else { + // TODO(szabadka) Find the best histogram to replce. + best_slot = (clusters->slot_ids.back() + 1) % 4; + } + slot_histograms[best_slot] = histogram_index; + slot_costs[best_slot] = best_cost; + clusters->slot_ids.push_back(best_slot); + } else { + // Merge this histogram with a previous one. + size_t histogram_index = slot_histograms[best_slot]; + const Histogram& prev = clusters->histograms[histogram_index]; + AddHistograms(prev, cur, &clusters->histograms[histogram_index]); + clusters->histogram_indexes[i] = histogram_index; + JXL_ASSERT(clusters->slot_ids[histogram_index] == best_slot); + slot_costs[best_slot] += best_cost; + } + } +} + +void CopyHuffmanTable(j_compress_ptr cinfo, int index, bool is_dc, + int* inv_slot_map, uint8_t* slot_id_map, + JHUFF_TBL* huffman_tables, size_t* num_huffman_tables) { + const char* type = is_dc ? "DC" : "AC"; + if (index < 0 || index >= NUM_HUFF_TBLS) { + JPEGLI_ERROR("Invalid %s Huffman table index %d", type, index); + } + // Check if we have already copied this Huffman table. + int slot_idx = index + (is_dc ? 0 : NUM_HUFF_TBLS); + if (inv_slot_map[slot_idx] != -1) { + return; + } + inv_slot_map[slot_idx] = *num_huffman_tables; + // Look up and validate Huffman table. + JHUFF_TBL* table = + is_dc ? cinfo->dc_huff_tbl_ptrs[index] : cinfo->ac_huff_tbl_ptrs[index]; + if (table == nullptr) { + JPEGLI_ERROR("Missing %s Huffman table %d", type, index); + } + ValidateHuffmanTable(reinterpret_cast<j_common_ptr>(cinfo), table, is_dc); + // Copy Huffman table to the end of the list and save slot id. + slot_id_map[*num_huffman_tables] = index + (is_dc ? 0 : 0x10); + memcpy(&huffman_tables[*num_huffman_tables], table, sizeof(JHUFF_TBL)); + ++(*num_huffman_tables); +} + +void BuildJpegHuffmanTable(const Histogram& histo, JHUFF_TBL* table) { + std::vector<uint32_t> counts(kJpegHuffmanAlphabetSize + 1); + std::vector<uint8_t> depths(kJpegHuffmanAlphabetSize + 1); + for (size_t j = 0; j < kJpegHuffmanAlphabetSize; ++j) { + counts[j] = histo.count[j]; + } + counts[kJpegHuffmanAlphabetSize] = 1; + CreateHuffmanTree(counts.data(), counts.size(), kJpegHuffmanMaxBitLength, + &depths[0]); + memset(table, 0, sizeof(JHUFF_TBL)); + for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) { + if (depths[i] > 0) { + ++table->bits[depths[i]]; + } + } + int offset[kJpegHuffmanMaxBitLength + 1] = {0}; + for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) { + offset[i] = offset[i - 1] + table->bits[i - 1]; + } + for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) { + if (depths[i] > 0) { + table->huffval[offset[depths[i]]++] = i; + } + } +} + +} // namespace + +void CopyHuffmanTables(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + size_t max_huff_tables = 2 * cinfo->num_components; + // Copy Huffman tables and save slot ids. + m->huffman_tables = Allocate<JHUFF_TBL>(cinfo, max_huff_tables, JPOOL_IMAGE); + m->slot_id_map = Allocate<uint8_t>(cinfo, max_huff_tables, JPOOL_IMAGE); + m->num_huffman_tables = 0; + int inv_slot_map[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + CopyHuffmanTable(cinfo, comp->dc_tbl_no, /*is_dc=*/true, &inv_slot_map[0], + m->slot_id_map, m->huffman_tables, &m->num_huffman_tables); + CopyHuffmanTable(cinfo, comp->ac_tbl_no, /*is_dc=*/false, &inv_slot_map[0], + m->slot_id_map, m->huffman_tables, &m->num_huffman_tables); + } + // Compute context map. + m->context_map = Allocate<uint8_t>(cinfo, 8, JPOOL_IMAGE); + memset(m->context_map, 0, 8); + for (int c = 0; c < cinfo->num_components; ++c) { + m->context_map[c] = inv_slot_map[cinfo->comp_info[c].dc_tbl_no]; + } + int ac_ctx = 4; + for (int i = 0; i < cinfo->num_scans; ++i) { + const jpeg_scan_info* si = &cinfo->scan_info[i]; + if (si->Se > 0) { + for (int j = 0; j < si->comps_in_scan; ++j) { + int c = si->component_index[j]; + jpeg_component_info* comp = &cinfo->comp_info[c]; + m->context_map[ac_ctx++] = inv_slot_map[comp->ac_tbl_no + 4]; + } + } + } +} + +void OptimizeHuffmanCodes(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + // Build DC and AC histograms. + std::vector<Histogram> histograms(m->num_contexts); + BuildHistograms(cinfo, &histograms[0]); + + // Cluster DC histograms. + JpegClusteredHistograms dc_clusters; + ClusterJpegHistograms(histograms.data(), cinfo->num_components, &dc_clusters); + + // Cluster AC histograms. + JpegClusteredHistograms ac_clusters; + ClusterJpegHistograms(histograms.data() + 4, m->num_contexts - 4, + &ac_clusters); + + // Create Huffman tables and slot ids clusters. + size_t num_dc_huff = dc_clusters.histograms.size(); + m->num_huffman_tables = num_dc_huff + ac_clusters.histograms.size(); + m->huffman_tables = + Allocate<JHUFF_TBL>(cinfo, m->num_huffman_tables, JPOOL_IMAGE); + m->slot_id_map = Allocate<uint8_t>(cinfo, m->num_huffman_tables, JPOOL_IMAGE); + for (size_t i = 0; i < m->num_huffman_tables; ++i) { + JHUFF_TBL huff_table = {}; + if (i < dc_clusters.histograms.size()) { + m->slot_id_map[i] = i; + BuildJpegHuffmanTable(dc_clusters.histograms[i], &huff_table); + } else { + m->slot_id_map[i] = 16 + ac_clusters.slot_ids[i - num_dc_huff]; + BuildJpegHuffmanTable(ac_clusters.histograms[i - num_dc_huff], + &huff_table); + } + memcpy(&m->huffman_tables[i], &huff_table, sizeof(huff_table)); + } + + // Create context map from clustered histogram indexes. + m->context_map = Allocate<uint8_t>(cinfo, m->num_contexts, JPOOL_IMAGE); + memset(m->context_map, 0, m->num_contexts); + for (size_t i = 0; i < m->num_contexts; ++i) { + if (i < (size_t)cinfo->num_components) { + m->context_map[i] = dc_clusters.histogram_indexes[i]; + } else if (i >= 4) { + m->context_map[i] = num_dc_huff + ac_clusters.histogram_indexes[i - 4]; + } + } +} + +namespace { + +constexpr uint8_t kNumExtraBits[256] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 7, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 11, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 13, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // +}; + +void BuildHuffmanCodeTable(const JHUFF_TBL& table, HuffmanCodeTable* code) { + int huff_code[kJpegHuffmanAlphabetSize]; + // +1 for a sentinel element. + uint32_t huff_size[kJpegHuffmanAlphabetSize + 1]; + int p = 0; + for (size_t l = 1; l <= kJpegHuffmanMaxBitLength; ++l) { + int i = table.bits[l]; + while (i--) huff_size[p++] = l; + } + + // Reuse sentinel element. + int last_p = p; + huff_size[last_p] = 0; + + int next_code = 0; + uint32_t si = huff_size[0]; + p = 0; + while (huff_size[p]) { + while ((huff_size[p]) == si) { + huff_code[p++] = next_code; + next_code++; + } + next_code <<= 1; + si++; + } + for (p = 0; p < last_p; p++) { + int i = table.huffval[p]; + int nbits = kNumExtraBits[i]; + code->depth[i] = huff_size[p] + nbits; + code->code[i] = huff_code[p] << nbits; + } +} + +} // namespace + +void InitEntropyCoder(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + m->coding_tables = + Allocate<HuffmanCodeTable>(cinfo, m->num_huffman_tables, JPOOL_IMAGE); + for (size_t i = 0; i < m->num_huffman_tables; ++i) { + BuildHuffmanCodeTable(m->huffman_tables[i], &m->coding_tables[i]); + } +} + +} // namespace jpegli +#endif // HWY_ONCE diff --git a/lib/jpegli/entropy_coding.h b/lib/jpegli/entropy_coding.h new file mode 100644 index 0000000..a552219 --- /dev/null +++ b/lib/jpegli/entropy_coding.h @@ -0,0 +1,28 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_ENTROPY_CODING_H_ +#define LIB_JPEGLI_ENTROPY_CODING_H_ + +#include "lib/jpegli/common.h" + +namespace jpegli { + +size_t MaxNumTokensPerMCURow(j_compress_ptr cinfo); + +size_t EstimateNumTokens(j_compress_ptr cinfo, size_t mcu_y, size_t ysize_mcus, + size_t num_tokens, size_t max_per_row); + +void TokenizeJpeg(j_compress_ptr cinfo); + +void CopyHuffmanTables(j_compress_ptr cinfo); + +void OptimizeHuffmanCodes(j_compress_ptr cinfo); + +void InitEntropyCoder(j_compress_ptr cinfo); + +} // namespace jpegli + +#endif // LIB_JPEGLI_ENTROPY_CODING_H_ diff --git a/lib/jpegli/error.cc b/lib/jpegli/error.cc new file mode 100644 index 0000000..2892616 --- /dev/null +++ b/lib/jpegli/error.cc @@ -0,0 +1,102 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/error.h" + +#include <setjmp.h> +#include <stdlib.h> +#include <string.h> + +#include <string> + +#include "lib/jpegli/common.h" + +namespace jpegli { + +const char* const kErrorMessageTable[] = { + "Message codes are not supported, error message is in msg_parm.s string", +}; + +bool FormatString(char* buffer, const char* format, ...) { + va_list args; + va_start(args, format); + vsnprintf(buffer, JMSG_STR_PARM_MAX, format, args); + va_end(args); + return false; +} + +void ExitWithAbort(j_common_ptr cinfo) { + (*cinfo->err->output_message)(cinfo); + jpegli_destroy(cinfo); + exit(EXIT_FAILURE); +} + +void EmitMessage(j_common_ptr cinfo, int msg_level) { + if (msg_level < 0) { + if (cinfo->err->num_warnings <= 5 || cinfo->err->trace_level >= 3) { + (*cinfo->err->output_message)(cinfo); + } + ++cinfo->err->num_warnings; + } else if (cinfo->err->trace_level >= msg_level) { + (*cinfo->err->output_message)(cinfo); + } +} + +void OutputMessage(j_common_ptr cinfo) { + char buffer[JMSG_LENGTH_MAX]; + (*cinfo->err->format_message)(cinfo, buffer); + fprintf(stderr, "%s\n", buffer); +} + +void FormatMessage(j_common_ptr cinfo, char* buffer) { + jpeg_error_mgr* err = cinfo->err; + int code = err->msg_code; + if (code == 0) { + memcpy(buffer, cinfo->err->msg_parm.s, JMSG_STR_PARM_MAX); + } else if (err->addon_message_table != nullptr && + code >= err->first_addon_message && + code <= err->last_addon_message) { + std::string msg(err->addon_message_table[code - err->first_addon_message]); + if (msg.find("%s") != std::string::npos) { + snprintf(buffer, JMSG_LENGTH_MAX, msg.data(), err->msg_parm.s); + } else { + snprintf(buffer, JMSG_LENGTH_MAX, msg.data(), err->msg_parm.i[0], + err->msg_parm.i[1], err->msg_parm.i[2], err->msg_parm.i[3], + err->msg_parm.i[4], err->msg_parm.i[5], err->msg_parm.i[6], + err->msg_parm.i[7]); + } + } else { + snprintf(buffer, JMSG_LENGTH_MAX, "%s", kErrorMessageTable[0]); + } +} + +void ResetErrorManager(j_common_ptr cinfo) { + memset(cinfo->err->msg_parm.s, 0, JMSG_STR_PARM_MAX); + cinfo->err->msg_code = 0; + cinfo->err->num_warnings = 0; +} + +} // namespace jpegli + +struct jpeg_error_mgr* jpegli_std_error(struct jpeg_error_mgr* err) { + err->error_exit = jpegli::ExitWithAbort; + err->emit_message = jpegli::EmitMessage; + err->output_message = jpegli::OutputMessage; + err->format_message = jpegli::FormatMessage; + err->reset_error_mgr = jpegli::ResetErrorManager; + memset(err->msg_parm.s, 0, JMSG_STR_PARM_MAX); + err->trace_level = 0; + err->num_warnings = 0; + // We don't support message codes and message table, but we define one here + // in case the application has a custom format_message and tries to access + // these fields there. + err->msg_code = 0; + err->jpeg_message_table = jpegli::kErrorMessageTable; + err->last_jpeg_message = 0; + err->addon_message_table = nullptr; + err->first_addon_message = 0; + err->last_addon_message = 0; + return err; +} diff --git a/lib/jpegli/error.h b/lib/jpegli/error.h new file mode 100644 index 0000000..4451abd --- /dev/null +++ b/lib/jpegli/error.h @@ -0,0 +1,37 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_ERROR_H_ +#define LIB_JPEGLI_ERROR_H_ + +#include <stdarg.h> +#include <stdint.h> + +#include "lib/jpegli/common.h" + +namespace jpegli { + +bool FormatString(char* buffer, const char* format, ...); + +} // namespace jpegli + +#define JPEGLI_ERROR(format, ...) \ + jpegli::FormatString(cinfo->err->msg_parm.s, ("%s:%d: " format), __FILE__, \ + __LINE__, ##__VA_ARGS__), \ + (*cinfo->err->error_exit)(reinterpret_cast<j_common_ptr>(cinfo)) + +#define JPEGLI_WARN(format, ...) \ + jpegli::FormatString(cinfo->err->msg_parm.s, ("%s:%d: " format), __FILE__, \ + __LINE__, ##__VA_ARGS__), \ + (*cinfo->err->emit_message)(reinterpret_cast<j_common_ptr>(cinfo), -1) + +#define JPEGLI_TRACE(level, format, ...) \ + if (cinfo->err->trace_level >= (level)) \ + jpegli::FormatString(cinfo->err->msg_parm.s, ("%s:%d: " format), __FILE__, \ + __LINE__, ##__VA_ARGS__), \ + (*cinfo->err->emit_message)(reinterpret_cast<j_common_ptr>(cinfo), \ + (level)) + +#endif // LIB_JPEGLI_ERROR_H_ diff --git a/lib/jpegli/error_handling_test.cc b/lib/jpegli/error_handling_test.cc new file mode 100644 index 0000000..0d481c5 --- /dev/null +++ b/lib/jpegli/error_handling_test.cc @@ -0,0 +1,1276 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/decode.h" +#include "lib/jpegli/encode.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/test_utils.h" +#include "lib/jpegli/testing.h" +#include "lib/jxl/sanitizers.h" + +namespace jpegli { +namespace { + +TEST(EncoderErrorHandlingTest, MinimalSuccess) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + { + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + jpegli_start_compress(&cinfo, TRUE); + JSAMPLE image[1] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + } + TestImage output; + DecodeWithLibjpeg(CompressParams(), DecompressParams(), nullptr, 0, buffer, + buffer_size, &output); + EXPECT_EQ(1, output.xsize); + EXPECT_EQ(1, output.ysize); + EXPECT_EQ(1, output.components); + EXPECT_EQ(0, output.pixels[0]); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NoDestination) { + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); +} + +TEST(EncoderErrorHandlingTest, NoImageDimensions) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, ImageTooBig) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 100000; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NoInputComponents) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + jpegli_set_defaults(&cinfo); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, TooManyInputComponents) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1000; + jpegli_set_defaults(&cinfo); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NoSetDefaults) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_start_compress(&cinfo, TRUE); + JSAMPLE image[1] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NoStartCompress) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + JSAMPLE image[1] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NoWriteScanlines) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + jpegli_start_compress(&cinfo, TRUE); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NoWriteAllScanlines) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 2; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + jpegli_start_compress(&cinfo, TRUE); + JSAMPLE image[1] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidQuantValue) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + cinfo.quant_tbl_ptrs[0] = jpegli_alloc_quant_table((j_common_ptr)&cinfo); + for (size_t k = 0; k < DCTSIZE2; ++k) { + cinfo.quant_tbl_ptrs[0]->quantval[k] = 0; + } + jpegli_start_compress(&cinfo, TRUE); + JSAMPLE image[1] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidQuantTableIndex) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + cinfo.comp_info[0].quant_tbl_no = 3; + jpegli_start_compress(&cinfo, TRUE); + JSAMPLE image[1] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch1) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + cinfo.num_components = 100; + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch2) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + cinfo.num_components = 2; + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch3) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + cinfo.num_components = 2; + cinfo.comp_info[1].h_samp_factor = cinfo.comp_info[1].v_samp_factor = 1; + jpegli_start_compress(&cinfo, TRUE); + JSAMPLE image[1] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch4) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + cinfo.in_color_space = JCS_RGB; + jpegli_set_defaults(&cinfo); + jpegli_start_compress(&cinfo, TRUE); + JSAMPLE image[1] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch5) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 3; + cinfo.in_color_space = JCS_GRAYSCALE; + jpegli_set_defaults(&cinfo); + jpegli_start_compress(&cinfo, TRUE); + JSAMPLE image[3] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch6) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 3; + cinfo.in_color_space = JCS_RGB; + jpegli_set_defaults(&cinfo); + cinfo.num_components = 2; + jpegli_start_compress(&cinfo, TRUE); + JSAMPLE image[3] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidColorTransform) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 3; + cinfo.in_color_space = JCS_YCbCr; + jpegli_set_defaults(&cinfo); + cinfo.jpeg_color_space = JCS_RGB; + jpegli_start_compress(&cinfo, TRUE); + JSAMPLE image[3] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, DuplicateComponentIds) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 3; + jpegli_set_defaults(&cinfo); + cinfo.comp_info[0].component_id = 0; + cinfo.comp_info[1].component_id = 0; + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidComponentIndex) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 3; + jpegli_set_defaults(&cinfo); + cinfo.comp_info[0].component_index = 17; + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, ArithmeticCoding) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 3; + jpegli_set_defaults(&cinfo); + cinfo.arith_code = TRUE; + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, CCIR601Sampling) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 3; + jpegli_set_defaults(&cinfo); + cinfo.CCIR601_sampling = TRUE; + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript1) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = {{1, {0}, 0, 63, 0, 0}}; // + cinfo.scan_info = kScript; + cinfo.num_scans = 0; + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript2) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = {{2, {0, 1}, 0, 63, 0, 0}}; // + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript3) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = {{5, {0}, 0, 63, 0, 0}}; // + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript4) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 2; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = {{2, {0, 0}, 0, 63, 0, 0}}; // + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript5) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 2; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = {{2, {1, 0}, 0, 63, 0, 0}}; // + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript6) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = {{1, {0}, 0, 64, 0, 0}}; // + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript7) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = {{1, {0}, 2, 1, 0, 0}}; // + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript8) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 2; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = { + {1, {0}, 0, 63, 0, 0}, {1, {1}, 0, 0, 0, 0}, {1, {1}, 1, 63, 0, 0} // + }; + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript9) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = { + {1, {0}, 0, 1, 0, 0}, {1, {0}, 2, 63, 0, 0}, // + }; + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript10) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 2; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = { + {2, {0, 1}, 0, 0, 0, 0}, {2, {0, 1}, 1, 63, 0, 0} // + }; + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript11) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = { + {1, {0}, 1, 63, 0, 0}, {1, {0}, 0, 0, 0, 0} // + }; + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript12) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = { + {1, {0}, 0, 0, 10, 1}, {1, {0}, 0, 0, 1, 0}, {1, {0}, 1, 63, 0, 0} // + }; + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript13) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = { + {1, {0}, 0, 0, 0, 2}, + {1, {0}, 0, 0, 1, 0}, + {1, {0}, 0, 0, 2, 1}, // + {1, {0}, 1, 63, 0, 0} // + }; + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, MCUSizeTooBig) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 3; + jpegli_set_defaults(&cinfo); + jpegli_set_progressive_level(&cinfo, 0); + cinfo.comp_info[0].h_samp_factor = 3; + cinfo.comp_info[0].v_samp_factor = 3; + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, RestartIntervalTooBig) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + cinfo.restart_interval = 1000000; + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, SamplingFactorTooBig) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 3; + jpegli_set_defaults(&cinfo); + cinfo.comp_info[0].h_samp_factor = 5; + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NonIntegralSamplingRatio) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 3; + jpegli_set_defaults(&cinfo); + cinfo.comp_info[0].h_samp_factor = 3; + cinfo.comp_info[1].h_samp_factor = 2; + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +constexpr const char* kAddOnTable[] = {"First message", + "Second message with int param %d", + "Third message with string param %s"}; + +TEST(EncoderErrorHandlingTest, AddOnTableNoParam) { + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + cinfo.err->addon_message_table = kAddOnTable; + cinfo.err->first_addon_message = 10000; + cinfo.err->last_addon_message = 10002; + cinfo.err->msg_code = 10000; + (*cinfo.err->error_exit)(reinterpret_cast<j_common_ptr>(&cinfo)); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); +} + +TEST(EncoderErrorHandlingTest, AddOnTableIntParam) { + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + cinfo.err->addon_message_table = kAddOnTable; + cinfo.err->first_addon_message = 10000; + cinfo.err->last_addon_message = 10002; + cinfo.err->msg_code = 10001; + cinfo.err->msg_parm.i[0] = 17; + (*cinfo.err->error_exit)(reinterpret_cast<j_common_ptr>(&cinfo)); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); +} + +TEST(EncoderErrorHandlingTest, AddOnTableNoStringParam) { + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + cinfo.err->addon_message_table = kAddOnTable; + cinfo.err->first_addon_message = 10000; + cinfo.err->last_addon_message = 10002; + cinfo.err->msg_code = 10002; + memcpy(cinfo.err->msg_parm.s, "MESSAGE PARAM", 14); + (*cinfo.err->error_exit)(reinterpret_cast<j_common_ptr>(&cinfo)); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); +} + +static const uint8_t kCompressed0[] = { + // SOI + 0xff, 0xd8, // + // DQT + 0xff, 0xdb, 0x00, 0x43, 0x00, 0x03, 0x02, 0x02, 0x03, 0x02, // + 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x03, 0x03, 0x04, 0x05, // + 0x08, 0x05, 0x05, 0x04, 0x04, 0x05, 0x0a, 0x07, 0x07, 0x06, // + 0x08, 0x0c, 0x0a, 0x0c, 0x0c, 0x0b, 0x0a, 0x0b, 0x0b, 0x0d, // + 0x0e, 0x12, 0x10, 0x0d, 0x0e, 0x11, 0x0e, 0x0b, 0x0b, 0x10, // + 0x16, 0x10, 0x11, 0x13, 0x14, 0x15, 0x15, 0x15, 0x0c, 0x0f, // + 0x17, 0x18, 0x16, 0x14, 0x18, 0x12, 0x14, 0x15, 0x14, // + // SOF + 0xff, 0xc0, 0x00, 0x0b, 0x08, 0x00, 0x01, 0x00, 0x01, 0x01, // + 0x01, 0x11, 0x00, // + // DHT + 0xff, 0xc4, 0x00, 0xd2, 0x00, 0x00, 0x01, 0x05, 0x01, 0x01, // + 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // + 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, // + 0x09, 0x0a, 0x0b, 0x10, 0x00, 0x02, 0x01, 0x03, 0x03, 0x02, // + 0x04, 0x03, 0x05, 0x05, 0x04, 0x04, 0x00, 0x00, 0x01, 0x7d, // + 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, // + 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32, // + 0x81, 0x91, 0xa1, 0x08, 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, // + 0xd1, 0xf0, 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16, // + 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, // + 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, 0x45, // + 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, // + 0x58, 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, // + 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x83, // + 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, // + 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, // + 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, // + 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, // + 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, // + 0xd9, 0xda, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, // + 0xe9, 0xea, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, // + 0xf9, 0xfa, // + // SOS + 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x00, 0x3f, 0x00, // + // entropy coded data + 0xfc, 0xaa, 0xaf, // + // EOI + 0xff, 0xd9, // +}; +static const size_t kLen0 = sizeof(kCompressed0); + +static const size_t kDQTOffset = 2; +static const size_t kSOFOffset = 71; +static const size_t kDHTOffset = 84; +static const size_t kSOSOffset = 296; + +TEST(DecoderErrorHandlingTest, MinimalSuccess) { + JXL_CHECK(kCompressed0[kDQTOffset] == 0xff); + JXL_CHECK(kCompressed0[kSOFOffset] == 0xff); + JXL_CHECK(kCompressed0[kDHTOffset] == 0xff); + JXL_CHECK(kCompressed0[kSOSOffset] == 0xff); + jpeg_decompress_struct cinfo = {}; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + jpegli_mem_src(&cinfo, kCompressed0, kLen0); + jpegli_read_header(&cinfo, TRUE); + EXPECT_EQ(1, cinfo.image_width); + EXPECT_EQ(1, cinfo.image_height); + jpegli_start_decompress(&cinfo); + JSAMPLE image[1]; + JSAMPROW row[] = {image}; + jpegli_read_scanlines(&cinfo, row, 1); + EXPECT_EQ(0, image[0]); + jpegli_finish_decompress(&cinfo); + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); +} + +TEST(DecoderErrorHandlingTest, NoSource) { + jpeg_decompress_struct cinfo = {}; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + jpegli_read_header(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); +} + +TEST(DecoderErrorHandlingTest, NoReadHeader) { + jpeg_decompress_struct cinfo = {}; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + jpegli_mem_src(&cinfo, kCompressed0, kLen0); + jpegli_start_decompress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); +} + +TEST(DecoderErrorHandlingTest, NoStartDecompress) { + jpeg_decompress_struct cinfo = {}; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + jpegli_mem_src(&cinfo, kCompressed0, kLen0); + jpegli_read_header(&cinfo, TRUE); + EXPECT_EQ(1, cinfo.image_width); + EXPECT_EQ(1, cinfo.image_height); + JSAMPLE image[1]; + JSAMPROW row[] = {image}; + jpegli_read_scanlines(&cinfo, row, 1); + EXPECT_EQ(0, image[0]); + jpegli_finish_decompress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); +} + +TEST(DecoderErrorHandlingTest, NoReadScanlines) { + jpeg_decompress_struct cinfo = {}; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + jpegli_mem_src(&cinfo, kCompressed0, kLen0); + jpegli_read_header(&cinfo, TRUE); + EXPECT_EQ(1, cinfo.image_width); + EXPECT_EQ(1, cinfo.image_height); + jpegli_start_decompress(&cinfo); + jpegli_finish_decompress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); +} + +static const size_t kMaxImageWidth = 0xffff; +JSAMPLE kOutputBuffer[MAX_COMPONENTS * kMaxImageWidth]; + +bool ParseCompressed(const std::vector<uint8_t>& compressed) { + jpeg_decompress_struct cinfo = {}; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + jpegli_mem_src(&cinfo, compressed.data(), compressed.size()); + jpegli_read_header(&cinfo, TRUE); + jpegli_start_decompress(&cinfo); + for (JDIMENSION i = 0; i < cinfo.output_height; ++i) { + JSAMPROW row[] = {kOutputBuffer}; + jpegli_read_scanlines(&cinfo, row, 1); + } + jpegli_finish_decompress(&cinfo); + return true; + }; + bool retval = try_catch_block(); + jpegli_destroy_decompress(&cinfo); + return retval; +} + +TEST(DecoderErrorHandlingTest, NoSOI) { + for (int pos : {0, 1}) { + std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0); + compressed[pos] = 0; + EXPECT_FALSE(ParseCompressed(compressed)); + } +} + +TEST(DecoderErrorHandlingTest, InvalidDQT) { + // Bad marker length + for (int diff : {-2, -1, 1, 2}) { + std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kDQTOffset + 3] += diff; + EXPECT_FALSE(ParseCompressed(compressed)); + } + // inavlid table index / precision + for (int val : {0x20, 0x05}) { + std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kDQTOffset + 4] = val; + EXPECT_FALSE(ParseCompressed(compressed)); + } + // zero quant value + for (int k : {0, 1, 17, 63}) { + std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kDQTOffset + 5 + k] = 0; + EXPECT_FALSE(ParseCompressed(compressed)); + } +} + +TEST(DecoderErrorHandlingTest, InvalidSOF) { + // Bad marker length + for (int diff : {-2, -1, 1, 2}) { + std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kSOFOffset + 3] += diff; + EXPECT_FALSE(ParseCompressed(compressed)); + } + // zero width, height or num_components + for (int pos : {6, 8, 9}) { + std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kSOFOffset + pos] = 0; + EXPECT_FALSE(ParseCompressed(compressed)); + } + // invalid data precision + for (int val : {0, 1, 127}) { + std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kSOFOffset + 4] = val; + EXPECT_FALSE(ParseCompressed(compressed)); + } + // too many num_components + for (int val : {5, 255}) { + std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kSOFOffset + 9] = val; + EXPECT_FALSE(ParseCompressed(compressed)); + } + // invalid sampling factors + for (int val : {0x00, 0x01, 0x10, 0x15, 0x51}) { + std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kSOFOffset + 11] = val; + EXPECT_FALSE(ParseCompressed(compressed)); + } + // invalid quant table index + for (int val : {5, 17}) { + std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kSOFOffset + 12] = val; + EXPECT_FALSE(ParseCompressed(compressed)); + } +} + +TEST(DecoderErrorHandlingTest, InvalidDHT) { + // Bad marker length + for (int diff : {-2, -1, 1, 2}) { + std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kDHTOffset + 3] += diff; + EXPECT_FALSE(ParseCompressed(compressed)); + } + { + std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kDHTOffset + 2] += 17; + EXPECT_FALSE(ParseCompressed(compressed)); + } + // inavlid table slot_id + for (int val : {0x05, 0x15, 0x20}) { + std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kDHTOffset + 4] = val; + EXPECT_FALSE(ParseCompressed(compressed)); + } +} + +TEST(DecoderErrorHandlingTest, InvalidSOS) { + // Invalid comps_in_scan + for (int val : {2, 5, 17}) { + std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kSOSOffset + 4] = val; + EXPECT_FALSE(ParseCompressed(compressed)); + } + // invalid Huffman table indexes + for (int val : {0x05, 0x50, 0x15, 0x51}) { + std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kSOSOffset + 6] = val; + EXPECT_FALSE(ParseCompressed(compressed)); + } + // invalid Ss/Se + for (int pos : {7, 8}) { + std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kSOSOffset + pos] = 64; + EXPECT_FALSE(ParseCompressed(compressed)); + } +} + +TEST(DecoderErrorHandlingTest, MutateSingleBytes) { + for (size_t pos = 0; pos < kLen0; ++pos) { + std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0); + for (int val : {0x00, 0x0f, 0xf0, 0xff}) { + compressed[pos] = val; + ParseCompressed(compressed); + } + } +} + +} // namespace +} // namespace jpegli diff --git a/lib/jpegli/huffman.cc b/lib/jpegli/huffman.cc new file mode 100644 index 0000000..1cf88a5 --- /dev/null +++ b/lib/jpegli/huffman.cc @@ -0,0 +1,321 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/huffman.h" + +#include <limits> +#include <vector> + +#include "lib/jpegli/common.h" +#include "lib/jpegli/error.h" + +namespace jpegli { + +// Returns the table width of the next 2nd level table, count is the histogram +// of bit lengths for the remaining symbols, len is the code length of the next +// processed symbol. +static inline int NextTableBitSize(const int* count, int len) { + int left = 1 << (len - kJpegHuffmanRootTableBits); + while (len < static_cast<int>(kJpegHuffmanMaxBitLength)) { + left -= count[len]; + if (left <= 0) break; + ++len; + left <<= 1; + } + return len - kJpegHuffmanRootTableBits; +} + +void BuildJpegHuffmanTable(const uint32_t* count, const uint32_t* symbols, + HuffmanTableEntry* lut) { + HuffmanTableEntry code; // current table entry + HuffmanTableEntry* table; // next available space in table + int len; // current code length + int idx; // symbol index + int key; // prefix code + int reps; // number of replicate key values in current table + int low; // low bits for current root entry + int table_bits; // key length of current table + int table_size; // size of current table + + // Make a local copy of the input bit length histogram. + int tmp_count[kJpegHuffmanMaxBitLength + 1] = {0}; + int total_count = 0; + for (len = 1; len <= static_cast<int>(kJpegHuffmanMaxBitLength); ++len) { + tmp_count[len] = count[len]; + total_count += tmp_count[len]; + } + + table = lut; + table_bits = kJpegHuffmanRootTableBits; + table_size = 1 << table_bits; + + // Special case code with only one value. + if (total_count == 1) { + code.bits = 0; + code.value = symbols[0]; + for (key = 0; key < table_size; ++key) { + table[key] = code; + } + return; + } + + // Fill in root table. + key = 0; + idx = 0; + for (len = 1; len <= kJpegHuffmanRootTableBits; ++len) { + for (; tmp_count[len] > 0; --tmp_count[len]) { + code.bits = len; + code.value = symbols[idx++]; + reps = 1 << (kJpegHuffmanRootTableBits - len); + while (reps--) { + table[key++] = code; + } + } + } + + // Fill in 2nd level tables and add pointers to root table. + table += table_size; + table_size = 0; + low = 0; + for (len = kJpegHuffmanRootTableBits + 1; + len <= static_cast<int>(kJpegHuffmanMaxBitLength); ++len) { + for (; tmp_count[len] > 0; --tmp_count[len]) { + // Start a new sub-table if the previous one is full. + if (low >= table_size) { + table += table_size; + table_bits = NextTableBitSize(tmp_count, len); + table_size = 1 << table_bits; + low = 0; + lut[key].bits = table_bits + kJpegHuffmanRootTableBits; + lut[key].value = (table - lut) - key; + ++key; + } + code.bits = len - kJpegHuffmanRootTableBits; + code.value = symbols[idx++]; + reps = 1 << (table_bits - code.bits); + while (reps--) { + table[low++] = code; + } + } + } +} + +// A node of a Huffman tree. +struct HuffmanTree { + HuffmanTree(uint32_t count, int16_t left, int16_t right) + : total_count(count), index_left(left), index_right_or_value(right) {} + uint32_t total_count; + int16_t index_left; + int16_t index_right_or_value; +}; + +void SetDepth(const HuffmanTree& p, HuffmanTree* pool, uint8_t* depth, + uint8_t level) { + if (p.index_left >= 0) { + ++level; + SetDepth(pool[p.index_left], pool, depth, level); + SetDepth(pool[p.index_right_or_value], pool, depth, level); + } else { + depth[p.index_right_or_value] = level; + } +} + +// Sort the root nodes, least popular first. +static JXL_INLINE bool Compare(const HuffmanTree& v0, const HuffmanTree& v1) { + return v0.total_count < v1.total_count; +} + +// This function will create a Huffman tree. +// +// The catch here is that the tree cannot be arbitrarily deep. +// Brotli specifies a maximum depth of 15 bits for "code trees" +// and 7 bits for "code length code trees." +// +// count_limit is the value that is to be faked as the minimum value +// and this minimum value is raised until the tree matches the +// maximum length requirement. +// +// This algorithm is not of excellent performance for very long data blocks, +// especially when population counts are longer than 2**tree_limit, but +// we are not planning to use this with extremely long blocks. +// +// See http://en.wikipedia.org/wiki/Huffman_coding +void CreateHuffmanTree(const uint32_t* data, const size_t length, + const int tree_limit, uint8_t* depth) { + // For block sizes below 64 kB, we never need to do a second iteration + // of this loop. Probably all of our block sizes will be smaller than + // that, so this loop is mostly of academic interest. If we actually + // would need this, we would be better off with the Katajainen algorithm. + for (uint32_t count_limit = 1;; count_limit *= 2) { + std::vector<HuffmanTree> tree; + tree.reserve(2 * length + 1); + + for (size_t i = length; i != 0;) { + --i; + if (data[i]) { + const uint32_t count = std::max(data[i], count_limit - 1); + tree.emplace_back(count, -1, static_cast<int16_t>(i)); + } + } + + const size_t n = tree.size(); + if (n == 1) { + // Fake value; will be fixed on upper level. + depth[tree[0].index_right_or_value] = 1; + break; + } + + std::stable_sort(tree.begin(), tree.end(), Compare); + + // The nodes are: + // [0, n): the sorted leaf nodes that we start with. + // [n]: we add a sentinel here. + // [n + 1, 2n): new parent nodes are added here, starting from + // (n+1). These are naturally in ascending order. + // [2n]: we add a sentinel at the end as well. + // There will be (2n+1) elements at the end. + const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1); + tree.push_back(sentinel); + tree.push_back(sentinel); + + size_t i = 0; // Points to the next leaf node. + size_t j = n + 1; // Points to the next non-leaf node. + for (size_t k = n - 1; k != 0; --k) { + size_t left, right; + if (tree[i].total_count <= tree[j].total_count) { + left = i; + ++i; + } else { + left = j; + ++j; + } + if (tree[i].total_count <= tree[j].total_count) { + right = i; + ++i; + } else { + right = j; + ++j; + } + + // The sentinel node becomes the parent node. + size_t j_end = tree.size() - 1; + tree[j_end].total_count = + tree[left].total_count + tree[right].total_count; + tree[j_end].index_left = static_cast<int16_t>(left); + tree[j_end].index_right_or_value = static_cast<int16_t>(right); + + // Add back the last sentinel node. + tree.push_back(sentinel); + } + JXL_DASSERT(tree.size() == 2 * n + 1); + SetDepth(tree[2 * n - 1], &tree[0], depth, 0); + + // We need to pack the Huffman tree in tree_limit bits. + // If this was not successful, add fake entities to the lowest values + // and retry. + if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) { + break; + } + } +} + +void ValidateHuffmanTable(j_common_ptr cinfo, const JHUFF_TBL* table, + bool is_dc) { + size_t total_symbols = 0; + size_t total_p = 0; + size_t max_depth = 0; + for (size_t d = 1; d <= kJpegHuffmanMaxBitLength; ++d) { + uint8_t count = table->bits[d]; + if (count) { + total_symbols += count; + total_p += (1u << (kJpegHuffmanMaxBitLength - d)) * count; + max_depth = d; + } + } + total_p += 1u << (kJpegHuffmanMaxBitLength - max_depth); // sentinel symbol + if (total_symbols == 0) { + JPEGLI_ERROR("Empty Huffman table"); + } + if (total_symbols > kJpegHuffmanAlphabetSize) { + JPEGLI_ERROR("Too many symbols in Huffman table"); + } + if (total_p != (1u << kJpegHuffmanMaxBitLength)) { + JPEGLI_ERROR("Invalid bit length distribution"); + } + uint8_t symbol_seen[kJpegHuffmanAlphabetSize] = {}; + for (size_t i = 0; i < total_symbols; ++i) { + uint8_t symbol = table->huffval[i]; + if (symbol_seen[symbol]) { + JPEGLI_ERROR("Duplicate symbol %d in Huffman table", symbol); + } + symbol_seen[symbol] = 1; + } +} + +void AddStandardHuffmanTables(j_common_ptr cinfo, bool is_dc) { + // Huffman tables from the JPEG standard. + static constexpr JHUFF_TBL kStandardDCTables[2] = { + // DC luma + {{0, 0, 1, 5, 1, 1, 1, 1, 1, 1}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, + FALSE}, + // DC chroma + {{0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, + FALSE}}; + static constexpr JHUFF_TBL kStandardACTables[2] = { + // AC luma + {{0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 125}, + {0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, 0x41, 0x06, + 0x13, 0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08, + 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, 0x24, 0x33, 0x62, 0x72, + 0x82, 0x09, 0x0a, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28, + 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, 0x45, + 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, + 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74, 0x75, + 0x76, 0x77, 0x78, 0x79, 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, + 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, + 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, + 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, + 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2, + 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf1, 0xf2, 0xf3, 0xf4, + 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa}, + FALSE}, + // AC chroma + {{0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 119}, + {0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, 0x31, 0x06, 0x12, 0x41, + 0x51, 0x07, 0x61, 0x71, 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, + 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, 0x15, 0x62, 0x72, 0xd1, + 0x0a, 0x16, 0x24, 0x34, 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26, + 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, + 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, + 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74, + 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, + 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, + 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, + 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf2, 0xf3, 0xf4, + 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa}, + FALSE}}; + const JHUFF_TBL* std_tables = is_dc ? kStandardDCTables : kStandardACTables; + JHUFF_TBL** tables; + if (cinfo->is_decompressor) { + j_decompress_ptr cinfo_d = reinterpret_cast<j_decompress_ptr>(cinfo); + tables = is_dc ? cinfo_d->dc_huff_tbl_ptrs : cinfo_d->ac_huff_tbl_ptrs; + } else { + j_compress_ptr cinfo_c = reinterpret_cast<j_compress_ptr>(cinfo); + tables = is_dc ? cinfo_c->dc_huff_tbl_ptrs : cinfo_c->ac_huff_tbl_ptrs; + } + for (int i = 0; i < 2; ++i) { + if (tables[i] == nullptr) { + tables[i] = jpegli_alloc_huff_table(cinfo); + memcpy(tables[i], &std_tables[i], sizeof(JHUFF_TBL)); + ValidateHuffmanTable(cinfo, tables[i], is_dc); + } + } +} + +} // namespace jpegli diff --git a/lib/jpegli/huffman.h b/lib/jpegli/huffman.h new file mode 100644 index 0000000..f0e5e1d --- /dev/null +++ b/lib/jpegli/huffman.h @@ -0,0 +1,50 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_HUFFMAN_H_ +#define LIB_JPEGLI_HUFFMAN_H_ + +#include <stdint.h> +#include <stdlib.h> + +#include "lib/jpegli/common_internal.h" + +namespace jpegli { + +constexpr int kJpegHuffmanRootTableBits = 8; +// Maximum huffman lookup table size. +// According to zlib/examples/enough.c, 758 entries are always enough for +// an alphabet of 257 symbols (256 + 1 special symbol for the all 1s code) and +// max bit length 16 if the root table has 8 bits. +constexpr int kJpegHuffmanLutSize = 758; + +struct HuffmanTableEntry { + uint8_t bits; // number of bits used for this symbol + uint16_t value; // symbol value or table offset +}; + +void BuildJpegHuffmanTable(const uint32_t* count, const uint32_t* symbols, + HuffmanTableEntry* lut); + +// This function will create a Huffman tree. +// +// The (data,length) contains the population counts. +// The tree_limit is the maximum bit depth of the Huffman codes. +// +// The depth contains the tree, i.e., how many bits are used for +// the symbol. +// +// See http://en.wikipedia.org/wiki/Huffman_coding +void CreateHuffmanTree(const uint32_t* data, size_t length, int tree_limit, + uint8_t* depth); + +void ValidateHuffmanTable(j_common_ptr cinfo, const JHUFF_TBL* table, + bool is_dc); + +void AddStandardHuffmanTables(j_common_ptr cinfo, bool is_dc); + +} // namespace jpegli + +#endif // LIB_JPEGLI_HUFFMAN_H_ diff --git a/lib/jpegli/idct.cc b/lib/jpegli/idct.cc new file mode 100644 index 0000000..4d10563 --- /dev/null +++ b/lib/jpegli/idct.cc @@ -0,0 +1,692 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/idct.h" + +#include <cmath> + +#include "lib/jpegli/decode_internal.h" +#include "lib/jxl/base/status.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jpegli/idct.cc" +#include <hwy/foreach_target.h> +#include <hwy/highway.h> + +#include "lib/jpegli/transpose-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Abs; +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::Gt; +using hwy::HWY_NAMESPACE::IfThenElseZero; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::NegMulAdd; +using hwy::HWY_NAMESPACE::Rebind; +using hwy::HWY_NAMESPACE::Sub; +using hwy::HWY_NAMESPACE::Vec; +using hwy::HWY_NAMESPACE::Xor; + +using D = HWY_FULL(float); +using DI = HWY_FULL(int32_t); +constexpr D d; +constexpr DI di; + +using D8 = HWY_CAPPED(float, 8); +constexpr D8 d8; + +void DequantBlock(const int16_t* JXL_RESTRICT qblock, + const float* JXL_RESTRICT dequant, + const float* JXL_RESTRICT biases, float* JXL_RESTRICT block) { + for (size_t k = 0; k < 64; k += Lanes(d)) { + const auto mul = Load(d, dequant + k); + const auto bias = Load(d, biases + k); + const Rebind<int16_t, DI> di16; + const Vec<DI> quant_i = PromoteTo(di, Load(di16, qblock + k)); + const Rebind<float, DI> df; + const auto quant = ConvertTo(df, quant_i); + const auto abs_quant = Abs(quant); + const auto not_0 = Gt(abs_quant, Zero(df)); + const auto sign_quant = Xor(quant, abs_quant); + const auto biased_quant = Sub(quant, Xor(bias, sign_quant)); + const auto dequant = IfThenElseZero(not_0, Mul(biased_quant, mul)); + Store(dequant, d, block + k); + } +} + +template <size_t N> +void ForwardEvenOdd(const float* JXL_RESTRICT ain, size_t ain_stride, + float* JXL_RESTRICT aout) { + for (size_t i = 0; i < N / 2; i++) { + auto in1 = LoadU(d8, ain + 2 * i * ain_stride); + Store(in1, d8, aout + i * 8); + } + for (size_t i = N / 2; i < N; i++) { + auto in1 = LoadU(d8, ain + (2 * (i - N / 2) + 1) * ain_stride); + Store(in1, d8, aout + i * 8); + } +} + +template <size_t N> +void BTranspose(float* JXL_RESTRICT coeff) { + for (size_t i = N - 1; i > 0; i--) { + auto in1 = Load(d8, coeff + i * 8); + auto in2 = Load(d8, coeff + (i - 1) * 8); + Store(Add(in1, in2), d8, coeff + i * 8); + } + constexpr float kSqrt2 = 1.41421356237f; + auto sqrt2 = Set(d8, kSqrt2); + auto in1 = Load(d8, coeff); + Store(Mul(in1, sqrt2), d8, coeff); +} + +// Constants for DCT implementation. Generated by the following snippet: +// for i in range(N // 2): +// print(1.0 / (2 * math.cos((i + 0.5) * math.pi / N)), end=", ") +template <size_t N> +struct WcMultipliers; + +template <> +struct WcMultipliers<4> { + static constexpr float kMultipliers[] = { + 0.541196100146197, + 1.3065629648763764, + }; +}; + +template <> +struct WcMultipliers<8> { + static constexpr float kMultipliers[] = { + 0.5097955791041592, + 0.6013448869350453, + 0.8999762231364156, + 2.5629154477415055, + }; +}; + +constexpr float WcMultipliers<4>::kMultipliers[]; +constexpr float WcMultipliers<8>::kMultipliers[]; + +template <size_t N> +void MultiplyAndAdd(const float* JXL_RESTRICT coeff, float* JXL_RESTRICT out, + size_t out_stride) { + for (size_t i = 0; i < N / 2; i++) { + auto mul = Set(d8, WcMultipliers<N>::kMultipliers[i]); + auto in1 = Load(d8, coeff + i * 8); + auto in2 = Load(d8, coeff + (N / 2 + i) * 8); + auto out1 = MulAdd(mul, in2, in1); + auto out2 = NegMulAdd(mul, in2, in1); + StoreU(out1, d8, out + i * out_stride); + StoreU(out2, d8, out + (N - i - 1) * out_stride); + } +} + +template <size_t N> +struct IDCT1DImpl; + +template <> +struct IDCT1DImpl<1> { + JXL_INLINE void operator()(const float* from, size_t from_stride, float* to, + size_t to_stride) { + StoreU(LoadU(d8, from), d8, to); + } +}; + +template <> +struct IDCT1DImpl<2> { + JXL_INLINE void operator()(const float* from, size_t from_stride, float* to, + size_t to_stride) { + JXL_DASSERT(from_stride >= 8); + JXL_DASSERT(to_stride >= 8); + auto in1 = LoadU(d8, from); + auto in2 = LoadU(d8, from + from_stride); + StoreU(Add(in1, in2), d8, to); + StoreU(Sub(in1, in2), d8, to + to_stride); + } +}; + +template <size_t N> +struct IDCT1DImpl { + void operator()(const float* from, size_t from_stride, float* to, + size_t to_stride) { + JXL_DASSERT(from_stride >= 8); + JXL_DASSERT(to_stride >= 8); + HWY_ALIGN float tmp[64]; + ForwardEvenOdd<N>(from, from_stride, tmp); + IDCT1DImpl<N / 2>()(tmp, 8, tmp, 8); + BTranspose<N / 2>(tmp + N * 4); + IDCT1DImpl<N / 2>()(tmp + N * 4, 8, tmp + N * 4, 8); + MultiplyAndAdd<N>(tmp, to, to_stride); + } +}; + +template <size_t N> +void IDCT1D(float* JXL_RESTRICT from, float* JXL_RESTRICT output, + size_t output_stride) { + for (size_t i = 0; i < 8; i += Lanes(d8)) { + IDCT1DImpl<N>()(from + i, 8, output + i, output_stride); + } +} + +void ComputeScaledIDCT(float* JXL_RESTRICT block0, float* JXL_RESTRICT block1, + float* JXL_RESTRICT output, size_t output_stride) { + Transpose8x8Block(block0, block1); + IDCT1D<8>(block1, block0, 8); + Transpose8x8Block(block0, block1); + IDCT1D<8>(block1, output, output_stride); +} + +void InverseTransformBlock8x8(const int16_t* JXL_RESTRICT qblock, + const float* JXL_RESTRICT dequant, + const float* JXL_RESTRICT biases, + float* JXL_RESTRICT scratch_space, + float* JXL_RESTRICT output, size_t output_stride, + size_t dctsize) { + float* JXL_RESTRICT block0 = scratch_space; + float* JXL_RESTRICT block1 = scratch_space + DCTSIZE2; + DequantBlock(qblock, dequant, biases, block0); + ComputeScaledIDCT(block0, block1, output, output_stride); +} + +// Computes the N-point IDCT of in[], and stores the result in out[]. The in[] +// array is at most 8 values long, values in[8:N-1] are assumed to be 0. +void Compute1dIDCT(float* in, float* out, size_t N) { + switch (N) { + case 3: { + static constexpr float kC3[3] = { + 1.414213562373, + 1.224744871392, + 0.707106781187, + }; + float even0 = in[0] + kC3[2] * in[2]; + float even1 = in[0] - kC3[0] * in[2]; + float odd0 = kC3[1] * in[1]; + out[0] = even0 + odd0; + out[2] = even0 - odd0; + out[1] = even1; + break; + } + case 5: { + static constexpr float kC5[5] = { + 1.414213562373, 1.344997023928, 1.144122805635, + 0.831253875555, 0.437016024449, + }; + float even0 = in[0] + kC5[2] * in[2] + kC5[4] * in[4]; + float even1 = in[0] - kC5[4] * in[2] - kC5[2] * in[4]; + float even2 = in[0] - kC5[0] * in[2] + kC5[0] * in[4]; + float odd0 = kC5[1] * in[1] + kC5[3] * in[3]; + float odd1 = kC5[3] * in[1] - kC5[1] * in[3]; + out[0] = even0 + odd0; + out[4] = even0 - odd0; + out[1] = even1 + odd1; + out[3] = even1 - odd1; + out[2] = even2; + break; + } + case 6: { + static constexpr float kC6[6] = { + 1.414213562373, 1.366025403784, 1.224744871392, + 1.000000000000, 0.707106781187, 0.366025403784, + }; + float even0 = in[0] + kC6[2] * in[2] + kC6[4] * in[4]; + float even1 = in[0] - kC6[0] * in[4]; + float even2 = in[0] - kC6[2] * in[2] + kC6[4] * in[4]; + float odd0 = kC6[1] * in[1] + kC6[3] * in[3] + kC6[5] * in[5]; + float odd1 = kC6[3] * in[1] - kC6[3] * in[3] - kC6[3] * in[5]; + float odd2 = kC6[5] * in[1] - kC6[3] * in[3] + kC6[1] * in[5]; + out[0] = even0 + odd0; + out[5] = even0 - odd0; + out[1] = even1 + odd1; + out[4] = even1 - odd1; + out[2] = even2 + odd2; + out[3] = even2 - odd2; + break; + } + case 7: { + static constexpr float kC7[7] = { + 1.414213562373, 1.378756275744, 1.274162392264, 1.105676685997, + 0.881747733790, 0.613604268353, 0.314692122713, + }; + float even0 = in[0] + kC7[2] * in[2] + kC7[4] * in[4] + kC7[6] * in[6]; + float even1 = in[0] + kC7[6] * in[2] - kC7[2] * in[4] - kC7[4] * in[6]; + float even2 = in[0] - kC7[4] * in[2] - kC7[6] * in[4] + kC7[2] * in[6]; + float even3 = in[0] - kC7[0] * in[2] + kC7[0] * in[4] - kC7[0] * in[6]; + float odd0 = kC7[1] * in[1] + kC7[3] * in[3] + kC7[5] * in[5]; + float odd1 = kC7[3] * in[1] - kC7[5] * in[3] - kC7[1] * in[5]; + float odd2 = kC7[5] * in[1] - kC7[1] * in[3] + kC7[3] * in[5]; + out[0] = even0 + odd0; + out[6] = even0 - odd0; + out[1] = even1 + odd1; + out[5] = even1 - odd1; + out[2] = even2 + odd2; + out[4] = even2 - odd2; + out[3] = even3; + break; + } + case 9: { + static constexpr float kC9[9] = { + 1.414213562373, 1.392728480640, 1.328926048777, + 1.224744871392, 1.083350440839, 0.909038955344, + 0.707106781187, 0.483689525296, 0.245575607938, + }; + float even0 = in[0] + kC9[2] * in[2] + kC9[4] * in[4] + kC9[6] * in[6]; + float even1 = in[0] + kC9[6] * in[2] - kC9[6] * in[4] - kC9[0] * in[6]; + float even2 = in[0] - kC9[8] * in[2] - kC9[2] * in[4] + kC9[6] * in[6]; + float even3 = in[0] - kC9[4] * in[2] + kC9[8] * in[4] + kC9[6] * in[6]; + float even4 = in[0] - kC9[0] * in[2] + kC9[0] * in[4] - kC9[0] * in[6]; + float odd0 = + kC9[1] * in[1] + kC9[3] * in[3] + kC9[5] * in[5] + kC9[7] * in[7]; + float odd1 = kC9[3] * in[1] - kC9[3] * in[5] - kC9[3] * in[7]; + float odd2 = + kC9[5] * in[1] - kC9[3] * in[3] - kC9[7] * in[5] + kC9[1] * in[7]; + float odd3 = + kC9[7] * in[1] - kC9[3] * in[3] + kC9[1] * in[5] - kC9[5] * in[7]; + out[0] = even0 + odd0; + out[8] = even0 - odd0; + out[1] = even1 + odd1; + out[7] = even1 - odd1; + out[2] = even2 + odd2; + out[6] = even2 - odd2; + out[3] = even3 + odd3; + out[5] = even3 - odd3; + out[4] = even4; + break; + } + case 10: { + static constexpr float kC10[10] = { + 1.414213562373, 1.396802246667, 1.344997023928, 1.260073510670, + 1.144122805635, 1.000000000000, 0.831253875555, 0.642039521920, + 0.437016024449, 0.221231742082, + }; + float even0 = in[0] + kC10[2] * in[2] + kC10[4] * in[4] + kC10[6] * in[6]; + float even1 = in[0] + kC10[6] * in[2] - kC10[8] * in[4] - kC10[2] * in[6]; + float even2 = in[0] - kC10[0] * in[4]; + float even3 = in[0] - kC10[6] * in[2] - kC10[8] * in[4] + kC10[2] * in[6]; + float even4 = in[0] - kC10[2] * in[2] + kC10[4] * in[4] - kC10[6] * in[6]; + float odd0 = + kC10[1] * in[1] + kC10[3] * in[3] + kC10[5] * in[5] + kC10[7] * in[7]; + float odd1 = + kC10[3] * in[1] + kC10[9] * in[3] - kC10[5] * in[5] - kC10[1] * in[7]; + float odd2 = + kC10[5] * in[1] - kC10[5] * in[3] - kC10[5] * in[5] + kC10[5] * in[7]; + float odd3 = + kC10[7] * in[1] - kC10[1] * in[3] + kC10[5] * in[5] + kC10[9] * in[7]; + float odd4 = + kC10[9] * in[1] - kC10[7] * in[3] + kC10[5] * in[5] - kC10[3] * in[7]; + out[0] = even0 + odd0; + out[9] = even0 - odd0; + out[1] = even1 + odd1; + out[8] = even1 - odd1; + out[2] = even2 + odd2; + out[7] = even2 - odd2; + out[3] = even3 + odd3; + out[6] = even3 - odd3; + out[4] = even4 + odd4; + out[5] = even4 - odd4; + break; + } + case 11: { + static constexpr float kC11[11] = { + 1.414213562373, 1.399818907436, 1.356927976287, 1.286413904599, + 1.189712155524, 1.068791297809, 0.926112931411, 0.764581576418, + 0.587485545401, 0.398430002847, 0.201263574413, + }; + float even0 = in[0] + kC11[2] * in[2] + kC11[4] * in[4] + kC11[6] * in[6]; + float even1 = + in[0] + kC11[6] * in[2] - kC11[10] * in[4] - kC11[4] * in[6]; + float even2 = + in[0] + kC11[10] * in[2] - kC11[2] * in[4] - kC11[8] * in[6]; + float even3 = in[0] - kC11[8] * in[2] - kC11[6] * in[4] + kC11[2] * in[6]; + float even4 = + in[0] - kC11[4] * in[2] + kC11[8] * in[4] + kC11[10] * in[6]; + float even5 = in[0] - kC11[0] * in[2] + kC11[0] * in[4] - kC11[0] * in[6]; + float odd0 = + kC11[1] * in[1] + kC11[3] * in[3] + kC11[5] * in[5] + kC11[7] * in[7]; + float odd1 = + kC11[3] * in[1] + kC11[9] * in[3] - kC11[7] * in[5] - kC11[1] * in[7]; + float odd2 = + kC11[5] * in[1] - kC11[7] * in[3] - kC11[3] * in[5] + kC11[9] * in[7]; + float odd3 = + kC11[7] * in[1] - kC11[1] * in[3] + kC11[9] * in[5] + kC11[5] * in[7]; + float odd4 = + kC11[9] * in[1] - kC11[5] * in[3] + kC11[1] * in[5] - kC11[3] * in[7]; + out[0] = even0 + odd0; + out[10] = even0 - odd0; + out[1] = even1 + odd1; + out[9] = even1 - odd1; + out[2] = even2 + odd2; + out[8] = even2 - odd2; + out[3] = even3 + odd3; + out[7] = even3 - odd3; + out[4] = even4 + odd4; + out[6] = even4 - odd4; + out[5] = even5; + break; + } + case 12: { + static constexpr float kC12[12] = { + 1.414213562373, 1.402114769300, 1.366025403784, 1.306562964876, + 1.224744871392, 1.121971053594, 1.000000000000, 0.860918669154, + 0.707106781187, 0.541196100146, 0.366025403784, 0.184591911283, + }; + float even0 = in[0] + kC12[2] * in[2] + kC12[4] * in[4] + kC12[6] * in[6]; + float even1 = in[0] + kC12[6] * in[2] - kC12[6] * in[6]; + float even2 = + in[0] + kC12[10] * in[2] - kC12[4] * in[4] - kC12[6] * in[6]; + float even3 = + in[0] - kC12[10] * in[2] - kC12[4] * in[4] + kC12[6] * in[6]; + float even4 = in[0] - kC12[6] * in[2] + kC12[6] * in[6]; + float even5 = in[0] - kC12[2] * in[2] + kC12[4] * in[4] - kC12[6] * in[6]; + float odd0 = + kC12[1] * in[1] + kC12[3] * in[3] + kC12[5] * in[5] + kC12[7] * in[7]; + float odd1 = + kC12[3] * in[1] + kC12[9] * in[3] - kC12[9] * in[5] - kC12[3] * in[7]; + float odd2 = kC12[5] * in[1] - kC12[9] * in[3] - kC12[1] * in[5] - + kC12[11] * in[7]; + float odd3 = kC12[7] * in[1] - kC12[3] * in[3] - kC12[11] * in[5] + + kC12[1] * in[7]; + float odd4 = + kC12[9] * in[1] - kC12[3] * in[3] + kC12[3] * in[5] - kC12[9] * in[7]; + float odd5 = kC12[11] * in[1] - kC12[9] * in[3] + kC12[7] * in[5] - + kC12[5] * in[7]; + out[0] = even0 + odd0; + out[11] = even0 - odd0; + out[1] = even1 + odd1; + out[10] = even1 - odd1; + out[2] = even2 + odd2; + out[9] = even2 - odd2; + out[3] = even3 + odd3; + out[8] = even3 - odd3; + out[4] = even4 + odd4; + out[7] = even4 - odd4; + out[5] = even5 + odd5; + out[6] = even5 - odd5; + break; + } + case 13: { + static constexpr float kC13[13] = { + 1.414213562373, 1.403902353238, 1.373119086479, 1.322312651445, + 1.252223920364, 1.163874944761, 1.058554051646, 0.937797056801, + 0.803364869133, 0.657217812653, 0.501487040539, 0.338443458124, + 0.170464607981, + }; + float even0 = in[0] + kC13[2] * in[2] + kC13[4] * in[4] + kC13[6] * in[6]; + float even1 = + in[0] + kC13[6] * in[2] + kC13[12] * in[4] - kC13[8] * in[6]; + float even2 = + in[0] + kC13[10] * in[2] - kC13[6] * in[4] - kC13[4] * in[6]; + float even3 = + in[0] - kC13[12] * in[2] - kC13[2] * in[4] + kC13[10] * in[6]; + float even4 = + in[0] - kC13[8] * in[2] - kC13[10] * in[4] + kC13[2] * in[6]; + float even5 = + in[0] - kC13[4] * in[2] + kC13[8] * in[4] - kC13[12] * in[6]; + float even6 = in[0] - kC13[0] * in[2] + kC13[0] * in[4] - kC13[0] * in[6]; + float odd0 = + kC13[1] * in[1] + kC13[3] * in[3] + kC13[5] * in[5] + kC13[7] * in[7]; + float odd1 = kC13[3] * in[1] + kC13[9] * in[3] - kC13[11] * in[5] - + kC13[5] * in[7]; + float odd2 = kC13[5] * in[1] - kC13[11] * in[3] - kC13[1] * in[5] - + kC13[9] * in[7]; + float odd3 = + kC13[7] * in[1] - kC13[5] * in[3] - kC13[9] * in[5] + kC13[3] * in[7]; + float odd4 = kC13[9] * in[1] - kC13[1] * in[3] + kC13[7] * in[5] + + kC13[11] * in[7]; + float odd5 = kC13[11] * in[1] - kC13[7] * in[3] + kC13[3] * in[5] - + kC13[1] * in[7]; + out[0] = even0 + odd0; + out[12] = even0 - odd0; + out[1] = even1 + odd1; + out[11] = even1 - odd1; + out[2] = even2 + odd2; + out[10] = even2 - odd2; + out[3] = even3 + odd3; + out[9] = even3 - odd3; + out[4] = even4 + odd4; + out[8] = even4 - odd4; + out[5] = even5 + odd5; + out[7] = even5 - odd5; + out[6] = even6; + break; + } + case 14: { + static constexpr float kC14[14] = { + 1.414213562373, 1.405321284327, 1.378756275744, 1.334852607020, + 1.274162392264, 1.197448846138, 1.105676685997, 1.000000000000, + 0.881747733790, 0.752406978226, 0.613604268353, 0.467085128785, + 0.314692122713, 0.158341680609, + }; + float even0 = in[0] + kC14[2] * in[2] + kC14[4] * in[4] + kC14[6] * in[6]; + float even1 = + in[0] + kC14[6] * in[2] + kC14[12] * in[4] - kC14[10] * in[6]; + float even2 = + in[0] + kC14[10] * in[2] - kC14[8] * in[4] - kC14[2] * in[6]; + float even3 = in[0] - kC14[0] * in[4]; + float even4 = + in[0] - kC14[10] * in[2] - kC14[8] * in[4] + kC14[2] * in[6]; + float even5 = + in[0] - kC14[6] * in[2] + kC14[12] * in[4] + kC14[10] * in[6]; + float even6 = in[0] - kC14[2] * in[2] + kC14[4] * in[4] - kC14[6] * in[6]; + float odd0 = + kC14[1] * in[1] + kC14[3] * in[3] + kC14[5] * in[5] + kC14[7] * in[7]; + float odd1 = kC14[3] * in[1] + kC14[9] * in[3] - kC14[13] * in[5] - + kC14[7] * in[7]; + float odd2 = kC14[5] * in[1] - kC14[13] * in[3] - kC14[3] * in[5] - + kC14[7] * in[7]; + float odd3 = + kC14[7] * in[1] - kC14[7] * in[3] - kC14[7] * in[5] + kC14[7] * in[7]; + float odd4 = kC14[9] * in[1] - kC14[1] * in[3] + kC14[11] * in[5] + + kC14[7] * in[7]; + float odd5 = kC14[11] * in[1] - kC14[5] * in[3] + kC14[1] * in[5] - + kC14[7] * in[7]; + float odd6 = kC14[13] * in[1] - kC14[11] * in[3] + kC14[9] * in[5] - + kC14[7] * in[7]; + out[0] = even0 + odd0; + out[13] = even0 - odd0; + out[1] = even1 + odd1; + out[12] = even1 - odd1; + out[2] = even2 + odd2; + out[11] = even2 - odd2; + out[3] = even3 + odd3; + out[10] = even3 - odd3; + out[4] = even4 + odd4; + out[9] = even4 - odd4; + out[5] = even5 + odd5; + out[8] = even5 - odd5; + out[6] = even6 + odd6; + out[7] = even6 - odd6; + break; + } + case 15: { + static constexpr float kC15[15] = { + 1.414213562373, 1.406466352507, 1.383309602960, 1.344997023928, + 1.291948376043, 1.224744871392, 1.144122805635, 1.050965490998, + 0.946293578512, 0.831253875555, 0.707106781187, 0.575212476952, + 0.437016024449, 0.294031532930, 0.147825570407, + }; + float even0 = in[0] + kC15[2] * in[2] + kC15[4] * in[4] + kC15[6] * in[6]; + float even1 = + in[0] + kC15[6] * in[2] + kC15[12] * in[4] - kC15[12] * in[6]; + float even2 = + in[0] + kC15[10] * in[2] - kC15[10] * in[4] - kC15[0] * in[6]; + float even3 = + in[0] + kC15[14] * in[2] - kC15[2] * in[4] - kC15[12] * in[6]; + float even4 = + in[0] - kC15[12] * in[2] - kC15[6] * in[4] + kC15[6] * in[6]; + float even5 = + in[0] - kC15[8] * in[2] - kC15[14] * in[4] + kC15[6] * in[6]; + float even6 = + in[0] - kC15[4] * in[2] + kC15[8] * in[4] - kC15[12] * in[6]; + float even7 = in[0] - kC15[0] * in[2] + kC15[0] * in[4] - kC15[0] * in[6]; + float odd0 = + kC15[1] * in[1] + kC15[3] * in[3] + kC15[5] * in[5] + kC15[7] * in[7]; + float odd1 = kC15[3] * in[1] + kC15[9] * in[3] - kC15[9] * in[7]; + float odd2 = kC15[5] * in[1] - kC15[5] * in[5] - kC15[5] * in[7]; + float odd3 = kC15[7] * in[1] - kC15[9] * in[3] - kC15[5] * in[5] + + kC15[11] * in[7]; + float odd4 = kC15[9] * in[1] - kC15[3] * in[3] + kC15[3] * in[7]; + float odd5 = kC15[11] * in[1] - kC15[3] * in[3] + kC15[5] * in[5] - + kC15[13] * in[7]; + float odd6 = kC15[13] * in[1] - kC15[9] * in[3] + kC15[5] * in[5] - + kC15[1] * in[7]; + out[0] = even0 + odd0; + out[14] = even0 - odd0; + out[1] = even1 + odd1; + out[13] = even1 - odd1; + out[2] = even2 + odd2; + out[12] = even2 - odd2; + out[3] = even3 + odd3; + out[11] = even3 - odd3; + out[4] = even4 + odd4; + out[10] = even4 - odd4; + out[5] = even5 + odd5; + out[9] = even5 - odd5; + out[6] = even6 + odd6; + out[8] = even6 - odd6; + out[7] = even7; + break; + } + case 16: { + static constexpr float kC16[16] = { + 1.414213562373, 1.407403737526, 1.387039845322, 1.353318001174, + 1.306562964876, 1.247225012987, 1.175875602419, 1.093201867002, + 1.000000000000, 0.897167586343, 0.785694958387, 0.666655658478, + 0.541196100146, 0.410524527522, 0.275899379283, 0.138617169199, + }; + float even0 = in[0] + kC16[2] * in[2] + kC16[4] * in[4] + kC16[6] * in[6]; + float even1 = + in[0] + kC16[6] * in[2] + kC16[12] * in[4] - kC16[14] * in[6]; + float even2 = + in[0] + kC16[10] * in[2] - kC16[12] * in[4] - kC16[2] * in[6]; + float even3 = + in[0] + kC16[14] * in[2] - kC16[4] * in[4] - kC16[10] * in[6]; + float even4 = + in[0] - kC16[14] * in[2] - kC16[4] * in[4] + kC16[10] * in[6]; + float even5 = + in[0] - kC16[10] * in[2] - kC16[12] * in[4] + kC16[2] * in[6]; + float even6 = + in[0] - kC16[6] * in[2] + kC16[12] * in[4] + kC16[14] * in[6]; + float even7 = in[0] - kC16[2] * in[2] + kC16[4] * in[4] - kC16[6] * in[6]; + float odd0 = (kC16[1] * in[1] + kC16[3] * in[3] + kC16[5] * in[5] + + kC16[7] * in[7]); + float odd1 = (kC16[3] * in[1] + kC16[9] * in[3] + kC16[15] * in[5] - + kC16[11] * in[7]); + float odd2 = (kC16[5] * in[1] + kC16[15] * in[3] - kC16[7] * in[5] - + kC16[3] * in[7]); + float odd3 = (kC16[7] * in[1] - kC16[11] * in[3] - kC16[3] * in[5] + + kC16[15] * in[7]); + float odd4 = (kC16[9] * in[1] - kC16[5] * in[3] - kC16[13] * in[5] + + kC16[1] * in[7]); + float odd5 = (kC16[11] * in[1] - kC16[1] * in[3] + kC16[9] * in[5] + + kC16[13] * in[7]); + float odd6 = (kC16[13] * in[1] - kC16[7] * in[3] + kC16[1] * in[5] - + kC16[5] * in[7]); + float odd7 = (kC16[15] * in[1] - kC16[13] * in[3] + kC16[11] * in[5] - + kC16[9] * in[7]); + out[0] = even0 + odd0; + out[15] = even0 - odd0; + out[1] = even1 + odd1; + out[14] = even1 - odd1; + out[2] = even2 + odd2; + out[13] = even2 - odd2; + out[3] = even3 + odd3; + out[12] = even3 - odd3; + out[4] = even4 + odd4; + out[11] = even4 - odd4; + out[5] = even5 + odd5; + out[10] = even5 - odd5; + out[6] = even6 + odd6; + out[9] = even6 - odd6; + out[7] = even7 + odd7; + out[8] = even7 - odd7; + break; + } + } +} + +void InverseTransformBlockGeneric(const int16_t* JXL_RESTRICT qblock, + const float* JXL_RESTRICT dequant, + const float* JXL_RESTRICT biases, + float* JXL_RESTRICT scratch_space, + float* JXL_RESTRICT output, + size_t output_stride, size_t dctsize) { + float* JXL_RESTRICT block0 = scratch_space; + float* JXL_RESTRICT block1 = scratch_space + DCTSIZE2; + DequantBlock(qblock, dequant, biases, block0); + if (dctsize == 1) { + *output = *block0; + } else if (dctsize == 2 || dctsize == 4) { + float* JXL_RESTRICT block2 = scratch_space + 2 * DCTSIZE2; + ComputeScaledIDCT(block0, block1, block2, 8); + if (dctsize == 4) { + for (size_t iy = 0; iy < 4; ++iy) { + for (size_t ix = 0; ix < 4; ++ix) { + float* block = &block2[16 * iy + 2 * ix]; + output[iy * output_stride + ix] = + 0.25f * (block[0] + block[1] + block[8] + block[9]); + } + } + } else { + for (size_t iy = 0; iy < 2; ++iy) { + for (size_t ix = 0; ix < 2; ++ix) { + float* block = &block2[32 * iy + 4 * ix]; + output[iy * output_stride + ix] = + 0.0625f * + (block[0] + block[1] + block[2] + block[3] + block[8] + block[9] + + block[10] + block[11] + block[16] + block[17] + block[18] + + block[19] + block[24] + block[25] + block[26] + block[27]); + } + } + } + } else { + float dctin[DCTSIZE]; + float dctout[DCTSIZE * 2]; + size_t insize = std::min<size_t>(dctsize, DCTSIZE); + for (size_t ix = 0; ix < insize; ++ix) { + for (size_t iy = 0; iy < insize; ++iy) { + dctin[iy] = block0[iy * DCTSIZE + ix]; + } + Compute1dIDCT(dctin, dctout, dctsize); + for (size_t iy = 0; iy < dctsize; ++iy) { + block1[iy * dctsize + ix] = dctout[iy]; + } + } + for (size_t iy = 0; iy < dctsize; ++iy) { + Compute1dIDCT(block1 + iy * dctsize, output + iy * output_stride, + dctsize); + } + } +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jpegli { + +HWY_EXPORT(InverseTransformBlock8x8); +HWY_EXPORT(InverseTransformBlockGeneric); + +void ChooseInverseTransform(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + for (int c = 0; c < cinfo->num_components; ++c) { + if (m->scaled_dct_size[c] == DCTSIZE) { + m->inverse_transform[c] = HWY_DYNAMIC_DISPATCH(InverseTransformBlock8x8); + } else { + m->inverse_transform[c] = + HWY_DYNAMIC_DISPATCH(InverseTransformBlockGeneric); + } + } +} + +} // namespace jpegli +#endif // HWY_ONCE diff --git a/lib/jpegli/idct.h b/lib/jpegli/idct.h new file mode 100644 index 0000000..c2ec6d1 --- /dev/null +++ b/lib/jpegli/idct.h @@ -0,0 +1,18 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_IDCT_H_ +#define LIB_JPEGLI_IDCT_H_ + +#include "lib/jpegli/common.h" +#include "lib/jxl/base/compiler_specific.h" + +namespace jpegli { + +void ChooseInverseTransform(j_decompress_ptr cinfo); + +} // namespace jpegli + +#endif // LIB_JPEGLI_IDCT_H_ diff --git a/lib/jpegli/input.cc b/lib/jpegli/input.cc new file mode 100644 index 0000000..765bf98 --- /dev/null +++ b/lib/jpegli/input.cc @@ -0,0 +1,414 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/input.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jpegli/input.cc" +#include <hwy/foreach_target.h> +#include <hwy/highway.h> + +#include "lib/jpegli/encode_internal.h" +#include "lib/jpegli/error.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/compiler_specific.h" + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { + +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::Rebind; +using hwy::HWY_NAMESPACE::Vec; + +using D = HWY_FULL(float); +using DU = HWY_FULL(uint32_t); +using DU8 = Rebind<uint8_t, D>; +using DU16 = Rebind<uint16_t, D>; + +constexpr D d; +constexpr DU du; +constexpr DU8 du8; +constexpr DU16 du16; + +static constexpr double kMul16 = 1.0 / 257.0; +static constexpr double kMulFloat = 255.0; + +template <size_t C> +void ReadUint8Row(const uint8_t* row_in, size_t x0, size_t len, + float* row_out[kMaxComponents]) { + for (size_t x = x0; x < len; ++x) { + for (size_t c = 0; c < C; ++c) { + row_out[c][x] = row_in[C * x + c]; + } + } +} + +template <size_t C, bool swap_endianness = false> +void ReadUint16Row(const uint8_t* row_in, size_t x0, size_t len, + float* row_out[kMaxComponents]) { + const uint16_t* row16 = reinterpret_cast<const uint16_t*>(row_in); + for (size_t x = x0; x < len; ++x) { + for (size_t c = 0; c < C; ++c) { + uint16_t val = row16[C * x + c]; + if (swap_endianness) val = JXL_BSWAP16(val); + row_out[c][x] = val * kMul16; + } + } +} + +template <size_t C, bool swap_endianness = false> +void ReadFloatRow(const uint8_t* row_in, size_t x0, size_t len, + float* row_out[kMaxComponents]) { + const float* rowf = reinterpret_cast<const float*>(row_in); + for (size_t x = x0; x < len; ++x) { + for (size_t c = 0; c < C; ++c) { + float val = rowf[C * x + c]; + if (swap_endianness) val = BSwapFloat(val); + row_out[c][x] = val * kMulFloat; + } + } +} + +void ReadUint8RowSingle(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + float* JXL_RESTRICT const row0 = row_out[0]; + for (size_t x = 0; x < simd_len; x += N) { + Store(ConvertTo(d, PromoteTo(du, LoadU(du8, row_in + x))), d, row0 + x); + } + ReadUint8Row<1>(row_in, simd_len, len, row_out); +} + +void ReadUint8RowInterleaved2(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + float* JXL_RESTRICT const row0 = row_out[0]; + float* JXL_RESTRICT const row1 = row_out[1]; + Vec<DU8> out0, out1; + for (size_t x = 0; x < simd_len; x += N) { + LoadInterleaved2(du8, row_in + 2 * x, out0, out1); + Store(ConvertTo(d, PromoteTo(du, out0)), d, row0 + x); + Store(ConvertTo(d, PromoteTo(du, out1)), d, row1 + x); + } + ReadUint8Row<2>(row_in, simd_len, len, row_out); +} + +void ReadUint8RowInterleaved3(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + float* JXL_RESTRICT const row0 = row_out[0]; + float* JXL_RESTRICT const row1 = row_out[1]; + float* JXL_RESTRICT const row2 = row_out[2]; + Vec<DU8> out0, out1, out2; + for (size_t x = 0; x < simd_len; x += N) { + LoadInterleaved3(du8, row_in + 3 * x, out0, out1, out2); + Store(ConvertTo(d, PromoteTo(du, out0)), d, row0 + x); + Store(ConvertTo(d, PromoteTo(du, out1)), d, row1 + x); + Store(ConvertTo(d, PromoteTo(du, out2)), d, row2 + x); + } + ReadUint8Row<3>(row_in, simd_len, len, row_out); +} + +void ReadUint8RowInterleaved4(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + float* JXL_RESTRICT const row0 = row_out[0]; + float* JXL_RESTRICT const row1 = row_out[1]; + float* JXL_RESTRICT const row2 = row_out[2]; + float* JXL_RESTRICT const row3 = row_out[3]; + Vec<DU8> out0, out1, out2, out3; + for (size_t x = 0; x < simd_len; x += N) { + LoadInterleaved4(du8, row_in + 4 * x, out0, out1, out2, out3); + Store(ConvertTo(d, PromoteTo(du, out0)), d, row0 + x); + Store(ConvertTo(d, PromoteTo(du, out1)), d, row1 + x); + Store(ConvertTo(d, PromoteTo(du, out2)), d, row2 + x); + Store(ConvertTo(d, PromoteTo(du, out3)), d, row3 + x); + } + ReadUint8Row<4>(row_in, simd_len, len, row_out); +} + +void ReadUint16RowSingle(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + const auto mul = Set(d, kMul16); + const uint16_t* JXL_RESTRICT const row = + reinterpret_cast<const uint16_t*>(row_in); + float* JXL_RESTRICT const row0 = row_out[0]; + for (size_t x = 0; x < simd_len; x += N) { + Store(Mul(mul, ConvertTo(d, PromoteTo(du, LoadU(du16, row + x)))), d, + row0 + x); + } + ReadUint16Row<1>(row_in, simd_len, len, row_out); +} + +void ReadUint16RowInterleaved2(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + const auto mul = Set(d, kMul16); + const uint16_t* JXL_RESTRICT const row = + reinterpret_cast<const uint16_t*>(row_in); + float* JXL_RESTRICT const row0 = row_out[0]; + float* JXL_RESTRICT const row1 = row_out[1]; + Vec<DU16> out0, out1; + for (size_t x = 0; x < simd_len; x += N) { + LoadInterleaved2(du16, row + 2 * x, out0, out1); + Store(Mul(mul, ConvertTo(d, PromoteTo(du, out0))), d, row0 + x); + Store(Mul(mul, ConvertTo(d, PromoteTo(du, out1))), d, row1 + x); + } + ReadUint16Row<2>(row_in, simd_len, len, row_out); +} + +void ReadUint16RowInterleaved3(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + const auto mul = Set(d, kMul16); + const uint16_t* JXL_RESTRICT const row = + reinterpret_cast<const uint16_t*>(row_in); + float* JXL_RESTRICT const row0 = row_out[0]; + float* JXL_RESTRICT const row1 = row_out[1]; + float* JXL_RESTRICT const row2 = row_out[2]; + Vec<DU16> out0, out1, out2; + for (size_t x = 0; x < simd_len; x += N) { + LoadInterleaved3(du16, row + 3 * x, out0, out1, out2); + Store(Mul(mul, ConvertTo(d, PromoteTo(du, out0))), d, row0 + x); + Store(Mul(mul, ConvertTo(d, PromoteTo(du, out1))), d, row1 + x); + Store(Mul(mul, ConvertTo(d, PromoteTo(du, out2))), d, row2 + x); + } + ReadUint16Row<3>(row_in, simd_len, len, row_out); +} + +void ReadUint16RowInterleaved4(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + const auto mul = Set(d, kMul16); + const uint16_t* JXL_RESTRICT const row = + reinterpret_cast<const uint16_t*>(row_in); + float* JXL_RESTRICT const row0 = row_out[0]; + float* JXL_RESTRICT const row1 = row_out[1]; + float* JXL_RESTRICT const row2 = row_out[2]; + float* JXL_RESTRICT const row3 = row_out[3]; + Vec<DU16> out0, out1, out2, out3; + for (size_t x = 0; x < simd_len; x += N) { + LoadInterleaved4(du16, row + 4 * x, out0, out1, out2, out3); + Store(Mul(mul, ConvertTo(d, PromoteTo(du, out0))), d, row0 + x); + Store(Mul(mul, ConvertTo(d, PromoteTo(du, out1))), d, row1 + x); + Store(Mul(mul, ConvertTo(d, PromoteTo(du, out2))), d, row2 + x); + Store(Mul(mul, ConvertTo(d, PromoteTo(du, out3))), d, row3 + x); + } + ReadUint16Row<4>(row_in, simd_len, len, row_out); +} + +void ReadUint16RowSingleSwap(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + ReadUint16Row<1, true>(row_in, 0, len, row_out); +} + +void ReadUint16RowInterleaved2Swap(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + ReadUint16Row<2, true>(row_in, 0, len, row_out); +} + +void ReadUint16RowInterleaved3Swap(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + ReadUint16Row<3, true>(row_in, 0, len, row_out); +} + +void ReadUint16RowInterleaved4Swap(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + ReadUint16Row<4, true>(row_in, 0, len, row_out); +} + +void ReadFloatRowSingle(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + const auto mul = Set(d, kMulFloat); + const float* JXL_RESTRICT const row = reinterpret_cast<const float*>(row_in); + float* JXL_RESTRICT const row0 = row_out[0]; + for (size_t x = 0; x < simd_len; x += N) { + Store(Mul(mul, LoadU(d, row + x)), d, row0 + x); + } + ReadFloatRow<1>(row_in, simd_len, len, row_out); +} + +void ReadFloatRowInterleaved2(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + const auto mul = Set(d, kMulFloat); + const float* JXL_RESTRICT const row = reinterpret_cast<const float*>(row_in); + float* JXL_RESTRICT const row0 = row_out[0]; + float* JXL_RESTRICT const row1 = row_out[1]; + Vec<D> out0, out1; + for (size_t x = 0; x < simd_len; x += N) { + LoadInterleaved2(d, row + 2 * x, out0, out1); + Store(Mul(mul, out0), d, row0 + x); + Store(Mul(mul, out1), d, row1 + x); + } + ReadFloatRow<2>(row_in, simd_len, len, row_out); +} + +void ReadFloatRowInterleaved3(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + const auto mul = Set(d, kMulFloat); + const float* JXL_RESTRICT const row = reinterpret_cast<const float*>(row_in); + float* JXL_RESTRICT const row0 = row_out[0]; + float* JXL_RESTRICT const row1 = row_out[1]; + float* JXL_RESTRICT const row2 = row_out[2]; + Vec<D> out0, out1, out2; + for (size_t x = 0; x < simd_len; x += N) { + LoadInterleaved3(d, row + 3 * x, out0, out1, out2); + Store(Mul(mul, out0), d, row0 + x); + Store(Mul(mul, out1), d, row1 + x); + Store(Mul(mul, out2), d, row2 + x); + } + ReadFloatRow<3>(row_in, simd_len, len, row_out); +} + +void ReadFloatRowInterleaved4(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + const auto mul = Set(d, kMulFloat); + const float* JXL_RESTRICT const row = reinterpret_cast<const float*>(row_in); + float* JXL_RESTRICT const row0 = row_out[0]; + float* JXL_RESTRICT const row1 = row_out[1]; + float* JXL_RESTRICT const row2 = row_out[2]; + float* JXL_RESTRICT const row3 = row_out[3]; + Vec<D> out0, out1, out2, out3; + for (size_t x = 0; x < simd_len; x += N) { + LoadInterleaved4(d, row + 4 * x, out0, out1, out2, out3); + Store(Mul(mul, out0), d, row0 + x); + Store(Mul(mul, out1), d, row1 + x); + Store(Mul(mul, out2), d, row2 + x); + Store(Mul(mul, out3), d, row3 + x); + } + ReadFloatRow<4>(row_in, simd_len, len, row_out); +} + +void ReadFloatRowSingleSwap(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + ReadFloatRow<1, true>(row_in, 0, len, row_out); +} + +void ReadFloatRowInterleaved2Swap(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + ReadFloatRow<2, true>(row_in, 0, len, row_out); +} + +void ReadFloatRowInterleaved3Swap(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + ReadFloatRow<3, true>(row_in, 0, len, row_out); +} + +void ReadFloatRowInterleaved4Swap(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + ReadFloatRow<4, true>(row_in, 0, len, row_out); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jpegli { + +HWY_EXPORT(ReadUint8RowSingle); +HWY_EXPORT(ReadUint8RowInterleaved2); +HWY_EXPORT(ReadUint8RowInterleaved3); +HWY_EXPORT(ReadUint8RowInterleaved4); +HWY_EXPORT(ReadUint16RowSingle); +HWY_EXPORT(ReadUint16RowInterleaved2); +HWY_EXPORT(ReadUint16RowInterleaved3); +HWY_EXPORT(ReadUint16RowInterleaved4); +HWY_EXPORT(ReadUint16RowSingleSwap); +HWY_EXPORT(ReadUint16RowInterleaved2Swap); +HWY_EXPORT(ReadUint16RowInterleaved3Swap); +HWY_EXPORT(ReadUint16RowInterleaved4Swap); +HWY_EXPORT(ReadFloatRowSingle); +HWY_EXPORT(ReadFloatRowInterleaved2); +HWY_EXPORT(ReadFloatRowInterleaved3); +HWY_EXPORT(ReadFloatRowInterleaved4); +HWY_EXPORT(ReadFloatRowSingleSwap); +HWY_EXPORT(ReadFloatRowInterleaved2Swap); +HWY_EXPORT(ReadFloatRowInterleaved3Swap); +HWY_EXPORT(ReadFloatRowInterleaved4Swap); + +void ChooseInputMethod(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + bool swap_endianness = + (m->endianness == JPEGLI_LITTLE_ENDIAN && !IsLittleEndian()) || + (m->endianness == JPEGLI_BIG_ENDIAN && IsLittleEndian()); + m->input_method = nullptr; + if (m->data_type == JPEGLI_TYPE_UINT8) { + if (cinfo->raw_data_in || cinfo->input_components == 1) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint8RowSingle); + } else if (cinfo->input_components == 2) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint8RowInterleaved2); + } else if (cinfo->input_components == 3) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint8RowInterleaved3); + } else if (cinfo->input_components == 4) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint8RowInterleaved4); + } + } else if (m->data_type == JPEGLI_TYPE_UINT16 && !swap_endianness) { + if (cinfo->raw_data_in || cinfo->input_components == 1) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowSingle); + } else if (cinfo->input_components == 2) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved2); + } else if (cinfo->input_components == 3) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved3); + } else if (cinfo->input_components == 4) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved4); + } + } else if (m->data_type == JPEGLI_TYPE_UINT16 && swap_endianness) { + if (cinfo->raw_data_in || cinfo->input_components == 1) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowSingleSwap); + } else if (cinfo->input_components == 2) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved2Swap); + } else if (cinfo->input_components == 3) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved3Swap); + } else if (cinfo->input_components == 4) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved4Swap); + } + } else if (m->data_type == JPEGLI_TYPE_FLOAT && !swap_endianness) { + if (cinfo->raw_data_in || cinfo->input_components == 1) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowSingle); + } else if (cinfo->input_components == 2) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved2); + } else if (cinfo->input_components == 3) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved3); + } else if (cinfo->input_components == 4) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved4); + } + } else if (m->data_type == JPEGLI_TYPE_FLOAT && swap_endianness) { + if (cinfo->raw_data_in || cinfo->input_components == 1) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowSingleSwap); + } else if (cinfo->input_components == 2) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved2Swap); + } else if (cinfo->input_components == 3) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved3Swap); + } else if (cinfo->input_components == 4) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved4Swap); + } + } + if (m->input_method == nullptr) { + JPEGLI_ERROR("Could not find input method."); + } +} + +} // namespace jpegli +#endif // HWY_ONCE diff --git a/lib/jpegli/input.h b/lib/jpegli/input.h new file mode 100644 index 0000000..f54d0be --- /dev/null +++ b/lib/jpegli/input.h @@ -0,0 +1,17 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_INPUT_H_ +#define LIB_JPEGLI_INPUT_H_ + +#include "lib/jpegli/common.h" + +namespace jpegli { + +void ChooseInputMethod(j_compress_ptr cinfo); + +} // namespace jpegli + +#endif // LIB_JPEGLI_INPUT_H_ diff --git a/lib/jpegli/input_suspension_test.cc b/lib/jpegli/input_suspension_test.cc new file mode 100644 index 0000000..09bafd9 --- /dev/null +++ b/lib/jpegli/input_suspension_test.cc @@ -0,0 +1,612 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include <cmath> +#include <cstdint> +#include <vector> + +#include "lib/jpegli/decode.h" +#include "lib/jpegli/test_utils.h" +#include "lib/jpegli/testing.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/sanitizers.h" + +namespace jpegli { +namespace { + +static constexpr uint8_t kFakeEoiMarker[2] = {0xff, 0xd9}; + +struct SourceManager { + SourceManager(const uint8_t* data, size_t len, size_t max_chunk_size, + bool is_partial_file) + : data_(data), + len_(len), + pos_(0), + max_chunk_size_(max_chunk_size), + is_partial_file_(is_partial_file) { + pub_.init_source = init_source; + pub_.fill_input_buffer = fill_input_buffer; + pub_.next_input_byte = nullptr; + pub_.bytes_in_buffer = 0; + pub_.skip_input_data = skip_input_data; + pub_.resync_to_restart = jpegli_resync_to_restart; + pub_.term_source = term_source; + if (max_chunk_size_ == 0) max_chunk_size_ = len; + } + + ~SourceManager() { + EXPECT_EQ(0, pub_.bytes_in_buffer); + if (!is_partial_file_) { + EXPECT_EQ(len_, pos_); + } + } + + bool LoadNextChunk() { + if (pos_ >= len_ && !is_partial_file_) { + return false; + } + if (pub_.bytes_in_buffer > 0) { + EXPECT_LE(pub_.bytes_in_buffer, buffer_.size()); + memmove(&buffer_[0], pub_.next_input_byte, pub_.bytes_in_buffer); + } + size_t chunk_size = + pos_ < len_ ? std::min(len_ - pos_, max_chunk_size_) : 2; + buffer_.resize(pub_.bytes_in_buffer + chunk_size); + memcpy(&buffer_[pub_.bytes_in_buffer], + pos_ < len_ ? data_ + pos_ : kFakeEoiMarker, chunk_size); + pub_.next_input_byte = &buffer_[0]; + pub_.bytes_in_buffer += chunk_size; + pos_ += chunk_size; + return true; + } + + private: + jpeg_source_mgr pub_; + std::vector<uint8_t> buffer_; + const uint8_t* data_; + size_t len_; + size_t pos_; + size_t max_chunk_size_; + bool is_partial_file_; + + static void init_source(j_decompress_ptr cinfo) { + auto src = reinterpret_cast<SourceManager*>(cinfo->src); + src->pub_.next_input_byte = nullptr; + src->pub_.bytes_in_buffer = 0; + } + + static boolean fill_input_buffer(j_decompress_ptr cinfo) { return FALSE; } + + static void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { + auto src = reinterpret_cast<SourceManager*>(cinfo->src); + if (num_bytes <= 0) { + return; + } + if (src->pub_.bytes_in_buffer >= static_cast<size_t>(num_bytes)) { + src->pub_.bytes_in_buffer -= num_bytes; + src->pub_.next_input_byte += num_bytes; + } else { + src->pos_ += num_bytes - src->pub_.bytes_in_buffer; + src->pub_.bytes_in_buffer = 0; + } + } + + static void term_source(j_decompress_ptr cinfo) {} +}; + +uint8_t markers_seen[kMarkerSequenceLen]; +size_t num_markers_seen = 0; + +uint8_t get_next_byte(j_decompress_ptr cinfo) { + cinfo->src->bytes_in_buffer--; + return *cinfo->src->next_input_byte++; +} + +boolean test_marker_processor(j_decompress_ptr cinfo) { + markers_seen[num_markers_seen] = cinfo->unread_marker; + if (cinfo->src->bytes_in_buffer < 2) { + return FALSE; + } + size_t marker_len = (get_next_byte(cinfo) << 8) + get_next_byte(cinfo); + EXPECT_EQ(2 + ((num_markers_seen + 2) % sizeof(kMarkerData)), marker_len); + if (marker_len > 2) { + (*cinfo->src->skip_input_data)(cinfo, marker_len - 2); + } + ++num_markers_seen; + return TRUE; +} + +void ReadOutputImage(const DecompressParams& dparams, j_decompress_ptr cinfo, + SourceManager* src, TestImage* output) { + output->ysize = cinfo->output_height; + output->xsize = cinfo->output_width; + output->components = cinfo->num_components; + if (cinfo->raw_data_out) { + output->color_space = cinfo->jpeg_color_space; + for (int c = 0; c < cinfo->num_components; ++c) { + size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE; + size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE; + std::vector<uint8_t> plane(ysize * xsize); + output->raw_data.emplace_back(std::move(plane)); + } + } else { + output->color_space = cinfo->out_color_space; + output->AllocatePixels(); + } + size_t total_output_lines = 0; + while (cinfo->output_scanline < cinfo->output_height) { + size_t max_lines; + size_t num_output_lines; + if (cinfo->raw_data_out) { + size_t iMCU_height = cinfo->max_v_samp_factor * DCTSIZE; + EXPECT_EQ(cinfo->output_scanline, cinfo->output_iMCU_row * iMCU_height); + max_lines = iMCU_height; + std::vector<std::vector<JSAMPROW>> rowdata(cinfo->num_components); + std::vector<JSAMPARRAY> data(cinfo->num_components); + for (int c = 0; c < cinfo->num_components; ++c) { + size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE; + size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE; + size_t num_lines = cinfo->comp_info[c].v_samp_factor * DCTSIZE; + rowdata[c].resize(num_lines); + size_t y0 = cinfo->output_iMCU_row * num_lines; + for (size_t i = 0; i < num_lines; ++i) { + rowdata[c][i] = + y0 + i < ysize ? &output->raw_data[c][(y0 + i) * xsize] : nullptr; + } + data[c] = &rowdata[c][0]; + } + while ((num_output_lines = + jpegli_read_raw_data(cinfo, &data[0], max_lines)) == 0) { + JXL_CHECK(src && src->LoadNextChunk()); + } + } else { + size_t max_output_lines = dparams.max_output_lines; + if (max_output_lines == 0) max_output_lines = cinfo->output_height; + size_t lines_left = cinfo->output_height - cinfo->output_scanline; + max_lines = std::min<size_t>(max_output_lines, lines_left); + size_t stride = cinfo->output_width * cinfo->num_components; + std::vector<JSAMPROW> scanlines(max_lines); + for (size_t i = 0; i < max_lines; ++i) { + size_t yidx = cinfo->output_scanline + i; + scanlines[i] = &output->pixels[yidx * stride]; + } + while ((num_output_lines = jpegli_read_scanlines(cinfo, &scanlines[0], + max_lines)) == 0) { + JXL_CHECK(src && src->LoadNextChunk()); + } + } + total_output_lines += num_output_lines; + EXPECT_EQ(total_output_lines, cinfo->output_scanline); + if (num_output_lines < max_lines) { + JXL_CHECK(src && src->LoadNextChunk()); + } + } +} + +struct TestConfig { + std::string fn; + std::string fn_desc; + TestImage input; + CompressParams jparams; + DecompressParams dparams; + float max_rms_dist = 1.0f; +}; + +std::vector<uint8_t> GetTestJpegData(TestConfig& config) { + if (!config.fn.empty()) { + return ReadTestData(config.fn.c_str()); + } + GeneratePixels(&config.input); + std::vector<uint8_t> compressed; + JXL_CHECK(EncodeWithJpegli(config.input, config.jparams, &compressed)); + return compressed; +} + +bool IsSequential(const TestConfig& config) { + if (!config.fn.empty()) { + return config.fn_desc.find("PROGR") == std::string::npos; + } + return config.jparams.progressive_mode <= 0; +} + +class InputSuspensionTestParam : public ::testing::TestWithParam<TestConfig> {}; + +TEST_P(InputSuspensionTestParam, InputOutputLockStepNonBuffered) { + TestConfig config = GetParam(); + const DecompressParams& dparams = config.dparams; + std::vector<uint8_t> compressed = GetTestJpegData(config); + bool is_partial = config.dparams.size_factor < 1.0f; + if (is_partial) { + compressed.resize(compressed.size() * config.dparams.size_factor); + } + SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size, + is_partial); + TestImage output0; + jpeg_decompress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src); + + if (config.jparams.add_marker) { + jpegli_save_markers(&cinfo, kSpecialMarker0, 0xffff); + jpegli_save_markers(&cinfo, kSpecialMarker1, 0xffff); + num_markers_seen = 0; + jpegli_set_marker_processor(&cinfo, 0xe6, test_marker_processor); + jpegli_set_marker_processor(&cinfo, 0xe7, test_marker_processor); + jpegli_set_marker_processor(&cinfo, 0xe8, test_marker_processor); + } + while (jpegli_read_header(&cinfo, TRUE) == JPEG_SUSPENDED) { + JXL_CHECK(src.LoadNextChunk()); + } + SetDecompressParams(dparams, &cinfo); + jpegli_set_output_format(&cinfo, dparams.data_type, dparams.endianness); + if (config.jparams.add_marker) { + EXPECT_EQ(num_markers_seen, kMarkerSequenceLen); + EXPECT_EQ(0, memcmp(markers_seen, kMarkerSequence, num_markers_seen)); + } + VerifyHeader(config.jparams, &cinfo); + cinfo.raw_data_out = dparams.output_mode == RAW_DATA; + + if (dparams.output_mode == COEFFICIENTS) { + jvirt_barray_ptr* coef_arrays; + while ((coef_arrays = jpegli_read_coefficients(&cinfo)) == nullptr) { + JXL_CHECK(src.LoadNextChunk()); + } + CopyCoefficients(&cinfo, coef_arrays, &output0); + } else { + while (!jpegli_start_decompress(&cinfo)) { + JXL_CHECK(src.LoadNextChunk()); + } + ReadOutputImage(dparams, &cinfo, &src, &output0); + } + + while (!jpegli_finish_decompress(&cinfo)) { + JXL_CHECK(src.LoadNextChunk()); + } + return true; + }; + ASSERT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); + + TestImage output1; + DecodeWithLibjpeg(config.jparams, dparams, compressed, &output1); + VerifyOutputImage(output1, output0, config.max_rms_dist); +} + +TEST_P(InputSuspensionTestParam, InputOutputLockStepBuffered) { + TestConfig config = GetParam(); + if (config.jparams.add_marker) return; + const DecompressParams& dparams = config.dparams; + std::vector<uint8_t> compressed = GetTestJpegData(config); + bool is_partial = config.dparams.size_factor < 1.0f; + if (is_partial) { + compressed.resize(compressed.size() * config.dparams.size_factor); + } + SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size, + is_partial); + std::vector<TestImage> output_progression0; + jpeg_decompress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + + cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src); + + while (jpegli_read_header(&cinfo, TRUE) == JPEG_SUSPENDED) { + JXL_CHECK(src.LoadNextChunk()); + } + SetDecompressParams(dparams, &cinfo); + jpegli_set_output_format(&cinfo, dparams.data_type, dparams.endianness); + + cinfo.buffered_image = TRUE; + cinfo.raw_data_out = dparams.output_mode == RAW_DATA; + + EXPECT_TRUE(jpegli_start_decompress(&cinfo)); + EXPECT_FALSE(jpegli_input_complete(&cinfo)); + EXPECT_EQ(0, cinfo.output_scan_number); + + int sos_marker_cnt = 1; // read_header reads the first SOS marker + while (!jpegli_input_complete(&cinfo)) { + EXPECT_EQ(cinfo.input_scan_number, sos_marker_cnt); + EXPECT_TRUE(jpegli_start_output(&cinfo, cinfo.input_scan_number)); + // start output sets output_scan_number, but does not change + // input_scan_number + EXPECT_EQ(cinfo.output_scan_number, cinfo.input_scan_number); + EXPECT_EQ(cinfo.input_scan_number, sos_marker_cnt); + TestImage output; + ReadOutputImage(dparams, &cinfo, &src, &output); + output_progression0.emplace_back(std::move(output)); + // read scanlines/read raw data does not change input/output scan number + EXPECT_EQ(cinfo.input_scan_number, sos_marker_cnt); + EXPECT_EQ(cinfo.output_scan_number, cinfo.input_scan_number); + while (!jpegli_finish_output(&cinfo)) { + JXL_CHECK(src.LoadNextChunk()); + } + ++sos_marker_cnt; // finish output reads the next SOS marker or EOI + if (dparams.output_mode == COEFFICIENTS) { + jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(&cinfo); + JXL_CHECK(coef_arrays != nullptr); + CopyCoefficients(&cinfo, coef_arrays, &output_progression0.back()); + } + } + + EXPECT_TRUE(jpegli_finish_decompress(&cinfo)); + return true; + }; + ASSERT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); + + std::vector<TestImage> output_progression1; + DecodeAllScansWithLibjpeg(config.jparams, dparams, compressed, + &output_progression1); + ASSERT_EQ(output_progression0.size(), output_progression1.size()); + for (size_t i = 0; i < output_progression0.size(); ++i) { + const TestImage& output = output_progression0[i]; + const TestImage& expected = output_progression1[i]; + VerifyOutputImage(expected, output, config.max_rms_dist); + } +} + +TEST_P(InputSuspensionTestParam, PreConsumeInputBuffered) { + TestConfig config = GetParam(); + if (config.jparams.add_marker) return; + const DecompressParams& dparams = config.dparams; + std::vector<uint8_t> compressed = GetTestJpegData(config); + bool is_partial = config.dparams.size_factor < 1.0f; + if (is_partial) { + compressed.resize(compressed.size() * config.dparams.size_factor); + } + std::vector<TestImage> output_progression1; + DecodeAllScansWithLibjpeg(config.jparams, dparams, compressed, + &output_progression1); + SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size, + is_partial); + TestImage output0; + jpeg_decompress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src); + + int status; + while ((status = jpegli_consume_input(&cinfo)) != JPEG_REACHED_SOS) { + if (status == JPEG_SUSPENDED) { + JXL_CHECK(src.LoadNextChunk()); + } + } + EXPECT_EQ(JPEG_REACHED_SOS, jpegli_consume_input(&cinfo)); + cinfo.buffered_image = TRUE; + cinfo.raw_data_out = dparams.output_mode == RAW_DATA; + cinfo.do_block_smoothing = dparams.do_block_smoothing; + + EXPECT_TRUE(jpegli_start_decompress(&cinfo)); + EXPECT_FALSE(jpegli_input_complete(&cinfo)); + EXPECT_EQ(1, cinfo.input_scan_number); + EXPECT_EQ(0, cinfo.output_scan_number); + + while ((status = jpegli_consume_input(&cinfo)) != JPEG_REACHED_EOI) { + if (status == JPEG_SUSPENDED) { + JXL_CHECK(src.LoadNextChunk()); + } + } + + EXPECT_TRUE(jpegli_input_complete(&cinfo)); + EXPECT_EQ(output_progression1.size(), cinfo.input_scan_number); + EXPECT_EQ(0, cinfo.output_scan_number); + + EXPECT_TRUE(jpegli_start_output(&cinfo, cinfo.input_scan_number)); + EXPECT_EQ(output_progression1.size(), cinfo.input_scan_number); + EXPECT_EQ(cinfo.output_scan_number, cinfo.input_scan_number); + + ReadOutputImage(dparams, &cinfo, nullptr, &output0); + EXPECT_EQ(output_progression1.size(), cinfo.input_scan_number); + EXPECT_EQ(cinfo.output_scan_number, cinfo.input_scan_number); + + EXPECT_TRUE(jpegli_finish_output(&cinfo)); + if (dparams.output_mode == COEFFICIENTS) { + jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(&cinfo); + JXL_CHECK(coef_arrays != nullptr); + CopyCoefficients(&cinfo, coef_arrays, &output0); + } + EXPECT_TRUE(jpegli_finish_decompress(&cinfo)); + return true; + }; + ASSERT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); + + VerifyOutputImage(output_progression1.back(), output0, config.max_rms_dist); +} + +TEST_P(InputSuspensionTestParam, PreConsumeInputNonBuffered) { + TestConfig config = GetParam(); + if (config.jparams.add_marker || IsSequential(config)) return; + const DecompressParams& dparams = config.dparams; + std::vector<uint8_t> compressed = GetTestJpegData(config); + bool is_partial = config.dparams.size_factor < 1.0f; + if (is_partial) { + compressed.resize(compressed.size() * config.dparams.size_factor); + } + SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size, + is_partial); + TestImage output0; + jpeg_decompress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src); + + int status; + while ((status = jpegli_consume_input(&cinfo)) != JPEG_REACHED_SOS) { + if (status == JPEG_SUSPENDED) { + JXL_CHECK(src.LoadNextChunk()); + } + } + EXPECT_EQ(JPEG_REACHED_SOS, jpegli_consume_input(&cinfo)); + cinfo.raw_data_out = dparams.output_mode == RAW_DATA; + cinfo.do_block_smoothing = dparams.do_block_smoothing; + + if (dparams.output_mode == COEFFICIENTS) { + jpegli_read_coefficients(&cinfo); + } else { + while (!jpegli_start_decompress(&cinfo)) { + JXL_CHECK(src.LoadNextChunk()); + } + } + + while ((status = jpegli_consume_input(&cinfo)) != JPEG_REACHED_EOI) { + if (status == JPEG_SUSPENDED) { + JXL_CHECK(src.LoadNextChunk()); + } + } + + if (dparams.output_mode == COEFFICIENTS) { + jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(&cinfo); + JXL_CHECK(coef_arrays != nullptr); + CopyCoefficients(&cinfo, coef_arrays, &output0); + } else { + ReadOutputImage(dparams, &cinfo, nullptr, &output0); + } + + EXPECT_TRUE(jpegli_finish_decompress(&cinfo)); + return true; + }; + ASSERT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); + + TestImage output1; + DecodeWithLibjpeg(config.jparams, dparams, compressed, &output1); + VerifyOutputImage(output1, output0, config.max_rms_dist); +} + +std::vector<TestConfig> GenerateTests() { + std::vector<TestConfig> all_tests; + std::vector<std::pair<std::string, std::string>> testfiles({ + {"jxl/flower/flower.png.im_q85_444.jpg", "Q85YUV444"}, + {"jxl/flower/flower.png.im_q85_420_R13B.jpg", "Q85YUV420R13B"}, + {"jxl/flower/flower.png.im_q85_420_progr.jpg", "Q85YUV420PROGR"}, + }); + for (const auto& it : testfiles) { + for (size_t chunk_size : {1, 64, 65536}) { + for (size_t max_output_lines : {0, 1, 8, 16}) { + TestConfig config; + config.fn = it.first; + config.fn_desc = it.second; + config.dparams.chunk_size = chunk_size; + config.dparams.max_output_lines = max_output_lines; + all_tests.push_back(config); + if (max_output_lines == 16) { + config.dparams.output_mode = RAW_DATA; + all_tests.push_back(config); + config.dparams.output_mode = COEFFICIENTS; + all_tests.push_back(config); + } + } + } + } + for (size_t r : {1, 17, 1024}) { + for (size_t chunk_size : {1, 65536}) { + TestConfig config; + config.dparams.chunk_size = chunk_size; + config.jparams.progressive_mode = 2; + config.jparams.restart_interval = r; + all_tests.push_back(config); + } + } + for (size_t chunk_size : {1, 4, 1024}) { + TestConfig config; + config.input.xsize = 256; + config.input.ysize = 256; + config.dparams.chunk_size = chunk_size; + config.jparams.add_marker = true; + all_tests.push_back(config); + } + // Tests for partial input. + for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f}) { + for (int progr : {0, 1, 3}) { + for (int samp : {1, 2}) { + for (JpegIOMode output_mode : {PIXELS, RAW_DATA}) { + TestConfig config; + config.input.xsize = 517; + config.input.ysize = 523; + config.jparams.h_sampling = {samp, 1, 1}; + config.jparams.v_sampling = {samp, 1, 1}; + config.jparams.progressive_mode = progr; + config.dparams.size_factor = size_factor; + config.dparams.output_mode = output_mode; + // The last partially available block can behave differently. + // TODO(szabadka) Figure out if we can make the behaviour more + // similar. + config.max_rms_dist = samp == 1 ? 1.75f : 3.0f; + all_tests.push_back(config); + } + } + } + } + // Tests for block smoothing. + for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f, 1.0f}) { + for (int samp : {1, 2}) { + TestConfig config; + config.input.xsize = 517; + config.input.ysize = 523; + config.jparams.h_sampling = {samp, 1, 1}; + config.jparams.v_sampling = {samp, 1, 1}; + config.jparams.progressive_mode = 2; + config.dparams.size_factor = size_factor; + config.dparams.do_block_smoothing = true; + // libjpeg does smoothing for incomplete scans differently at + // the border between current and previous scans. + config.max_rms_dist = 8.0f; + all_tests.push_back(config); + } + } + return all_tests; +} + +std::ostream& operator<<(std::ostream& os, const TestConfig& c) { + if (!c.fn.empty()) { + os << c.fn_desc; + } else { + os << c.input; + } + os << c.jparams; + if (c.dparams.chunk_size == 0) { + os << "CompleteInput"; + } else { + os << "InputChunks" << c.dparams.chunk_size; + } + if (c.dparams.size_factor < 1.0f) { + os << "Partial" << static_cast<int>(c.dparams.size_factor * 100) << "p"; + } + if (c.dparams.max_output_lines == 0) { + os << "CompleteOutput"; + } else { + os << "OutputLines" << c.dparams.max_output_lines; + } + if (c.dparams.output_mode == RAW_DATA) { + os << "RawDataOut"; + } else if (c.dparams.output_mode == COEFFICIENTS) { + os << "CoeffsOut"; + } + if (c.dparams.do_block_smoothing) { + os << "BlockSmoothing"; + } + return os; +} + +std::string TestDescription( + const testing::TestParamInfo<InputSuspensionTestParam::ParamType>& info) { + std::stringstream name; + name << info.param; + return name.str(); +} + +JPEGLI_INSTANTIATE_TEST_SUITE_P(InputSuspensionTest, InputSuspensionTestParam, + testing::ValuesIn(GenerateTests()), + TestDescription); + +} // namespace +} // namespace jpegli diff --git a/lib/jpegli/jpeg.version.62 b/lib/jpegli/jpeg.version.62 new file mode 100644 index 0000000..3a8d1f5 --- /dev/null +++ b/lib/jpegli/jpeg.version.62 @@ -0,0 +1,11 @@ +LIBJPEG_6.2 { + global: + jpeg*; +}; + +LIBJPEGTURBO_6.2 { + global: + jpeg_mem_src*; + jpeg_mem_dest*; + tj*; +};
\ No newline at end of file diff --git a/lib/jpegli/jpeg.version.8 b/lib/jpegli/jpeg.version.8 new file mode 100644 index 0000000..aa891f8 --- /dev/null +++ b/lib/jpegli/jpeg.version.8 @@ -0,0 +1,9 @@ +LIBJPEG_8.0 { + global: + jpeg*; +}; + +LIBJPEGTURBO_8.0 { + global: + tj*; +}; diff --git a/lib/jpegli/libjpeg_test_util.cc b/lib/jpegli/libjpeg_test_util.cc new file mode 100644 index 0000000..de23037 --- /dev/null +++ b/lib/jpegli/libjpeg_test_util.cc @@ -0,0 +1,261 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/libjpeg_test_util.h" + +/* clang-format off */ +#include <stdio.h> +#include <jpeglib.h> +#include <setjmp.h> +/* clang-format on */ + +#include "lib/jxl/sanitizers.h" + +namespace jpegli { + +namespace { + +#define JPEG_API_FN(name) jpeg_##name +#include "lib/jpegli/test_utils-inl.h" +#undef JPEG_API_FN + +void ReadOutputPass(j_decompress_ptr cinfo, const DecompressParams& dparams, + TestImage* output) { + JDIMENSION xoffset = 0; + JDIMENSION yoffset = 0; + JDIMENSION xsize_cropped = cinfo->output_width; + JDIMENSION ysize_cropped = cinfo->output_height; + if (dparams.crop_output) { + xoffset = xsize_cropped = cinfo->output_width / 3; + yoffset = ysize_cropped = cinfo->output_height / 3; + jpeg_crop_scanline(cinfo, &xoffset, &xsize_cropped); + JXL_CHECK(xsize_cropped == cinfo->output_width); + } + output->xsize = xsize_cropped; + output->ysize = ysize_cropped; + output->components = cinfo->out_color_components; + if (cinfo->quantize_colors) { + jxl::msan::UnpoisonMemory(cinfo->colormap, cinfo->out_color_components * + sizeof(cinfo->colormap[0])); + for (int c = 0; c < cinfo->out_color_components; ++c) { + jxl::msan::UnpoisonMemory( + cinfo->colormap[c], + cinfo->actual_number_of_colors * sizeof(cinfo->colormap[c][0])); + } + } + if (!cinfo->raw_data_out) { + size_t stride = output->xsize * output->components; + output->pixels.resize(output->ysize * stride); + output->color_space = cinfo->out_color_space; + if (yoffset > 0) { + jpeg_skip_scanlines(cinfo, yoffset); + } + for (size_t y = 0; y < output->ysize; ++y) { + JSAMPROW rows[] = { + reinterpret_cast<JSAMPLE*>(&output->pixels[y * stride])}; + JXL_CHECK(1 == jpeg_read_scanlines(cinfo, rows, 1)); + jxl::msan::UnpoisonMemory( + rows[0], sizeof(JSAMPLE) * cinfo->output_components * output->xsize); + if (cinfo->quantize_colors) { + UnmapColors(rows[0], cinfo->output_width, cinfo->out_color_components, + cinfo->colormap, cinfo->actual_number_of_colors); + } + } + if (cinfo->output_scanline < cinfo->output_height) { + jpeg_skip_scanlines(cinfo, cinfo->output_height - cinfo->output_scanline); + } + } else { + output->color_space = cinfo->jpeg_color_space; + for (int c = 0; c < cinfo->num_components; ++c) { + size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE; + size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE; + std::vector<uint8_t> plane(ysize * xsize); + output->raw_data.emplace_back(std::move(plane)); + } + while (cinfo->output_scanline < cinfo->output_height) { + size_t iMCU_height = cinfo->max_v_samp_factor * DCTSIZE; + JXL_CHECK(cinfo->output_scanline == cinfo->output_iMCU_row * iMCU_height); + std::vector<std::vector<JSAMPROW>> rowdata(cinfo->num_components); + std::vector<JSAMPARRAY> data(cinfo->num_components); + for (int c = 0; c < cinfo->num_components; ++c) { + size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE; + size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE; + size_t num_lines = cinfo->comp_info[c].v_samp_factor * DCTSIZE; + rowdata[c].resize(num_lines); + size_t y0 = cinfo->output_iMCU_row * num_lines; + for (size_t i = 0; i < num_lines; ++i) { + rowdata[c][i] = + y0 + i < ysize ? &output->raw_data[c][(y0 + i) * xsize] : nullptr; + } + data[c] = &rowdata[c][0]; + } + JXL_CHECK(iMCU_height == + jpeg_read_raw_data(cinfo, &data[0], iMCU_height)); + } + } + JXL_CHECK(cinfo->total_iMCU_rows == + DivCeil(cinfo->image_height, cinfo->max_v_samp_factor * DCTSIZE)); +} + +void DecodeWithLibjpeg(const CompressParams& jparams, + const DecompressParams& dparams, j_decompress_ptr cinfo, + TestImage* output) { + if (jparams.add_marker) { + jpeg_save_markers(cinfo, kSpecialMarker0, 0xffff); + jpeg_save_markers(cinfo, kSpecialMarker1, 0xffff); + } + if (!jparams.icc.empty()) { + jpeg_save_markers(cinfo, JPEG_APP0 + 2, 0xffff); + } + JXL_CHECK(JPEG_REACHED_SOS == + jpeg_read_header(cinfo, /*require_image=*/TRUE)); + if (!jparams.icc.empty()) { + uint8_t* icc_data = nullptr; + unsigned int icc_len; + JXL_CHECK(jpeg_read_icc_profile(cinfo, &icc_data, &icc_len)); + JXL_CHECK(icc_data); + jxl::msan::UnpoisonMemory(icc_data, icc_len); + JXL_CHECK(0 == memcmp(jparams.icc.data(), icc_data, icc_len)); + free(icc_data); + } + SetDecompressParams(dparams, cinfo); + VerifyHeader(jparams, cinfo); + if (dparams.output_mode == COEFFICIENTS) { + jvirt_barray_ptr* coef_arrays = jpeg_read_coefficients(cinfo); + JXL_CHECK(coef_arrays != nullptr); + CopyCoefficients(cinfo, coef_arrays, output); + } else { + JXL_CHECK(jpeg_start_decompress(cinfo)); + VerifyScanHeader(jparams, cinfo); + ReadOutputPass(cinfo, dparams, output); + } + JXL_CHECK(jpeg_finish_decompress(cinfo)); +} + +} // namespace + +// Verifies that an image encoded with libjpegli can be decoded with libjpeg, +// and checks that the jpeg coding metadata matches jparams. +void DecodeAllScansWithLibjpeg(const CompressParams& jparams, + const DecompressParams& dparams, + const std::vector<uint8_t>& compressed, + std::vector<TestImage>* output_progression) { + jpeg_decompress_struct cinfo = {}; + const auto try_catch_block = [&]() { + jpeg_error_mgr jerr; + jmp_buf env; + cinfo.err = jpeg_std_error(&jerr); + if (setjmp(env)) { + return false; + } + cinfo.client_data = reinterpret_cast<void*>(&env); + cinfo.err->error_exit = [](j_common_ptr cinfo) { + (*cinfo->err->output_message)(cinfo); + jmp_buf* env = reinterpret_cast<jmp_buf*>(cinfo->client_data); + jpeg_destroy(cinfo); + longjmp(*env, 1); + }; + jpeg_create_decompress(&cinfo); + jpeg_mem_src(&cinfo, compressed.data(), compressed.size()); + if (jparams.add_marker) { + jpeg_save_markers(&cinfo, kSpecialMarker0, 0xffff); + jpeg_save_markers(&cinfo, kSpecialMarker1, 0xffff); + } + JXL_CHECK(JPEG_REACHED_SOS == + jpeg_read_header(&cinfo, /*require_image=*/TRUE)); + cinfo.buffered_image = TRUE; + SetDecompressParams(dparams, &cinfo); + VerifyHeader(jparams, &cinfo); + JXL_CHECK(jpeg_start_decompress(&cinfo)); + // start decompress should not read the whole input in buffered image mode + JXL_CHECK(!jpeg_input_complete(&cinfo)); + JXL_CHECK(cinfo.output_scan_number == 0); + int sos_marker_cnt = 1; // read header reads the first SOS marker + while (!jpeg_input_complete(&cinfo)) { + JXL_CHECK(cinfo.input_scan_number == sos_marker_cnt); + if (dparams.skip_scans && (cinfo.input_scan_number % 2) != 1) { + int result = JPEG_SUSPENDED; + while (result != JPEG_REACHED_SOS && result != JPEG_REACHED_EOI) { + result = jpeg_consume_input(&cinfo); + } + if (result == JPEG_REACHED_SOS) ++sos_marker_cnt; + continue; + } + SetScanDecompressParams(dparams, &cinfo, cinfo.input_scan_number); + JXL_CHECK(jpeg_start_output(&cinfo, cinfo.input_scan_number)); + // start output sets output_scan_number, but does not change + // input_scan_number + JXL_CHECK(cinfo.output_scan_number == cinfo.input_scan_number); + JXL_CHECK(cinfo.input_scan_number == sos_marker_cnt); + VerifyScanHeader(jparams, &cinfo); + TestImage output; + ReadOutputPass(&cinfo, dparams, &output); + output_progression->emplace_back(std::move(output)); + // read scanlines/read raw data does not change input/output scan number + if (!cinfo.progressive_mode) { + JXL_CHECK(cinfo.input_scan_number == sos_marker_cnt); + JXL_CHECK(cinfo.output_scan_number == cinfo.input_scan_number); + } + JXL_CHECK(jpeg_finish_output(&cinfo)); + ++sos_marker_cnt; // finish output reads the next SOS marker or EOI + if (dparams.output_mode == COEFFICIENTS) { + jvirt_barray_ptr* coef_arrays = jpeg_read_coefficients(&cinfo); + JXL_CHECK(coef_arrays != nullptr); + CopyCoefficients(&cinfo, coef_arrays, &output_progression->back()); + } + } + JXL_CHECK(jpeg_finish_decompress(&cinfo)); + return true; + }; + JXL_CHECK(try_catch_block()); + jpeg_destroy_decompress(&cinfo); +} + +// Returns the number of bytes read from compressed. +size_t DecodeWithLibjpeg(const CompressParams& jparams, + const DecompressParams& dparams, + const uint8_t* table_stream, size_t table_stream_size, + const uint8_t* compressed, size_t len, + TestImage* output) { + jpeg_decompress_struct cinfo = {}; + size_t bytes_read; + const auto try_catch_block = [&]() { + jpeg_error_mgr jerr; + jmp_buf env; + cinfo.err = jpeg_std_error(&jerr); + if (setjmp(env)) { + return false; + } + cinfo.client_data = reinterpret_cast<void*>(&env); + cinfo.err->error_exit = [](j_common_ptr cinfo) { + (*cinfo->err->output_message)(cinfo); + jmp_buf* env = reinterpret_cast<jmp_buf*>(cinfo->client_data); + jpeg_destroy(cinfo); + longjmp(*env, 1); + }; + jpeg_create_decompress(&cinfo); + if (table_stream != nullptr) { + jpeg_mem_src(&cinfo, table_stream, table_stream_size); + jpeg_read_header(&cinfo, FALSE); + } + jpeg_mem_src(&cinfo, compressed, len); + DecodeWithLibjpeg(jparams, dparams, &cinfo, output); + bytes_read = len - cinfo.src->bytes_in_buffer; + return true; + }; + JXL_CHECK(try_catch_block()); + jpeg_destroy_decompress(&cinfo); + return bytes_read; +} + +void DecodeWithLibjpeg(const CompressParams& jparams, + const DecompressParams& dparams, + const std::vector<uint8_t>& compressed, + TestImage* output) { + DecodeWithLibjpeg(jparams, dparams, nullptr, 0, compressed.data(), + compressed.size(), output); +} + +} // namespace jpegli diff --git a/lib/jpegli/libjpeg_test_util.h b/lib/jpegli/libjpeg_test_util.h new file mode 100644 index 0000000..18cc1e5 --- /dev/null +++ b/lib/jpegli/libjpeg_test_util.h @@ -0,0 +1,37 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_LIBJPEG_TEST_UTIL_H_ +#define LIB_JPEGLI_LIBJPEG_TEST_UTIL_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <vector> + +#include "lib/jpegli/test_params.h" + +namespace jpegli { + +// Verifies that an image encoded with libjpegli can be decoded with libjpeg, +// and checks that the jpeg coding metadata matches jparams. +void DecodeAllScansWithLibjpeg(const CompressParams& jparams, + const DecompressParams& dparams, + const std::vector<uint8_t>& compressed, + std::vector<TestImage>* output_progression); +// Returns the number of bytes read from compressed. +size_t DecodeWithLibjpeg(const CompressParams& jparams, + const DecompressParams& dparams, + const uint8_t* table_stream, size_t table_stream_size, + const uint8_t* compressed, size_t len, + TestImage* output); +void DecodeWithLibjpeg(const CompressParams& jparams, + const DecompressParams& dparams, + const std::vector<uint8_t>& compressed, + TestImage* output); + +} // namespace jpegli + +#endif // LIB_JPEGLI_LIBJPEG_TEST_UTIL_H_ diff --git a/lib/jpegli/libjpeg_wrapper.cc b/lib/jpegli/libjpeg_wrapper.cc new file mode 100644 index 0000000..b38d16f --- /dev/null +++ b/lib/jpegli/libjpeg_wrapper.cc @@ -0,0 +1,255 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// This file contains wrapper-functions that are used to build the libjpeg.so +// shared library that is API- and ABI-compatible with libjpeg-turbo's version +// of libjpeg.so. + +#include "lib/jpegli/common.h" +#include "lib/jpegli/decode.h" +#include "lib/jpegli/encode.h" +#include "lib/jpegli/error.h" + +struct jpeg_error_mgr *jpeg_std_error(struct jpeg_error_mgr *err) { + return jpegli_std_error(err); +} + +void jpeg_abort(j_common_ptr cinfo) { jpegli_abort(cinfo); } + +void jpeg_destroy(j_common_ptr cinfo) { jpegli_destroy(cinfo); } + +JQUANT_TBL *jpeg_alloc_quant_table(j_common_ptr cinfo) { + return jpegli_alloc_quant_table(cinfo); +} + +JHUFF_TBL *jpeg_alloc_huff_table(j_common_ptr cinfo) { + return jpegli_alloc_huff_table(cinfo); +} + +void jpeg_CreateDecompress(j_decompress_ptr cinfo, int version, + size_t structsize) { + jpegli_CreateDecompress(cinfo, version, structsize); +} + +void jpeg_stdio_src(j_decompress_ptr cinfo, FILE *infile) { + jpegli_stdio_src(cinfo, infile); +} + +void jpeg_mem_src(j_decompress_ptr cinfo, const unsigned char *inbuffer, + unsigned long insize) { + jpegli_mem_src(cinfo, inbuffer, insize); +} + +int jpeg_read_header(j_decompress_ptr cinfo, boolean require_image) { + return jpegli_read_header(cinfo, require_image); +} + +boolean jpeg_start_decompress(j_decompress_ptr cinfo) { + return jpegli_start_decompress(cinfo); +} + +JDIMENSION jpeg_read_scanlines(j_decompress_ptr cinfo, JSAMPARRAY scanlines, + JDIMENSION max_lines) { + return jpegli_read_scanlines(cinfo, scanlines, max_lines); +} + +JDIMENSION jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines) { + return jpegli_skip_scanlines(cinfo, num_lines); +} + +void jpeg_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset, + JDIMENSION *width) { + jpegli_crop_scanline(cinfo, xoffset, width); +} + +boolean jpeg_finish_decompress(j_decompress_ptr cinfo) { + return jpegli_finish_decompress(cinfo); +} + +JDIMENSION jpeg_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data, + JDIMENSION max_lines) { + return jpegli_read_raw_data(cinfo, data, max_lines); +} + +jvirt_barray_ptr *jpeg_read_coefficients(j_decompress_ptr cinfo) { + return jpegli_read_coefficients(cinfo); +} + +boolean jpeg_has_multiple_scans(j_decompress_ptr cinfo) { + return jpegli_has_multiple_scans(cinfo); +} + +boolean jpeg_start_output(j_decompress_ptr cinfo, int scan_number) { + return jpegli_start_output(cinfo, scan_number); +} + +boolean jpeg_finish_output(j_decompress_ptr cinfo) { + return jpegli_finish_output(cinfo); +} + +boolean jpeg_input_complete(j_decompress_ptr cinfo) { + return jpegli_input_complete(cinfo); +} + +int jpeg_consume_input(j_decompress_ptr cinfo) { + return jpegli_consume_input(cinfo); +} + +#if JPEG_LIB_VERSION >= 80 +void jpeg_core_output_dimensions(j_decompress_ptr cinfo) { + jpegli_core_output_dimensions(cinfo); +} +#endif +void jpeg_calc_output_dimensions(j_decompress_ptr cinfo) { + jpegli_calc_output_dimensions(cinfo); +} + +void jpeg_save_markers(j_decompress_ptr cinfo, int marker_code, + unsigned int length_limit) { + jpegli_save_markers(cinfo, marker_code, length_limit); +} + +void jpeg_set_marker_processor(j_decompress_ptr cinfo, int marker_code, + jpeg_marker_parser_method routine) { + jpegli_set_marker_processor(cinfo, marker_code, routine); +} + +boolean jpeg_read_icc_profile(j_decompress_ptr cinfo, JOCTET **icc_data_ptr, + unsigned int *icc_data_len) { + return jpegli_read_icc_profile(cinfo, icc_data_ptr, icc_data_len); +} + +void jpeg_abort_decompress(j_decompress_ptr cinfo) { + return jpegli_abort_decompress(cinfo); +} + +void jpeg_destroy_decompress(j_decompress_ptr cinfo) { + return jpegli_destroy_decompress(cinfo); +} + +void jpeg_CreateCompress(j_compress_ptr cinfo, int version, size_t structsize) { + jpegli_CreateCompress(cinfo, version, structsize); +} + +void jpeg_stdio_dest(j_compress_ptr cinfo, FILE *outfile) { + jpegli_stdio_dest(cinfo, outfile); +} + +void jpeg_mem_dest(j_compress_ptr cinfo, unsigned char **outbuffer, + unsigned long *outsize) { + jpegli_mem_dest(cinfo, outbuffer, outsize); +} + +void jpeg_set_defaults(j_compress_ptr cinfo) { jpegli_set_defaults(cinfo); } + +void jpeg_default_colorspace(j_compress_ptr cinfo) { + jpegli_default_colorspace(cinfo); +} + +void jpeg_set_colorspace(j_compress_ptr cinfo, J_COLOR_SPACE colorspace) { + jpegli_set_colorspace(cinfo, colorspace); +} + +void jpeg_set_quality(j_compress_ptr cinfo, int quality, + boolean force_baseline) { + jpegli_set_quality(cinfo, quality, force_baseline); +} + +void jpeg_set_linear_quality(j_compress_ptr cinfo, int scale_factor, + boolean force_baseline) { + jpegli_set_linear_quality(cinfo, scale_factor, force_baseline); +} + +#if JPEG_LIB_VERSION >= 70 +void jpeg_default_qtables(j_compress_ptr cinfo, boolean force_baseline) { + jpegli_default_qtables(cinfo, force_baseline); +} +#endif + +int jpeg_quality_scaling(int quality) { + return jpegli_quality_scaling(quality); +} + +void jpeg_add_quant_table(j_compress_ptr cinfo, int which_tbl, + const unsigned int *basic_table, int scale_factor, + boolean force_baseline) { + jpegli_add_quant_table(cinfo, which_tbl, basic_table, scale_factor, + force_baseline); +} + +void jpeg_simple_progression(j_compress_ptr cinfo) { + jpegli_simple_progression(cinfo); +} + +void jpeg_suppress_tables(j_compress_ptr cinfo, boolean suppress) { + jpegli_suppress_tables(cinfo, suppress); +} + +#if JPEG_LIB_VERSION >= 70 +void jpeg_calc_jpeg_dimensions(j_compress_ptr cinfo) { + jpegli_calc_jpeg_dimensions(cinfo); +} +#endif + +void jpeg_copy_critical_parameters(j_decompress_ptr srcinfo, + j_compress_ptr dstinfo) { + jpegli_copy_critical_parameters(srcinfo, dstinfo); +} + +void jpeg_write_m_header(j_compress_ptr cinfo, int marker, + unsigned int datalen) { + jpegli_write_m_header(cinfo, marker, datalen); +} + +void jpeg_write_m_byte(j_compress_ptr cinfo, int val) { + jpegli_write_m_byte(cinfo, val); +} + +void jpeg_write_marker(j_compress_ptr cinfo, int marker, const JOCTET *dataptr, + unsigned int datalen) { + jpegli_write_marker(cinfo, marker, dataptr, datalen); +} + +void jpeg_write_icc_profile(j_compress_ptr cinfo, const JOCTET *icc_data_ptr, + unsigned int icc_data_len) { + jpegli_write_icc_profile(cinfo, icc_data_ptr, icc_data_len); +} + +void jpeg_start_compress(j_compress_ptr cinfo, boolean write_all_tables) { + jpegli_start_compress(cinfo, write_all_tables); +} + +void jpeg_write_tables(j_compress_ptr cinfo) { jpegli_write_tables(cinfo); } + +JDIMENSION jpeg_write_scanlines(j_compress_ptr cinfo, JSAMPARRAY scanlines, + JDIMENSION num_lines) { + return jpegli_write_scanlines(cinfo, scanlines, num_lines); +} + +JDIMENSION jpeg_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data, + JDIMENSION num_lines) { + return jpegli_write_raw_data(cinfo, data, num_lines); +} + +void jpeg_write_coefficients(j_compress_ptr cinfo, + jvirt_barray_ptr *coef_arrays) { + jpegli_write_coefficients(cinfo, coef_arrays); +} + +void jpeg_finish_compress(j_compress_ptr cinfo) { + jpegli_finish_compress(cinfo); +} + +void jpeg_abort_compress(j_compress_ptr cinfo) { jpegli_abort_compress(cinfo); } + +void jpeg_destroy_compress(j_compress_ptr cinfo) { + jpegli_destroy_compress(cinfo); +} + +boolean jpeg_resync_to_restart(j_decompress_ptr cinfo, int desired) { + return jpegli_resync_to_restart(cinfo, desired); +} + +void jpeg_new_colormap(j_decompress_ptr cinfo) { jpegli_new_colormap(cinfo); } diff --git a/lib/jpegli/memory_manager.cc b/lib/jpegli/memory_manager.cc new file mode 100644 index 0000000..3a8f230 --- /dev/null +++ b/lib/jpegli/memory_manager.cc @@ -0,0 +1,186 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/memory_manager.h" + +#include <string.h> + +#include <hwy/aligned_allocator.h> +#include <vector> + +#include "lib/jpegli/common_internal.h" +#include "lib/jpegli/error.h" + +struct jvirt_sarray_control { + JSAMPARRAY full_buffer; + size_t numrows; + JDIMENSION maxaccess; +}; + +struct jvirt_barray_control { + JBLOCKARRAY full_buffer; + size_t numrows; + JDIMENSION maxaccess; +}; + +namespace jpegli { + +namespace { + +struct MemoryManager { + struct jpeg_memory_mgr pub; + std::vector<void*> owned_ptrs[2 * JPOOL_NUMPOOLS]; + uint64_t pool_memory_usage[2 * JPOOL_NUMPOOLS]; + uint64_t total_memory_usage; + uint64_t peak_memory_usage; +}; + +void* Alloc(j_common_ptr cinfo, int pool_id, size_t sizeofobject) { + MemoryManager* mem = reinterpret_cast<MemoryManager*>(cinfo->mem); + if (pool_id < 0 || pool_id >= 2 * JPOOL_NUMPOOLS) { + JPEGLI_ERROR("Invalid pool id %d", pool_id); + } + if (mem->pub.max_memory_to_use > 0 && + mem->total_memory_usage + static_cast<uint64_t>(sizeofobject) > + static_cast<uint64_t>(mem->pub.max_memory_to_use)) { + JPEGLI_ERROR("Total memory usage exceeding %ld", + mem->pub.max_memory_to_use); + } + void* p; + if (pool_id < JPOOL_NUMPOOLS) { + p = malloc(sizeofobject); + } else { + p = hwy::AllocateAlignedBytes(sizeofobject, nullptr, nullptr); + } + if (p == nullptr) { + JPEGLI_ERROR("Out of memory"); + } + mem->owned_ptrs[pool_id].push_back(p); + mem->pool_memory_usage[pool_id] += sizeofobject; + mem->total_memory_usage += sizeofobject; + mem->peak_memory_usage = + std::max(mem->peak_memory_usage, mem->total_memory_usage); + return p; +} + +constexpr size_t gcd(size_t a, size_t b) { return b == 0 ? a : gcd(b, a % b); } +constexpr size_t lcm(size_t a, size_t b) { return (a * b) / gcd(a, b); } + +template <typename T> +T** Alloc2dArray(j_common_ptr cinfo, int pool_id, JDIMENSION samplesperrow, + JDIMENSION numrows) { + T** array = Allocate<T*>(cinfo, numrows, pool_id); + // Always use aligned allocator for large 2d arrays. + if (pool_id < JPOOL_NUMPOOLS) { + pool_id += JPOOL_NUMPOOLS; + } + size_t alignment = lcm(sizeof(T), HWY_ALIGNMENT); + size_t memstride = RoundUpTo(samplesperrow * sizeof(T), alignment); + size_t stride = memstride / sizeof(T); + T* buffer = Allocate<T>(cinfo, numrows * stride, pool_id); + for (size_t i = 0; i < numrows; ++i) { + array[i] = &buffer[i * stride]; + } + return array; +} + +template <typename Control, typename T> +Control* RequestVirtualArray(j_common_ptr cinfo, int pool_id, boolean pre_zero, + JDIMENSION samplesperrow, JDIMENSION numrows, + JDIMENSION maxaccess) { + if (pool_id != JPOOL_IMAGE) { + JPEGLI_ERROR("Only image lifetime virtual arrays are supported."); + } + Control* p = Allocate<Control>(cinfo, 1, pool_id); + p->full_buffer = Alloc2dArray<T>(cinfo, pool_id, samplesperrow, numrows); + p->numrows = numrows; + p->maxaccess = maxaccess; + if (pre_zero) { + for (size_t i = 0; i < numrows; ++i) { + memset(p->full_buffer[i], 0, samplesperrow * sizeof(T)); + } + } + return p; +} + +void RealizeVirtualArrays(j_common_ptr cinfo) { + // Nothing to do, the full arrays were realized at request time already. +} + +template <typename Control, typename T> +T** AccessVirtualArray(j_common_ptr cinfo, Control* ptr, JDIMENSION start_row, + JDIMENSION num_rows, boolean writable) { + if (num_rows > ptr->maxaccess) { + JPEGLI_ERROR("Invalid virtual array access, num rows %u vs max rows %u", + num_rows, ptr->maxaccess); + } + if (start_row + num_rows > ptr->numrows) { + JPEGLI_ERROR("Invalid virtual array access, %u vs %u total rows", + start_row + num_rows, ptr->numrows); + } + if (ptr->full_buffer == nullptr) { + JPEGLI_ERROR("Invalid virtual array access, array not realized."); + } + return ptr->full_buffer + start_row; +} + +void ClearPool(j_common_ptr cinfo, int pool_id) { + MemoryManager* mem = reinterpret_cast<MemoryManager*>(cinfo->mem); + mem->owned_ptrs[pool_id].clear(); + mem->total_memory_usage -= mem->pool_memory_usage[pool_id]; + mem->pool_memory_usage[pool_id] = 0; +} + +void FreePool(j_common_ptr cinfo, int pool_id) { + MemoryManager* mem = reinterpret_cast<MemoryManager*>(cinfo->mem); + if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS) { + JPEGLI_ERROR("Invalid pool id %d", pool_id); + } + for (void* ptr : mem->owned_ptrs[pool_id]) { + free(ptr); + } + ClearPool(cinfo, pool_id); + for (void* ptr : mem->owned_ptrs[JPOOL_NUMPOOLS + pool_id]) { + hwy::FreeAlignedBytes(ptr, nullptr, nullptr); + } + ClearPool(cinfo, JPOOL_NUMPOOLS + pool_id); +} + +void SelfDestruct(j_common_ptr cinfo) { + MemoryManager* mem = reinterpret_cast<MemoryManager*>(cinfo->mem); + for (int pool_id = 0; pool_id < JPOOL_NUMPOOLS; ++pool_id) { + FreePool(cinfo, pool_id); + } + delete mem; + cinfo->mem = nullptr; +} + +} // namespace + +void InitMemoryManager(j_common_ptr cinfo) { + MemoryManager* mem = new MemoryManager; + mem->pub.alloc_small = jpegli::Alloc; + mem->pub.alloc_large = jpegli::Alloc; + mem->pub.alloc_sarray = jpegli::Alloc2dArray<JSAMPLE>; + mem->pub.alloc_barray = jpegli::Alloc2dArray<JBLOCK>; + mem->pub.request_virt_sarray = + jpegli::RequestVirtualArray<jvirt_sarray_control, JSAMPLE>; + mem->pub.request_virt_barray = + jpegli::RequestVirtualArray<jvirt_barray_control, JBLOCK>; + mem->pub.realize_virt_arrays = jpegli::RealizeVirtualArrays; + mem->pub.access_virt_sarray = + jpegli::AccessVirtualArray<jvirt_sarray_control, JSAMPLE>; + mem->pub.access_virt_barray = + jpegli::AccessVirtualArray<jvirt_barray_control, JBLOCK>; + mem->pub.free_pool = jpegli::FreePool; + mem->pub.self_destruct = jpegli::SelfDestruct; + mem->pub.max_memory_to_use = 0; + mem->total_memory_usage = 0; + mem->peak_memory_usage = 0; + memset(mem->pool_memory_usage, 0, sizeof(mem->pool_memory_usage)); + cinfo->mem = reinterpret_cast<struct jpeg_memory_mgr*>(mem); +} + +} // namespace jpegli diff --git a/lib/jpegli/memory_manager.h b/lib/jpegli/memory_manager.h new file mode 100644 index 0000000..3e2bdab --- /dev/null +++ b/lib/jpegli/memory_manager.h @@ -0,0 +1,45 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_MEMORY_MANAGER_H_ +#define LIB_JPEGLI_MEMORY_MANAGER_H_ + +#include <stdlib.h> + +#include "lib/jpegli/common.h" + +#define JPOOL_PERMANENT_ALIGNED (JPOOL_NUMPOOLS + JPOOL_PERMANENT) +#define JPOOL_IMAGE_ALIGNED (JPOOL_NUMPOOLS + JPOOL_IMAGE) + +namespace jpegli { + +void InitMemoryManager(j_common_ptr cinfo); + +template <typename T> +T* Allocate(j_common_ptr cinfo, size_t len, int pool_id = JPOOL_PERMANENT) { + void* p = (*cinfo->mem->alloc_small)(cinfo, pool_id, len * sizeof(T)); + return reinterpret_cast<T*>(p); +} + +template <typename T> +T* Allocate(j_decompress_ptr cinfo, size_t len, int pool_id = JPOOL_PERMANENT) { + return Allocate<T>(reinterpret_cast<j_common_ptr>(cinfo), len, pool_id); +} + +template <typename T> +T* Allocate(j_compress_ptr cinfo, size_t len, int pool_id = JPOOL_PERMANENT) { + return Allocate<T>(reinterpret_cast<j_common_ptr>(cinfo), len, pool_id); +} + +template <typename T> +JBLOCKARRAY GetBlockRow(T cinfo, int c, JDIMENSION by) { + return (*cinfo->mem->access_virt_barray)( + reinterpret_cast<j_common_ptr>(cinfo), cinfo->master->coeff_buffers[c], + by, 1, true); +} + +} // namespace jpegli + +#endif // LIB_JPEGLI_MEMORY_MANAGER_H_ diff --git a/lib/jpegli/output_suspension_test.cc b/lib/jpegli/output_suspension_test.cc new file mode 100644 index 0000000..73db791 --- /dev/null +++ b/lib/jpegli/output_suspension_test.cc @@ -0,0 +1,219 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/encode.h" +#include "lib/jpegli/test_utils.h" +#include "lib/jpegli/testing.h" + +namespace jpegli { +namespace { + +static constexpr size_t kInitialBufferSize = 1024; +static constexpr size_t kFinalBufferSize = 18; + +struct DestinationManager { + jpeg_destination_mgr pub; + std::vector<uint8_t> buffer; + + DestinationManager() { + pub.init_destination = init_destination; + pub.empty_output_buffer = empty_output_buffer; + pub.term_destination = term_destination; + } + + void Rewind() { + pub.next_output_byte = buffer.data(); + pub.free_in_buffer = buffer.size(); + } + + void EmptyTo(std::vector<uint8_t>* output, size_t new_size = 0) { + output->insert(output->end(), buffer.data(), pub.next_output_byte); + if (new_size > 0) { + buffer.resize(new_size); + } + Rewind(); + } + + static void init_destination(j_compress_ptr cinfo) { + auto us = reinterpret_cast<DestinationManager*>(cinfo->dest); + us->buffer.resize(kInitialBufferSize); + us->Rewind(); + } + + static boolean empty_output_buffer(j_compress_ptr cinfo) { return FALSE; } + + static void term_destination(j_compress_ptr cinfo) {} +}; + +struct TestConfig { + TestImage input; + CompressParams jparams; + size_t buffer_size; + size_t lines_batch_size; +}; + +class OutputSuspensionTestParam : public ::testing::TestWithParam<TestConfig> { +}; + +TEST_P(OutputSuspensionTestParam, PixelData) { + jpeg_compress_struct cinfo = {}; + TestConfig config = GetParam(); + TestImage& input = config.input; + GeneratePixels(&input); + DestinationManager dest; + std::vector<uint8_t> compressed; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + cinfo.dest = reinterpret_cast<jpeg_destination_mgr*>(&dest); + + cinfo.image_width = input.xsize; + cinfo.image_height = input.ysize; + cinfo.input_components = input.components; + cinfo.in_color_space = JCS_RGB; + jpegli_set_defaults(&cinfo); + cinfo.comp_info[0].v_samp_factor = config.jparams.v_sampling[0]; + jpegli_set_progressive_level(&cinfo, 0); + cinfo.optimize_coding = FALSE; + jpegli_start_compress(&cinfo, TRUE); + + size_t stride = cinfo.image_width * cinfo.input_components; + std::vector<uint8_t> row_bytes(config.lines_batch_size * stride); + while (cinfo.next_scanline < cinfo.image_height) { + size_t lines_left = cinfo.image_height - cinfo.next_scanline; + size_t num_lines = std::min(config.lines_batch_size, lines_left); + memcpy(&row_bytes[0], &input.pixels[cinfo.next_scanline * stride], + num_lines * stride); + std::vector<JSAMPROW> rows(num_lines); + for (size_t i = 0; i < num_lines; ++i) { + rows[i] = &row_bytes[i * stride]; + } + size_t lines_done = 0; + while (lines_done < num_lines) { + lines_done += jpegli_write_scanlines(&cinfo, &rows[lines_done], + num_lines - lines_done); + if (lines_done < num_lines) { + dest.EmptyTo(&compressed, config.buffer_size); + } + } + } + dest.EmptyTo(&compressed, kFinalBufferSize); + jpegli_finish_compress(&cinfo); + dest.EmptyTo(&compressed); + return true; + }; + ASSERT_TRUE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + TestImage output; + DecodeWithLibjpeg(CompressParams(), DecompressParams(), compressed, &output); + VerifyOutputImage(input, output, 2.5); +} + +TEST_P(OutputSuspensionTestParam, RawData) { + jpeg_compress_struct cinfo = {}; + TestConfig config = GetParam(); + if (config.lines_batch_size != 1) return; + TestImage& input = config.input; + input.color_space = JCS_YCbCr; + GeneratePixels(&input); + GenerateRawData(config.jparams, &input); + DestinationManager dest; + std::vector<uint8_t> compressed; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + cinfo.dest = reinterpret_cast<jpeg_destination_mgr*>(&dest); + cinfo.image_width = input.xsize; + cinfo.image_height = input.ysize; + cinfo.input_components = input.components; + cinfo.in_color_space = JCS_YCbCr; + jpegli_set_defaults(&cinfo); + cinfo.comp_info[0].v_samp_factor = config.jparams.v_sampling[0]; + jpegli_set_progressive_level(&cinfo, 0); + cinfo.optimize_coding = FALSE; + cinfo.raw_data_in = TRUE; + jpegli_start_compress(&cinfo, TRUE); + + std::vector<std::vector<uint8_t>> raw_data = input.raw_data; + size_t max_lines = config.jparams.max_v_sample() * DCTSIZE; + std::vector<std::vector<JSAMPROW>> rowdata(cinfo.num_components); + std::vector<JSAMPARRAY> data(cinfo.num_components); + for (int c = 0; c < cinfo.num_components; ++c) { + rowdata[c].resize(config.jparams.v_samp(c) * DCTSIZE); + data[c] = &rowdata[c][0]; + } + while (cinfo.next_scanline < cinfo.image_height) { + for (int c = 0; c < cinfo.num_components; ++c) { + size_t cwidth = cinfo.comp_info[c].width_in_blocks * DCTSIZE; + size_t cheight = cinfo.comp_info[c].height_in_blocks * DCTSIZE; + size_t num_lines = config.jparams.v_samp(c) * DCTSIZE; + size_t y0 = (cinfo.next_scanline / max_lines) * num_lines; + for (size_t i = 0; i < num_lines; ++i) { + rowdata[c][i] = + (y0 + i < cheight ? &raw_data[c][(y0 + i) * cwidth] : nullptr); + } + } + while (jpegli_write_raw_data(&cinfo, &data[0], max_lines) == 0) { + dest.EmptyTo(&compressed, config.buffer_size); + } + } + dest.EmptyTo(&compressed, kFinalBufferSize); + jpegli_finish_compress(&cinfo); + dest.EmptyTo(&compressed); + return true; + }; + try_catch_block(); + jpegli_destroy_compress(&cinfo); + DecompressParams dparams; + dparams.output_mode = RAW_DATA; + TestImage output; + DecodeWithLibjpeg(CompressParams(), dparams, compressed, &output); + VerifyOutputImage(input, output, 3.5); +} + +std::vector<TestConfig> GenerateTests() { + std::vector<TestConfig> all_tests; + const size_t xsize0 = 1920; + const size_t ysize0 = 1080; + for (int dysize : {0, 1, 8, 9}) { + for (int v_sampling : {1, 2}) { + for (int nlines : {1, 8, 117}) { + for (int bufsize : {1, 16, 16 << 10}) { + TestConfig config; + config.lines_batch_size = nlines; + config.buffer_size = bufsize; + config.input.xsize = xsize0; + config.input.ysize = ysize0 + dysize; + config.jparams.h_sampling = {1, 1, 1}; + config.jparams.v_sampling = {v_sampling, 1, 1}; + all_tests.push_back(config); + } + } + } + } + return all_tests; +} + +std::ostream& operator<<(std::ostream& os, const TestConfig& c) { + os << c.input; + os << c.jparams; + os << "Lines" << c.lines_batch_size; + os << "BufSize" << c.buffer_size; + return os; +} + +std::string TestDescription( + const testing::TestParamInfo<OutputSuspensionTestParam::ParamType>& info) { + std::stringstream name; + name << info.param; + return name.str(); +} + +JPEGLI_INSTANTIATE_TEST_SUITE_P(OutputSuspensionTest, OutputSuspensionTestParam, + testing::ValuesIn(GenerateTests()), + TestDescription); + +} // namespace +} // namespace jpegli diff --git a/lib/jpegli/quant.cc b/lib/jpegli/quant.cc new file mode 100644 index 0000000..36f1df4 --- /dev/null +++ b/lib/jpegli/quant.cc @@ -0,0 +1,768 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/quant.h" + +#include <algorithm> +#include <cmath> +#include <vector> + +#include "lib/jpegli/adaptive_quantization.h" +#include "lib/jpegli/common.h" +#include "lib/jpegli/encode_internal.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/memory_manager.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/status.h" + +namespace jpegli { + +namespace { + +// Global scale is chosen in a way that butteraugli 3-norm matches libjpeg +// with the same quality setting. Fitted for quality 90 on jyrki31 corpus. +constexpr float kGlobalScaleXYB = 1.43951668f; +constexpr float kGlobalScaleYCbCr = 1.73966010f; + +static constexpr float kBaseQuantMatrixXYB[] = { + // c = 0 + 7.5629935265f, + 19.8247814178f, + 22.5724945068f, + 20.6706695557f, + 22.6864585876f, + 23.5696277618f, + 25.8129081726f, + 36.3307571411f, + 19.8247814178f, + 21.5503177643f, + 19.9372234344f, + 20.5424213409f, + 21.8645496368f, + 23.9041385651f, + 28.2844066620f, + 32.6609764099f, + 22.5724945068f, + 19.9372234344f, + 21.9017257690f, + 19.1223449707f, + 21.7515811920f, + 24.6724700928f, + 25.4249649048f, + 32.6653823853f, + 20.6706695557f, + 20.5424213409f, + 19.1223449707f, + 20.1610221863f, + 25.3719692230f, + 25.9668903351f, + 30.9804954529f, + 31.3406009674f, + 22.6864585876f, + 21.8645496368f, + 21.7515811920f, + 25.3719692230f, + 26.2431850433f, + 40.5992202759f, + 43.2624626160f, + 63.3010940552f, + 23.5696277618f, + 23.9041385651f, + 24.6724700928f, + 25.9668903351f, + 40.5992202759f, + 48.3026771545f, + 34.0964355469f, + 61.9852142334f, + 25.8129081726f, + 28.2844066620f, + 25.4249649048f, + 30.9804954529f, + 43.2624626160f, + 34.0964355469f, + 34.4937438965f, + 66.9702758789f, + 36.3307571411f, + 32.6609764099f, + 32.6653823853f, + 31.3406009674f, + 63.3010940552f, + 61.9852142334f, + 66.9702758789f, + 39.9652709961f, + // c = 1 + 1.6262000799f, + 3.2199242115f, + 3.4903779030f, + 3.9148359299f, + 4.8337211609f, + 4.9108843803f, + 5.3137121201f, + 6.1676793098f, + 3.2199242115f, + 3.4547898769f, + 3.6036829948f, + 4.2652835846f, + 4.8368387222f, + 4.8226222992f, + 5.6120514870f, + 6.3431472778f, + 3.4903779030f, + 3.6036829948f, + 3.9044559002f, + 4.3374395370f, + 4.8435096741f, + 5.4057979584f, + 5.6066360474f, + 6.1075134277f, + 3.9148359299f, + 4.2652835846f, + 4.3374395370f, + 4.6064834595f, + 5.1751475334f, + 5.4013924599f, + 6.0399808884f, + 6.7825231552f, + 4.8337211609f, + 4.8368387222f, + 4.8435096741f, + 5.1751475334f, + 5.3748049736f, + 6.1410837173f, + 7.6529307365f, + 7.5235214233f, + 4.9108843803f, + 4.8226222992f, + 5.4057979584f, + 5.4013924599f, + 6.1410837173f, + 6.3431472778f, + 7.1083049774f, + 7.6008300781f, + 5.3137121201f, + 5.6120514870f, + 5.6066360474f, + 6.0399808884f, + 7.6529307365f, + 7.1083049774f, + 7.0943155289f, + 7.0478363037f, + 6.1676793098f, + 6.3431472778f, + 6.1075134277f, + 6.7825231552f, + 7.5235214233f, + 7.6008300781f, + 7.0478363037f, + 6.9186143875f, + // c = 2 + 3.3038473129f, + 10.0689258575f, + 12.2785224915f, + 14.6041173935f, + 16.2107315063f, + 19.2314529419f, + 28.0129547119f, + 55.6682891846f, + 10.0689258575f, + 11.4085016251f, + 11.3871345520f, + 15.4934167862f, + 16.5364933014f, + 14.9153423309f, + 26.3748722076f, + 40.8614425659f, + 12.2785224915f, + 11.3871345520f, + 17.0886878967f, + 13.9500350952f, + 16.0003223419f, + 28.5660629272f, + 26.2124195099f, + 30.1260128021f, + 14.6041173935f, + 15.4934167862f, + 13.9500350952f, + 21.1235027313f, + 26.1579780579f, + 25.5579223633f, + 40.6859359741f, + 33.8056335449f, + 16.2107315063f, + 16.5364933014f, + 16.0003223419f, + 26.1579780579f, + 26.8042831421f, + 26.1587715149f, + 35.7343978882f, + 43.6857032776f, + 19.2314529419f, + 14.9153423309f, + 28.5660629272f, + 25.5579223633f, + 26.1587715149f, + 34.5418128967f, + 41.3197937012f, + 48.7867660522f, + 28.0129547119f, + 26.3748722076f, + 26.2124195099f, + 40.6859359741f, + 35.7343978882f, + 41.3197937012f, + 47.6329460144f, + 55.3498458862f, + 55.6682891846f, + 40.8614425659f, + 30.1260128021f, + 33.8056335449f, + 43.6857032776f, + 48.7867660522f, + 55.3498458862f, + 63.6065597534f, +}; + +static const float kBaseQuantMatrixYCbCr[] = { + // c = 0 + 1.2397409345866273f, // + 1.7227115097630963f, // + 2.9212167156636855f, // + 2.812737435286529f, // + 3.339819711906184f, // + 3.463603762596166f, // + 3.840915217993518f, // + 3.86956f, // + 1.7227115097630963f, // + 2.0928894413636874f, // + 2.8456760904429297f, // + 2.704506820909662f, // + 3.4407673520905337f, // + 3.166232352090534f, // + 4.025208741558432f, // + 4.035324490952577f, // + 2.9212167156636855f, // + 2.8456760904429297f, // + 2.9587403520905338f, // + 3.3862948970669273f, // + 3.619523781336757f, // + 3.9046279999999998f, // + 3.757835838431854f, // + 4.237447515714274f, // + 2.812737435286529f, // + 2.704506820909662f, // + 3.3862948970669273f, // + 3.380058821812233f, // + 4.1679867415584315f, // + 4.805510627261856f, // + 4.784259f, // + 4.605934f, // + 3.339819711906184f, // + 3.4407673520905337f, // + 3.619523781336757f, // + 4.1679867415584315f, // + 4.579851258441568f, // + 4.923237f, // + 5.574107f, // + 5.48533336146308f, // + 3.463603762596166f, // + 3.166232352090534f, // + 3.9046279999999998f, // + 4.805510627261856f, // + 4.923237f, // + 5.43936f, // + 5.093895741558431f, // + 6.0872254423617225f, // + 3.840915217993518f, // + 4.025208741558432f, // + 3.757835838431854f, // + 4.784259f, // + 5.574107f, // + 5.093895741558431f, // + 5.438461f, // + 5.4037359493250845f, // + 3.86956f, // + 4.035324490952577f, // + 4.237447515714274f, // + 4.605934f, // + 5.48533336146308f, // + 6.0872254423617225f, // + 5.4037359493250845f, // + 4.37787101190424f, + // c = 1 + 2.8236197786377537f, // + 6.495639358561486f, // + 9.310489207538302f, // + 10.64747864717083f, // + 11.07419143098738f, // + 17.146390223910462f, // + 18.463982229408998f, // + 29.087001644203088f, // + 6.495639358561486f, // + 8.890103846667353f, // + 8.976895794294748f, // + 13.666270550318826f, // + 16.547071905624193f, // + 16.63871382827686f, // + 26.778396930893695f, // + 21.33034294694781f, // + 9.310489207538302f, // + 8.976895794294748f, // + 11.08737706005991f, // + 18.20548239870446f, // + 19.752481654011646f, // + 23.985660533114896f, // + 102.6457378402362f, // + 24.450989f, // + 10.64747864717083f, // + 13.666270550318826f, // + 18.20548239870446f, // + 18.628012327860365f, // + 16.042509519487183f, // + 25.04918273242625f, // + 25.017140189353015f, // + 35.79788782635831f, // + 11.07419143098738f, // + 16.547071905624193f, // + 19.752481654011646f, // + 16.042509519487183f, // + 19.373482748612577f, // + 14.677529999999999f, // + 19.94695960400931f, // + 51.094112f, // + 17.146390223910462f, // + 16.63871382827686f, // + 23.985660533114896f, // + 25.04918273242625f, // + 14.677529999999999f, // + 31.320412426835304f, // + 46.357234000000005f, // + 67.48111451705412f, // + 18.463982229408998f, // + 26.778396930893695f, // + 102.6457378402362f, // + 25.017140189353015f, // + 19.94695960400931f, // + 46.357234000000005f, // + 61.315764694388044f, // + 88.34665293823721f, // + 29.087001644203088f, // + 21.33034294694781f, // + 24.450989f, // + 35.79788782635831f, // + 51.094112f, // + 67.48111451705412f, // + 88.34665293823721f, // + 112.16099098350989f, + // c = 2 + 2.9217254961255255f, // + 4.497681013199305f, // + 7.356344520940414f, // + 6.583891506504051f, // + 8.535608740100237f, // + 8.799434353234647f, // + 9.188341534163023f, // + 9.482700481227672f, // + 4.497681013199305f, // + 6.309548851989123f, // + 7.024608962670982f, // + 7.156445324163424f, // + 8.049059218663244f, // + 7.0124290657218555f, // + 6.711923184393611f, // + 8.380307846134853f, // + 7.356344520940414f, // + 7.024608962670982f, // + 6.892101177327445f, // + 6.882819916277163f, // + 8.782226090078568f, // + 6.8774750000000004f, // + 7.8858175969577955f, // + 8.67909f, // + 6.583891506504051f, // + 7.156445324163424f, // + 6.882819916277163f, // + 7.003072944847055f, // + 7.7223464701024875f, // + 7.955425720217421f, // + 7.4734110000000005f, // + 8.362933242943903f, // + 8.535608740100237f, // + 8.049059218663244f, // + 8.782226090078568f, // + 7.7223464701024875f, // + 6.778005927001542f, // + 9.484922741558432f, // + 9.043702663686046f, // + 8.053178199770173f, // + 8.799434353234647f, // + 7.0124290657218555f, // + 6.8774750000000004f, // + 7.955425720217421f, // + 9.484922741558432f, // + 8.607606527385098f, // + 9.922697394370815f, // + 64.25135180237939f, // + 9.188341534163023f, // + 6.711923184393611f, // + 7.8858175969577955f, // + 7.4734110000000005f, // + 9.043702663686046f, // + 9.922697394370815f, // + 63.184936549738225f, // + 83.35294340273799f, // + 9.482700481227672f, // + 8.380307846134853f, // + 8.67909f, // + 8.362933242943903f, // + 8.053178199770173f, // + 64.25135180237939f, // + 83.35294340273799f, // + 114.89202448569779f, // +}; + +static const float k420GlobalScale = 1.22; +static const float k420Rescale[64] = { + 0.4093, 0.3209, 0.3477, 0.3333, 0.3144, 0.2823, 0.3214, 0.3354, // + 0.3209, 0.3111, 0.3489, 0.2801, 0.3059, 0.3119, 0.4135, 0.3445, // + 0.3477, 0.3489, 0.3586, 0.3257, 0.2727, 0.3754, 0.3369, 0.3484, // + 0.3333, 0.2801, 0.3257, 0.3020, 0.3515, 0.3410, 0.3971, 0.3839, // + 0.3144, 0.3059, 0.2727, 0.3515, 0.3105, 0.3397, 0.2716, 0.3836, // + 0.2823, 0.3119, 0.3754, 0.3410, 0.3397, 0.3212, 0.3203, 0.0726, // + 0.3214, 0.4135, 0.3369, 0.3971, 0.2716, 0.3203, 0.0798, 0.0553, // + 0.3354, 0.3445, 0.3484, 0.3839, 0.3836, 0.0726, 0.0553, 0.3368, // +}; + +static const float kBaseQuantMatrixStd[] = { + // c = 0 + 16.0f, 11.0f, 10.0f, 16.0f, 24.0f, 40.0f, 51.0f, 61.0f, // + 12.0f, 12.0f, 14.0f, 19.0f, 26.0f, 58.0f, 60.0f, 55.0f, // + 14.0f, 13.0f, 16.0f, 24.0f, 40.0f, 57.0f, 69.0f, 56.0f, // + 14.0f, 17.0f, 22.0f, 29.0f, 51.0f, 87.0f, 80.0f, 62.0f, // + 18.0f, 22.0f, 37.0f, 56.0f, 68.0f, 109.0f, 103.0f, 77.0f, // + 24.0f, 35.0f, 55.0f, 64.0f, 81.0f, 104.0f, 113.0f, 92.0f, // + 49.0f, 64.0f, 78.0f, 87.0f, 103.0f, 121.0f, 120.0f, 101.0f, // + 72.0f, 92.0f, 95.0f, 98.0f, 112.0f, 100.0f, 103.0f, 99.0f, // + // c = 1 + 17.0f, 18.0f, 24.0f, 47.0f, 99.0f, 99.0f, 99.0f, 99.0f, // + 18.0f, 21.0f, 26.0f, 66.0f, 99.0f, 99.0f, 99.0f, 99.0f, // + 24.0f, 26.0f, 56.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, // + 47.0f, 66.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, // + 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, // + 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, // + 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, // + 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, // +}; + +static const float kZeroBiasMulYCbCrLQ[] = { + // c = 0 + 0.0000f, 0.0568f, 0.3880f, 0.6190f, 0.6190f, 0.4490f, 0.4490f, 0.6187f, // + 0.0568f, 0.5829f, 0.6189f, 0.6190f, 0.6190f, 0.7190f, 0.6190f, 0.6189f, // + 0.3880f, 0.6189f, 0.6190f, 0.6190f, 0.6190f, 0.6190f, 0.6187f, 0.6100f, // + 0.6190f, 0.6190f, 0.6190f, 0.6190f, 0.5890f, 0.3839f, 0.7160f, 0.6190f, // + 0.6190f, 0.6190f, 0.6190f, 0.5890f, 0.6190f, 0.3880f, 0.5860f, 0.4790f, // + 0.4490f, 0.7190f, 0.6190f, 0.3839f, 0.3880f, 0.6190f, 0.6190f, 0.6190f, // + 0.4490f, 0.6190f, 0.6187f, 0.7160f, 0.5860f, 0.6190f, 0.6204f, 0.6190f, // + 0.6187f, 0.6189f, 0.6100f, 0.6190f, 0.4790f, 0.6190f, 0.6190f, 0.3480f, // + // c = 1 + 0.0000f, 1.1640f, 0.9373f, 1.1319f, 0.8016f, 0.9136f, 1.1530f, 0.9430f, // + 1.1640f, 0.9188f, 0.9160f, 1.1980f, 1.1830f, 0.9758f, 0.9430f, 0.9430f, // + 0.9373f, 0.9160f, 0.8430f, 1.1720f, 0.7083f, 0.9430f, 0.9430f, 0.9430f, // + 1.1319f, 1.1980f, 1.1720f, 1.1490f, 0.8547f, 0.9430f, 0.9430f, 0.9430f, // + 0.8016f, 1.1830f, 0.7083f, 0.8547f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, // + 0.9136f, 0.9758f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, // + 1.1530f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9480f, // + 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9480f, 0.9430f, // + // c = 2 + 0.0000f, 1.3190f, 0.4308f, 0.4460f, 0.0661f, 0.0660f, 0.2660f, 0.2960f, // + 1.3190f, 0.3280f, 0.3093f, 0.0750f, 0.0505f, 0.1594f, 0.3060f, 0.2113f, // + 0.4308f, 0.3093f, 0.3060f, 0.1182f, 0.0500f, 0.3060f, 0.3915f, 0.2426f, // + 0.4460f, 0.0750f, 0.1182f, 0.0512f, 0.0500f, 0.2130f, 0.3930f, 0.1590f, // + 0.0661f, 0.0505f, 0.0500f, 0.0500f, 0.3055f, 0.3360f, 0.5148f, 0.5403f, // + 0.0660f, 0.1594f, 0.3060f, 0.2130f, 0.3360f, 0.5060f, 0.5874f, 0.3060f, // + 0.2660f, 0.3060f, 0.3915f, 0.3930f, 0.5148f, 0.5874f, 0.3060f, 0.3060f, // + 0.2960f, 0.2113f, 0.2426f, 0.1590f, 0.5403f, 0.3060f, 0.3060f, 0.3060f, // +}; + +static const float kZeroBiasMulYCbCrHQ[] = { + // c = 0 + 0.0000f, 0.0044f, 0.2521f, 0.6547f, 0.8161f, 0.6130f, 0.8841f, 0.8155f, // + 0.0044f, 0.6831f, 0.6553f, 0.6295f, 0.7848f, 0.7843f, 0.8474f, 0.7836f, // + 0.2521f, 0.6553f, 0.7834f, 0.7829f, 0.8161f, 0.8072f, 0.7743f, 0.9242f, // + 0.6547f, 0.6295f, 0.7829f, 0.8654f, 0.7829f, 0.6986f, 0.7818f, 0.7726f, // + 0.8161f, 0.7848f, 0.8161f, 0.7829f, 0.7471f, 0.7827f, 0.7843f, 0.7653f, // + 0.6130f, 0.7843f, 0.8072f, 0.6986f, 0.7827f, 0.7848f, 0.9508f, 0.7653f, // + 0.8841f, 0.8474f, 0.7743f, 0.7818f, 0.7843f, 0.9508f, 0.7839f, 0.8437f, // + 0.8155f, 0.7836f, 0.9242f, 0.7726f, 0.7653f, 0.7653f, 0.8437f, 0.7819f, // + // c = 1 + 0.0000f, 1.0816f, 1.0556f, 1.2876f, 1.1554f, 1.1567f, 1.8851f, 0.5488f, // + 1.0816f, 1.1537f, 1.1850f, 1.0712f, 1.1671f, 2.0719f, 1.0544f, 1.4764f, // + 1.0556f, 1.1850f, 1.2870f, 1.1981f, 1.8181f, 1.2618f, 1.0564f, 1.1191f, // + 1.2876f, 1.0712f, 1.1981f, 1.4753f, 2.0609f, 1.0564f, 1.2645f, 1.0564f, // + 1.1554f, 1.1671f, 1.8181f, 2.0609f, 0.7324f, 1.1163f, 0.8464f, 1.0564f, // + 1.1567f, 2.0719f, 1.2618f, 1.0564f, 1.1163f, 1.0040f, 1.0564f, 1.0564f, // + 1.8851f, 1.0544f, 1.0564f, 1.2645f, 0.8464f, 1.0564f, 1.0564f, 1.0564f, // + 0.5488f, 1.4764f, 1.1191f, 1.0564f, 1.0564f, 1.0564f, 1.0564f, 1.0564f, // + // c = 2 + 0.0000f, 0.5392f, 0.6659f, 0.8968f, 0.6829f, 0.6328f, 0.5802f, 0.4836f, // + 0.5392f, 0.6746f, 0.6760f, 0.6102f, 0.6015f, 0.6958f, 0.7327f, 0.4897f, // + 0.6659f, 0.6760f, 0.6957f, 0.6543f, 0.4396f, 0.6330f, 0.7081f, 0.2583f, // + 0.8968f, 0.6102f, 0.6543f, 0.5913f, 0.6457f, 0.5828f, 0.5139f, 0.3565f, // + 0.6829f, 0.6015f, 0.4396f, 0.6457f, 0.5633f, 0.4263f, 0.6371f, 0.5949f, // + 0.6328f, 0.6958f, 0.6330f, 0.5828f, 0.4263f, 0.2847f, 0.2909f, 0.6629f, // + 0.5802f, 0.7327f, 0.7081f, 0.5139f, 0.6371f, 0.2909f, 0.6644f, 0.6644f, // + 0.4836f, 0.4897f, 0.2583f, 0.3565f, 0.5949f, 0.6629f, 0.6644f, 0.6644f, // +}; + +static const float kZeroBiasOffsetYCbCrDC[] = {0.0f, 0.0f, 0.0f}; + +static const float kZeroBiasOffsetYCbCrAC[] = { + 0.59082f, + 0.58146f, + 0.57988f, +}; + +constexpr uint8_t kTransferFunctionPQ = 16; +constexpr uint8_t kTransferFunctionHLG = 18; + +float DistanceToLinearQuality(float distance) { + if (distance <= 0.1f) { + return 1.0f; + } else if (distance <= 4.6f) { + return (200.0f / 9.0f) * (distance - 0.1f); + } else if (distance <= 6.4f) { + return 5000.0f / (100.0f - (distance - 0.1f) / 0.09f); + } else if (distance < 25.0f) { + return 530000.0f / + (3450.0f - + 300.0f * std::sqrt((848.0f * distance - 5330.0f) / 120.0f)); + } else { + return 5000.0f; + } +} + +constexpr float kExponent[DCTSIZE2] = { + 1.00f, 0.51f, 0.67f, 0.74f, 1.00f, 1.00f, 1.00f, 1.00f, // + 0.51f, 0.66f, 0.69f, 0.87f, 1.00f, 1.00f, 1.00f, 1.00f, // + 0.67f, 0.69f, 0.84f, 0.83f, 0.96f, 1.00f, 1.00f, 1.00f, // + 0.74f, 0.87f, 0.83f, 1.00f, 1.00f, 0.91f, 0.91f, 1.00f, // + 1.00f, 1.00f, 0.96f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, // + 1.00f, 1.00f, 1.00f, 0.91f, 1.00f, 1.00f, 1.00f, 1.00f, // + 1.00f, 1.00f, 1.00f, 0.91f, 1.00f, 1.00f, 1.00f, 1.00f, // + 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, // +}; +constexpr float kDist0 = 1.5f; // distance where non-linearity kicks in. + +float DistanceToScale(float distance, int k) { + if (distance < kDist0) { + return distance; + } + const float exp = kExponent[k]; + const float mul = std::pow(kDist0, 1.0 - exp); + return std::max<float>(0.5f * distance, mul * std::pow(distance, exp)); +} + +float ScaleToDistance(float scale, int k) { + if (scale < kDist0) { + return scale; + } + const float exp = 1.0 / kExponent[k]; + const float mul = std::pow(kDist0, 1.0 - exp); + return std::min<float>(2.0f * scale, mul * std::pow(scale, exp)); +} + +float QuantValsToDistance(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + float global_scale = kGlobalScaleYCbCr; + if (m->cicp_transfer_function == kTransferFunctionPQ) { + global_scale *= .4f; + } else if (m->cicp_transfer_function == kTransferFunctionHLG) { + global_scale *= .5f; + } + int quant_max = m->force_baseline ? 255 : 32767U; + static const float kDistMax = 10000.0f; + float dist_min = 0.0f; + float dist_max = kDistMax; + for (int c = 0; c < cinfo->num_components; ++c) { + int quant_idx = cinfo->comp_info[c].quant_tbl_no; + uint16_t* quantval = cinfo->quant_tbl_ptrs[quant_idx]->quantval; + const float* base_qm = &kBaseQuantMatrixYCbCr[quant_idx * DCTSIZE2]; + for (int k = 0; k < DCTSIZE2; ++k) { + float dmin = 0.0; + float dmax = kDistMax; + float invq = 1.0f / base_qm[k] / global_scale; + int qval = quantval[k]; + if (qval > 1) { + float scale_min = (qval - 0.5f) * invq; + dmin = ScaleToDistance(scale_min, k); + } + if (qval < quant_max) { + float scale_max = (qval + 0.5f) * invq; + dmax = ScaleToDistance(scale_max, k); + } + if (dmin <= dist_max) { + dist_min = std::max(dmin, dist_min); + } + if (dmax >= dist_min) { + dist_max = std::min(dist_max, dmax); + } + } + } + float distance; + if (dist_min == 0) { + distance = dist_max; + } else if (dist_max == kDistMax) { + distance = dist_min; + } else { + distance = 0.5f * (dist_min + dist_max); + } + return distance; +} + +bool IsYUV420(j_compress_ptr cinfo) { + return (cinfo->jpeg_color_space == JCS_YCbCr && + cinfo->comp_info[0].h_samp_factor == 2 && + cinfo->comp_info[0].v_samp_factor == 2 && + cinfo->comp_info[1].h_samp_factor == 1 && + cinfo->comp_info[1].v_samp_factor == 1 && + cinfo->comp_info[2].h_samp_factor == 1 && + cinfo->comp_info[2].v_samp_factor == 1); +} + +} // namespace + +void SetQuantMatrices(j_compress_ptr cinfo, float distances[NUM_QUANT_TBLS], + bool add_two_chroma_tables) { + jpeg_comp_master* m = cinfo->master; + const bool xyb = m->xyb_mode && cinfo->jpeg_color_space == JCS_RGB; + const bool is_yuv420 = IsYUV420(cinfo); + + float global_scale; + bool non_linear_scaling = true; + const float* base_quant_matrix[NUM_QUANT_TBLS]; + int num_base_tables; + + if (xyb) { + global_scale = kGlobalScaleXYB; + num_base_tables = 3; + base_quant_matrix[0] = kBaseQuantMatrixXYB; + base_quant_matrix[1] = kBaseQuantMatrixXYB + DCTSIZE2; + base_quant_matrix[2] = kBaseQuantMatrixXYB + 2 * DCTSIZE2; + } else if (cinfo->jpeg_color_space == JCS_YCbCr && !m->use_std_tables) { + global_scale = kGlobalScaleYCbCr; + if (m->cicp_transfer_function == kTransferFunctionPQ) { + global_scale *= .4f; + } else if (m->cicp_transfer_function == kTransferFunctionHLG) { + global_scale *= .5f; + } + if (is_yuv420) { + global_scale *= k420GlobalScale; + } + if (add_two_chroma_tables) { + cinfo->comp_info[2].quant_tbl_no = 2; + num_base_tables = 3; + base_quant_matrix[0] = kBaseQuantMatrixYCbCr; + base_quant_matrix[1] = kBaseQuantMatrixYCbCr + DCTSIZE2; + base_quant_matrix[2] = kBaseQuantMatrixYCbCr + 2 * DCTSIZE2; + } else { + num_base_tables = 2; + base_quant_matrix[0] = kBaseQuantMatrixYCbCr; + // Use the Cr table for both Cb and Cr. + base_quant_matrix[1] = kBaseQuantMatrixYCbCr + 2 * DCTSIZE2; + } + } else { + global_scale = 0.01f; + non_linear_scaling = false; + num_base_tables = 2; + base_quant_matrix[0] = kBaseQuantMatrixStd; + base_quant_matrix[1] = kBaseQuantMatrixStd + DCTSIZE2; + } + + int quant_max = m->force_baseline ? 255 : 32767U; + for (int quant_idx = 0; quant_idx < num_base_tables; ++quant_idx) { + const float* base_qm = base_quant_matrix[quant_idx]; + JQUANT_TBL** qtable = &cinfo->quant_tbl_ptrs[quant_idx]; + if (*qtable == nullptr) { + *qtable = jpegli_alloc_quant_table(reinterpret_cast<j_common_ptr>(cinfo)); + } + for (int k = 0; k < DCTSIZE2; ++k) { + float scale = global_scale; + if (non_linear_scaling) { + scale *= DistanceToScale(distances[quant_idx], k); + if (is_yuv420 && quant_idx > 0) { + scale *= k420Rescale[k]; + } + } else { + scale *= DistanceToLinearQuality(distances[quant_idx]); + } + int qval = std::round(scale * base_qm[k]); + (*qtable)->quantval[k] = std::max(1, std::min(qval, quant_max)); + } + (*qtable)->sent_table = FALSE; + } +} + +void InitQuantizer(j_compress_ptr cinfo, QuantPass pass) { + jpeg_comp_master* m = cinfo->master; + // Compute quantization multupliers from the quant table values. + for (int c = 0; c < cinfo->num_components; ++c) { + int quant_idx = cinfo->comp_info[c].quant_tbl_no; + JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[quant_idx]; + if (!quant_table) { + JPEGLI_ERROR("Missing quantization table %d for component %d", quant_idx, + c); + } + for (size_t k = 0; k < DCTSIZE2; k++) { + int val = quant_table->quantval[k]; + if (val == 0) { + JPEGLI_ERROR("Invalid quantval 0."); + } + switch (pass) { + case QuantPass::NO_SEARCH: + m->quant_mul[c][k] = 8.0f / val; + break; + case QuantPass::SEARCH_FIRST_PASS: + m->quant_mul[c][k] = 128.0f; + break; + case QuantPass::SEARCH_SECOND_PASS: + m->quant_mul[c][kJPEGZigZagOrder[k]] = 1.0f / (16 * val); + break; + } + } + } + if (m->use_adaptive_quantization) { + for (int c = 0; c < cinfo->num_components; ++c) { + for (int k = 0; k < DCTSIZE2; ++k) { + m->zero_bias_mul[c][k] = k == 0 ? 0.0f : 0.5f; + m->zero_bias_offset[c][k] = k == 0 ? 0.0f : 0.5f; + } + } + if (cinfo->jpeg_color_space == JCS_YCbCr) { + float distance = QuantValsToDistance(cinfo); + static const float kDistHQ = 1.0f; + static const float kDistLQ = 3.0f; + float mix0 = (distance - kDistHQ) / (kDistLQ - kDistHQ); + mix0 = std::max(0.0f, std::min(1.0f, mix0)); + float mix1 = 1.0f - mix0; + for (int c = 0; c < cinfo->num_components; ++c) { + for (int k = 0; k < DCTSIZE2; ++k) { + float mul0 = kZeroBiasMulYCbCrLQ[c * DCTSIZE2 + k]; + float mul1 = kZeroBiasMulYCbCrHQ[c * DCTSIZE2 + k]; + m->zero_bias_mul[c][k] = mix0 * mul0 + mix1 * mul1; + m->zero_bias_offset[c][k] = + k == 0 ? kZeroBiasOffsetYCbCrDC[c] : kZeroBiasOffsetYCbCrAC[c]; + } + } + } + } else if (cinfo->jpeg_color_space == JCS_YCbCr) { + for (int c = 0; c < cinfo->num_components; ++c) { + for (int k = 0; k < DCTSIZE2; ++k) { + m->zero_bias_offset[c][k] = + k == 0 ? kZeroBiasOffsetYCbCrDC[c] : kZeroBiasOffsetYCbCrAC[c]; + } + } + } +} + +} // namespace jpegli diff --git a/lib/jpegli/quant.h b/lib/jpegli/quant.h new file mode 100644 index 0000000..cb37757 --- /dev/null +++ b/lib/jpegli/quant.h @@ -0,0 +1,26 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_QUANT_H_ +#define LIB_JPEGLI_QUANT_H_ + +#include "lib/jpegli/common.h" + +namespace jpegli { + +void SetQuantMatrices(j_compress_ptr cinfo, float distances[NUM_QUANT_TBLS], + bool add_two_chroma_tables); + +enum QuantPass { + NO_SEARCH, + SEARCH_FIRST_PASS, + SEARCH_SECOND_PASS, +}; + +void InitQuantizer(j_compress_ptr cinfo, QuantPass pass); + +} // namespace jpegli + +#endif // LIB_JPEGLI_QUANT_H_ diff --git a/lib/jpegli/render.cc b/lib/jpegli/render.cc new file mode 100644 index 0000000..24e7e99 --- /dev/null +++ b/lib/jpegli/render.cc @@ -0,0 +1,763 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/render.h" + +#include <string.h> + +#include <array> +#include <atomic> +#include <cmath> +#include <cstddef> +#include <cstdint> +#include <hwy/aligned_allocator.h> +#include <vector> + +#include "lib/jpegli/color_quantize.h" +#include "lib/jpegli/color_transform.h" +#include "lib/jpegli/decode_internal.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/idct.h" +#include "lib/jpegli/upsample.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" + +#ifdef MEMORY_SANITIZER +#define JXL_MEMORY_SANITIZER 1 +#elif defined(__has_feature) +#if __has_feature(memory_sanitizer) +#define JXL_MEMORY_SANITIZER 1 +#else +#define JXL_MEMORY_SANITIZER 0 +#endif +#else +#define JXL_MEMORY_SANITIZER 0 +#endif + +#if JXL_MEMORY_SANITIZER +#include "sanitizer/msan_interface.h" +#endif + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jpegli/render.cc" +#include <hwy/foreach_target.h> +#include <hwy/highway.h> + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Abs; +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::Clamp; +using hwy::HWY_NAMESPACE::Gt; +using hwy::HWY_NAMESPACE::IfThenElseZero; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::NearestInt; +using hwy::HWY_NAMESPACE::Or; +using hwy::HWY_NAMESPACE::Rebind; +using hwy::HWY_NAMESPACE::ShiftLeftSame; +using hwy::HWY_NAMESPACE::ShiftRightSame; +using hwy::HWY_NAMESPACE::Vec; +using D = HWY_FULL(float); +using DI = HWY_FULL(int32_t); +constexpr D d; +constexpr DI di; + +void GatherBlockStats(const int16_t* JXL_RESTRICT coeffs, + const size_t coeffs_size, int32_t* JXL_RESTRICT nonzeros, + int32_t* JXL_RESTRICT sumabs) { + for (size_t i = 0; i < coeffs_size; i += Lanes(d)) { + size_t k = i % DCTSIZE2; + const Rebind<int16_t, DI> di16; + const Vec<DI> coeff = PromoteTo(di, Load(di16, coeffs + i)); + const auto abs_coeff = Abs(coeff); + const auto not_0 = Gt(abs_coeff, Zero(di)); + const auto nzero = IfThenElseZero(not_0, Set(di, 1)); + Store(Add(nzero, Load(di, nonzeros + k)), di, nonzeros + k); + Store(Add(abs_coeff, Load(di, sumabs + k)), di, sumabs + k); + } +} + +void DecenterRow(float* row, size_t xsize) { + const HWY_CAPPED(float, 8) df; + const auto c128 = Set(df, 128.0f / 255); + for (size_t x = 0; x < xsize; x += Lanes(df)) { + Store(Add(Load(df, row + x), c128), df, row + x); + } +} + +void DitherRow(j_decompress_ptr cinfo, float* row, int c, size_t y, + size_t xsize) { + jpeg_decomp_master* m = cinfo->master; + if (!m->dither_[c]) return; + const float* dither_row = + &m->dither_[c][(y & m->dither_mask_) * m->dither_size_]; + for (size_t x = 0; x < xsize; ++x) { + row[x] += dither_row[x & m->dither_mask_]; + } +} + +template <typename T> +void StoreUnsignedRow(float* JXL_RESTRICT input[], size_t x0, size_t len, + size_t num_channels, float multiplier, T* output) { + const HWY_CAPPED(float, 8) d; + auto zero = Zero(d); + auto mul = Set(d, multiplier); + const Rebind<T, decltype(d)> du; +#if JXL_MEMORY_SANITIZER + const size_t padding = hwy::RoundUpTo(len, Lanes(d)) - len; + for (size_t c = 0; c < num_channels; ++c) { + __msan_unpoison(input[c] + x0 + len, sizeof(input[c][0]) * padding); + } +#endif + if (num_channels == 1) { + for (size_t i = 0; i < len; i += Lanes(d)) { + auto v0 = Clamp(zero, Mul(LoadU(d, &input[0][x0 + i]), mul), mul); + StoreU(DemoteTo(du, NearestInt(v0)), du, &output[i]); + } + } else if (num_channels == 2) { + for (size_t i = 0; i < len; i += Lanes(d)) { + auto v0 = Clamp(zero, Mul(LoadU(d, &input[0][x0 + i]), mul), mul); + auto v1 = Clamp(zero, Mul(LoadU(d, &input[1][x0 + i]), mul), mul); + StoreInterleaved2(DemoteTo(du, NearestInt(v0)), + DemoteTo(du, NearestInt(v1)), du, &output[2 * i]); + } + } else if (num_channels == 3) { + for (size_t i = 0; i < len; i += Lanes(d)) { + auto v0 = Clamp(zero, Mul(LoadU(d, &input[0][x0 + i]), mul), mul); + auto v1 = Clamp(zero, Mul(LoadU(d, &input[1][x0 + i]), mul), mul); + auto v2 = Clamp(zero, Mul(LoadU(d, &input[2][x0 + i]), mul), mul); + StoreInterleaved3(DemoteTo(du, NearestInt(v0)), + DemoteTo(du, NearestInt(v1)), + DemoteTo(du, NearestInt(v2)), du, &output[3 * i]); + } + } else if (num_channels == 4) { + for (size_t i = 0; i < len; i += Lanes(d)) { + auto v0 = Clamp(zero, Mul(LoadU(d, &input[0][x0 + i]), mul), mul); + auto v1 = Clamp(zero, Mul(LoadU(d, &input[1][x0 + i]), mul), mul); + auto v2 = Clamp(zero, Mul(LoadU(d, &input[2][x0 + i]), mul), mul); + auto v3 = Clamp(zero, Mul(LoadU(d, &input[3][x0 + i]), mul), mul); + StoreInterleaved4(DemoteTo(du, NearestInt(v0)), + DemoteTo(du, NearestInt(v1)), + DemoteTo(du, NearestInt(v2)), + DemoteTo(du, NearestInt(v3)), du, &output[4 * i]); + } + } +#if JXL_MEMORY_SANITIZER + __msan_poison(output + num_channels * len, + sizeof(output[0]) * num_channels * padding); +#endif +} + +void StoreFloatRow(float* JXL_RESTRICT input[3], size_t x0, size_t len, + size_t num_channels, float* output) { + const HWY_CAPPED(float, 8) d; + if (num_channels == 1) { + memcpy(output, input[0] + x0, len * sizeof(output[0])); + } else if (num_channels == 2) { + for (size_t i = 0; i < len; i += Lanes(d)) { + StoreInterleaved2(LoadU(d, &input[0][x0 + i]), + LoadU(d, &input[1][x0 + i]), d, &output[2 * i]); + } + } else if (num_channels == 3) { + for (size_t i = 0; i < len; i += Lanes(d)) { + StoreInterleaved3(LoadU(d, &input[0][x0 + i]), + LoadU(d, &input[1][x0 + i]), + LoadU(d, &input[2][x0 + i]), d, &output[3 * i]); + } + } else if (num_channels == 4) { + for (size_t i = 0; i < len; i += Lanes(d)) { + StoreInterleaved4(LoadU(d, &input[0][x0 + i]), + LoadU(d, &input[1][x0 + i]), + LoadU(d, &input[2][x0 + i]), + LoadU(d, &input[3][x0 + i]), d, &output[4 * i]); + } + } +} + +static constexpr float kFSWeightMR = 7.0f / 16.0f; +static constexpr float kFSWeightBL = 3.0f / 16.0f; +static constexpr float kFSWeightBM = 5.0f / 16.0f; +static constexpr float kFSWeightBR = 1.0f / 16.0f; + +float LimitError(float error) { + float abserror = std::abs(error); + if (abserror > 48.0f) { + abserror = 32.0f; + } else if (abserror > 16.0f) { + abserror = 0.5f * abserror + 8.0f; + } + return error > 0.0f ? abserror : -abserror; +} + +void WriteToOutput(j_decompress_ptr cinfo, float* JXL_RESTRICT rows[], + size_t xoffset, size_t len, size_t num_channels, + uint8_t* JXL_RESTRICT output) { + jpeg_decomp_master* m = cinfo->master; + uint8_t* JXL_RESTRICT scratch_space = m->output_scratch_; + if (cinfo->quantize_colors && m->quant_pass_ == 1) { + float* error_row[kMaxComponents]; + float* next_error_row[kMaxComponents]; + if (cinfo->dither_mode == JDITHER_ORDERED) { + for (size_t c = 0; c < num_channels; ++c) { + DitherRow(cinfo, &rows[c][xoffset], c, cinfo->output_scanline, + cinfo->output_width); + } + } else if (cinfo->dither_mode == JDITHER_FS) { + for (size_t c = 0; c < num_channels; ++c) { + if (cinfo->output_scanline % 2 == 0) { + error_row[c] = m->error_row_[c]; + next_error_row[c] = m->error_row_[c + kMaxComponents]; + } else { + error_row[c] = m->error_row_[c + kMaxComponents]; + next_error_row[c] = m->error_row_[c]; + } + memset(next_error_row[c], 0.0, cinfo->output_width * sizeof(float)); + } + } + const float mul = 255.0f; + if (cinfo->dither_mode != JDITHER_FS) { + StoreUnsignedRow(rows, xoffset, len, num_channels, mul, scratch_space); + } + for (size_t i = 0; i < len; ++i) { + uint8_t* pixel = &scratch_space[num_channels * i]; + if (cinfo->dither_mode == JDITHER_FS) { + for (size_t c = 0; c < num_channels; ++c) { + float val = rows[c][i] * mul + LimitError(error_row[c][i]); + pixel[c] = std::round(std::min(255.0f, std::max(0.0f, val))); + } + } + int index = LookupColorIndex(cinfo, pixel); + output[i] = index; + if (cinfo->dither_mode == JDITHER_FS) { + size_t prev_i = i > 0 ? i - 1 : 0; + size_t next_i = i + 1 < len ? i + 1 : len - 1; + for (size_t c = 0; c < num_channels; ++c) { + float error = pixel[c] - cinfo->colormap[c][index]; + error_row[c][next_i] += kFSWeightMR * error; + next_error_row[c][prev_i] += kFSWeightBL * error; + next_error_row[c][i] += kFSWeightBM * error; + next_error_row[c][next_i] += kFSWeightBR * error; + } + } + } + } else if (m->output_data_type_ == JPEGLI_TYPE_UINT8) { + const float mul = 255.0; + StoreUnsignedRow(rows, xoffset, len, num_channels, mul, scratch_space); + memcpy(output, scratch_space, len * num_channels); + } else if (m->output_data_type_ == JPEGLI_TYPE_UINT16) { + const float mul = 65535.0; + uint16_t* tmp = reinterpret_cast<uint16_t*>(scratch_space); + StoreUnsignedRow(rows, xoffset, len, num_channels, mul, tmp); + if (m->swap_endianness_) { + const HWY_CAPPED(uint16_t, 8) du; + size_t output_len = len * num_channels; + for (size_t j = 0; j < output_len; j += Lanes(du)) { + auto v = LoadU(du, tmp + j); + auto vswap = Or(ShiftRightSame(v, 8), ShiftLeftSame(v, 8)); + StoreU(vswap, du, tmp + j); + } + } + memcpy(output, tmp, len * num_channels * 2); + } else if (m->output_data_type_ == JPEGLI_TYPE_FLOAT) { + float* tmp = reinterpret_cast<float*>(scratch_space); + StoreFloatRow(rows, xoffset, len, num_channels, tmp); + if (m->swap_endianness_) { + size_t output_len = len * num_channels; + for (size_t j = 0; j < output_len; ++j) { + tmp[j] = BSwapFloat(tmp[j]); + } + } + memcpy(output, tmp, len * num_channels * 4); + } +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE + +namespace jpegli { + +HWY_EXPORT(GatherBlockStats); +HWY_EXPORT(WriteToOutput); +HWY_EXPORT(DecenterRow); + +void GatherBlockStats(const int16_t* JXL_RESTRICT coeffs, + const size_t coeffs_size, int32_t* JXL_RESTRICT nonzeros, + int32_t* JXL_RESTRICT sumabs) { + return HWY_DYNAMIC_DISPATCH(GatherBlockStats)(coeffs, coeffs_size, nonzeros, + sumabs); +} + +void WriteToOutput(j_decompress_ptr cinfo, float* JXL_RESTRICT rows[], + size_t xoffset, size_t len, size_t num_channels, + uint8_t* JXL_RESTRICT output) { + return HWY_DYNAMIC_DISPATCH(WriteToOutput)(cinfo, rows, xoffset, len, + num_channels, output); +} + +void DecenterRow(float* row, size_t xsize) { + return HWY_DYNAMIC_DISPATCH(DecenterRow)(row, xsize); +} + +bool ShouldApplyDequantBiases(j_decompress_ptr cinfo, int ci) { + const auto& compinfo = cinfo->comp_info[ci]; + return (compinfo.h_samp_factor == cinfo->max_h_samp_factor && + compinfo.v_samp_factor == cinfo->max_v_samp_factor); +} + +// See the following article for the details: +// J. R. Price and M. Rabbani, "Dequantization bias for JPEG decompression" +// Proceedings International Conference on Information Technology: Coding and +// Computing (Cat. No.PR00540), 2000, pp. 30-35, doi: 10.1109/ITCC.2000.844179. +void ComputeOptimalLaplacianBiases(const int num_blocks, const int* nonzeros, + const int* sumabs, float* biases) { + for (size_t k = 1; k < DCTSIZE2; ++k) { + if (nonzeros[k] == 0) { + biases[k] = 0.5f; + continue; + } + // Notation adapted from the article + float N = num_blocks; + float N1 = nonzeros[k]; + float N0 = num_blocks - N1; + float S = sumabs[k]; + // Compute gamma from N0, N1, N, S (eq. 11), with A and B being just + // temporary grouping of terms. + float A = 4.0 * S + 2.0 * N; + float B = 4.0 * S - 2.0 * N1; + float gamma = (-1.0 * N0 + std::sqrt(N0 * N0 * 1.0 + A * B)) / A; + float gamma2 = gamma * gamma; + // The bias is computed from gamma with (eq. 5), where the quantization + // multiplier Q can be factored out and thus the bias can be applied + // directly on the quantized coefficient. + biases[k] = + 0.5 * (((1.0 + gamma2) / (1.0 - gamma2)) + 1.0 / std::log(gamma)); + } +} + +constexpr std::array<int, SAVED_COEFS> Q_POS = {0, 1, 8, 16, 9, + 2, 3, 10, 17, 24}; + +bool is_nonzero_quantizers(const JQUANT_TBL* qtable) { + return std::all_of(Q_POS.begin(), Q_POS.end(), + [&](int pos) { return qtable->quantval[pos] != 0; }); +} + +// Determine whether smoothing should be applied during decompression +bool do_smoothing(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + bool smoothing_useful = false; + + if (!cinfo->progressive_mode || cinfo->coef_bits == nullptr) { + return false; + } + auto coef_bits_latch = m->coef_bits_latch; + auto prev_coef_bits_latch = m->prev_coef_bits_latch; + + for (int ci = 0; ci < cinfo->num_components; ci++) { + jpeg_component_info* compptr = &cinfo->comp_info[ci]; + JQUANT_TBL* qtable = compptr->quant_table; + int* coef_bits = cinfo->coef_bits[ci]; + int* prev_coef_bits = cinfo->coef_bits[ci + cinfo->num_components]; + + // Return early if conditions for smoothing are not met + if (qtable == nullptr || !is_nonzero_quantizers(qtable) || + coef_bits[0] < 0) { + return false; + } + + coef_bits_latch[ci][0] = coef_bits[0]; + + for (int coefi = 1; coefi < SAVED_COEFS; coefi++) { + prev_coef_bits_latch[ci][coefi] = + cinfo->input_scan_number > 1 ? prev_coef_bits[coefi] : -1; + if (coef_bits[coefi] != 0) { + smoothing_useful = true; + } + coef_bits_latch[ci][coefi] = coef_bits[coefi]; + } + } + + return smoothing_useful; +} + +void PredictSmooth(j_decompress_ptr cinfo, JBLOCKARRAY blocks, int component, + size_t bx, int iy) { + const size_t imcu_row = cinfo->output_iMCU_row; + int16_t* scratch = cinfo->master->smoothing_scratch_; + std::vector<int> Q_VAL(SAVED_COEFS); + int* coef_bits; + + std::array<std::array<int, 5>, 5> dc_values; + auto& compinfo = cinfo->comp_info[component]; + const size_t by0 = imcu_row * compinfo.v_samp_factor; + const size_t by = by0 + iy; + + int prev_iy = by > 0 ? iy - 1 : 0; + int prev_prev_iy = by > 1 ? iy - 2 : prev_iy; + int next_iy = by + 1 < compinfo.height_in_blocks ? iy + 1 : iy; + int next_next_iy = by + 2 < compinfo.height_in_blocks ? iy + 2 : next_iy; + + const int16_t* cur_row = blocks[iy][bx]; + const int16_t* prev_row = blocks[prev_iy][bx]; + const int16_t* prev_prev_row = blocks[prev_prev_iy][bx]; + const int16_t* next_row = blocks[next_iy][bx]; + const int16_t* next_next_row = blocks[next_next_iy][bx]; + + int prev_block_ind = bx ? -DCTSIZE2 : 0; + int prev_prev_block_ind = bx > 1 ? -2 * DCTSIZE2 : prev_block_ind; + int next_block_ind = bx + 1 < compinfo.width_in_blocks ? DCTSIZE2 : 0; + int next_next_block_ind = + bx + 2 < compinfo.width_in_blocks ? DCTSIZE2 * 2 : next_block_ind; + + std::array<const int16_t*, 5> row_ptrs = {prev_prev_row, prev_row, cur_row, + next_row, next_next_row}; + std::array<int, 5> block_inds = {prev_prev_block_ind, prev_block_ind, 0, + next_block_ind, next_next_block_ind}; + + memcpy(scratch, cur_row, DCTSIZE2 * sizeof(cur_row[0])); + + for (int r = 0; r < 5; ++r) { + for (int c = 0; c < 5; ++c) { + dc_values[r][c] = row_ptrs[r][block_inds[c]]; + } + } + // Get the correct coef_bits: In case of an incomplete scan, we use the + // prev coeficients. + if (cinfo->output_iMCU_row + 1 > cinfo->input_iMCU_row) { + coef_bits = cinfo->master->prev_coef_bits_latch[component]; + } else { + coef_bits = cinfo->master->coef_bits_latch[component]; + } + + bool change_dc = true; + for (int i = 1; i < SAVED_COEFS; i++) { + if (coef_bits[i] != -1) { + change_dc = false; + break; + } + } + + JQUANT_TBL* quanttbl = cinfo->quant_tbl_ptrs[compinfo.quant_tbl_no]; + for (size_t i = 0; i < 6; ++i) { + Q_VAL[i] = quanttbl->quantval[Q_POS[i]]; + } + if (change_dc) { + for (size_t i = 6; i < SAVED_COEFS; ++i) { + Q_VAL[i] = quanttbl->quantval[Q_POS[i]]; + } + } + auto calculate_dct_value = [&](int coef_index) { + int64_t num = 0; + int pred; + int Al; + // we use the symmetry of the smoothing matrices by transposing the 5x5 dc + // matrix in that case. + bool swap_indices = coef_index == 2 || coef_index == 5 || coef_index == 8 || + coef_index == 9; + auto dc = [&](int i, int j) { + return swap_indices ? dc_values[j][i] : dc_values[i][j]; + }; + Al = coef_bits[coef_index]; + switch (coef_index) { + case 0: + // set the DC + num = (-2 * dc(0, 0) - 6 * dc(0, 1) - 8 * dc(0, 2) - 6 * dc(0, 3) - + 2 * dc(0, 4) - 6 * dc(1, 0) + 6 * dc(1, 1) + 42 * dc(1, 2) + + 6 * dc(1, 3) - 6 * dc(1, 4) - 8 * dc(2, 0) + 42 * dc(2, 1) + + 152 * dc(2, 2) + 42 * dc(2, 3) - 8 * dc(2, 4) - 6 * dc(3, 0) + + 6 * dc(3, 1) + 42 * dc(3, 2) + 6 * dc(3, 3) - 6 * dc(3, 4) - + 2 * dc(4, 0) - 6 * dc(4, 1) - 8 * dc(4, 2) - 6 * dc(4, 3) - + 2 * dc(4, 4)); + // special case: for the DC the dequantization is different + Al = 0; + break; + case 1: + case 2: + // set Q01 or Q10 + num = (change_dc ? (-dc(0, 0) - dc(0, 1) + dc(0, 3) + dc(0, 4) - + 3 * dc(1, 0) + 13 * dc(1, 1) - 13 * dc(1, 3) + + 3 * dc(1, 4) - 3 * dc(2, 0) + 38 * dc(2, 1) - + 38 * dc(2, 3) + 3 * dc(2, 4) - 3 * dc(3, 0) + + 13 * dc(3, 1) - 13 * dc(3, 3) + 3 * dc(3, 4) - + dc(4, 0) - dc(4, 1) + dc(4, 3) + dc(4, 4)) + : (-7 * dc(2, 0) + 50 * dc(2, 1) - 50 * dc(2, 3) + + 7 * dc(2, 4))); + break; + case 3: + case 5: + // set Q02 or Q20 + num = (change_dc + ? dc(0, 2) + 2 * dc(1, 1) + 7 * dc(1, 2) + 2 * dc(1, 3) - + 5 * dc(2, 1) - 14 * dc(2, 2) - 5 * dc(2, 3) + + 2 * dc(3, 1) + 7 * dc(3, 2) + 2 * dc(3, 3) + dc(4, 2) + : (-dc(0, 2) + 13 * dc(1, 2) - 24 * dc(2, 2) + + 13 * dc(3, 2) - dc(4, 2))); + break; + case 4: + // set Q11 + num = + (change_dc ? -dc(0, 0) + dc(0, 4) + 9 * dc(1, 1) - 9 * dc(1, 3) - + 9 * dc(3, 1) + 9 * dc(3, 3) + dc(4, 0) - dc(4, 4) + : (dc(1, 4) + dc(3, 0) - 10 * dc(3, 1) + 10 * dc(3, 3) - + dc(0, 1) - dc(3, 4) + dc(4, 1) - dc(4, 3) + dc(0, 3) - + dc(1, 0) + 10 * dc(1, 1) - 10 * dc(1, 3))); + break; + case 6: + case 9: + // set Q03 or Q30 + num = (dc(1, 1) - dc(1, 3) + 2 * dc(2, 1) - 2 * dc(2, 3) + dc(3, 1) - + dc(3, 3)); + break; + case 7: + case 8: + // set Q12 and Q21 + num = (dc(1, 1) - 3 * dc(1, 2) + dc(1, 3) - dc(3, 1) + 3 * dc(3, 2) - + dc(3, 3)); + break; + } + num = Q_VAL[0] * num; + if (num >= 0) { + pred = ((Q_VAL[coef_index] << 7) + num) / (Q_VAL[coef_index] << 8); + if (Al > 0 && pred >= (1 << Al)) pred = (1 << Al) - 1; + } else { + pred = ((Q_VAL[coef_index] << 7) - num) / (Q_VAL[coef_index] << 8); + if (Al > 0 && pred >= (1 << Al)) pred = (1 << Al) - 1; + pred = -pred; + } + return static_cast<int16_t>(pred); + }; + + int loop_end = change_dc ? SAVED_COEFS : 6; + for (int i = 1; i < loop_end; ++i) { + if (coef_bits[i] != 0 && scratch[Q_POS[i]] == 0) { + scratch[Q_POS[i]] = calculate_dct_value(i); + } + } + if (change_dc) { + scratch[0] = calculate_dct_value(0); + } +} + +void PrepareForOutput(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + bool smoothing = do_smoothing(cinfo); + m->apply_smoothing = smoothing && cinfo->do_block_smoothing; + size_t coeffs_per_block = cinfo->num_components * DCTSIZE2; + memset(m->nonzeros_, 0, coeffs_per_block * sizeof(m->nonzeros_[0])); + memset(m->sumabs_, 0, coeffs_per_block * sizeof(m->sumabs_[0])); + memset(m->num_processed_blocks_, 0, sizeof(m->num_processed_blocks_)); + memset(m->biases_, 0, coeffs_per_block * sizeof(m->biases_[0])); + cinfo->output_iMCU_row = 0; + cinfo->output_scanline = 0; + const float kDequantScale = 1.0f / (8 * 255); + for (int c = 0; c < cinfo->num_components; c++) { + const auto& comp = cinfo->comp_info[c]; + JQUANT_TBL* table = comp.quant_table; + if (table == nullptr) continue; + for (size_t k = 0; k < DCTSIZE2; ++k) { + m->dequant_[c * DCTSIZE2 + k] = table->quantval[k] * kDequantScale; + } + } + ChooseInverseTransform(cinfo); + ChooseColorTransform(cinfo); +} + +void DecodeCurrentiMCURow(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + const size_t imcu_row = cinfo->output_iMCU_row; + JBLOCKARRAY ba[kMaxComponents]; + for (int c = 0; c < cinfo->num_components; ++c) { + const jpeg_component_info* comp = &cinfo->comp_info[c]; + int by0 = imcu_row * comp->v_samp_factor; + int block_rows_left = comp->height_in_blocks - by0; + int max_block_rows = std::min(comp->v_samp_factor, block_rows_left); + int offset = m->streaming_mode_ ? 0 : by0; + ba[c] = (*cinfo->mem->access_virt_barray)( + reinterpret_cast<j_common_ptr>(cinfo), m->coef_arrays[c], offset, + max_block_rows, false); + } + for (int c = 0; c < cinfo->num_components; ++c) { + size_t k0 = c * DCTSIZE2; + auto& compinfo = cinfo->comp_info[c]; + size_t block_row = imcu_row * compinfo.v_samp_factor; + if (ShouldApplyDequantBiases(cinfo, c)) { + // Update statistics for this iMCU row. + for (int iy = 0; iy < compinfo.v_samp_factor; ++iy) { + size_t by = block_row + iy; + if (by >= compinfo.height_in_blocks) { + continue; + } + int16_t* JXL_RESTRICT coeffs = &ba[c][iy][0][0]; + size_t num = compinfo.width_in_blocks * DCTSIZE2; + GatherBlockStats(coeffs, num, &m->nonzeros_[k0], &m->sumabs_[k0]); + m->num_processed_blocks_[c] += compinfo.width_in_blocks; + } + if (imcu_row % 4 == 3) { + // Re-compute optimal biases every few iMCU-rows. + ComputeOptimalLaplacianBiases(m->num_processed_blocks_[c], + &m->nonzeros_[k0], &m->sumabs_[k0], + &m->biases_[k0]); + } + } + RowBuffer<float>* raw_out = &m->raw_output_[c]; + for (int iy = 0; iy < compinfo.v_samp_factor; ++iy) { + size_t by = block_row + iy; + if (by >= compinfo.height_in_blocks) { + continue; + } + size_t dctsize = m->scaled_dct_size[c]; + int16_t* JXL_RESTRICT row_in = &ba[c][iy][0][0]; + float* JXL_RESTRICT row_out = raw_out->Row(by * dctsize); + for (size_t bx = 0; bx < compinfo.width_in_blocks; ++bx) { + if (m->apply_smoothing) { + PredictSmooth(cinfo, ba[c], c, bx, iy); + (*m->inverse_transform[c])(m->smoothing_scratch_, &m->dequant_[k0], + &m->biases_[k0], m->idct_scratch_, + &row_out[bx * dctsize], raw_out->stride(), + dctsize); + } else { + (*m->inverse_transform[c])(&row_in[bx * DCTSIZE2], &m->dequant_[k0], + &m->biases_[k0], m->idct_scratch_, + &row_out[bx * dctsize], raw_out->stride(), + dctsize); + } + } + if (m->streaming_mode_) { + memset(row_in, 0, compinfo.width_in_blocks * sizeof(JBLOCK)); + } + } + } +} + +void ProcessRawOutput(j_decompress_ptr cinfo, JSAMPIMAGE data) { + jpegli::DecodeCurrentiMCURow(cinfo); + jpeg_decomp_master* m = cinfo->master; + for (int c = 0; c < cinfo->num_components; ++c) { + const auto& compinfo = cinfo->comp_info[c]; + size_t comp_width = compinfo.width_in_blocks * DCTSIZE; + size_t comp_height = compinfo.height_in_blocks * DCTSIZE; + size_t comp_nrows = compinfo.v_samp_factor * DCTSIZE; + size_t y0 = cinfo->output_iMCU_row * compinfo.v_samp_factor * DCTSIZE; + size_t y1 = std::min(y0 + comp_nrows, comp_height); + for (size_t y = y0; y < y1; ++y) { + float* rows[1] = {m->raw_output_[c].Row(y)}; + uint8_t* output = data[c][y - y0]; + DecenterRow(rows[0], comp_width); + WriteToOutput(cinfo, rows, 0, comp_width, 1, output); + } + } + ++cinfo->output_iMCU_row; + cinfo->output_scanline += cinfo->max_v_samp_factor * DCTSIZE; + if (cinfo->output_scanline >= cinfo->output_height) { + ++m->output_passes_done_; + } +} + +void ProcessOutput(j_decompress_ptr cinfo, size_t* num_output_rows, + JSAMPARRAY scanlines, size_t max_output_rows) { + jpeg_decomp_master* m = cinfo->master; + const int vfactor = cinfo->max_v_samp_factor; + const int hfactor = cinfo->max_h_samp_factor; + const size_t context = m->need_context_rows_ ? 1 : 0; + const size_t imcu_row = cinfo->output_iMCU_row; + const size_t imcu_height = vfactor * m->min_scaled_dct_size; + const size_t imcu_width = hfactor * m->min_scaled_dct_size; + const size_t output_width = m->iMCU_cols_ * imcu_width; + if (imcu_row == cinfo->total_iMCU_rows || + (imcu_row > context && + cinfo->output_scanline < (imcu_row - context) * imcu_height)) { + // We are ready to output some scanlines. + size_t ybegin = cinfo->output_scanline; + size_t yend = (imcu_row == cinfo->total_iMCU_rows + ? cinfo->output_height + : (imcu_row - context) * imcu_height); + yend = std::min<size_t>(yend, ybegin + max_output_rows - *num_output_rows); + size_t yb = (ybegin / vfactor) * vfactor; + size_t ye = DivCeil(yend, vfactor) * vfactor; + for (size_t y = yb; y < ye; y += vfactor) { + for (int c = 0; c < cinfo->num_components; ++c) { + RowBuffer<float>* raw_out = &m->raw_output_[c]; + RowBuffer<float>* render_out = &m->render_output_[c]; + int line_groups = vfactor / m->v_factor[c]; + int downsampled_width = output_width / m->h_factor[c]; + size_t yc = y / m->v_factor[c]; + for (int dy = 0; dy < line_groups; ++dy) { + size_t ymid = yc + dy; + const float* JXL_RESTRICT row_mid = raw_out->Row(ymid); + if (cinfo->do_fancy_upsampling && m->v_factor[c] == 2) { + const float* JXL_RESTRICT row_top = + ymid == 0 ? row_mid : raw_out->Row(ymid - 1); + const float* JXL_RESTRICT row_bot = ymid + 1 == m->raw_height_[c] + ? row_mid + : raw_out->Row(ymid + 1); + Upsample2Vertical(row_top, row_mid, row_bot, + render_out->Row(2 * dy), + render_out->Row(2 * dy + 1), downsampled_width); + } else { + for (int yix = 0; yix < m->v_factor[c]; ++yix) { + memcpy(render_out->Row(m->v_factor[c] * dy + yix), row_mid, + downsampled_width * sizeof(float)); + } + } + if (m->h_factor[c] > 1) { + for (int yix = 0; yix < m->v_factor[c]; ++yix) { + int row_ix = m->v_factor[c] * dy + yix; + float* JXL_RESTRICT row = render_out->Row(row_ix); + float* JXL_RESTRICT tmp = m->upsample_scratch_; + if (cinfo->do_fancy_upsampling && m->h_factor[c] == 2) { + Upsample2Horizontal(row, tmp, output_width); + } else { + // TODO(szabadka) SIMDify this. + for (size_t x = 0; x < output_width; ++x) { + tmp[x] = row[x / m->h_factor[c]]; + } + memcpy(row, tmp, output_width * sizeof(tmp[0])); + } + } + } + } + } + for (int yix = 0; yix < vfactor; ++yix) { + if (y + yix < ybegin || y + yix >= yend) continue; + float* rows[kMaxComponents]; + int num_all_components = + std::max(cinfo->out_color_components, cinfo->num_components); + for (int c = 0; c < num_all_components; ++c) { + rows[c] = m->render_output_[c].Row(yix); + } + (*m->color_transform)(rows, output_width); + for (int c = 0; c < cinfo->out_color_components; ++c) { + // Undo the centering of the sample values around zero. + DecenterRow(rows[c], output_width); + } + if (scanlines) { + uint8_t* output = scanlines[*num_output_rows]; + WriteToOutput(cinfo, rows, m->xoffset_, cinfo->output_width, + cinfo->out_color_components, output); + } + JXL_ASSERT(cinfo->output_scanline == y + yix); + ++cinfo->output_scanline; + ++(*num_output_rows); + if (cinfo->output_scanline == cinfo->output_height) { + ++m->output_passes_done_; + } + } + } + } else { + DecodeCurrentiMCURow(cinfo); + ++cinfo->output_iMCU_row; + } +} + +} // namespace jpegli +#endif // HWY_ONCE diff --git a/lib/jpegli/render.h b/lib/jpegli/render.h new file mode 100644 index 0000000..ad69335 --- /dev/null +++ b/lib/jpegli/render.h @@ -0,0 +1,24 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_RENDER_H_ +#define LIB_JPEGLI_RENDER_H_ + +#include <stdint.h> + +#include "lib/jpegli/common.h" + +namespace jpegli { + +void PrepareForOutput(j_decompress_ptr cinfo); + +void ProcessOutput(j_decompress_ptr cinfo, size_t* num_output_rows, + JSAMPARRAY scanlines, size_t max_output_rows); + +void ProcessRawOutput(j_decompress_ptr cinfo, JSAMPIMAGE data); + +} // namespace jpegli + +#endif // LIB_JPEGLI_RENDER_H_ diff --git a/lib/jpegli/simd.cc b/lib/jpegli/simd.cc new file mode 100644 index 0000000..5e84939 --- /dev/null +++ b/lib/jpegli/simd.cc @@ -0,0 +1,38 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/simd.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jpegli/simd.cc" +#include <hwy/foreach_target.h> +#include <hwy/highway.h> + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { + +size_t GetVectorSize() { return HWY_LANES(uint8_t); } + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jpegli { +namespace { + +HWY_EXPORT(GetVectorSize); // Local function. + +} // namespace + +size_t VectorSize() { + static size_t bytes = HWY_DYNAMIC_DISPATCH(GetVectorSize)(); + return bytes; +} + +} // namespace jpegli +#endif // HWY_ONCE diff --git a/lib/jpegli/simd.h b/lib/jpegli/simd.h new file mode 100644 index 0000000..aec772e --- /dev/null +++ b/lib/jpegli/simd.h @@ -0,0 +1,18 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_SIMD_H_ +#define LIB_JPEGLI_SIMD_H_ + +#include <stddef.h> + +namespace jpegli { + +// Returns SIMD vector size in bytes. +size_t VectorSize(); + +} // namespace jpegli + +#endif // LIB_JPEGLI_SIMD_H_ diff --git a/lib/jpegli/source_manager.cc b/lib/jpegli/source_manager.cc new file mode 100644 index 0000000..0b8e0a5 --- /dev/null +++ b/lib/jpegli/source_manager.cc @@ -0,0 +1,90 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/decode.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/memory_manager.h" + +namespace jpegli { + +void init_mem_source(j_decompress_ptr cinfo) {} +void init_stdio_source(j_decompress_ptr cinfo) {} + +void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { + if (num_bytes <= 0) return; + while (num_bytes > static_cast<long>(cinfo->src->bytes_in_buffer)) { + num_bytes -= cinfo->src->bytes_in_buffer; + (*cinfo->src->fill_input_buffer)(cinfo); + } + cinfo->src->next_input_byte += num_bytes; + cinfo->src->bytes_in_buffer -= num_bytes; +} + +void term_source(j_decompress_ptr cinfo) {} + +boolean EmitFakeEoiMarker(j_decompress_ptr cinfo) { + static constexpr uint8_t kFakeEoiMarker[2] = {0xff, 0xd9}; + cinfo->src->next_input_byte = kFakeEoiMarker; + cinfo->src->bytes_in_buffer = 2; + return TRUE; +} + +constexpr size_t kStdioBufferSize = 64 << 10; + +struct StdioSourceManager { + jpeg_source_mgr pub; + FILE* f; + uint8_t* buffer; + + static boolean fill_input_buffer(j_decompress_ptr cinfo) { + auto src = reinterpret_cast<StdioSourceManager*>(cinfo->src); + size_t num_bytes_read = fread(src->buffer, 1, kStdioBufferSize, src->f); + if (num_bytes_read == 0) { + return EmitFakeEoiMarker(cinfo); + } + src->pub.next_input_byte = src->buffer; + src->pub.bytes_in_buffer = num_bytes_read; + return TRUE; + } +}; + +} // namespace jpegli + +void jpegli_mem_src(j_decompress_ptr cinfo, const unsigned char* inbuffer, + unsigned long insize) { + if (cinfo->src && cinfo->src->init_source != jpegli::init_mem_source) { + JPEGLI_ERROR("jpegli_mem_src: a different source manager was already set"); + } + if (!cinfo->src) { + cinfo->src = jpegli::Allocate<jpeg_source_mgr>(cinfo, 1); + } + cinfo->src->next_input_byte = inbuffer; + cinfo->src->bytes_in_buffer = insize; + cinfo->src->init_source = jpegli::init_mem_source; + cinfo->src->fill_input_buffer = jpegli::EmitFakeEoiMarker; + cinfo->src->skip_input_data = jpegli::skip_input_data; + cinfo->src->resync_to_restart = jpegli_resync_to_restart; + cinfo->src->term_source = jpegli::term_source; +} + +void jpegli_stdio_src(j_decompress_ptr cinfo, FILE* infile) { + if (cinfo->src && cinfo->src->init_source != jpegli::init_stdio_source) { + JPEGLI_ERROR("jpeg_stdio_src: a different source manager was already set"); + } + if (!cinfo->src) { + cinfo->src = reinterpret_cast<jpeg_source_mgr*>( + jpegli::Allocate<jpegli::StdioSourceManager>(cinfo, 1)); + } + auto src = reinterpret_cast<jpegli::StdioSourceManager*>(cinfo->src); + src->f = infile; + src->buffer = jpegli::Allocate<uint8_t>(cinfo, jpegli::kStdioBufferSize); + src->pub.next_input_byte = src->buffer; + src->pub.bytes_in_buffer = 0; + src->pub.init_source = jpegli::init_stdio_source; + src->pub.fill_input_buffer = jpegli::StdioSourceManager::fill_input_buffer; + src->pub.skip_input_data = jpegli::skip_input_data; + src->pub.resync_to_restart = jpegli_resync_to_restart; + src->pub.term_source = jpegli::term_source; +} diff --git a/lib/jpegli/source_manager_test.cc b/lib/jpegli/source_manager_test.cc new file mode 100644 index 0000000..4e13787 --- /dev/null +++ b/lib/jpegli/source_manager_test.cc @@ -0,0 +1,142 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include <cmath> +#include <cstdint> +#include <vector> + +#include "lib/jpegli/decode.h" +#include "lib/jpegli/test_utils.h" +#include "lib/jpegli/testing.h" +#include "lib/jxl/base/status.h" + +namespace jpegli { +namespace { + +void ReadOutputImage(j_decompress_ptr cinfo, TestImage* output) { + jpegli_read_header(cinfo, /*require_image=*/TRUE); + jpegli_start_decompress(cinfo); + output->ysize = cinfo->output_height; + output->xsize = cinfo->output_width; + output->components = cinfo->num_components; + output->AllocatePixels(); + size_t stride = cinfo->output_width * cinfo->num_components; + while (cinfo->output_scanline < cinfo->output_height) { + JSAMPROW scanline = &output->pixels[cinfo->output_scanline * stride]; + jpegli_read_scanlines(cinfo, &scanline, 1); + } + jpegli_finish_decompress(cinfo); +} + +struct TestConfig { + std::string fn; + std::string fn_desc; + DecompressParams dparams; +}; + +class SourceManagerTestParam : public ::testing::TestWithParam<TestConfig> {}; + +namespace { +FILE* MemOpen(const std::vector<uint8_t>& data) { + FILE* src = tmpfile(); + if (!src) return nullptr; + fwrite(data.data(), 1, data.size(), src); + rewind(src); + return src; +} +} // namespace + +TEST_P(SourceManagerTestParam, TestStdioSourceManager) { + TestConfig config = GetParam(); + std::vector<uint8_t> compressed = ReadTestData(config.fn.c_str()); + if (config.dparams.size_factor < 1.0) { + compressed.resize(compressed.size() * config.dparams.size_factor); + } + FILE* src = MemOpen(compressed); + ASSERT_TRUE(src); + TestImage output0; + jpeg_decompress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + jpegli_stdio_src(&cinfo, src); + ReadOutputImage(&cinfo, &output0); + return true; + }; + bool ok = try_catch_block(); + fclose(src); + ASSERT_TRUE(ok); + jpegli_destroy_decompress(&cinfo); + + TestImage output1; + DecodeWithLibjpeg(CompressParams(), DecompressParams(), compressed, &output1); + VerifyOutputImage(output1, output0, 1.0f); +} + +TEST_P(SourceManagerTestParam, TestMemSourceManager) { + TestConfig config = GetParam(); + std::vector<uint8_t> compressed = ReadTestData(config.fn.c_str()); + if (config.dparams.size_factor < 1.0f) { + compressed.resize(compressed.size() * config.dparams.size_factor); + } + TestImage output0; + jpeg_decompress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + jpegli_mem_src(&cinfo, compressed.data(), compressed.size()); + ReadOutputImage(&cinfo, &output0); + return true; + }; + ASSERT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); + + TestImage output1; + DecodeWithLibjpeg(CompressParams(), DecompressParams(), compressed, &output1); + VerifyOutputImage(output1, output0, 1.0f); +} + +std::vector<TestConfig> GenerateTests() { + std::vector<TestConfig> all_tests; + { + std::vector<std::pair<std::string, std::string>> testfiles({ + {"jxl/flower/flower.png.im_q85_444.jpg", "Q85YUV444"}, + {"jxl/flower/flower.png.im_q85_420.jpg", "Q85YUV420"}, + {"jxl/flower/flower.png.im_q85_420_R13B.jpg", "Q85YUV420R13B"}, + }); + for (const auto& it : testfiles) { + for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f}) { + TestConfig config; + config.fn = it.first; + config.fn_desc = it.second; + config.dparams.size_factor = size_factor; + all_tests.push_back(config); + } + } + return all_tests; + } +} + +std::ostream& operator<<(std::ostream& os, const TestConfig& c) { + os << c.fn_desc; + if (c.dparams.size_factor < 1.0f) { + os << "Partial" << static_cast<int>(c.dparams.size_factor * 100) << "p"; + } + return os; +} + +std::string TestDescription( + const testing::TestParamInfo<SourceManagerTestParam::ParamType>& info) { + std::stringstream name; + name << info.param; + return name.str(); +} + +JPEGLI_INSTANTIATE_TEST_SUITE_P(SourceManagerTest, SourceManagerTestParam, + testing::ValuesIn(GenerateTests()), + TestDescription); + +} // namespace +} // namespace jpegli diff --git a/lib/jpegli/streaming_test.cc b/lib/jpegli/streaming_test.cc new file mode 100644 index 0000000..8d2e357 --- /dev/null +++ b/lib/jpegli/streaming_test.cc @@ -0,0 +1,233 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/decode.h" +#include "lib/jpegli/encode.h" +#include "lib/jpegli/test_utils.h" +#include "lib/jpegli/testing.h" + +namespace jpegli { +namespace { + +// A simple suspending source manager with an input buffer. +struct SourceManager { + jpeg_source_mgr pub; + std::vector<uint8_t> buffer; + + SourceManager() { + pub.next_input_byte = nullptr; + pub.bytes_in_buffer = 0; + pub.init_source = init_source; + pub.fill_input_buffer = fill_input_buffer; + pub.skip_input_data = skip_input_data; + pub.resync_to_restart = jpegli_resync_to_restart; + pub.term_source = term_source; + } + + static void init_source(j_decompress_ptr cinfo) {} + static boolean fill_input_buffer(j_decompress_ptr cinfo) { return FALSE; } + static void skip_input_data(j_decompress_ptr cinfo, long num_bytes) {} + static void term_source(j_decompress_ptr cinfo) {} +}; + +// A destination manager that empties its output buffer into a SourceManager's +// input buffer. The buffer size is kept short because empty_output_buffer() is +// called only when the output buffer is full, and we want to update the decoder +// input frequently to demonstrate that streaming works. +static constexpr size_t kOutputBufferSize = 1024; +struct DestinationManager { + jpeg_destination_mgr pub; + std::vector<uint8_t> buffer; + SourceManager* dest; + + DestinationManager(SourceManager* src) + : buffer(kOutputBufferSize), dest(src) { + pub.next_output_byte = buffer.data(); + pub.free_in_buffer = buffer.size(); + pub.init_destination = init_destination; + pub.empty_output_buffer = empty_output_buffer; + pub.term_destination = term_destination; + } + + static void init_destination(j_compress_ptr cinfo) {} + + static boolean empty_output_buffer(j_compress_ptr cinfo) { + auto us = reinterpret_cast<DestinationManager*>(cinfo->dest); + jpeg_destination_mgr* src = &us->pub; + jpeg_source_mgr* dst = &us->dest->pub; + std::vector<uint8_t>& src_buf = us->buffer; + std::vector<uint8_t>& dst_buf = us->dest->buffer; + if (dst->bytes_in_buffer > 0 && dst->bytes_in_buffer < dst_buf.size()) { + memmove(dst_buf.data(), dst->next_input_byte, dst->bytes_in_buffer); + } + size_t src_len = src_buf.size() - src->free_in_buffer; + dst_buf.resize(dst->bytes_in_buffer + src_len); + memcpy(&dst_buf[dst->bytes_in_buffer], src_buf.data(), src_len); + dst->next_input_byte = dst_buf.data(); + dst->bytes_in_buffer = dst_buf.size(); + src->next_output_byte = src_buf.data(); + src->free_in_buffer = src_buf.size(); + return true; + } + + static void term_destination(j_compress_ptr cinfo) { + empty_output_buffer(cinfo); + } +}; + +struct TestConfig { + TestImage input; + CompressParams jparams; +}; + +class StreamingTestParam : public ::testing::TestWithParam<TestConfig> {}; + +TEST_P(StreamingTestParam, TestStreaming) { + jpeg_decompress_struct dinfo = {}; + jpeg_compress_struct cinfo = {}; + TestConfig config = GetParam(); + TestImage& input = config.input; + TestImage output; + GeneratePixels(&input); + const auto try_catch_block = [&]() { + ERROR_HANDLER_SETUP(jpegli); + dinfo.err = cinfo.err; + dinfo.client_data = cinfo.client_data; + // Create a pair of compressor and decompressor objects, where the + // compressor's output is connected to the decompressor's input. + jpegli_create_decompress(&dinfo); + jpegli_create_compress(&cinfo); + SourceManager src; + dinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src); + DestinationManager dest(&src); + cinfo.dest = reinterpret_cast<jpeg_destination_mgr*>(&dest); + + cinfo.image_width = input.xsize; + cinfo.image_height = input.ysize; + cinfo.input_components = input.components; + cinfo.in_color_space = (J_COLOR_SPACE)input.color_space; + jpegli_set_defaults(&cinfo); + cinfo.comp_info[0].v_samp_factor = config.jparams.v_sampling[0]; + jpegli_set_progressive_level(&cinfo, 0); + cinfo.optimize_coding = FALSE; + jpegli_start_compress(&cinfo, TRUE); + + size_t stride = cinfo.image_width * cinfo.input_components; + size_t iMCU_height = 8 * cinfo.max_v_samp_factor; + std::vector<uint8_t> row_bytes(iMCU_height * stride); + size_t yin = 0; + size_t yout = 0; + while (yin < cinfo.image_height) { + // Feed one iMCU row at a time to the compressor. + size_t lines_in = std::min(iMCU_height, cinfo.image_height - yin); + memcpy(&row_bytes[0], &input.pixels[yin * stride], lines_in * stride); + std::vector<JSAMPROW> rows_in(lines_in); + for (size_t i = 0; i < lines_in; ++i) { + rows_in[i] = &row_bytes[i * stride]; + } + EXPECT_EQ(lines_in, + jpegli_write_scanlines(&cinfo, &rows_in[0], lines_in)); + yin += lines_in; + if (yin == cinfo.image_height) { + jpegli_finish_compress(&cinfo); + } + + // Atfer the first iMCU row, we don't yet expect any output because the + // compressor delays processing to have context rows after the iMCU row. + if (yin < std::min<size_t>(2 * iMCU_height, cinfo.image_height)) { + continue; + } + + // After two iMCU rows, the compressor has started emitting compressed + // data. We check here that at least the scan header was output, because + // we expect that the compressor's output buffer was filled at least once + // while emitting the first compressed iMCU row. + if (yin == std::min<size_t>(2 * iMCU_height, cinfo.image_height)) { + EXPECT_EQ(JPEG_REACHED_SOS, + jpegli_read_header(&dinfo, /*require_image=*/TRUE)); + output.xsize = dinfo.image_width; + output.ysize = dinfo.image_height; + output.components = dinfo.num_components; + EXPECT_EQ(output.xsize, input.xsize); + EXPECT_EQ(output.ysize, input.ysize); + EXPECT_EQ(output.components, input.components); + EXPECT_TRUE(jpegli_start_decompress(&dinfo)); + output.pixels.resize(output.ysize * stride); + if (yin < cinfo.image_height) { + continue; + } + } + + // After six iMCU rows, the compressor has emitted five iMCU rows of + // compressed data, of which we expect four full iMCU row of compressed + // data to be in the decoder's input buffer, but since the decoder also + // needs context rows for upsampling and smoothing, we don't expect any + // output to be ready yet. + if (yin < 7 * iMCU_height && yin < cinfo.image_height) { + continue; + } + + // After five iMCU rows, we expect the decoder to have rendered the output + // with four iMCU rows of delay. + // TODO(szabadka) Reduce the processing delay in the decoder if possible. + size_t lines_out = + (yin == cinfo.image_height ? cinfo.image_height - yout : iMCU_height); + std::vector<JSAMPROW> rows_out(lines_out); + for (size_t i = 0; i < lines_out; ++i) { + rows_out[i] = + reinterpret_cast<JSAMPLE*>(&output.pixels[(yout + i) * stride]); + } + EXPECT_EQ(lines_out, + jpegli_read_scanlines(&dinfo, &rows_out[0], lines_out)); + VerifyOutputImage(input, output, yout, lines_out, 3.8f); + yout += lines_out; + + if (yout == cinfo.image_height) { + EXPECT_TRUE(jpegli_finish_decompress(&dinfo)); + } + } + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&dinfo); + jpegli_destroy_compress(&cinfo); +} + +std::vector<TestConfig> GenerateTests() { + std::vector<TestConfig> all_tests; + const size_t xsize0 = 1920; + const size_t ysize0 = 1080; + for (int dysize : {0, 1, 8, 9}) { + for (int v_sampling : {1, 2}) { + TestConfig config; + config.input.xsize = xsize0; + config.input.ysize = ysize0 + dysize; + config.jparams.h_sampling = {1, 1, 1}; + config.jparams.v_sampling = {v_sampling, 1, 1}; + all_tests.push_back(config); + } + } + return all_tests; +} + +std::ostream& operator<<(std::ostream& os, const TestConfig& c) { + os << c.input; + os << c.jparams; + return os; +} + +std::string TestDescription( + const testing::TestParamInfo<StreamingTestParam::ParamType>& info) { + std::stringstream name; + name << info.param; + return name.str(); +} + +JPEGLI_INSTANTIATE_TEST_SUITE_P(StreamingTest, StreamingTestParam, + testing::ValuesIn(GenerateTests()), + TestDescription); + +} // namespace +} // namespace jpegli diff --git a/lib/jpegli/test_params.h b/lib/jpegli/test_params.h new file mode 100644 index 0000000..6ab9fa5 --- /dev/null +++ b/lib/jpegli/test_params.h @@ -0,0 +1,163 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_TEST_PARAMS_H_ +#define LIB_JPEGLI_TEST_PARAMS_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <algorithm> +#include <vector> + +#include "lib/jpegli/types.h" + +namespace jpegli { + +// We define this here as well to make sure that the *_api_test.cc tests only +// use the public API and therefore we don't include any *_internal.h headers. +template <typename T1, typename T2> +constexpr inline T1 DivCeil(T1 a, T2 b) { + return (a + b - 1) / b; +} + +#define ARRAY_SIZE(X) (sizeof(X) / sizeof((X)[0])) + +static constexpr int kLastScan = 0xffff; + +static uint32_t kTestColorMap[] = { + 0x000000, 0xff0000, 0x00ff00, 0x0000ff, 0xffff00, 0x00ffff, + 0xff00ff, 0xffffff, 0x6251fc, 0x45d9c7, 0xa7f059, 0xd9a945, + 0xfa4e44, 0xceaffc, 0xbad7db, 0xc1f0b1, 0xdbca9a, 0xfacac5, + 0xf201ff, 0x0063db, 0x00f01c, 0xdbb204, 0xf12f0c, 0x7ba1dc}; +static constexpr int kTestColorMapNumColors = ARRAY_SIZE(kTestColorMap); + +static constexpr int kSpecialMarker0 = 0xe5; +static constexpr int kSpecialMarker1 = 0xe9; +static constexpr uint8_t kMarkerData[] = {0, 1, 255, 0, 17}; +static constexpr uint8_t kMarkerSequence[] = {0xe6, 0xe8, 0xe7, + 0xe6, 0xe7, 0xe8}; +static constexpr size_t kMarkerSequenceLen = ARRAY_SIZE(kMarkerSequence); + +enum JpegIOMode { + PIXELS, + RAW_DATA, + COEFFICIENTS, +}; + +struct CustomQuantTable { + int slot_idx = 0; + uint16_t table_type = 0; + int scale_factor = 100; + bool add_raw = false; + bool force_baseline = true; + std::vector<unsigned int> basic_table; + std::vector<unsigned int> quantval; + void Generate(); +}; + +struct TestImage { + size_t xsize = 2268; + size_t ysize = 1512; + int color_space = 2; // JCS_RGB + size_t components = 3; + JpegliDataType data_type = JPEGLI_TYPE_UINT8; + JpegliEndianness endianness = JPEGLI_NATIVE_ENDIAN; + std::vector<uint8_t> pixels; + std::vector<std::vector<uint8_t>> raw_data; + std::vector<std::vector<int16_t>> coeffs; + void AllocatePixels() { + pixels.resize(ysize * xsize * components * + jpegli_bytes_per_sample(data_type)); + } + void Clear() { + pixels.clear(); + raw_data.clear(); + coeffs.clear(); + } +}; + +struct CompressParams { + int quality = 90; + bool set_jpeg_colorspace = false; + int jpeg_color_space = 0; // JCS_UNKNOWN + std::vector<int> quant_indexes; + std::vector<CustomQuantTable> quant_tables; + std::vector<int> h_sampling; + std::vector<int> v_sampling; + std::vector<int> comp_ids; + int override_JFIF = -1; + int override_Adobe = -1; + bool add_marker = false; + bool simple_progression = false; + // -1 is library default + // 0, 1, 2 is set through jpegli_set_progressive_level() + // 2 + N is kScriptN + int progressive_mode = -1; + unsigned int restart_interval = 0; + int restart_in_rows = 0; + int smoothing_factor = 0; + int optimize_coding = -1; + bool use_flat_dc_luma_code = false; + bool omit_standard_tables = false; + bool xyb_mode = false; + bool libjpeg_mode = false; + bool use_adaptive_quantization = true; + std::vector<uint8_t> icc; + + int h_samp(int c) const { return h_sampling.empty() ? 1 : h_sampling[c]; } + int v_samp(int c) const { return v_sampling.empty() ? 1 : v_sampling[c]; } + int max_h_sample() const { + auto it = std::max_element(h_sampling.begin(), h_sampling.end()); + return it == h_sampling.end() ? 1 : *it; + } + int max_v_sample() const { + auto it = std::max_element(v_sampling.begin(), v_sampling.end()); + return it == v_sampling.end() ? 1 : *it; + } + int comp_width(const TestImage& input, int c) const { + return DivCeil(input.xsize * h_samp(c), max_h_sample() * 8) * 8; + } + int comp_height(const TestImage& input, int c) const { + return DivCeil(input.ysize * v_samp(c), max_v_sample() * 8) * 8; + } +}; + +enum ColorQuantMode { + CQUANT_1PASS, + CQUANT_2PASS, + CQUANT_EXTERNAL, + CQUANT_REUSE, +}; + +struct ScanDecompressParams { + int max_scan_number; + int dither_mode; + ColorQuantMode color_quant_mode; +}; + +struct DecompressParams { + float size_factor = 1.0f; + size_t chunk_size = 65536; + size_t max_output_lines = 16; + JpegIOMode output_mode = PIXELS; + JpegliDataType data_type = JPEGLI_TYPE_UINT8; + JpegliEndianness endianness = JPEGLI_NATIVE_ENDIAN; + bool set_out_color_space = false; + int out_color_space = 0; // JCS_UNKNOWN + bool crop_output = false; + bool do_block_smoothing = false; + bool do_fancy_upsampling = true; + bool skip_scans = false; + int scale_num = 1; + int scale_denom = 1; + bool quantize_colors = false; + int desired_number_of_colors = 256; + std::vector<ScanDecompressParams> scan_params; +}; + +} // namespace jpegli + +#endif // LIB_JPEGLI_TEST_PARAMS_H_ diff --git a/lib/jpegli/test_utils-inl.h b/lib/jpegli/test_utils-inl.h new file mode 100644 index 0000000..a454917 --- /dev/null +++ b/lib/jpegli/test_utils-inl.h @@ -0,0 +1,430 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This template file is included in both the libjpeg_test_util.cc and the +// test_utils.cc files with different JPEG_API_FN macros and possibly different +// include paths for the jpeg headers. + +// Sequential non-interleaved. +static constexpr jpeg_scan_info kScript1[] = { + {1, {0}, 0, 63, 0, 0}, + {1, {1}, 0, 63, 0, 0}, + {1, {2}, 0, 63, 0, 0}, +}; +// Sequential partially interleaved, chroma first. +static constexpr jpeg_scan_info kScript2[] = { + {2, {1, 2}, 0, 63, 0, 0}, + {1, {0}, 0, 63, 0, 0}, +}; + +// Rest of the scan scripts are progressive. + +static constexpr jpeg_scan_info kScript3[] = { + // Interleaved full DC. + {3, {0, 1, 2}, 0, 0, 0, 0}, + // Full AC scans. + {1, {0}, 1, 63, 0, 0}, + {1, {1}, 1, 63, 0, 0}, + {1, {2}, 1, 63, 0, 0}, +}; +static constexpr jpeg_scan_info kScript4[] = { + // Non-interleaved full DC. + {1, {0}, 0, 0, 0, 0}, + {1, {1}, 0, 0, 0, 0}, + {1, {2}, 0, 0, 0, 0}, + // Full AC scans. + {1, {0}, 1, 63, 0, 0}, + {1, {1}, 1, 63, 0, 0}, + {1, {2}, 1, 63, 0, 0}, +}; +static constexpr jpeg_scan_info kScript5[] = { + // Partially interleaved full DC, chroma first. + {2, {1, 2}, 0, 0, 0, 0}, + {1, {0}, 0, 0, 0, 0}, + // AC shifted by 1 bit. + {1, {0}, 1, 63, 0, 1}, + {1, {1}, 1, 63, 0, 1}, + {1, {2}, 1, 63, 0, 1}, + // AC refinement scan. + {1, {0}, 1, 63, 1, 0}, + {1, {1}, 1, 63, 1, 0}, + {1, {2}, 1, 63, 1, 0}, +}; +static constexpr jpeg_scan_info kScript6[] = { + // Interleaved DC shifted by 2 bits. + {3, {0, 1, 2}, 0, 0, 0, 2}, + // Interleaved DC refinement scans. + {3, {0, 1, 2}, 0, 0, 2, 1}, + {3, {0, 1, 2}, 0, 0, 1, 0}, + // Full AC scans. + {1, {0}, 1, 63, 0, 0}, + {1, {1}, 1, 63, 0, 0}, + {1, {2}, 1, 63, 0, 0}, +}; + +static constexpr jpeg_scan_info kScript7[] = { + // Non-interleaved DC shifted by 2 bits. + {1, {0}, 0, 0, 0, 2}, + {1, {1}, 0, 0, 0, 2}, + {1, {2}, 0, 0, 0, 2}, + // Non-interleaved DC first refinement scans. + {1, {0}, 0, 0, 2, 1}, + {1, {1}, 0, 0, 2, 1}, + {1, {2}, 0, 0, 2, 1}, + // Non-interleaved DC second refinement scans. + {1, {0}, 0, 0, 1, 0}, + {1, {1}, 0, 0, 1, 0}, + {1, {2}, 0, 0, 1, 0}, + // Full AC scans. + {1, {0}, 1, 63, 0, 0}, + {1, {1}, 1, 63, 0, 0}, + {1, {2}, 1, 63, 0, 0}, +}; + +static constexpr jpeg_scan_info kScript8[] = { + // Partially interleaved DC shifted by 2 bits, chroma first + {2, {1, 2}, 0, 0, 0, 2}, + {1, {0}, 0, 0, 0, 2}, + // Partially interleaved DC first refinement scans. + {2, {0, 2}, 0, 0, 2, 1}, + {1, {1}, 0, 0, 2, 1}, + // Partially interleaved DC first refinement scans, chroma first. + {2, {1, 2}, 0, 0, 1, 0}, + {1, {0}, 0, 0, 1, 0}, + // Full AC scans. + {1, {0}, 1, 63, 0, 0}, + {1, {1}, 1, 63, 0, 0}, + {1, {2}, 1, 63, 0, 0}, +}; + +static constexpr jpeg_scan_info kScript9[] = { + // Interleaved full DC. + {3, {0, 1, 2}, 0, 0, 0, 0}, + // AC scans for component 0 + // shifted by 1 bit, two spectral ranges + {1, {0}, 1, 6, 0, 1}, + {1, {0}, 7, 63, 0, 1}, + // refinement scan, full + {1, {0}, 1, 63, 1, 0}, + // AC scans for component 1 + // shifted by 1 bit, full + {1, {1}, 1, 63, 0, 1}, + // refinement scan, two spectral ranges + {1, {1}, 1, 6, 1, 0}, + {1, {1}, 7, 63, 1, 0}, + // AC scans for component 2 + // shifted by 1 bit, two spectral ranges + {1, {2}, 1, 6, 0, 1}, + {1, {2}, 7, 63, 0, 1}, + // refinement scan, two spectral ranges (but different from above) + {1, {2}, 1, 16, 1, 0}, + {1, {2}, 17, 63, 1, 0}, +}; + +static constexpr jpeg_scan_info kScript10[] = { + // Interleaved full DC. + {3, {0, 1, 2}, 0, 0, 0, 0}, + // AC scans for spectral range 1..16 + // shifted by 1 + {1, {0}, 1, 16, 0, 1}, + {1, {1}, 1, 16, 0, 1}, + {1, {2}, 1, 16, 0, 1}, + // refinement scans, two sub-ranges + {1, {0}, 1, 8, 1, 0}, + {1, {0}, 9, 16, 1, 0}, + {1, {1}, 1, 8, 1, 0}, + {1, {1}, 9, 16, 1, 0}, + {1, {2}, 1, 8, 1, 0}, + {1, {2}, 9, 16, 1, 0}, + // AC scans for spectral range 17..63 + {1, {0}, 17, 63, 0, 1}, + {1, {1}, 17, 63, 0, 1}, + {1, {2}, 17, 63, 0, 1}, + // refinement scans, two sub-ranges + {1, {0}, 17, 28, 1, 0}, + {1, {0}, 29, 63, 1, 0}, + {1, {1}, 17, 28, 1, 0}, + {1, {1}, 29, 63, 1, 0}, + {1, {2}, 17, 28, 1, 0}, + {1, {2}, 29, 63, 1, 0}, +}; + +struct ScanScript { + int num_scans; + const jpeg_scan_info* scans; +}; + +static constexpr ScanScript kTestScript[] = { + {ARRAY_SIZE(kScript1), kScript1}, {ARRAY_SIZE(kScript2), kScript2}, + {ARRAY_SIZE(kScript3), kScript3}, {ARRAY_SIZE(kScript4), kScript4}, + {ARRAY_SIZE(kScript5), kScript5}, {ARRAY_SIZE(kScript6), kScript6}, + {ARRAY_SIZE(kScript7), kScript7}, {ARRAY_SIZE(kScript8), kScript8}, + {ARRAY_SIZE(kScript9), kScript9}, {ARRAY_SIZE(kScript10), kScript10}, +}; +static constexpr int kNumTestScripts = ARRAY_SIZE(kTestScript); + +void SetScanDecompressParams(const DecompressParams& dparams, + j_decompress_ptr cinfo, int scan_number) { + const ScanDecompressParams* sparams = nullptr; + for (const auto& sp : dparams.scan_params) { + if (scan_number <= sp.max_scan_number) { + sparams = &sp; + break; + } + } + if (sparams == nullptr) { + return; + } + if (dparams.quantize_colors) { + cinfo->dither_mode = (J_DITHER_MODE)sparams->dither_mode; + if (sparams->color_quant_mode == CQUANT_1PASS) { + cinfo->two_pass_quantize = FALSE; + cinfo->colormap = nullptr; + } else if (sparams->color_quant_mode == CQUANT_2PASS) { + JXL_CHECK(cinfo->out_color_space == JCS_RGB); + cinfo->two_pass_quantize = TRUE; + cinfo->colormap = nullptr; + } else if (sparams->color_quant_mode == CQUANT_EXTERNAL) { + JXL_CHECK(cinfo->out_color_space == JCS_RGB); + cinfo->two_pass_quantize = FALSE; + bool have_colormap = cinfo->colormap != nullptr; + cinfo->actual_number_of_colors = kTestColorMapNumColors; + cinfo->colormap = (*cinfo->mem->alloc_sarray)( + reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE, + cinfo->actual_number_of_colors, 3); + jxl::msan::UnpoisonMemory(cinfo->colormap, 3 * sizeof(JSAMPROW)); + for (int i = 0; i < kTestColorMapNumColors; ++i) { + cinfo->colormap[0][i] = (kTestColorMap[i] >> 16) & 0xff; + cinfo->colormap[1][i] = (kTestColorMap[i] >> 8) & 0xff; + cinfo->colormap[2][i] = (kTestColorMap[i] >> 0) & 0xff; + } + if (have_colormap) { + JPEG_API_FN(new_colormap)(cinfo); + } + } else if (sparams->color_quant_mode == CQUANT_REUSE) { + JXL_CHECK(cinfo->out_color_space == JCS_RGB); + JXL_CHECK(cinfo->colormap); + } + } +} + +void SetDecompressParams(const DecompressParams& dparams, + j_decompress_ptr cinfo) { + cinfo->do_block_smoothing = dparams.do_block_smoothing; + cinfo->do_fancy_upsampling = dparams.do_fancy_upsampling; + if (dparams.output_mode == RAW_DATA) { + cinfo->raw_data_out = TRUE; + } + if (dparams.set_out_color_space) { + cinfo->out_color_space = (J_COLOR_SPACE)dparams.out_color_space; + if (dparams.out_color_space == JCS_UNKNOWN) { + cinfo->jpeg_color_space = JCS_UNKNOWN; + } + } + cinfo->scale_num = dparams.scale_num; + cinfo->scale_denom = dparams.scale_denom; + cinfo->quantize_colors = dparams.quantize_colors; + cinfo->desired_number_of_colors = dparams.desired_number_of_colors; + if (!dparams.scan_params.empty()) { + if (cinfo->buffered_image) { + for (const auto& sparams : dparams.scan_params) { + if (sparams.color_quant_mode == CQUANT_1PASS) { + cinfo->enable_1pass_quant = TRUE; + } else if (sparams.color_quant_mode == CQUANT_2PASS) { + cinfo->enable_2pass_quant = TRUE; + } else if (sparams.color_quant_mode == CQUANT_EXTERNAL) { + cinfo->enable_external_quant = TRUE; + } + } + SetScanDecompressParams(dparams, cinfo, 1); + } else { + SetScanDecompressParams(dparams, cinfo, kLastScan); + } + } +} + +void CheckMarkerPresent(j_decompress_ptr cinfo, uint8_t marker_type) { + bool marker_found = false; + for (jpeg_saved_marker_ptr marker = cinfo->marker_list; marker != nullptr; + marker = marker->next) { + jxl::msan::UnpoisonMemory(marker, sizeof(*marker)); + jxl::msan::UnpoisonMemory(marker->data, marker->data_length); + if (marker->marker == marker_type && + marker->data_length == sizeof(kMarkerData) && + memcmp(marker->data, kMarkerData, sizeof(kMarkerData)) == 0) { + marker_found = true; + } + } + JXL_CHECK(marker_found); +} + +void VerifyHeader(const CompressParams& jparams, j_decompress_ptr cinfo) { + if (jparams.set_jpeg_colorspace) { + JXL_CHECK(cinfo->jpeg_color_space == jparams.jpeg_color_space); + } + if (jparams.override_JFIF >= 0) { + JXL_CHECK(cinfo->saw_JFIF_marker == jparams.override_JFIF); + } + if (jparams.override_Adobe >= 0) { + JXL_CHECK(cinfo->saw_Adobe_marker == jparams.override_Adobe); + } + if (jparams.add_marker) { + CheckMarkerPresent(cinfo, kSpecialMarker0); + CheckMarkerPresent(cinfo, kSpecialMarker1); + } + jxl::msan::UnpoisonMemory( + cinfo->comp_info, cinfo->num_components * sizeof(cinfo->comp_info[0])); + int max_h_samp_factor = 1; + int max_v_samp_factor = 1; + for (int i = 0; i < cinfo->num_components; ++i) { + jpeg_component_info* comp = &cinfo->comp_info[i]; + if (!jparams.comp_ids.empty()) { + JXL_CHECK(comp->component_id == jparams.comp_ids[i]); + } + if (!jparams.h_sampling.empty()) { + JXL_CHECK(comp->h_samp_factor == jparams.h_sampling[i]); + } + if (!jparams.v_sampling.empty()) { + JXL_CHECK(comp->v_samp_factor == jparams.v_sampling[i]); + } + if (!jparams.quant_indexes.empty()) { + JXL_CHECK(comp->quant_tbl_no == jparams.quant_indexes[i]); + } + max_h_samp_factor = std::max(max_h_samp_factor, comp->h_samp_factor); + max_v_samp_factor = std::max(max_v_samp_factor, comp->v_samp_factor); + } + JXL_CHECK(max_h_samp_factor == cinfo->max_h_samp_factor); + JXL_CHECK(max_v_samp_factor == cinfo->max_v_samp_factor); + int referenced_tables[NUM_QUANT_TBLS] = {}; + for (int i = 0; i < cinfo->num_components; ++i) { + jpeg_component_info* comp = &cinfo->comp_info[i]; + JXL_CHECK(comp->width_in_blocks == + DivCeil(cinfo->image_width * comp->h_samp_factor, + max_h_samp_factor * DCTSIZE)); + JXL_CHECK(comp->height_in_blocks == + DivCeil(cinfo->image_height * comp->v_samp_factor, + max_v_samp_factor * DCTSIZE)); + referenced_tables[comp->quant_tbl_no] = 1; + } + for (const auto& table : jparams.quant_tables) { + JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[table.slot_idx]; + if (!referenced_tables[table.slot_idx]) { + JXL_CHECK(quant_table == nullptr); + continue; + } + JXL_CHECK(quant_table != nullptr); + jxl::msan::UnpoisonMemory(quant_table, sizeof(*quant_table)); + for (int k = 0; k < DCTSIZE2; ++k) { + JXL_CHECK(quant_table->quantval[k] == table.quantval[k]); + } + } +} + +void VerifyScanHeader(const CompressParams& jparams, j_decompress_ptr cinfo) { + JXL_CHECK(cinfo->input_scan_number > 0); + if (cinfo->progressive_mode) { + JXL_CHECK(cinfo->Ss != 0 || cinfo->Se != 63); + } else { + JXL_CHECK(cinfo->Ss == 0 && cinfo->Se == 63); + } + if (jparams.progressive_mode > 2) { + JXL_CHECK(jparams.progressive_mode < 3 + kNumTestScripts); + const ScanScript& script = kTestScript[jparams.progressive_mode - 3]; + JXL_CHECK(cinfo->input_scan_number <= script.num_scans); + const jpeg_scan_info& scan = script.scans[cinfo->input_scan_number - 1]; + JXL_CHECK(cinfo->comps_in_scan == scan.comps_in_scan); + for (int i = 0; i < cinfo->comps_in_scan; ++i) { + JXL_CHECK(cinfo->cur_comp_info[i]->component_index == + scan.component_index[i]); + } + JXL_CHECK(cinfo->Ss == scan.Ss); + JXL_CHECK(cinfo->Se == scan.Se); + JXL_CHECK(cinfo->Ah == scan.Ah); + JXL_CHECK(cinfo->Al == scan.Al); + } + if (jparams.restart_interval > 0) { + JXL_CHECK(cinfo->restart_interval == jparams.restart_interval); + } else if (jparams.restart_in_rows > 0) { + JXL_CHECK(cinfo->restart_interval == + jparams.restart_in_rows * cinfo->MCUs_per_row); + } + if (jparams.progressive_mode == 0 && jparams.optimize_coding == 0) { + if (cinfo->jpeg_color_space == JCS_RGB) { + JXL_CHECK(cinfo->comp_info[0].dc_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[1].dc_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[2].dc_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[0].ac_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[1].ac_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[2].ac_tbl_no == 0); + } else if (cinfo->jpeg_color_space == JCS_YCbCr) { + JXL_CHECK(cinfo->comp_info[0].dc_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[1].dc_tbl_no == 1); + JXL_CHECK(cinfo->comp_info[2].dc_tbl_no == 1); + JXL_CHECK(cinfo->comp_info[0].ac_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[1].ac_tbl_no == 1); + JXL_CHECK(cinfo->comp_info[2].ac_tbl_no == 1); + } else if (cinfo->jpeg_color_space == JCS_CMYK) { + JXL_CHECK(cinfo->comp_info[0].dc_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[1].dc_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[2].dc_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[3].dc_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[0].ac_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[1].ac_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[2].ac_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[3].ac_tbl_no == 0); + } else if (cinfo->jpeg_color_space == JCS_YCCK) { + JXL_CHECK(cinfo->comp_info[0].dc_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[1].dc_tbl_no == 1); + JXL_CHECK(cinfo->comp_info[2].dc_tbl_no == 1); + JXL_CHECK(cinfo->comp_info[3].dc_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[0].ac_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[1].ac_tbl_no == 1); + JXL_CHECK(cinfo->comp_info[2].ac_tbl_no == 1); + JXL_CHECK(cinfo->comp_info[3].ac_tbl_no == 0); + } + if (jparams.use_flat_dc_luma_code) { + JHUFF_TBL* tbl = cinfo->dc_huff_tbl_ptrs[0]; + jxl::msan::UnpoisonMemory(tbl, sizeof(*tbl)); + for (int i = 0; i < 15; ++i) { + JXL_CHECK(tbl->huffval[i] == i); + } + } + } +} + +void UnmapColors(uint8_t* row, size_t xsize, int components, + JSAMPARRAY colormap, size_t num_colors) { + JXL_CHECK(colormap != nullptr); + std::vector<uint8_t> tmp(xsize * components); + for (size_t x = 0; x < xsize; ++x) { + JXL_CHECK(row[x] < num_colors); + for (int c = 0; c < components; ++c) { + tmp[x * components + c] = colormap[c][row[x]]; + } + } + memcpy(row, tmp.data(), tmp.size()); +} + +void CopyCoefficients(j_decompress_ptr cinfo, jvirt_barray_ptr* coef_arrays, + TestImage* output) { + output->xsize = cinfo->image_width; + output->ysize = cinfo->image_height; + output->components = cinfo->num_components; + output->color_space = cinfo->out_color_space; + j_common_ptr comptr = reinterpret_cast<j_common_ptr>(cinfo); + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + std::vector<JCOEF> coeffs(comp->width_in_blocks * comp->height_in_blocks * + DCTSIZE2); + for (size_t by = 0; by < comp->height_in_blocks; ++by) { + JBLOCKARRAY ba = (*cinfo->mem->access_virt_barray)(comptr, coef_arrays[c], + by, 1, true); + size_t stride = comp->width_in_blocks * sizeof(JBLOCK); + size_t offset = by * comp->width_in_blocks * DCTSIZE2; + memcpy(&coeffs[offset], ba[0], stride); + } + output->coeffs.emplace_back(std::move(coeffs)); + } +} diff --git a/lib/jpegli/test_utils.cc b/lib/jpegli/test_utils.cc new file mode 100644 index 0000000..232b937 --- /dev/null +++ b/lib/jpegli/test_utils.cc @@ -0,0 +1,787 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/test_utils.h" + +#include <cmath> +#include <cstdint> +#include <fstream> + +#include "lib/jpegli/decode.h" +#include "lib/jpegli/encode.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/sanitizers.h" + +#if !defined(TEST_DATA_PATH) +#include "tools/cpp/runfiles/runfiles.h" +#endif + +namespace jpegli { + +#define JPEG_API_FN(name) jpegli_##name +#include "lib/jpegli/test_utils-inl.h" +#undef JPEG_API_FN + +#if defined(TEST_DATA_PATH) +std::string GetTestDataPath(const std::string& filename) { + return std::string(TEST_DATA_PATH "/") + filename; +} +#else +using bazel::tools::cpp::runfiles::Runfiles; +const std::unique_ptr<Runfiles> kRunfiles(Runfiles::Create("")); +std::string GetTestDataPath(const std::string& filename) { + std::string root(JPEGXL_ROOT_PACKAGE "/testdata/"); + return kRunfiles->Rlocation(root + filename); +} +#endif + +std::vector<uint8_t> ReadTestData(const std::string& filename) { + std::string full_path = GetTestDataPath(filename); + fprintf(stderr, "ReadTestData %s\n", full_path.c_str()); + std::ifstream file(full_path, std::ios::binary); + std::vector<char> str((std::istreambuf_iterator<char>(file)), + std::istreambuf_iterator<char>()); + JXL_CHECK(file.good()); + const uint8_t* raw = reinterpret_cast<const uint8_t*>(str.data()); + std::vector<uint8_t> data(raw, raw + str.size()); + printf("Test data %s is %d bytes long.\n", filename.c_str(), + static_cast<int>(data.size())); + return data; +} + +void CustomQuantTable::Generate() { + basic_table.resize(DCTSIZE2); + quantval.resize(DCTSIZE2); + switch (table_type) { + case 0: { + for (int k = 0; k < DCTSIZE2; ++k) { + basic_table[k] = k + 1; + } + break; + } + default: + for (int k = 0; k < DCTSIZE2; ++k) { + basic_table[k] = table_type; + } + } + for (int k = 0; k < DCTSIZE2; ++k) { + quantval[k] = (basic_table[k] * scale_factor + 50U) / 100U; + quantval[k] = std::max(quantval[k], 1U); + quantval[k] = std::min(quantval[k], 65535U); + if (!add_raw) { + quantval[k] = std::min(quantval[k], force_baseline ? 255U : 32767U); + } + } +} + +bool PNMParser::ParseHeader(const uint8_t** pos, size_t* xsize, size_t* ysize, + size_t* num_channels, size_t* bitdepth) { + if (pos_[0] != 'P' || (pos_[1] != '5' && pos_[1] != '6')) { + fprintf(stderr, "Invalid PNM header."); + return false; + } + *num_channels = (pos_[1] == '5' ? 1 : 3); + pos_ += 2; + + size_t maxval; + if (!SkipWhitespace() || !ParseUnsigned(xsize) || !SkipWhitespace() || + !ParseUnsigned(ysize) || !SkipWhitespace() || !ParseUnsigned(&maxval) || + !SkipWhitespace()) { + return false; + } + if (maxval == 0 || maxval >= 65536) { + fprintf(stderr, "Invalid maxval value.\n"); + return false; + } + bool found_bitdepth = false; + for (int bits = 1; bits <= 16; ++bits) { + if (maxval == (1u << bits) - 1) { + *bitdepth = bits; + found_bitdepth = true; + break; + } + } + if (!found_bitdepth) { + fprintf(stderr, "Invalid maxval value.\n"); + return false; + } + + *pos = pos_; + return true; +} + +bool PNMParser::ParseUnsigned(size_t* number) { + if (pos_ == end_ || *pos_ < '0' || *pos_ > '9') { + fprintf(stderr, "Expected unsigned number.\n"); + return false; + } + *number = 0; + while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') { + *number *= 10; + *number += *pos_ - '0'; + ++pos_; + } + + return true; +} + +bool PNMParser::SkipWhitespace() { + if (pos_ == end_ || !IsWhitespace(*pos_)) { + fprintf(stderr, "Expected whitespace.\n"); + return false; + } + while (pos_ < end_ && IsWhitespace(*pos_)) { + ++pos_; + } + return true; +} + +bool ReadPNM(const std::vector<uint8_t>& data, size_t* xsize, size_t* ysize, + size_t* num_channels, size_t* bitdepth, + std::vector<uint8_t>* pixels) { + if (data.size() < 2) { + fprintf(stderr, "PNM file too small.\n"); + return false; + } + PNMParser parser(data.data(), data.size()); + const uint8_t* pos = nullptr; + if (!parser.ParseHeader(&pos, xsize, ysize, num_channels, bitdepth)) { + return false; + } + pixels->resize(data.data() + data.size() - pos); + memcpy(&(*pixels)[0], pos, pixels->size()); + return true; +} + +std::string ColorSpaceName(J_COLOR_SPACE colorspace) { + switch (colorspace) { + case JCS_UNKNOWN: + return "UNKNOWN"; + case JCS_GRAYSCALE: + return "GRAYSCALE"; + case JCS_RGB: + return "RGB"; + case JCS_YCbCr: + return "YCbCr"; + case JCS_CMYK: + return "CMYK"; + case JCS_YCCK: + return "YCCK"; + default: + return ""; + } +} + +std::string IOMethodName(JpegliDataType data_type, + JpegliEndianness endianness) { + std::string retval; + if (data_type == JPEGLI_TYPE_UINT8) { + return ""; + } else if (data_type == JPEGLI_TYPE_UINT16) { + retval = "UINT16"; + } else if (data_type == JPEGLI_TYPE_FLOAT) { + retval = "FLOAT"; + } + if (endianness == JPEGLI_LITTLE_ENDIAN) { + retval += "LE"; + } else if (endianness == JPEGLI_BIG_ENDIAN) { + retval += "BE"; + } + return retval; +} + +std::string SamplingId(const CompressParams& jparams) { + std::stringstream os; + JXL_CHECK(jparams.h_sampling.size() == jparams.v_sampling.size()); + if (!jparams.h_sampling.empty()) { + size_t len = jparams.h_sampling.size(); + while (len > 1 && jparams.h_sampling[len - 1] == 1 && + jparams.v_sampling[len - 1] == 1) { + --len; + } + os << "SAMP"; + for (size_t i = 0; i < len; ++i) { + if (i > 0) os << "_"; + os << jparams.h_sampling[i] << "x" << jparams.v_sampling[i]; + } + } + return os.str(); +} + +std::ostream& operator<<(std::ostream& os, const TestImage& input) { + os << input.xsize << "x" << input.ysize; + os << IOMethodName(input.data_type, input.endianness); + if (input.color_space != JCS_RGB) { + os << "InputColor" << ColorSpaceName((J_COLOR_SPACE)input.color_space); + } + if (input.color_space == JCS_UNKNOWN) { + os << input.components; + } + return os; +} + +std::ostream& operator<<(std::ostream& os, const CompressParams& jparams) { + os << "Q" << jparams.quality; + os << SamplingId(jparams); + if (jparams.set_jpeg_colorspace) { + os << "JpegColor" + << ColorSpaceName((J_COLOR_SPACE)jparams.jpeg_color_space); + } + if (!jparams.comp_ids.empty()) { + os << "CID"; + for (size_t i = 0; i < jparams.comp_ids.size(); ++i) { + os << jparams.comp_ids[i]; + } + } + if (!jparams.quant_indexes.empty()) { + os << "QIDX"; + for (size_t i = 0; i < jparams.quant_indexes.size(); ++i) { + os << jparams.quant_indexes[i]; + } + for (const auto& table : jparams.quant_tables) { + os << "TABLE" << table.slot_idx << "T" << table.table_type << "F" + << table.scale_factor + << (table.add_raw ? "R" + : table.force_baseline ? "B" + : ""); + } + } + if (jparams.progressive_mode >= 0) { + os << "P" << jparams.progressive_mode; + } else if (jparams.simple_progression) { + os << "Psimple"; + } + if (jparams.optimize_coding == 1) { + os << "OptimizedCode"; + } else if (jparams.optimize_coding == 0) { + os << "FixedCode"; + if (jparams.use_flat_dc_luma_code) { + os << "FlatDCLuma"; + } else if (jparams.omit_standard_tables) { + os << "OmitDHT"; + } + } + if (!jparams.use_adaptive_quantization) { + os << "NoAQ"; + } + if (jparams.restart_interval > 0) { + os << "R" << jparams.restart_interval; + } + if (jparams.restart_in_rows > 0) { + os << "RR" << jparams.restart_in_rows; + } + if (jparams.xyb_mode) { + os << "XYB"; + } else if (jparams.libjpeg_mode) { + os << "Libjpeg"; + } + if (jparams.override_JFIF >= 0) { + os << (jparams.override_JFIF ? "AddJFIF" : "NoJFIF"); + } + if (jparams.override_Adobe >= 0) { + os << (jparams.override_Adobe ? "AddAdobe" : "NoAdobe"); + } + if (jparams.add_marker) { + os << "AddMarker"; + } + if (!jparams.icc.empty()) { + os << "ICCSize" << jparams.icc.size(); + } + if (jparams.smoothing_factor != 0) { + os << "SF" << jparams.smoothing_factor; + } + return os; +} + +void SetNumChannels(J_COLOR_SPACE colorspace, size_t* channels) { + if (colorspace == JCS_GRAYSCALE) { + *channels = 1; + } else if (colorspace == JCS_RGB || colorspace == JCS_YCbCr) { + *channels = 3; + } else if (colorspace == JCS_CMYK || colorspace == JCS_YCCK) { + *channels = 4; + } else if (colorspace == JCS_UNKNOWN) { + JXL_CHECK(*channels <= 4); + } else { + JXL_ABORT(); + } +} + +void RGBToYCbCr(float r, float g, float b, float* y, float* cb, float* cr) { + *y = 0.299f * r + 0.587f * g + 0.114f * b; + *cb = -0.168736f * r - 0.331264f * g + 0.5f * b + 0.5f; + *cr = 0.5f * r - 0.418688f * g - 0.081312f * b + 0.5f; +} + +void ConvertPixel(const uint8_t* input_rgb, uint8_t* out, + J_COLOR_SPACE colorspace, size_t num_channels, + JpegliDataType data_type = JPEGLI_TYPE_UINT8, + bool swap_endianness = JPEGLI_NATIVE_ENDIAN) { + const float kMul = 255.0f; + float r = input_rgb[0] / kMul; + float g = input_rgb[1] / kMul; + float b = input_rgb[2] / kMul; + uint8_t out8[MAX_COMPONENTS]; + if (colorspace == JCS_GRAYSCALE) { + const float Y = 0.299f * r + 0.587f * g + 0.114f * b; + out8[0] = static_cast<uint8_t>(std::round(Y * kMul)); + } else if (colorspace == JCS_RGB || colorspace == JCS_UNKNOWN) { + for (size_t c = 0; c < num_channels; ++c) { + out8[c] = input_rgb[std::min<size_t>(2, c)]; + } + } else if (colorspace == JCS_YCbCr) { + float Y, Cb, Cr; + RGBToYCbCr(r, g, b, &Y, &Cb, &Cr); + out8[0] = static_cast<uint8_t>(std::round(Y * kMul)); + out8[1] = static_cast<uint8_t>(std::round(Cb * kMul)); + out8[2] = static_cast<uint8_t>(std::round(Cr * kMul)); + } else if (colorspace == JCS_CMYK || colorspace == JCS_YCCK) { + float K = 1.0f - std::max(r, std::max(g, b)); + float scaleK = 1.0f / (1.0f - K); + r *= scaleK; + g *= scaleK; + b *= scaleK; + if (colorspace == JCS_CMYK) { + out8[0] = static_cast<uint8_t>(std::round((1.0f - r) * kMul)); + out8[1] = static_cast<uint8_t>(std::round((1.0f - g) * kMul)); + out8[2] = static_cast<uint8_t>(std::round((1.0f - b) * kMul)); + } else if (colorspace == JCS_YCCK) { + float Y, Cb, Cr; + RGBToYCbCr(r, g, b, &Y, &Cb, &Cr); + out8[0] = static_cast<uint8_t>(std::round(Y * kMul)); + out8[1] = static_cast<uint8_t>(std::round(Cb * kMul)); + out8[2] = static_cast<uint8_t>(std::round(Cr * kMul)); + } + out8[3] = static_cast<uint8_t>(std::round(K * kMul)); + } else { + JXL_ABORT("Colorspace %d not supported", colorspace); + } + if (data_type == JPEGLI_TYPE_UINT8) { + memcpy(out, out8, num_channels); + } else if (data_type == JPEGLI_TYPE_UINT16) { + for (size_t c = 0; c < num_channels; ++c) { + uint16_t val = (out8[c] << 8) + out8[c]; + val |= 0x40; // Make little-endian and big-endian asymmetric + if (swap_endianness) { + val = JXL_BSWAP16(val); + } + memcpy(&out[sizeof(val) * c], &val, sizeof(val)); + } + } else if (data_type == JPEGLI_TYPE_FLOAT) { + for (size_t c = 0; c < num_channels; ++c) { + float val = out8[c] / 255.0f; + if (swap_endianness) { + val = BSwapFloat(val); + } + memcpy(&out[sizeof(val) * c], &val, sizeof(val)); + } + } +} + +void ConvertToGrayscale(TestImage* img) { + if (img->color_space == JCS_GRAYSCALE) return; + JXL_CHECK(img->data_type == JPEGLI_TYPE_UINT8); + for (size_t i = 0; i < img->pixels.size(); i += 3) { + if (img->color_space == JCS_RGB) { + ConvertPixel(&img->pixels[i], &img->pixels[i / 3], JCS_GRAYSCALE, 1); + } else if (img->color_space == JCS_YCbCr) { + img->pixels[i / 3] = img->pixels[i]; + } + } + img->pixels.resize(img->pixels.size() / 3); + img->color_space = JCS_GRAYSCALE; + img->components = 1; +} + +void GeneratePixels(TestImage* img) { + const std::vector<uint8_t> imgdata = ReadTestData("jxl/flower/flower.pnm"); + size_t xsize, ysize, channels, bitdepth; + std::vector<uint8_t> pixels; + JXL_CHECK(ReadPNM(imgdata, &xsize, &ysize, &channels, &bitdepth, &pixels)); + if (img->xsize == 0) img->xsize = xsize; + if (img->ysize == 0) img->ysize = ysize; + JXL_CHECK(img->xsize <= xsize); + JXL_CHECK(img->ysize <= ysize); + JXL_CHECK(3 == channels); + JXL_CHECK(8 == bitdepth); + size_t in_bytes_per_pixel = channels; + size_t in_stride = xsize * in_bytes_per_pixel; + size_t x0 = (xsize - img->xsize) / 2; + size_t y0 = (ysize - img->ysize) / 2; + SetNumChannels((J_COLOR_SPACE)img->color_space, &img->components); + size_t out_bytes_per_pixel = + jpegli_bytes_per_sample(img->data_type) * img->components; + size_t out_stride = img->xsize * out_bytes_per_pixel; + bool swap_endianness = + (img->endianness == JPEGLI_LITTLE_ENDIAN && !IsLittleEndian()) || + (img->endianness == JPEGLI_BIG_ENDIAN && IsLittleEndian()); + img->pixels.resize(img->ysize * out_stride); + for (size_t iy = 0; iy < img->ysize; ++iy) { + size_t y = y0 + iy; + for (size_t ix = 0; ix < img->xsize; ++ix) { + size_t x = x0 + ix; + size_t idx_in = y * in_stride + x * in_bytes_per_pixel; + size_t idx_out = iy * out_stride + ix * out_bytes_per_pixel; + ConvertPixel(&pixels[idx_in], &img->pixels[idx_out], + (J_COLOR_SPACE)img->color_space, img->components, + img->data_type, swap_endianness); + } + } +} + +void GenerateRawData(const CompressParams& jparams, TestImage* img) { + for (size_t c = 0; c < img->components; ++c) { + size_t xsize = jparams.comp_width(*img, c); + size_t ysize = jparams.comp_height(*img, c); + size_t factor_y = jparams.max_v_sample() / jparams.v_samp(c); + size_t factor_x = jparams.max_h_sample() / jparams.h_samp(c); + size_t factor = factor_x * factor_y; + std::vector<uint8_t> plane(ysize * xsize); + size_t bytes_per_pixel = img->components; + for (size_t y = 0; y < ysize; ++y) { + for (size_t x = 0; x < xsize; ++x) { + int result = 0; + for (size_t iy = 0; iy < factor_y; ++iy) { + size_t yy = std::min(y * factor_y + iy, img->ysize - 1); + for (size_t ix = 0; ix < factor_x; ++ix) { + size_t xx = std::min(x * factor_x + ix, img->xsize - 1); + size_t pixel_ix = (yy * img->xsize + xx) * bytes_per_pixel + c; + result += img->pixels[pixel_ix]; + } + } + result = static_cast<uint8_t>((result + factor / 2) / factor); + plane[y * xsize + x] = result; + } + } + img->raw_data.emplace_back(std::move(plane)); + } +} + +void GenerateCoeffs(const CompressParams& jparams, TestImage* img) { + for (size_t c = 0; c < img->components; ++c) { + int xsize_blocks = jparams.comp_width(*img, c) / DCTSIZE; + int ysize_blocks = jparams.comp_height(*img, c) / DCTSIZE; + std::vector<JCOEF> plane(ysize_blocks * xsize_blocks * DCTSIZE2); + for (int by = 0; by < ysize_blocks; ++by) { + for (int bx = 0; bx < xsize_blocks; ++bx) { + JCOEF* block = &plane[(by * xsize_blocks + bx) * DCTSIZE2]; + for (int k = 0; k < DCTSIZE2; ++k) { + block[k] = (bx - by) / (k + 1); + } + } + } + img->coeffs.emplace_back(std::move(plane)); + } +} + +void EncodeWithJpegli(const TestImage& input, const CompressParams& jparams, + j_compress_ptr cinfo) { + cinfo->image_width = input.xsize; + cinfo->image_height = input.ysize; + cinfo->input_components = input.components; + if (jparams.xyb_mode) { + jpegli_set_xyb_mode(cinfo); + } + if (jparams.libjpeg_mode) { + jpegli_enable_adaptive_quantization(cinfo, FALSE); + jpegli_use_standard_quant_tables(cinfo); + jpegli_set_progressive_level(cinfo, 0); + } + jpegli_set_defaults(cinfo); + cinfo->in_color_space = (J_COLOR_SPACE)input.color_space; + jpegli_default_colorspace(cinfo); + if (jparams.override_JFIF >= 0) { + cinfo->write_JFIF_header = jparams.override_JFIF; + } + if (jparams.override_Adobe >= 0) { + cinfo->write_Adobe_marker = jparams.override_Adobe; + } + if (jparams.set_jpeg_colorspace) { + jpegli_set_colorspace(cinfo, (J_COLOR_SPACE)jparams.jpeg_color_space); + } + if (!jparams.comp_ids.empty()) { + for (int c = 0; c < cinfo->num_components; ++c) { + cinfo->comp_info[c].component_id = jparams.comp_ids[c]; + } + } + if (!jparams.h_sampling.empty()) { + for (int c = 0; c < cinfo->num_components; ++c) { + cinfo->comp_info[c].h_samp_factor = jparams.h_sampling[c]; + cinfo->comp_info[c].v_samp_factor = jparams.v_sampling[c]; + } + } + jpegli_set_quality(cinfo, jparams.quality, TRUE); + if (!jparams.quant_indexes.empty()) { + for (int c = 0; c < cinfo->num_components; ++c) { + cinfo->comp_info[c].quant_tbl_no = jparams.quant_indexes[c]; + } + for (const auto& table : jparams.quant_tables) { + if (table.add_raw) { + cinfo->quant_tbl_ptrs[table.slot_idx] = + jpegli_alloc_quant_table((j_common_ptr)cinfo); + for (int k = 0; k < DCTSIZE2; ++k) { + cinfo->quant_tbl_ptrs[table.slot_idx]->quantval[k] = + table.quantval[k]; + } + cinfo->quant_tbl_ptrs[table.slot_idx]->sent_table = FALSE; + } else { + jpegli_add_quant_table(cinfo, table.slot_idx, &table.basic_table[0], + table.scale_factor, table.force_baseline); + } + } + } + if (jparams.simple_progression) { + jpegli_simple_progression(cinfo); + JXL_CHECK(jparams.progressive_mode == -1); + } + if (jparams.progressive_mode > 2) { + const ScanScript& script = kTestScript[jparams.progressive_mode - 3]; + cinfo->scan_info = script.scans; + cinfo->num_scans = script.num_scans; + } else if (jparams.progressive_mode >= 0) { + jpegli_set_progressive_level(cinfo, jparams.progressive_mode); + } + jpegli_set_input_format(cinfo, input.data_type, input.endianness); + jpegli_enable_adaptive_quantization(cinfo, jparams.use_adaptive_quantization); + cinfo->restart_interval = jparams.restart_interval; + cinfo->restart_in_rows = jparams.restart_in_rows; + cinfo->smoothing_factor = jparams.smoothing_factor; + if (jparams.optimize_coding == 1) { + cinfo->optimize_coding = TRUE; + } else if (jparams.optimize_coding == 0) { + cinfo->optimize_coding = FALSE; + } + cinfo->raw_data_in = !input.raw_data.empty(); + if (jparams.optimize_coding == 0 && jparams.use_flat_dc_luma_code) { + JHUFF_TBL* tbl = cinfo->dc_huff_tbl_ptrs[0]; + memset(tbl, 0, sizeof(*tbl)); + tbl->bits[4] = 15; + for (int i = 0; i < 15; ++i) tbl->huffval[i] = i; + } + if (input.coeffs.empty()) { + bool write_all_tables = TRUE; + if (jparams.optimize_coding == 0 && !jparams.use_flat_dc_luma_code && + jparams.omit_standard_tables) { + write_all_tables = FALSE; + cinfo->dc_huff_tbl_ptrs[0]->sent_table = TRUE; + cinfo->dc_huff_tbl_ptrs[1]->sent_table = TRUE; + cinfo->ac_huff_tbl_ptrs[0]->sent_table = TRUE; + cinfo->ac_huff_tbl_ptrs[1]->sent_table = TRUE; + } + jpegli_start_compress(cinfo, write_all_tables); + if (jparams.add_marker) { + jpegli_write_marker(cinfo, kSpecialMarker0, kMarkerData, + sizeof(kMarkerData)); + jpegli_write_m_header(cinfo, kSpecialMarker1, sizeof(kMarkerData)); + for (size_t p = 0; p < sizeof(kMarkerData); ++p) { + jpegli_write_m_byte(cinfo, kMarkerData[p]); + } + for (size_t i = 0; i < kMarkerSequenceLen; ++i) { + jpegli_write_marker(cinfo, kMarkerSequence[i], kMarkerData, + ((i + 2) % sizeof(kMarkerData))); + } + } + if (!jparams.icc.empty()) { + jpegli_write_icc_profile(cinfo, jparams.icc.data(), jparams.icc.size()); + } + } + if (cinfo->raw_data_in) { + // Need to copy because jpeg API requires non-const pointers. + std::vector<std::vector<uint8_t>> raw_data = input.raw_data; + size_t max_lines = jparams.max_v_sample() * DCTSIZE; + std::vector<std::vector<JSAMPROW>> rowdata(cinfo->num_components); + std::vector<JSAMPARRAY> data(cinfo->num_components); + for (int c = 0; c < cinfo->num_components; ++c) { + rowdata[c].resize(jparams.v_samp(c) * DCTSIZE); + data[c] = &rowdata[c][0]; + } + while (cinfo->next_scanline < cinfo->image_height) { + for (int c = 0; c < cinfo->num_components; ++c) { + size_t cwidth = cinfo->comp_info[c].width_in_blocks * DCTSIZE; + size_t cheight = cinfo->comp_info[c].height_in_blocks * DCTSIZE; + size_t num_lines = jparams.v_samp(c) * DCTSIZE; + size_t y0 = (cinfo->next_scanline / max_lines) * num_lines; + for (size_t i = 0; i < num_lines; ++i) { + rowdata[c][i] = + (y0 + i < cheight ? &raw_data[c][(y0 + i) * cwidth] : nullptr); + } + } + size_t num_lines = jpegli_write_raw_data(cinfo, &data[0], max_lines); + JXL_CHECK(num_lines == max_lines); + } + } else if (!input.coeffs.empty()) { + j_common_ptr comptr = reinterpret_cast<j_common_ptr>(cinfo); + jvirt_barray_ptr* coef_arrays = reinterpret_cast<jvirt_barray_ptr*>(( + *cinfo->mem->alloc_small)( + comptr, JPOOL_IMAGE, cinfo->num_components * sizeof(jvirt_barray_ptr))); + for (int c = 0; c < cinfo->num_components; ++c) { + size_t xsize_blocks = jparams.comp_width(input, c) / DCTSIZE; + size_t ysize_blocks = jparams.comp_height(input, c) / DCTSIZE; + coef_arrays[c] = (*cinfo->mem->request_virt_barray)( + comptr, JPOOL_IMAGE, FALSE, xsize_blocks, ysize_blocks, + cinfo->comp_info[c].v_samp_factor); + } + jpegli_write_coefficients(cinfo, coef_arrays); + if (jparams.add_marker) { + jpegli_write_marker(cinfo, kSpecialMarker0, kMarkerData, + sizeof(kMarkerData)); + jpegli_write_m_header(cinfo, kSpecialMarker1, sizeof(kMarkerData)); + for (size_t p = 0; p < sizeof(kMarkerData); ++p) { + jpegli_write_m_byte(cinfo, kMarkerData[p]); + } + } + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + for (size_t by = 0; by < comp->height_in_blocks; ++by) { + JBLOCKARRAY ba = (*cinfo->mem->access_virt_barray)( + comptr, coef_arrays[c], by, 1, true); + size_t stride = comp->width_in_blocks * sizeof(JBLOCK); + size_t offset = by * comp->width_in_blocks * DCTSIZE2; + memcpy(ba[0], &input.coeffs[c][offset], stride); + } + } + } else { + size_t stride = cinfo->image_width * cinfo->input_components * + jpegli_bytes_per_sample(input.data_type); + std::vector<uint8_t> row_bytes(stride); + for (size_t y = 0; y < cinfo->image_height; ++y) { + memcpy(&row_bytes[0], &input.pixels[y * stride], stride); + JSAMPROW row[] = {row_bytes.data()}; + jpegli_write_scanlines(cinfo, row, 1); + } + } + jpegli_finish_compress(cinfo); +} + +bool EncodeWithJpegli(const TestImage& input, const CompressParams& jparams, + std::vector<uint8_t>* compressed) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + EncodeWithJpegli(input, jparams, &cinfo); + return true; + }; + bool success = try_catch_block(); + jpegli_destroy_compress(&cinfo); + if (success) { + compressed->resize(buffer_size); + std::copy_n(buffer, buffer_size, compressed->data()); + } + if (buffer) std::free(buffer); + return success; +} + +int NumTestScanScripts() { return kNumTestScripts; } + +void DumpImage(const TestImage& image, const std::string fn) { + JXL_CHECK(image.components == 1 || image.components == 3); + size_t bytes_per_sample = jpegli_bytes_per_sample(image.data_type); + uint32_t maxval = (1u << (8 * bytes_per_sample)) - 1; + char type = image.components == 1 ? '5' : '6'; + std::ofstream out(fn.c_str(), std::ofstream::binary); + out << "P" << type << std::endl + << image.xsize << " " << image.ysize << std::endl + << maxval << std::endl; + out.write(reinterpret_cast<const char*>(image.pixels.data()), + image.pixels.size()); + out.close(); +} + +double DistanceRms(const TestImage& input, const TestImage& output, + size_t start_line, size_t num_lines, double* max_diff) { + size_t stride = input.xsize * input.components; + size_t start_offset = start_line * stride; + auto get_sample = [&](const TestImage& im, const std::vector<uint8_t>& data, + size_t idx) -> double { + size_t bytes_per_sample = jpegli_bytes_per_sample(im.data_type); + bool is_little_endian = + (im.endianness == JPEGLI_LITTLE_ENDIAN || + (im.endianness == JPEGLI_NATIVE_ENDIAN && IsLittleEndian())); + size_t offset = start_offset + idx * bytes_per_sample; + JXL_CHECK(offset < data.size()); + const uint8_t* p = &data[offset]; + if (im.data_type == JPEGLI_TYPE_UINT8) { + static const double mul8 = 1.0 / 255.0; + return p[0] * mul8; + } else if (im.data_type == JPEGLI_TYPE_UINT16) { + static const double mul16 = 1.0 / 65535.0; + return (is_little_endian ? LoadLE16(p) : LoadBE16(p)) * mul16; + } else if (im.data_type == JPEGLI_TYPE_FLOAT) { + return (is_little_endian ? LoadLEFloat(p) : LoadBEFloat(p)); + } + return 0.0; + }; + double diff2 = 0.0; + size_t num_samples = 0; + if (max_diff) *max_diff = 0.0; + if (!input.pixels.empty() && !output.pixels.empty()) { + num_samples = num_lines * stride; + for (size_t i = 0; i < num_samples; ++i) { + double sample_orig = get_sample(input, input.pixels, i); + double sample_output = get_sample(output, output.pixels, i); + double diff = sample_orig - sample_output; + if (max_diff) *max_diff = std::max(*max_diff, 255.0 * std::abs(diff)); + diff2 += diff * diff; + } + } else { + JXL_CHECK(!input.raw_data.empty()); + JXL_CHECK(!output.raw_data.empty()); + for (size_t c = 0; c < input.raw_data.size(); ++c) { + JXL_CHECK(c < output.raw_data.size()); + num_samples += input.raw_data[c].size(); + for (size_t i = 0; i < input.raw_data[c].size(); ++i) { + double sample_orig = get_sample(input, input.raw_data[c], i); + double sample_output = get_sample(output, output.raw_data[c], i); + double diff = sample_orig - sample_output; + if (max_diff) *max_diff = std::max(*max_diff, 255.0 * std::abs(diff)); + diff2 += diff * diff; + } + } + } + return std::sqrt(diff2 / num_samples) * 255.0; +} + +double DistanceRms(const TestImage& input, const TestImage& output, + double* max_diff) { + return DistanceRms(input, output, 0, output.ysize, max_diff); +} + +void VerifyOutputImage(const TestImage& input, const TestImage& output, + size_t start_line, size_t num_lines, double max_rms, + double max_diff) { + double max_d; + double rms = DistanceRms(input, output, start_line, num_lines, &max_d); + printf("rms: %f, max_rms: %f, max_d: %f, max_diff: %f\n", rms, max_rms, + max_d, max_diff); + JXL_CHECK(rms <= max_rms); + JXL_CHECK(max_d <= max_diff); +} + +void VerifyOutputImage(const TestImage& input, const TestImage& output, + double max_rms, double max_diff) { + JXL_CHECK(output.xsize == input.xsize); + JXL_CHECK(output.ysize == input.ysize); + JXL_CHECK(output.components == input.components); + JXL_CHECK(output.color_space == input.color_space); + if (!input.coeffs.empty()) { + JXL_CHECK(input.coeffs.size() == input.components); + JXL_CHECK(output.coeffs.size() == input.components); + for (size_t c = 0; c < input.components; ++c) { + JXL_CHECK(output.coeffs[c].size() == input.coeffs[c].size()); + JXL_CHECK(0 == memcmp(input.coeffs[c].data(), output.coeffs[c].data(), + input.coeffs[c].size())); + } + } else { + VerifyOutputImage(input, output, 0, output.ysize, max_rms, max_diff); + } +} + +} // namespace jpegli diff --git a/lib/jpegli/test_utils.h b/lib/jpegli/test_utils.h new file mode 100644 index 0000000..132cfd0 --- /dev/null +++ b/lib/jpegli/test_utils.h @@ -0,0 +1,130 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_TEST_UTILS_H_ +#define LIB_JPEGLI_TEST_UTILS_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <algorithm> +#include <string> +#include <vector> + +/* clang-format off */ +#include <stdio.h> +#include <jpeglib.h> +#include <setjmp.h> +/* clang-format on */ + +#include "lib/jpegli/common.h" +#include "lib/jpegli/libjpeg_test_util.h" +#include "lib/jpegli/test_params.h" + +namespace jpegli { + +#define ERROR_HANDLER_SETUP(flavor) \ + jpeg_error_mgr jerr; \ + jmp_buf env; \ + cinfo.err = flavor##_std_error(&jerr); \ + if (setjmp(env)) { \ + return false; \ + } \ + cinfo.client_data = reinterpret_cast<void*>(&env); \ + cinfo.err->error_exit = [](j_common_ptr cinfo) { \ + (*cinfo->err->output_message)(cinfo); \ + jmp_buf* env = reinterpret_cast<jmp_buf*>(cinfo->client_data); \ + flavor##_destroy(cinfo); \ + longjmp(*env, 1); \ + }; + +std::string IOMethodName(JpegliDataType data_type, JpegliEndianness endianness); + +std::string ColorSpaceName(J_COLOR_SPACE colorspace); + +std::ostream& operator<<(std::ostream& os, const TestImage& input); + +std::ostream& operator<<(std::ostream& os, const CompressParams& jparams); + +int NumTestScanScripts(); + +void VerifyHeader(const CompressParams& jparams, j_decompress_ptr cinfo); +void VerifyScanHeader(const CompressParams& jparams, j_decompress_ptr cinfo); + +void SetDecompressParams(const DecompressParams& dparams, + j_decompress_ptr cinfo); + +void SetScanDecompressParams(const DecompressParams& dparams, + j_decompress_ptr cinfo, int scan_number); + +void CopyCoefficients(j_decompress_ptr cinfo, jvirt_barray_ptr* coef_arrays, + TestImage* output); + +void UnmapColors(uint8_t* row, size_t xsize, int components, + JSAMPARRAY colormap, size_t num_colors); + +std::string GetTestDataPath(const std::string& filename); +std::vector<uint8_t> ReadTestData(const std::string& filename); + +class PNMParser { + public: + explicit PNMParser(const uint8_t* data, const size_t len) + : pos_(data), end_(data + len) {} + + // Sets "pos" to the first non-header byte/pixel on success. + bool ParseHeader(const uint8_t** pos, size_t* xsize, size_t* ysize, + size_t* num_channels, size_t* bitdepth); + + private: + static bool IsLineBreak(const uint8_t c) { return c == '\r' || c == '\n'; } + static bool IsWhitespace(const uint8_t c) { + return IsLineBreak(c) || c == '\t' || c == ' '; + } + + bool ParseUnsigned(size_t* number); + + bool SkipWhitespace(); + + const uint8_t* pos_; + const uint8_t* const end_; +}; + +bool ReadPNM(const std::vector<uint8_t>& data, size_t* xsize, size_t* ysize, + size_t* num_channels, size_t* bitdepth, + std::vector<uint8_t>* pixels); + +void SetNumChannels(J_COLOR_SPACE colorspace, size_t* channels); + +void ConvertToGrayscale(TestImage* img); + +void GeneratePixels(TestImage* img); + +void GenerateRawData(const CompressParams& jparams, TestImage* img); + +void GenerateCoeffs(const CompressParams& jparams, TestImage* img); + +void EncodeWithJpegli(const TestImage& input, const CompressParams& jparams, + j_compress_ptr cinfo); + +bool EncodeWithJpegli(const TestImage& input, const CompressParams& jparams, + std::vector<uint8_t>* compressed); + +double DistanceRms(const TestImage& input, const TestImage& output, + size_t start_line, size_t num_lines, + double* max_diff = nullptr); + +double DistanceRms(const TestImage& input, const TestImage& output, + double* max_diff = nullptr); + +void VerifyOutputImage(const TestImage& input, const TestImage& output, + size_t start_line, size_t num_lines, double max_rms, + double max_diff = 255.0); + +void VerifyOutputImage(const TestImage& input, const TestImage& output, + double max_rms, double max_diff = 255.0); + +} // namespace jpegli + +#endif // LIB_JPEGLI_TEST_UTILS_H_ diff --git a/lib/jpegli/testing.h b/lib/jpegli/testing.h new file mode 100644 index 0000000..873a017 --- /dev/null +++ b/lib/jpegli/testing.h @@ -0,0 +1,35 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_TESTING_H_ +#define LIB_JPEGLI_TESTING_H_ + +// GTest/GMock specific macros / wrappers. + +// gmock unconditionally redefines those macros (to wrong values). +// Lets include it only here and mitigate the problem. +#pragma push_macro("PRIdS") +#pragma push_macro("PRIuS") +#include "gmock/gmock.h" +#pragma pop_macro("PRIuS") +#pragma pop_macro("PRIdS") + +#include "gtest/gtest.h" + +// googletest before 1.10 didn't define INSTANTIATE_TEST_SUITE_P() but instead +// used INSTANTIATE_TEST_CASE_P which is now deprecated. +#ifdef INSTANTIATE_TEST_SUITE_P +#define JPEGLI_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_SUITE_P +#else +#define JPEGLI_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_CASE_P +#endif + +// Ensures that we don't make our test bounds too lax, effectively disabling the +// tests. +MATCHER_P(IsSlightlyBelow, max, "") { + return max * 0.75 <= arg && arg <= max * 1.0; +} + +#endif // LIB_JPEGLI_TESTING_H_ diff --git a/lib/jpegli/transcode_api_test.cc b/lib/jpegli/transcode_api_test.cc new file mode 100644 index 0000000..1d99ce3 --- /dev/null +++ b/lib/jpegli/transcode_api_test.cc @@ -0,0 +1,133 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include <vector> + +#include "lib/jpegli/decode.h" +#include "lib/jpegli/encode.h" +#include "lib/jpegli/test_utils.h" +#include "lib/jpegli/testing.h" +#include "lib/jxl/base/status.h" + +namespace jpegli { +namespace { + +void TranscodeWithJpegli(const std::vector<uint8_t>& jpeg_input, + const CompressParams& jparams, + std::vector<uint8_t>* jpeg_output) { + jpeg_decompress_struct dinfo = {}; + jpeg_compress_struct cinfo = {}; + uint8_t* transcoded_data = nullptr; + unsigned long transcoded_size; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + dinfo.err = cinfo.err; + dinfo.client_data = cinfo.client_data; + jpegli_create_decompress(&dinfo); + jpegli_mem_src(&dinfo, jpeg_input.data(), jpeg_input.size()); + EXPECT_EQ(JPEG_REACHED_SOS, + jpegli_read_header(&dinfo, /*require_image=*/TRUE)); + jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(&dinfo); + JXL_CHECK(coef_arrays != nullptr); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &transcoded_data, &transcoded_size); + jpegli_copy_critical_parameters(&dinfo, &cinfo); + jpegli_set_progressive_level(&cinfo, jparams.progressive_mode); + cinfo.optimize_coding = jparams.optimize_coding; + jpegli_write_coefficients(&cinfo, coef_arrays); + jpegli_finish_compress(&cinfo); + jpegli_finish_decompress(&dinfo); + return true; + }; + ASSERT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&dinfo); + jpegli_destroy_compress(&cinfo); + if (transcoded_data) { + jpeg_output->assign(transcoded_data, transcoded_data + transcoded_size); + free(transcoded_data); + } +} + +struct TestConfig { + TestImage input; + CompressParams jparams; +}; + +class TranscodeAPITestParam : public ::testing::TestWithParam<TestConfig> {}; + +TEST_P(TranscodeAPITestParam, TestAPI) { + TestConfig config = GetParam(); + CompressParams& jparams = config.jparams; + GeneratePixels(&config.input); + + // Start with sequential non-optimized jpeg. + jparams.progressive_mode = 0; + jparams.optimize_coding = 0; + std::vector<uint8_t> compressed; + ASSERT_TRUE(EncodeWithJpegli(config.input, jparams, &compressed)); + TestImage output0; + DecodeWithLibjpeg(jparams, DecompressParams(), compressed, &output0); + + // Transcode first to a sequential optimized jpeg, and then further to + // a progressive jpeg. + for (int progr : {0, 2}) { + std::vector<uint8_t> transcoded; + jparams.progressive_mode = progr; + jparams.optimize_coding = 1; + TranscodeWithJpegli(compressed, jparams, &transcoded); + + // We expect a size reduction of at least 2%. + EXPECT_LT(transcoded.size(), compressed.size() * 0.98f); + + // Verify that transcoding is lossless. + TestImage output1; + DecodeWithLibjpeg(jparams, DecompressParams(), transcoded, &output1); + ASSERT_EQ(output0.pixels.size(), output1.pixels.size()); + EXPECT_EQ(0, memcmp(output0.pixels.data(), output1.pixels.data(), + output0.pixels.size())); + compressed = transcoded; + } +} + +std::vector<TestConfig> GenerateTests() { + std::vector<TestConfig> all_tests; + const size_t xsize0 = 1024; + const size_t ysize0 = 768; + for (int dxsize : {0, 1, 8, 9}) { + for (int dysize : {0, 1, 8, 9}) { + for (int h_sampling : {1, 2}) { + for (int v_sampling : {1, 2}) { + TestConfig config; + config.input.xsize = xsize0 + dxsize; + config.input.ysize = ysize0 + dysize; + config.jparams.h_sampling = {h_sampling, 1, 1}; + config.jparams.v_sampling = {v_sampling, 1, 1}; + all_tests.push_back(config); + } + } + } + } + return all_tests; +} + +std::ostream& operator<<(std::ostream& os, const TestConfig& c) { + os << c.input; + os << c.jparams; + return os; +} + +std::string TestDescription( + const testing::TestParamInfo<TranscodeAPITestParam::ParamType>& info) { + std::stringstream name; + name << info.param; + return name.str(); +} + +JPEGLI_INSTANTIATE_TEST_SUITE_P(TranscodeAPITest, TranscodeAPITestParam, + testing::ValuesIn(GenerateTests()), + TestDescription); + +} // namespace +} // namespace jpegli diff --git a/lib/jpegli/transpose-inl.h b/lib/jpegli/transpose-inl.h new file mode 100644 index 0000000..9fdd222 --- /dev/null +++ b/lib/jpegli/transpose-inl.h @@ -0,0 +1,111 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#if defined(LIB_JPEGLI_TRANSPOSE_INL_H_) == defined(HWY_TARGET_TOGGLE) +#ifdef LIB_JPEGLI_TRANSPOSE_INL_H_ +#undef LIB_JPEGLI_TRANSPOSE_INL_H_ +#else +#define LIB_JPEGLI_TRANSPOSE_INL_H_ +#endif + +#include "lib/jxl/base/compiler_specific.h" + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { +namespace { + +#if HWY_CAP_GE256 +static JXL_INLINE void Transpose8x8Block(const float* JXL_RESTRICT from, + float* JXL_RESTRICT to) { + const HWY_CAPPED(float, 8) d; + auto i0 = Load(d, from); + auto i1 = Load(d, from + 1 * 8); + auto i2 = Load(d, from + 2 * 8); + auto i3 = Load(d, from + 3 * 8); + auto i4 = Load(d, from + 4 * 8); + auto i5 = Load(d, from + 5 * 8); + auto i6 = Load(d, from + 6 * 8); + auto i7 = Load(d, from + 7 * 8); + + const auto q0 = InterleaveLower(d, i0, i2); + const auto q1 = InterleaveLower(d, i1, i3); + const auto q2 = InterleaveUpper(d, i0, i2); + const auto q3 = InterleaveUpper(d, i1, i3); + const auto q4 = InterleaveLower(d, i4, i6); + const auto q5 = InterleaveLower(d, i5, i7); + const auto q6 = InterleaveUpper(d, i4, i6); + const auto q7 = InterleaveUpper(d, i5, i7); + + const auto r0 = InterleaveLower(d, q0, q1); + const auto r1 = InterleaveUpper(d, q0, q1); + const auto r2 = InterleaveLower(d, q2, q3); + const auto r3 = InterleaveUpper(d, q2, q3); + const auto r4 = InterleaveLower(d, q4, q5); + const auto r5 = InterleaveUpper(d, q4, q5); + const auto r6 = InterleaveLower(d, q6, q7); + const auto r7 = InterleaveUpper(d, q6, q7); + + i0 = ConcatLowerLower(d, r4, r0); + i1 = ConcatLowerLower(d, r5, r1); + i2 = ConcatLowerLower(d, r6, r2); + i3 = ConcatLowerLower(d, r7, r3); + i4 = ConcatUpperUpper(d, r4, r0); + i5 = ConcatUpperUpper(d, r5, r1); + i6 = ConcatUpperUpper(d, r6, r2); + i7 = ConcatUpperUpper(d, r7, r3); + + Store(i0, d, to); + Store(i1, d, to + 1 * 8); + Store(i2, d, to + 2 * 8); + Store(i3, d, to + 3 * 8); + Store(i4, d, to + 4 * 8); + Store(i5, d, to + 5 * 8); + Store(i6, d, to + 6 * 8); + Store(i7, d, to + 7 * 8); +} +#elif HWY_TARGET != HWY_SCALAR +static JXL_INLINE void Transpose8x8Block(const float* JXL_RESTRICT from, + float* JXL_RESTRICT to) { + const HWY_CAPPED(float, 4) d; + for (size_t n = 0; n < 8; n += 4) { + for (size_t m = 0; m < 8; m += 4) { + auto p0 = Load(d, from + n * 8 + m); + auto p1 = Load(d, from + (n + 1) * 8 + m); + auto p2 = Load(d, from + (n + 2) * 8 + m); + auto p3 = Load(d, from + (n + 3) * 8 + m); + const auto q0 = InterleaveLower(d, p0, p2); + const auto q1 = InterleaveLower(d, p1, p3); + const auto q2 = InterleaveUpper(d, p0, p2); + const auto q3 = InterleaveUpper(d, p1, p3); + + const auto r0 = InterleaveLower(d, q0, q1); + const auto r1 = InterleaveUpper(d, q0, q1); + const auto r2 = InterleaveLower(d, q2, q3); + const auto r3 = InterleaveUpper(d, q2, q3); + Store(r0, d, to + m * 8 + n); + Store(r1, d, to + (1 + m) * 8 + n); + Store(r2, d, to + (2 + m) * 8 + n); + Store(r3, d, to + (3 + m) * 8 + n); + } + } +} +#else +static JXL_INLINE void Transpose8x8Block(const float* JXL_RESTRICT from, + float* JXL_RESTRICT to) { + for (size_t n = 0; n < 8; ++n) { + for (size_t m = 0; m < 8; ++m) { + to[8 * n + m] = from[8 * m + n]; + } + } +} +#endif + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); +#endif // LIB_JPEGLI_TRANSPOSE_INL_H_ diff --git a/lib/jpegli/types.h b/lib/jpegli/types.h new file mode 100644 index 0000000..2f446b7 --- /dev/null +++ b/lib/jpegli/types.h @@ -0,0 +1,38 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_TYPES_H_ +#define LIB_JPEGLI_TYPES_H_ + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +// +// New API structs and functions that are not available in libjpeg +// +// NOTE: This part of the API is still experimental and will probably change in +// the future. +// + +typedef enum { + JPEGLI_TYPE_FLOAT = 0, + JPEGLI_TYPE_UINT8 = 2, + JPEGLI_TYPE_UINT16 = 3, +} JpegliDataType; + +typedef enum { + JPEGLI_NATIVE_ENDIAN = 0, + JPEGLI_LITTLE_ENDIAN = 1, + JPEGLI_BIG_ENDIAN = 2, +} JpegliEndianness; + +int jpegli_bytes_per_sample(JpegliDataType data_type); + +#if defined(__cplusplus) || defined(c_plusplus) +} // extern "C" +#endif + +#endif // LIB_JPEGLI_TYPES_H_ diff --git a/lib/jpegli/upsample.cc b/lib/jpegli/upsample.cc new file mode 100644 index 0000000..5559aa7 --- /dev/null +++ b/lib/jpegli/upsample.cc @@ -0,0 +1,137 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/upsample.h" + +#include <string.h> + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jpegli/upsample.cc" +#include <hwy/foreach_target.h> +#include <hwy/highway.h> + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::Vec; + +#if HWY_CAP_GE512 +using hwy::HWY_NAMESPACE::Half; +using hwy::HWY_NAMESPACE::Vec; +template <size_t i, class DF, class V> +HWY_INLINE Vec<Half<Half<DF>>> Quarter(const DF df, V v) { + using HF = Half<DF>; + using HHF = Half<HF>; + auto half = i >= 2 ? UpperHalf(HF(), v) : LowerHalf(HF(), v); + return i & 1 ? UpperHalf(HHF(), half) : LowerHalf(HHF(), half); +} + +template <class DF, class V> +HWY_INLINE Vec<DF> Concat4(const DF df, V v0, V v1, V v2, V v3) { + using HF = Half<DF>; + return Combine(DF(), Combine(HF(), v3, v2), Combine(HF(), v1, v0)); +} + +#endif + +// Stores v0[0], v1[0], v0[1], v1[1], ... to mem, in this order. Mem must be +// aligned. +template <class DF, class V, typename T> +void StoreInterleaved(const DF df, V v0, V v1, T* mem) { + static_assert(sizeof(T) == 4, "only use StoreInterleaved for 4-byte types"); +#if HWY_TARGET == HWY_SCALAR + Store(v0, df, mem); + Store(v1, df, mem + 1); +#elif !HWY_CAP_GE256 + Store(InterleaveLower(df, v0, v1), df, mem); + Store(InterleaveUpper(df, v0, v1), df, mem + Lanes(df)); +#else + if (!HWY_CAP_GE512 || Lanes(df) == 8) { + auto t0 = InterleaveLower(df, v0, v1); + auto t1 = InterleaveUpper(df, v0, v1); + Store(ConcatLowerLower(df, t1, t0), df, mem); + Store(ConcatUpperUpper(df, t1, t0), df, mem + Lanes(df)); + } else { +#if HWY_CAP_GE512 + auto t0 = InterleaveLower(df, v0, v1); + auto t1 = InterleaveUpper(df, v0, v1); + Store(Concat4(df, Quarter<0>(df, t0), Quarter<0>(df, t1), + Quarter<1>(df, t0), Quarter<1>(df, t1)), + df, mem); + Store(Concat4(df, Quarter<2>(df, t0), Quarter<2>(df, t1), + Quarter<3>(df, t0), Quarter<3>(df, t1)), + df, mem + Lanes(df)); +#endif + } +#endif +} + +void Upsample2Horizontal(float* JXL_RESTRICT row, + float* JXL_RESTRICT scratch_space, size_t len_out) { + HWY_FULL(float) df; + auto threefour = Set(df, 0.75f); + auto onefour = Set(df, 0.25f); + const size_t len_in = (len_out + 1) >> 1; + memcpy(scratch_space, row, len_in * sizeof(row[0])); + scratch_space[-1] = scratch_space[0]; + scratch_space[len_in] = scratch_space[len_in - 1]; + for (size_t x = 0; x < len_in; x += Lanes(df)) { + auto current = Mul(Load(df, scratch_space + x), threefour); + auto prev = LoadU(df, scratch_space + x - 1); + auto next = LoadU(df, scratch_space + x + 1); + auto left = MulAdd(onefour, prev, current); + auto right = MulAdd(onefour, next, current); + StoreInterleaved(df, left, right, row + x * 2); + } +} + +void Upsample2Vertical(const float* JXL_RESTRICT row_top, + const float* JXL_RESTRICT row_mid, + const float* JXL_RESTRICT row_bot, + float* JXL_RESTRICT row_out0, + float* JXL_RESTRICT row_out1, size_t len) { + HWY_FULL(float) df; + auto threefour = Set(df, 0.75f); + auto onefour = Set(df, 0.25f); + for (size_t x = 0; x < len; x += Lanes(df)) { + auto it = Load(df, row_top + x); + auto im = Load(df, row_mid + x); + auto ib = Load(df, row_bot + x); + auto im_scaled = Mul(im, threefour); + Store(MulAdd(it, onefour, im_scaled), df, row_out0 + x); + Store(MulAdd(ib, onefour, im_scaled), df, row_out1 + x); + } +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jpegli { + +HWY_EXPORT(Upsample2Horizontal); +HWY_EXPORT(Upsample2Vertical); + +void Upsample2Horizontal(float* JXL_RESTRICT row, + float* JXL_RESTRICT scratch_space, size_t len_out) { + return HWY_DYNAMIC_DISPATCH(Upsample2Horizontal)(row, scratch_space, len_out); +} + +void Upsample2Vertical(const float* JXL_RESTRICT row_top, + const float* JXL_RESTRICT row_mid, + const float* JXL_RESTRICT row_bot, + float* JXL_RESTRICT row_out0, + float* JXL_RESTRICT row_out1, size_t len) { + return HWY_DYNAMIC_DISPATCH(Upsample2Vertical)(row_top, row_mid, row_bot, + row_out0, row_out1, len); +} +} // namespace jpegli +#endif // HWY_ONCE diff --git a/lib/jpegli/upsample.h b/lib/jpegli/upsample.h new file mode 100644 index 0000000..1a05720 --- /dev/null +++ b/lib/jpegli/upsample.h @@ -0,0 +1,26 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_UPSAMPLE_H_ +#define LIB_JPEGLI_UPSAMPLE_H_ + +#include <stddef.h> + +#include "lib/jxl/base/compiler_specific.h" + +namespace jpegli { + +void Upsample2Horizontal(float* JXL_RESTRICT row, + float* JXL_RESTRICT scratch_space, size_t len_out); + +void Upsample2Vertical(const float* JXL_RESTRICT row_top, + const float* JXL_RESTRICT row_mid, + const float* JXL_RESTRICT row_bot, + float* JXL_RESTRICT row_out0, + float* JXL_RESTRICT row_out1, size_t len); + +} // namespace jpegli + +#endif // LIB_JPEGLI_UPSAMPLE_H_ |