summaryrefslogtreecommitdiff
path: root/lib/jpegli
diff options
context:
space:
mode:
Diffstat (limited to 'lib/jpegli')
-rw-r--r--lib/jpegli/README.md49
-rw-r--r--lib/jpegli/adaptive_quantization.cc562
-rw-r--r--lib/jpegli/adaptive_quantization.h17
-rw-r--r--lib/jpegli/bit_writer.cc60
-rw-r--r--lib/jpegli/bit_writer.h98
-rw-r--r--lib/jpegli/bitstream.cc452
-rw-r--r--lib/jpegli/bitstream.h44
-rw-r--r--lib/jpegli/color_quantize.cc533
-rw-r--r--lib/jpegli/color_quantize.h27
-rw-r--r--lib/jpegli/color_transform.cc281
-rw-r--r--lib/jpegli/color_transform.h20
-rw-r--r--lib/jpegli/common.cc59
-rw-r--r--lib/jpegli/common.h48
-rw-r--r--lib/jpegli/common_internal.h150
-rw-r--r--lib/jpegli/dct-inl.h258
-rw-r--r--lib/jpegli/decode.cc1028
-rw-r--r--lib/jpegli/decode.h106
-rw-r--r--lib/jpegli/decode_api_test.cc1304
-rw-r--r--lib/jpegli/decode_internal.h151
-rw-r--r--lib/jpegli/decode_marker.cc588
-rw-r--r--lib/jpegli/decode_marker.h32
-rw-r--r--lib/jpegli/decode_scan.cc566
-rw-r--r--lib/jpegli/decode_scan.h31
-rw-r--r--lib/jpegli/destination_manager.cc148
-rw-r--r--lib/jpegli/downsample.cc356
-rw-r--r--lib/jpegli/downsample.h21
-rw-r--r--lib/jpegli/encode.cc1253
-rw-r--r--lib/jpegli/encode.h158
-rw-r--r--lib/jpegli/encode_api_test.cc837
-rw-r--r--lib/jpegli/encode_finish.cc230
-rw-r--r--lib/jpegli/encode_finish.h17
-rw-r--r--lib/jpegli/encode_internal.h141
-rw-r--r--lib/jpegli/encode_streaming.cc259
-rw-r--r--lib/jpegli/encode_streaming.h21
-rw-r--r--lib/jpegli/entropy_coding-inl.h213
-rw-r--r--lib/jpegli/entropy_coding.cc837
-rw-r--r--lib/jpegli/entropy_coding.h28
-rw-r--r--lib/jpegli/error.cc102
-rw-r--r--lib/jpegli/error.h37
-rw-r--r--lib/jpegli/error_handling_test.cc1276
-rw-r--r--lib/jpegli/huffman.cc321
-rw-r--r--lib/jpegli/huffman.h50
-rw-r--r--lib/jpegli/idct.cc692
-rw-r--r--lib/jpegli/idct.h18
-rw-r--r--lib/jpegli/input.cc414
-rw-r--r--lib/jpegli/input.h17
-rw-r--r--lib/jpegli/input_suspension_test.cc612
-rw-r--r--lib/jpegli/jpeg.version.6211
-rw-r--r--lib/jpegli/jpeg.version.89
-rw-r--r--lib/jpegli/libjpeg_test_util.cc261
-rw-r--r--lib/jpegli/libjpeg_test_util.h37
-rw-r--r--lib/jpegli/libjpeg_wrapper.cc255
-rw-r--r--lib/jpegli/memory_manager.cc186
-rw-r--r--lib/jpegli/memory_manager.h45
-rw-r--r--lib/jpegli/output_suspension_test.cc219
-rw-r--r--lib/jpegli/quant.cc768
-rw-r--r--lib/jpegli/quant.h26
-rw-r--r--lib/jpegli/render.cc763
-rw-r--r--lib/jpegli/render.h24
-rw-r--r--lib/jpegli/simd.cc38
-rw-r--r--lib/jpegli/simd.h18
-rw-r--r--lib/jpegli/source_manager.cc90
-rw-r--r--lib/jpegli/source_manager_test.cc142
-rw-r--r--lib/jpegli/streaming_test.cc233
-rw-r--r--lib/jpegli/test_params.h163
-rw-r--r--lib/jpegli/test_utils-inl.h430
-rw-r--r--lib/jpegli/test_utils.cc787
-rw-r--r--lib/jpegli/test_utils.h130
-rw-r--r--lib/jpegli/testing.h35
-rw-r--r--lib/jpegli/transcode_api_test.cc133
-rw-r--r--lib/jpegli/transpose-inl.h111
-rw-r--r--lib/jpegli/types.h38
-rw-r--r--lib/jpegli/upsample.cc137
-rw-r--r--lib/jpegli/upsample.h26
74 files changed, 19637 insertions, 0 deletions
diff --git a/lib/jpegli/README.md b/lib/jpegli/README.md
new file mode 100644
index 0000000..72f13af
--- /dev/null
+++ b/lib/jpegli/README.md
@@ -0,0 +1,49 @@
+# Improved JPEG encoder and decoder implementation
+
+This subdirectory contains a JPEG encoder and decoder implementation that is
+API and ABI compatible with libjpeg62.
+
+## Building
+
+When building the parent libjxl project, two binaries, `tools/cjpegli` and
+`tools/djpegli` will be built, as well as a
+`lib/jpegli/libjpeg.so.62.3.0` shared library that can be used as a drop-in
+replacement for the system library with the same name.
+
+## Encoder improvements
+
+Improvements and new features used by the encoder include:
+
+* Support for 16-bit unsigned and 32-bit floating point input buffers.
+
+* Color space conversions, chroma subsampling and DCT are all done in floating
+ point precision, the conversion to integers happens first when producing
+ the final quantized DCT coefficients.
+
+* The desired quality can be indicated by a distance parameter that is
+ analogous to the distance parameter of JPEG XL. The quantization tables
+ are chosen based on the distance and the chroma subsampling mode, with
+ different positions in the quantization matrix scaling differently, and the
+ red and blue chrominance channels have separate quantization tables.
+
+* Adaptive dead-zone quantization. On noisy parts of the image, quantization
+ thresholds for zero coefficients are higher than on smoother parts of the
+ image.
+
+* Support for more efficient compression of JPEGs with an ICC profile
+ representing the XYB colorspace. These JPEGs will not be converted to the
+ YCbCr colorspace, but specialized quantization tables will be chosen for
+ the original X, Y, B channels.
+
+## Decoder improvements
+
+* Support for 16-bit unsigned and 32-bit floating point output buffers.
+
+* Non-zero DCT coefficients are dequantized to the expectation value of their
+ respective quantization intervals assuming a Laplacian distribution of the
+ original unquantized DCT coefficients.
+
+* After dequantization, inverse DCT, chroma upsampling and color space
+ conversions are all done in floating point precision, the conversion to
+ integer samples happens only in the final output phase (unless output to
+ floating point was requested).
diff --git a/lib/jpegli/adaptive_quantization.cc b/lib/jpegli/adaptive_quantization.cc
new file mode 100644
index 0000000..6a8c4d3
--- /dev/null
+++ b/lib/jpegli/adaptive_quantization.cc
@@ -0,0 +1,562 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/adaptive_quantization.h"
+
+#include <stddef.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <string>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/adaptive_quantization.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::AbsDiff;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::And;
+using hwy::HWY_NAMESPACE::Div;
+using hwy::HWY_NAMESPACE::Floor;
+using hwy::HWY_NAMESPACE::GetLane;
+using hwy::HWY_NAMESPACE::Max;
+using hwy::HWY_NAMESPACE::Min;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::NegMulAdd;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::ShiftLeft;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Sqrt;
+using hwy::HWY_NAMESPACE::Sub;
+using hwy::HWY_NAMESPACE::ZeroIfNegative;
+
+static constexpr float kInputScaling = 1.0f / 255.0f;
+
+// Primary template: default to actual division.
+template <typename T, class V>
+struct FastDivision {
+ HWY_INLINE V operator()(const V n, const V d) const { return n / d; }
+};
+// Partial specialization for float vectors.
+template <class V>
+struct FastDivision<float, V> {
+ // One Newton-Raphson iteration.
+ static HWY_INLINE V ReciprocalNR(const V x) {
+ const auto rcp = ApproximateReciprocal(x);
+ const auto sum = Add(rcp, rcp);
+ const auto x_rcp = Mul(x, rcp);
+ return NegMulAdd(x_rcp, rcp, sum);
+ }
+
+ V operator()(const V n, const V d) const {
+#if 1 // Faster on SKX
+ return Div(n, d);
+#else
+ return n * ReciprocalNR(d);
+#endif
+ }
+};
+
+// Approximates smooth functions via rational polynomials (i.e. dividing two
+// polynomials). Evaluates polynomials via Horner's scheme, which is faster than
+// Clenshaw recurrence for Chebyshev polynomials. LoadDup128 allows us to
+// specify constants (replicated 4x) independently of the lane count.
+template <size_t NP, size_t NQ, class D, class V, typename T>
+HWY_INLINE HWY_MAYBE_UNUSED V EvalRationalPolynomial(const D d, const V x,
+ const T (&p)[NP],
+ const T (&q)[NQ]) {
+ constexpr size_t kDegP = NP / 4 - 1;
+ constexpr size_t kDegQ = NQ / 4 - 1;
+ auto yp = LoadDup128(d, &p[kDegP * 4]);
+ auto yq = LoadDup128(d, &q[kDegQ * 4]);
+ // We use pointer arithmetic to refer to &p[(kDegP - n) * 4] to avoid a
+ // compiler warning that the index is out of bounds since we are already
+ // checking that it is not out of bounds with (kDegP >= n) and the access
+ // will be optimized away. Similarly with q and kDegQ.
+ HWY_FENCE;
+ if (kDegP >= 1) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 1) * 4)));
+ if (kDegQ >= 1) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 1) * 4)));
+ HWY_FENCE;
+ if (kDegP >= 2) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 2) * 4)));
+ if (kDegQ >= 2) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 2) * 4)));
+ HWY_FENCE;
+ if (kDegP >= 3) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 3) * 4)));
+ if (kDegQ >= 3) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 3) * 4)));
+ HWY_FENCE;
+ if (kDegP >= 4) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 4) * 4)));
+ if (kDegQ >= 4) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 4) * 4)));
+ HWY_FENCE;
+ if (kDegP >= 5) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 5) * 4)));
+ if (kDegQ >= 5) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 5) * 4)));
+ HWY_FENCE;
+ if (kDegP >= 6) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 6) * 4)));
+ if (kDegQ >= 6) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 6) * 4)));
+ HWY_FENCE;
+ if (kDegP >= 7) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 7) * 4)));
+ if (kDegQ >= 7) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 7) * 4)));
+
+ return FastDivision<T, V>()(yp, yq);
+}
+
+// Computes base-2 logarithm like std::log2. Undefined if negative / NaN.
+// L1 error ~3.9E-6
+template <class DF, class V>
+V FastLog2f(const DF df, V x) {
+ // 2,2 rational polynomial approximation of std::log1p(x) / std::log(2).
+ HWY_ALIGN const float p[4 * (2 + 1)] = {HWY_REP4(-1.8503833400518310E-06f),
+ HWY_REP4(1.4287160470083755E+00f),
+ HWY_REP4(7.4245873327820566E-01f)};
+ HWY_ALIGN const float q[4 * (2 + 1)] = {HWY_REP4(9.9032814277590719E-01f),
+ HWY_REP4(1.0096718572241148E+00f),
+ HWY_REP4(1.7409343003366853E-01f)};
+
+ const Rebind<int32_t, DF> di;
+ const auto x_bits = BitCast(di, x);
+
+ // Range reduction to [-1/3, 1/3] - 3 integer, 2 float ops
+ const auto exp_bits = Sub(x_bits, Set(di, 0x3f2aaaab)); // = 2/3
+ // Shifted exponent = log2; also used to clear mantissa.
+ const auto exp_shifted = ShiftRight<23>(exp_bits);
+ const auto mantissa = BitCast(df, Sub(x_bits, ShiftLeft<23>(exp_shifted)));
+ const auto exp_val = ConvertTo(df, exp_shifted);
+ return Add(EvalRationalPolynomial(df, Sub(mantissa, Set(df, 1.0f)), p, q),
+ exp_val);
+}
+
+// max relative error ~3e-7
+template <class DF, class V>
+V FastPow2f(const DF df, V x) {
+ const Rebind<int32_t, DF> di;
+ auto floorx = Floor(x);
+ auto exp =
+ BitCast(df, ShiftLeft<23>(Add(ConvertTo(di, floorx), Set(di, 127))));
+ auto frac = Sub(x, floorx);
+ auto num = Add(frac, Set(df, 1.01749063e+01));
+ num = MulAdd(num, frac, Set(df, 4.88687798e+01));
+ num = MulAdd(num, frac, Set(df, 9.85506591e+01));
+ num = Mul(num, exp);
+ auto den = MulAdd(frac, Set(df, 2.10242958e-01), Set(df, -2.22328856e-02));
+ den = MulAdd(den, frac, Set(df, -1.94414990e+01));
+ den = MulAdd(den, frac, Set(df, 9.85506633e+01));
+ return Div(num, den);
+}
+
+inline float FastPow2f(float f) {
+ HWY_CAPPED(float, 1) D;
+ return GetLane(FastPow2f(D, Set(D, f)));
+}
+
+// The following functions modulate an exponent (out_val) and return the updated
+// value. Their descriptor is limited to 8 lanes for 8x8 blocks.
+
+template <class D, class V>
+V ComputeMask(const D d, const V out_val) {
+ const auto kBase = Set(d, -0.74174993f);
+ const auto kMul4 = Set(d, 3.2353257320940401f);
+ const auto kMul2 = Set(d, 12.906028311180409f);
+ const auto kOffset2 = Set(d, 305.04035728311436f);
+ const auto kMul3 = Set(d, 5.0220313103171232f);
+ const auto kOffset3 = Set(d, 2.1925739705298404f);
+ const auto kOffset4 = Mul(Set(d, 0.25f), kOffset3);
+ const auto kMul0 = Set(d, 0.74760422233706747f);
+ const auto k1 = Set(d, 1.0f);
+
+ // Avoid division by zero.
+ const auto v1 = Max(Mul(out_val, kMul0), Set(d, 1e-3f));
+ const auto v2 = Div(k1, Add(v1, kOffset2));
+ const auto v3 = Div(k1, MulAdd(v1, v1, kOffset3));
+ const auto v4 = Div(k1, MulAdd(v1, v1, kOffset4));
+ // TODO(jyrki):
+ // A log or two here could make sense. In butteraugli we have effectively
+ // log(log(x + C)) for this kind of use, as a single log is used in
+ // saturating visual masking and here the modulation values are exponential,
+ // another log would counter that.
+ return Add(kBase, MulAdd(kMul4, v4, MulAdd(kMul2, v2, Mul(kMul3, v3))));
+}
+
+// mul and mul2 represent a scaling difference between jxl and butteraugli.
+static const float kSGmul = 226.0480446705883f;
+static const float kSGmul2 = 1.0f / 73.377132366608819f;
+static const float kLog2 = 0.693147181f;
+// Includes correction factor for std::log -> log2.
+static const float kSGRetMul = kSGmul2 * 18.6580932135f * kLog2;
+static const float kSGVOffset = 7.14672470003f;
+
+template <bool invert, typename D, typename V>
+V RatioOfDerivativesOfCubicRootToSimpleGamma(const D d, V v) {
+ // The opsin space in jxl is the cubic root of photons, i.e., v * v * v
+ // is related to the number of photons.
+ //
+ // SimpleGamma(v * v * v) is the psychovisual space in butteraugli.
+ // This ratio allows quantization to move from jxl's opsin space to
+ // butteraugli's log-gamma space.
+ static const float kEpsilon = 1e-2;
+ static const float kNumOffset = kEpsilon / kInputScaling / kInputScaling;
+ static const float kNumMul = kSGRetMul * 3 * kSGmul;
+ static const float kVOffset = (kSGVOffset * kLog2 + kEpsilon) / kInputScaling;
+ static const float kDenMul = kLog2 * kSGmul * kInputScaling * kInputScaling;
+
+ v = ZeroIfNegative(v);
+ const auto num_mul = Set(d, kNumMul);
+ const auto num_offset = Set(d, kNumOffset);
+ const auto den_offset = Set(d, kVOffset);
+ const auto den_mul = Set(d, kDenMul);
+
+ const auto v2 = Mul(v, v);
+
+ const auto num = MulAdd(num_mul, v2, num_offset);
+ const auto den = MulAdd(Mul(den_mul, v), v2, den_offset);
+ return invert ? Div(num, den) : Div(den, num);
+}
+
+template <bool invert = false>
+static float RatioOfDerivativesOfCubicRootToSimpleGamma(float v) {
+ using DScalar = HWY_CAPPED(float, 1);
+ auto vscalar = Load(DScalar(), &v);
+ return GetLane(
+ RatioOfDerivativesOfCubicRootToSimpleGamma<invert>(DScalar(), vscalar));
+}
+
+// TODO(veluca): this function computes an approximation of the derivative of
+// SimpleGamma with (f(x+eps)-f(x))/eps. Consider two-sided approximation or
+// exact derivatives. For reference, SimpleGamma was:
+/*
+template <typename D, typename V>
+V SimpleGamma(const D d, V v) {
+ // A simple HDR compatible gamma function.
+ const auto mul = Set(d, kSGmul);
+ const auto kRetMul = Set(d, kSGRetMul);
+ const auto kRetAdd = Set(d, kSGmul2 * -20.2789020414f);
+ const auto kVOffset = Set(d, kSGVOffset);
+
+ v *= mul;
+
+ // This should happen rarely, but may lead to a NaN, which is rather
+ // undesirable. Since negative photons don't exist we solve the NaNs by
+ // clamping here.
+ // TODO(veluca): with FastLog2f, this no longer leads to NaNs.
+ v = ZeroIfNegative(v);
+ return kRetMul * FastLog2f(d, v + kVOffset) + kRetAdd;
+}
+*/
+
+template <class D, class V>
+V GammaModulation(const D d, const size_t x, const size_t y,
+ const RowBuffer<float>& input, const V out_val) {
+ static const float kBias = 0.16f / kInputScaling;
+ static const float kScale = kInputScaling / 64.0f;
+ auto overall_ratio = Zero(d);
+ const auto bias = Set(d, kBias);
+ const auto scale = Set(d, kScale);
+ const float* const JXL_RESTRICT block_start = input.Row(y) + x;
+ for (size_t dy = 0; dy < 8; ++dy) {
+ const float* const JXL_RESTRICT row_in = block_start + dy * input.stride();
+ for (size_t dx = 0; dx < 8; dx += Lanes(d)) {
+ const auto iny = Add(Load(d, row_in + dx), bias);
+ const auto ratio_g =
+ RatioOfDerivativesOfCubicRootToSimpleGamma</*invert=*/true>(d, iny);
+ overall_ratio = Add(overall_ratio, ratio_g);
+ }
+ }
+ overall_ratio = Mul(SumOfLanes(d, overall_ratio), scale);
+ // ideally -1.0, but likely optimal correction adds some entropy, so slightly
+ // less than that.
+ // ln(2) constant folded in because we want std::log but have FastLog2f.
+ const auto kGam = Set(d, -0.15526878023684174f * 0.693147180559945f);
+ return MulAdd(kGam, FastLog2f(d, overall_ratio), out_val);
+}
+
+// Change precision in 8x8 blocks that have high frequency content.
+template <class D, class V>
+V HfModulation(const D d, const size_t x, const size_t y,
+ const RowBuffer<float>& input, const V out_val) {
+ // Zero out the invalid differences for the rightmost value per row.
+ const Rebind<uint32_t, D> du;
+ HWY_ALIGN constexpr uint32_t kMaskRight[8] = {~0u, ~0u, ~0u, ~0u,
+ ~0u, ~0u, ~0u, 0};
+
+ auto sum = Zero(d); // sum of absolute differences with right and below
+ static const float kSumCoeff = -2.0052193233688884f * kInputScaling / 112.0;
+ auto sumcoeff = Set(d, kSumCoeff);
+
+ const float* const JXL_RESTRICT block_start = input.Row(y) + x;
+ for (size_t dy = 0; dy < 8; ++dy) {
+ const float* JXL_RESTRICT row_in = block_start + dy * input.stride();
+ const float* JXL_RESTRICT row_in_next =
+ dy == 7 ? row_in : row_in + input.stride();
+
+ for (size_t dx = 0; dx < 8; dx += Lanes(d)) {
+ const auto p = Load(d, row_in + dx);
+ const auto pr = LoadU(d, row_in + dx + 1);
+ const auto mask = BitCast(d, Load(du, kMaskRight + dx));
+ sum = Add(sum, And(mask, AbsDiff(p, pr)));
+ const auto pd = Load(d, row_in_next + dx);
+ sum = Add(sum, AbsDiff(p, pd));
+ }
+ }
+
+ sum = SumOfLanes(d, sum);
+ return MulAdd(sum, sumcoeff, out_val);
+}
+
+void PerBlockModulations(const float y_quant_01, const RowBuffer<float>& input,
+ const size_t yb0, const size_t yblen,
+ RowBuffer<float>* aq_map) {
+ static const float kAcQuant = 0.841f;
+ float base_level = 0.48f * kAcQuant;
+ float kDampenRampStart = 9.0f;
+ float kDampenRampEnd = 65.0f;
+ float dampen = 1.0f;
+ if (y_quant_01 >= kDampenRampStart) {
+ dampen = 1.0f - ((y_quant_01 - kDampenRampStart) /
+ (kDampenRampEnd - kDampenRampStart));
+ if (dampen < 0) {
+ dampen = 0;
+ }
+ }
+ const float mul = kAcQuant * dampen;
+ const float add = (1.0f - dampen) * base_level;
+ for (size_t iy = 0; iy < yblen; iy++) {
+ const size_t yb = yb0 + iy;
+ const size_t y = yb * 8;
+ float* const JXL_RESTRICT row_out = aq_map->Row(yb);
+ const HWY_CAPPED(float, 8) df;
+ for (size_t ix = 0; ix < aq_map->xsize(); ix++) {
+ size_t x = ix * 8;
+ auto out_val = Set(df, row_out[ix]);
+ out_val = ComputeMask(df, out_val);
+ out_val = HfModulation(df, x, y, input, out_val);
+ out_val = GammaModulation(df, x, y, input, out_val);
+ // We want multiplicative quantization field, so everything
+ // until this point has been modulating the exponent.
+ row_out[ix] = FastPow2f(GetLane(out_val) * 1.442695041f) * mul + add;
+ }
+ }
+}
+
+template <typename D, typename V>
+V MaskingSqrt(const D d, V v) {
+ static const float kLogOffset = 28;
+ static const float kMul = 211.50759899638012f;
+ const auto mul_v = Set(d, kMul * 1e8);
+ const auto offset_v = Set(d, kLogOffset);
+ return Mul(Set(d, 0.25f), Sqrt(MulAdd(v, Sqrt(mul_v), offset_v)));
+}
+
+template <typename V>
+void Sort4(V& min0, V& min1, V& min2, V& min3) {
+ const auto tmp0 = Min(min0, min1);
+ const auto tmp1 = Max(min0, min1);
+ const auto tmp2 = Min(min2, min3);
+ const auto tmp3 = Max(min2, min3);
+ const auto tmp4 = Max(tmp0, tmp2);
+ const auto tmp5 = Min(tmp1, tmp3);
+ min0 = Min(tmp0, tmp2);
+ min1 = Min(tmp4, tmp5);
+ min2 = Max(tmp4, tmp5);
+ min3 = Max(tmp1, tmp3);
+}
+
+template <typename V>
+void UpdateMin4(const V v, V& min0, V& min1, V& min2, V& min3) {
+ const auto tmp0 = Max(min0, v);
+ const auto tmp1 = Max(min1, tmp0);
+ const auto tmp2 = Max(min2, tmp1);
+ min0 = Min(min0, v);
+ min1 = Min(min1, tmp0);
+ min2 = Min(min2, tmp1);
+ min3 = Min(min3, tmp2);
+}
+
+// Computes a linear combination of the 4 lowest values of the 3x3 neighborhood
+// of each pixel. Output is downsampled 2x.
+void FuzzyErosion(const RowBuffer<float>& pre_erosion, const size_t yb0,
+ const size_t yblen, RowBuffer<float>* tmp,
+ RowBuffer<float>* aq_map) {
+ int xsize_blocks = aq_map->xsize();
+ int xsize = pre_erosion.xsize();
+ HWY_FULL(float) d;
+ const auto mul0 = Set(d, 0.125f);
+ const auto mul1 = Set(d, 0.075f);
+ const auto mul2 = Set(d, 0.06f);
+ const auto mul3 = Set(d, 0.05f);
+ for (size_t iy = 0; iy < 2 * yblen; ++iy) {
+ size_t y = 2 * yb0 + iy;
+ const float* JXL_RESTRICT rowt = pre_erosion.Row(y - 1);
+ const float* JXL_RESTRICT rowm = pre_erosion.Row(y);
+ const float* JXL_RESTRICT rowb = pre_erosion.Row(y + 1);
+ float* row_out = tmp->Row(y);
+ for (int x = 0; x < xsize; x += Lanes(d)) {
+ int xm1 = x - 1;
+ int xp1 = x + 1;
+ auto min0 = LoadU(d, rowm + x);
+ auto min1 = LoadU(d, rowm + xm1);
+ auto min2 = LoadU(d, rowm + xp1);
+ auto min3 = LoadU(d, rowt + xm1);
+ Sort4(min0, min1, min2, min3);
+ UpdateMin4(LoadU(d, rowt + x), min0, min1, min2, min3);
+ UpdateMin4(LoadU(d, rowt + xp1), min0, min1, min2, min3);
+ UpdateMin4(LoadU(d, rowb + xm1), min0, min1, min2, min3);
+ UpdateMin4(LoadU(d, rowb + x), min0, min1, min2, min3);
+ UpdateMin4(LoadU(d, rowb + xp1), min0, min1, min2, min3);
+ const auto v = Add(Add(Mul(mul0, min0), Mul(mul1, min1)),
+ Add(Mul(mul2, min2), Mul(mul3, min3)));
+ Store(v, d, row_out + x);
+ }
+ if (iy % 2 == 1) {
+ const float* JXL_RESTRICT row_out0 = tmp->Row(y - 1);
+ float* JXL_RESTRICT aq_out = aq_map->Row(yb0 + iy / 2);
+ for (int bx = 0, x = 0; bx < xsize_blocks; ++bx, x += 2) {
+ aq_out[bx] =
+ (row_out[x] + row_out[x + 1] + row_out0[x] + row_out0[x + 1]);
+ }
+ }
+ }
+}
+
+void ComputePreErosion(const RowBuffer<float>& input, const size_t xsize,
+ const size_t y0, const size_t ylen, int border,
+ float* diff_buffer, RowBuffer<float>* pre_erosion) {
+ const size_t xsize_out = xsize / 4;
+ const size_t y0_out = y0 / 4;
+
+ // The XYB gamma is 3.0 to be able to decode faster with two muls.
+ // Butteraugli's gamma is matching the gamma of human eye, around 2.6.
+ // We approximate the gamma difference by adding one cubic root into
+ // the adaptive quantization. This gives us a total gamma of 2.6666
+ // for quantization uses.
+ static const float match_gamma_offset = 0.019 / kInputScaling;
+
+ const HWY_CAPPED(float, 8) df;
+
+ static const float limit = 0.2f;
+ // Computes image (padded to multiple of 8x8) of local pixel differences.
+ // Subsample both directions by 4.
+ for (size_t iy = 0; iy < ylen; ++iy) {
+ size_t y = y0 + iy;
+ const float* row_in = input.Row(y);
+ const float* row_in1 = input.Row(y + 1);
+ const float* row_in2 = input.Row(y - 1);
+ float* JXL_RESTRICT row_out = diff_buffer;
+ const auto match_gamma_offset_v = Set(df, match_gamma_offset);
+ const auto quarter = Set(df, 0.25f);
+ for (size_t x = 0; x < xsize; x += Lanes(df)) {
+ const auto in = LoadU(df, row_in + x);
+ const auto in_r = LoadU(df, row_in + x + 1);
+ const auto in_l = LoadU(df, row_in + x - 1);
+ const auto in_t = LoadU(df, row_in2 + x);
+ const auto in_b = LoadU(df, row_in1 + x);
+ const auto base = Mul(quarter, Add(Add(in_r, in_l), Add(in_t, in_b)));
+ const auto gammacv =
+ RatioOfDerivativesOfCubicRootToSimpleGamma</*invert=*/false>(
+ df, Add(in, match_gamma_offset_v));
+ auto diff = Mul(gammacv, Sub(in, base));
+ diff = Mul(diff, diff);
+ diff = Min(diff, Set(df, limit));
+ diff = MaskingSqrt(df, diff);
+ if ((iy & 3) != 0) {
+ diff = Add(diff, LoadU(df, row_out + x));
+ }
+ StoreU(diff, df, row_out + x);
+ }
+ if (iy % 4 == 3) {
+ size_t y_out = y0_out + iy / 4;
+ float* row_dout = pre_erosion->Row(y_out);
+ for (size_t x = 0; x < xsize_out; x++) {
+ row_dout[x] = (row_out[x * 4] + row_out[x * 4 + 1] +
+ row_out[x * 4 + 2] + row_out[x * 4 + 3]) *
+ 0.25f;
+ }
+ pre_erosion->PadRow(y_out, xsize_out, border);
+ }
+ }
+}
+
+} // namespace
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+HWY_EXPORT(ComputePreErosion);
+HWY_EXPORT(FuzzyErosion);
+HWY_EXPORT(PerBlockModulations);
+
+namespace {
+
+static constexpr int kPreErosionBorder = 1;
+
+} // namespace
+
+void ComputeAdaptiveQuantField(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ if (!m->use_adaptive_quantization) {
+ return;
+ }
+ int y_channel = cinfo->jpeg_color_space == JCS_RGB ? 1 : 0;
+ jpeg_component_info* y_comp = &cinfo->comp_info[y_channel];
+ int y_quant_01 = cinfo->quant_tbl_ptrs[y_comp->quant_tbl_no]->quantval[1];
+ if (m->next_iMCU_row == 0) {
+ m->input_buffer[y_channel].CopyRow(-1, 0, 1);
+ }
+ if (m->next_iMCU_row + 1 == cinfo->total_iMCU_rows) {
+ size_t last_row = m->ysize_blocks * DCTSIZE - 1;
+ m->input_buffer[y_channel].CopyRow(last_row + 1, last_row, 1);
+ }
+ const RowBuffer<float>& input = m->input_buffer[y_channel];
+ const size_t xsize_blocks = y_comp->width_in_blocks;
+ const size_t xsize = xsize_blocks * DCTSIZE;
+ const size_t yb0 = m->next_iMCU_row * cinfo->max_v_samp_factor;
+ const size_t yblen = cinfo->max_v_samp_factor;
+ size_t y0 = yb0 * DCTSIZE;
+ size_t ylen = cinfo->max_v_samp_factor * DCTSIZE;
+ if (y0 == 0) {
+ ylen += 4;
+ } else {
+ y0 += 4;
+ }
+ if (m->next_iMCU_row + 1 == cinfo->total_iMCU_rows) {
+ ylen -= 4;
+ }
+ HWY_DYNAMIC_DISPATCH(ComputePreErosion)
+ (input, xsize, y0, ylen, kPreErosionBorder, m->diff_buffer, &m->pre_erosion);
+ if (y0 == 0) {
+ m->pre_erosion.CopyRow(-1, 0, kPreErosionBorder);
+ }
+ if (m->next_iMCU_row + 1 == cinfo->total_iMCU_rows) {
+ size_t last_row = m->ysize_blocks * 2 - 1;
+ m->pre_erosion.CopyRow(last_row + 1, last_row, kPreErosionBorder);
+ }
+ HWY_DYNAMIC_DISPATCH(FuzzyErosion)
+ (m->pre_erosion, yb0, yblen, &m->fuzzy_erosion_tmp, &m->quant_field);
+ HWY_DYNAMIC_DISPATCH(PerBlockModulations)
+ (y_quant_01, input, yb0, yblen, &m->quant_field);
+ for (int y = 0; y < cinfo->max_v_samp_factor; ++y) {
+ float* row = m->quant_field.Row(yb0 + y);
+ for (size_t x = 0; x < xsize_blocks; ++x) {
+ row[x] = std::max(0.0f, (0.6f / row[x]) - 1.0f);
+ }
+ }
+}
+
+} // namespace jpegli
+#endif // HWY_ONCE
diff --git a/lib/jpegli/adaptive_quantization.h b/lib/jpegli/adaptive_quantization.h
new file mode 100644
index 0000000..d8537e8
--- /dev/null
+++ b/lib/jpegli/adaptive_quantization.h
@@ -0,0 +1,17 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_ADAPTIVE_QUANTIZATION_H_
+#define LIB_JPEGLI_ADAPTIVE_QUANTIZATION_H_
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+void ComputeAdaptiveQuantField(j_compress_ptr cinfo);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_ADAPTIVE_QUANTIZATION_H_
diff --git a/lib/jpegli/bit_writer.cc b/lib/jpegli/bit_writer.cc
new file mode 100644
index 0000000..9788f35
--- /dev/null
+++ b/lib/jpegli/bit_writer.cc
@@ -0,0 +1,60 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/bit_writer.h"
+
+#include "lib/jpegli/encode_internal.h"
+
+namespace jpegli {
+
+void JpegBitWriterInit(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ JpegBitWriter* bw = &m->bw;
+ size_t buffer_size = m->blocks_per_iMCU_row * (DCTSIZE2 * 16 + 8) + (1 << 16);
+ bw->cinfo = cinfo;
+ bw->data = Allocate<uint8_t>(cinfo, buffer_size, JPOOL_IMAGE);
+ bw->len = buffer_size;
+ bw->pos = 0;
+ bw->output_pos = 0;
+ bw->put_buffer = 0;
+ bw->free_bits = 64;
+ bw->healthy = true;
+}
+
+bool EmptyBitWriterBuffer(JpegBitWriter* bw) {
+ while (bw->output_pos < bw->pos) {
+ j_compress_ptr cinfo = bw->cinfo;
+ if (cinfo->dest->free_in_buffer == 0 &&
+ !(*cinfo->dest->empty_output_buffer)(cinfo)) {
+ return false;
+ }
+ size_t buflen = bw->pos - bw->output_pos;
+ size_t copylen = std::min<size_t>(cinfo->dest->free_in_buffer, buflen);
+ memcpy(cinfo->dest->next_output_byte, bw->data + bw->output_pos, copylen);
+ bw->output_pos += copylen;
+ cinfo->dest->free_in_buffer -= copylen;
+ cinfo->dest->next_output_byte += copylen;
+ }
+ bw->output_pos = bw->pos = 0;
+ return true;
+}
+
+void JumpToByteBoundary(JpegBitWriter* bw) {
+ size_t n_bits = bw->free_bits & 7u;
+ if (n_bits > 0) {
+ WriteBits(bw, n_bits, (1u << n_bits) - 1);
+ }
+ bw->put_buffer <<= bw->free_bits;
+ while (bw->free_bits <= 56) {
+ int c = (bw->put_buffer >> 56) & 0xFF;
+ EmitByte(bw, c);
+ bw->put_buffer <<= 8;
+ bw->free_bits += 8;
+ }
+ bw->put_buffer = 0;
+ bw->free_bits = 64;
+}
+
+} // namespace jpegli
diff --git a/lib/jpegli/bit_writer.h b/lib/jpegli/bit_writer.h
new file mode 100644
index 0000000..3adf1ea
--- /dev/null
+++ b/lib/jpegli/bit_writer.h
@@ -0,0 +1,98 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_BIT_WRITER_H_
+#define LIB_JPEGLI_BIT_WRITER_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#include "lib/jpegli/common.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jpegli {
+
+// Handles the packing of bits into output bytes.
+struct JpegBitWriter {
+ j_compress_ptr cinfo;
+ uint8_t* data;
+ size_t len;
+ size_t pos;
+ size_t output_pos;
+ uint64_t put_buffer;
+ int free_bits;
+ bool healthy;
+};
+
+void JpegBitWriterInit(j_compress_ptr cinfo);
+
+bool EmptyBitWriterBuffer(JpegBitWriter* bw);
+
+void JumpToByteBoundary(JpegBitWriter* bw);
+
+// Returns non-zero if and only if x has a zero byte, i.e. one of
+// x & 0xff, x & 0xff00, ..., x & 0xff00000000000000 is zero.
+static JXL_INLINE uint64_t HasZeroByte(uint64_t x) {
+ return (x - 0x0101010101010101ULL) & ~x & 0x8080808080808080ULL;
+}
+
+/**
+ * Writes the given byte to the output, writes an extra zero if byte is 0xFF.
+ *
+ * This method is "careless" - caller must make sure that there is enough
+ * space in the output buffer. Emits up to 2 bytes to buffer.
+ */
+static JXL_INLINE void EmitByte(JpegBitWriter* bw, int byte) {
+ bw->data[bw->pos++] = byte;
+ if (byte == 0xFF) bw->data[bw->pos++] = 0;
+}
+
+static JXL_INLINE void DischargeBitBuffer(JpegBitWriter* bw) {
+ // At this point we are ready to emit the bytes of put_buffer to the output.
+ // The JPEG format requires that after every 0xff byte in the entropy
+ // coded section, there is a zero byte, therefore we first check if any of
+ // the bytes of put_buffer is 0xFF.
+ if (HasZeroByte(~bw->put_buffer)) {
+ // We have a 0xFF byte somewhere, examine each byte and append a zero
+ // byte if necessary.
+ EmitByte(bw, (bw->put_buffer >> 56) & 0xFF);
+ EmitByte(bw, (bw->put_buffer >> 48) & 0xFF);
+ EmitByte(bw, (bw->put_buffer >> 40) & 0xFF);
+ EmitByte(bw, (bw->put_buffer >> 32) & 0xFF);
+ EmitByte(bw, (bw->put_buffer >> 24) & 0xFF);
+ EmitByte(bw, (bw->put_buffer >> 16) & 0xFF);
+ EmitByte(bw, (bw->put_buffer >> 8) & 0xFF);
+ EmitByte(bw, (bw->put_buffer >> 0) & 0xFF);
+ } else {
+ // We don't have any 0xFF bytes, output all 8 bytes without checking.
+ StoreBE64(bw->put_buffer, bw->data + bw->pos);
+ bw->pos += 8;
+ }
+}
+
+static JXL_INLINE void WriteBits(JpegBitWriter* bw, int nbits, uint64_t bits) {
+ // This is an optimization; if everything goes well,
+ // then |nbits| is positive; if non-existing Huffman symbol is going to be
+ // encoded, its length should be zero; later encoder could check the
+ // "health" of JpegBitWriter.
+ if (nbits == 0) {
+ bw->healthy = false;
+ return;
+ }
+ bw->free_bits -= nbits;
+ if (bw->free_bits < 0) {
+ bw->put_buffer <<= (bw->free_bits + nbits);
+ bw->put_buffer |= (bits >> -bw->free_bits);
+ DischargeBitBuffer(bw);
+ bw->free_bits += 64;
+ bw->put_buffer = nbits;
+ }
+ bw->put_buffer <<= nbits;
+ bw->put_buffer |= bits;
+}
+
+} // namespace jpegli
+#endif // LIB_JPEGLI_BIT_WRITER_H_
diff --git a/lib/jpegli/bitstream.cc b/lib/jpegli/bitstream.cc
new file mode 100644
index 0000000..3448367
--- /dev/null
+++ b/lib/jpegli/bitstream.cc
@@ -0,0 +1,452 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/bitstream.h"
+
+#include <cmath>
+
+#include "lib/jpegli/bit_writer.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/memory_manager.h"
+
+namespace jpegli {
+
+void WriteOutput(j_compress_ptr cinfo, const uint8_t* buf, size_t bufsize) {
+ size_t pos = 0;
+ while (pos < bufsize) {
+ if (cinfo->dest->free_in_buffer == 0 &&
+ !(*cinfo->dest->empty_output_buffer)(cinfo)) {
+ JPEGLI_ERROR("Destination suspension is not supported in markers.");
+ }
+ size_t len = std::min<size_t>(cinfo->dest->free_in_buffer, bufsize - pos);
+ memcpy(cinfo->dest->next_output_byte, buf + pos, len);
+ pos += len;
+ cinfo->dest->free_in_buffer -= len;
+ cinfo->dest->next_output_byte += len;
+ }
+}
+
+void WriteOutput(j_compress_ptr cinfo, const std::vector<uint8_t>& bytes) {
+ WriteOutput(cinfo, bytes.data(), bytes.size());
+}
+
+void WriteOutput(j_compress_ptr cinfo, std::initializer_list<uint8_t> bytes) {
+ WriteOutput(cinfo, bytes.begin(), bytes.size());
+}
+
+void EncodeAPP0(j_compress_ptr cinfo) {
+ WriteOutput(cinfo,
+ {0xff, 0xe0, 0, 16, 'J', 'F', 'I', 'F', '\0',
+ cinfo->JFIF_major_version, cinfo->JFIF_minor_version,
+ cinfo->density_unit, static_cast<uint8_t>(cinfo->X_density >> 8),
+ static_cast<uint8_t>(cinfo->X_density & 0xff),
+ static_cast<uint8_t>(cinfo->Y_density >> 8),
+ static_cast<uint8_t>(cinfo->Y_density & 0xff), 0, 0});
+}
+
+void EncodeAPP14(j_compress_ptr cinfo) {
+ uint8_t color_transform = cinfo->jpeg_color_space == JCS_YCbCr ? 1
+ : cinfo->jpeg_color_space == JCS_YCCK ? 2
+ : 0;
+ WriteOutput(cinfo, {0xff, 0xee, 0, 14, 'A', 'd', 'o', 'b', 'e', 0, 100, 0, 0,
+ 0, 0, color_transform});
+}
+
+void WriteFileHeader(j_compress_ptr cinfo) {
+ WriteOutput(cinfo, {0xFF, 0xD8}); // SOI
+ if (cinfo->write_JFIF_header) {
+ EncodeAPP0(cinfo);
+ }
+ if (cinfo->write_Adobe_marker) {
+ EncodeAPP14(cinfo);
+ }
+}
+
+bool EncodeDQT(j_compress_ptr cinfo, bool write_all_tables) {
+ uint8_t data[4 + NUM_QUANT_TBLS * (1 + 2 * DCTSIZE2)]; // 520 bytes
+ size_t pos = 0;
+ data[pos++] = 0xFF;
+ data[pos++] = 0xDB;
+ pos += 2; // Length will be filled in later.
+
+ int send_table[NUM_QUANT_TBLS] = {};
+ if (write_all_tables) {
+ for (int i = 0; i < NUM_QUANT_TBLS; ++i) {
+ if (cinfo->quant_tbl_ptrs[i]) send_table[i] = 1;
+ }
+ } else {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ send_table[cinfo->comp_info[c].quant_tbl_no] = 1;
+ }
+ }
+
+ bool is_baseline = true;
+ for (int i = 0; i < NUM_QUANT_TBLS; ++i) {
+ if (!send_table[i]) continue;
+ JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[i];
+ if (quant_table == nullptr) {
+ JPEGLI_ERROR("Missing quant table %d", i);
+ }
+ int precision = 0;
+ for (size_t k = 0; k < DCTSIZE2; ++k) {
+ if (quant_table->quantval[k] > 255) {
+ precision = 1;
+ is_baseline = false;
+ }
+ }
+ if (quant_table->sent_table) {
+ continue;
+ }
+ data[pos++] = (precision << 4) + i;
+ for (size_t j = 0; j < DCTSIZE2; ++j) {
+ int val_idx = kJPEGNaturalOrder[j];
+ int val = quant_table->quantval[val_idx];
+ if (val == 0) {
+ JPEGLI_ERROR("Invalid quantval 0.");
+ }
+ if (precision) {
+ data[pos++] = val >> 8;
+ }
+ data[pos++] = val & 0xFFu;
+ }
+ quant_table->sent_table = TRUE;
+ }
+ if (pos > 4) {
+ data[2] = (pos - 2) >> 8u;
+ data[3] = (pos - 2) & 0xFFu;
+ WriteOutput(cinfo, data, pos);
+ }
+ return is_baseline;
+}
+
+void EncodeSOF(j_compress_ptr cinfo, bool is_baseline) {
+ if (cinfo->data_precision != kJpegPrecision) {
+ is_baseline = false;
+ JPEGLI_ERROR("Unsupported data precision %d", cinfo->data_precision);
+ }
+ const uint8_t marker = cinfo->progressive_mode ? 0xc2
+ : is_baseline ? 0xc0
+ : 0xc1;
+ const size_t n_comps = cinfo->num_components;
+ const size_t marker_len = 8 + 3 * n_comps;
+ std::vector<uint8_t> data(marker_len + 2);
+ size_t pos = 0;
+ data[pos++] = 0xFF;
+ data[pos++] = marker;
+ data[pos++] = marker_len >> 8u;
+ data[pos++] = marker_len & 0xFFu;
+ data[pos++] = kJpegPrecision;
+ data[pos++] = cinfo->image_height >> 8u;
+ data[pos++] = cinfo->image_height & 0xFFu;
+ data[pos++] = cinfo->image_width >> 8u;
+ data[pos++] = cinfo->image_width & 0xFFu;
+ data[pos++] = n_comps;
+ for (size_t i = 0; i < n_comps; ++i) {
+ jpeg_component_info* comp = &cinfo->comp_info[i];
+ data[pos++] = comp->component_id;
+ data[pos++] = ((comp->h_samp_factor << 4u) | (comp->v_samp_factor));
+ const uint32_t quant_idx = comp->quant_tbl_no;
+ if (cinfo->quant_tbl_ptrs[quant_idx] == nullptr) {
+ JPEGLI_ERROR("Invalid component quant table index %u.", quant_idx);
+ }
+ data[pos++] = quant_idx;
+ }
+ WriteOutput(cinfo, data);
+}
+
+void WriteFrameHeader(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ bool is_baseline = EncodeDQT(cinfo, /*write_all_tables=*/false);
+ if (cinfo->progressive_mode || cinfo->arith_code ||
+ cinfo->data_precision != 8) {
+ is_baseline = false;
+ }
+ for (size_t i = 0; i < m->num_huffman_tables; ++i) {
+ int slot_id = m->slot_id_map[i];
+ if (slot_id > 0x11 || (slot_id > 0x01 && slot_id < 0x10)) {
+ is_baseline = false;
+ }
+ }
+ EncodeSOF(cinfo, is_baseline);
+}
+
+void EncodeDRI(j_compress_ptr cinfo) {
+ WriteOutput(cinfo, {0xFF, 0xDD, 0, 4,
+ static_cast<uint8_t>(cinfo->restart_interval >> 8),
+ static_cast<uint8_t>(cinfo->restart_interval & 0xFF)});
+}
+
+void EncodeDHT(j_compress_ptr cinfo, size_t offset, size_t num) {
+ jpeg_comp_master* m = cinfo->master;
+ size_t marker_len = 2;
+ for (size_t i = 0; i < num; ++i) {
+ const JHUFF_TBL& table = m->huffman_tables[offset + i];
+ if (table.sent_table) continue;
+ marker_len += kJpegHuffmanMaxBitLength + 1;
+ for (size_t j = 0; j <= kJpegHuffmanMaxBitLength; ++j) {
+ marker_len += table.bits[j];
+ }
+ }
+ std::vector<uint8_t> data(marker_len + 2);
+ size_t pos = 0;
+ data[pos++] = 0xFF;
+ data[pos++] = 0xC4;
+ data[pos++] = marker_len >> 8u;
+ data[pos++] = marker_len & 0xFFu;
+ for (size_t i = 0; i < num; ++i) {
+ const JHUFF_TBL& table = m->huffman_tables[offset + i];
+ if (table.sent_table) continue;
+ size_t total_count = 0;
+ for (size_t i = 0; i <= kJpegHuffmanMaxBitLength; ++i) {
+ total_count += table.bits[i];
+ }
+ data[pos++] = m->slot_id_map[offset + i];
+ for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) {
+ data[pos++] = table.bits[i];
+ }
+ for (size_t i = 0; i < total_count; ++i) {
+ data[pos++] = table.huffval[i];
+ }
+ }
+ if (marker_len > 2) {
+ WriteOutput(cinfo, data);
+ }
+}
+
+void EncodeSOS(j_compress_ptr cinfo, int scan_index) {
+ jpeg_comp_master* m = cinfo->master;
+ const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
+ const size_t marker_len = 6 + 2 * scan_info->comps_in_scan;
+ std::vector<uint8_t> data(marker_len + 2);
+ size_t pos = 0;
+ data[pos++] = 0xFF;
+ data[pos++] = 0xDA;
+ data[pos++] = marker_len >> 8u;
+ data[pos++] = marker_len & 0xFFu;
+ data[pos++] = scan_info->comps_in_scan;
+ for (int i = 0; i < scan_info->comps_in_scan; ++i) {
+ int comp_idx = scan_info->component_index[i];
+ data[pos++] = cinfo->comp_info[comp_idx].component_id;
+ int dc_slot_id = m->slot_id_map[m->context_map[comp_idx]];
+ int ac_context = m->ac_ctx_offset[scan_index] + i;
+ int ac_slot_id = m->slot_id_map[m->context_map[ac_context]];
+ data[pos++] = (dc_slot_id << 4u) + (ac_slot_id - 16);
+ }
+ data[pos++] = scan_info->Ss;
+ data[pos++] = scan_info->Se;
+ data[pos++] = ((scan_info->Ah << 4u) | (scan_info->Al));
+ WriteOutput(cinfo, data);
+}
+
+void WriteScanHeader(j_compress_ptr cinfo, int scan_index) {
+ jpeg_comp_master* m = cinfo->master;
+ const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
+ cinfo->restart_interval = m->scan_token_info[scan_index].restart_interval;
+ if (cinfo->restart_interval != m->last_restart_interval) {
+ EncodeDRI(cinfo);
+ m->last_restart_interval = cinfo->restart_interval;
+ }
+ size_t num_dht = 0;
+ if (scan_index == 0) {
+ // For the first scan we emit all DC and at most 4 AC Huffman codes.
+ for (size_t i = 0, num_ac = 0; i < m->num_huffman_tables; ++i) {
+ if (m->slot_id_map[i] >= 16 && num_ac++ >= 4) break;
+ ++num_dht;
+ }
+ } else if (scan_info->Ss > 0) {
+ // For multi-scan sequential and progressive DC scans we have already
+ // emitted all Huffman codes that we need before the first scan. For
+ // progressive AC scans we only need at most one new Huffman code.
+ if (m->context_map[m->ac_ctx_offset[scan_index]] == m->next_dht_index) {
+ num_dht = 1;
+ }
+ }
+ if (num_dht > 0) {
+ EncodeDHT(cinfo, m->next_dht_index, num_dht);
+ m->next_dht_index += num_dht;
+ }
+ EncodeSOS(cinfo, scan_index);
+}
+
+void WriteBlock(const int32_t* JXL_RESTRICT symbols,
+ const int32_t* JXL_RESTRICT extra_bits, const int num_nonzeros,
+ const bool emit_eob,
+ const HuffmanCodeTable* JXL_RESTRICT dc_code,
+ const HuffmanCodeTable* JXL_RESTRICT ac_code,
+ JpegBitWriter* JXL_RESTRICT bw) {
+ int symbol = symbols[0];
+ WriteBits(bw, dc_code->depth[symbol], dc_code->code[symbol] | extra_bits[0]);
+ for (int i = 1; i < num_nonzeros; ++i) {
+ symbol = symbols[i];
+ if (symbol > 255) {
+ WriteBits(bw, ac_code->depth[0xf0], ac_code->code[0xf0]);
+ symbol -= 256;
+ if (symbol > 255) {
+ WriteBits(bw, ac_code->depth[0xf0], ac_code->code[0xf0]);
+ symbol -= 256;
+ if (symbol > 255) {
+ WriteBits(bw, ac_code->depth[0xf0], ac_code->code[0xf0]);
+ symbol -= 256;
+ }
+ }
+ }
+ WriteBits(bw, ac_code->depth[symbol],
+ ac_code->code[symbol] | extra_bits[i]);
+ }
+ if (emit_eob) {
+ WriteBits(bw, ac_code->depth[0], ac_code->code[0]);
+ }
+}
+
+namespace {
+
+static JXL_INLINE void EmitMarker(JpegBitWriter* bw, int marker) {
+ bw->data[bw->pos++] = 0xFF;
+ bw->data[bw->pos++] = marker;
+}
+
+void WriteTokens(j_compress_ptr cinfo, int scan_index, JpegBitWriter* bw) {
+ jpeg_comp_master* m = cinfo->master;
+ HuffmanCodeTable* coding_tables = &m->coding_tables[0];
+ int next_restart_marker = 0;
+ const ScanTokenInfo& sti = m->scan_token_info[scan_index];
+ size_t num_token_arrays = m->cur_token_array + 1;
+ size_t total_tokens = 0;
+ size_t restart_idx = 0;
+ size_t next_restart = sti.restarts[restart_idx];
+ uint8_t* context_map = m->context_map;
+ for (size_t i = 0; i < num_token_arrays; ++i) {
+ Token* tokens = m->token_arrays[i].tokens;
+ size_t num_tokens = m->token_arrays[i].num_tokens;
+ if (sti.token_offset < total_tokens + num_tokens &&
+ total_tokens < sti.token_offset + sti.num_tokens) {
+ size_t start_ix =
+ total_tokens < sti.token_offset ? sti.token_offset - total_tokens : 0;
+ size_t end_ix = std::min(sti.token_offset + sti.num_tokens - total_tokens,
+ num_tokens);
+ size_t cycle_len = bw->len / 8;
+ size_t next_cycle = cycle_len;
+ for (size_t i = start_ix; i < end_ix; ++i) {
+ if (total_tokens + i == next_restart) {
+ JumpToByteBoundary(bw);
+ EmitMarker(bw, 0xD0 + next_restart_marker);
+ next_restart_marker += 1;
+ next_restart_marker &= 0x7;
+ next_restart = sti.restarts[++restart_idx];
+ }
+ Token t = tokens[i];
+ const HuffmanCodeTable* code = &coding_tables[context_map[t.context]];
+ WriteBits(bw, code->depth[t.symbol], code->code[t.symbol] | t.bits);
+ if (--next_cycle == 0) {
+ if (!EmptyBitWriterBuffer(bw)) {
+ JPEGLI_ERROR(
+ "Output suspension is not supported in "
+ "finish_compress");
+ }
+ next_cycle = cycle_len;
+ }
+ }
+ }
+ total_tokens += num_tokens;
+ }
+}
+
+void WriteACRefinementTokens(j_compress_ptr cinfo, int scan_index,
+ JpegBitWriter* bw) {
+ jpeg_comp_master* m = cinfo->master;
+ const ScanTokenInfo& sti = m->scan_token_info[scan_index];
+ const uint8_t context = m->ac_ctx_offset[scan_index];
+ const HuffmanCodeTable* code = &m->coding_tables[m->context_map[context]];
+ size_t cycle_len = bw->len / 64;
+ size_t next_cycle = cycle_len;
+ size_t refbit_idx = 0;
+ size_t eobrun_idx = 0;
+ size_t restart_idx = 0;
+ size_t next_restart = sti.restarts[restart_idx];
+ int next_restart_marker = 0;
+ for (size_t i = 0; i < sti.num_tokens; ++i) {
+ if (i == next_restart) {
+ JumpToByteBoundary(bw);
+ EmitMarker(bw, 0xD0 + next_restart_marker);
+ next_restart_marker += 1;
+ next_restart_marker &= 0x7;
+ next_restart = sti.restarts[++restart_idx];
+ }
+ RefToken t = sti.tokens[i];
+ int symbol = t.symbol & 253;
+ uint16_t bits = 0;
+ if ((symbol & 1) == 0) {
+ int r = symbol >> 4;
+ if (r > 0 && r < 15) {
+ bits = sti.eobruns[eobrun_idx++];
+ }
+ } else {
+ bits = (t.symbol >> 1) & 1;
+ }
+ WriteBits(bw, code->depth[symbol], code->code[symbol] | bits);
+ for (int j = 0; j < t.refbits; ++j) {
+ WriteBits(bw, 1, sti.refbits[refbit_idx++]);
+ }
+ if (--next_cycle == 0) {
+ if (!EmptyBitWriterBuffer(bw)) {
+ JPEGLI_ERROR("Output suspension is not supported in finish_compress");
+ }
+ next_cycle = cycle_len;
+ }
+ }
+}
+
+void WriteDCRefinementBits(j_compress_ptr cinfo, int scan_index,
+ JpegBitWriter* bw) {
+ jpeg_comp_master* m = cinfo->master;
+ const ScanTokenInfo& sti = m->scan_token_info[scan_index];
+ size_t restart_idx = 0;
+ size_t next_restart = sti.restarts[restart_idx];
+ int next_restart_marker = 0;
+ size_t cycle_len = bw->len * 4;
+ size_t next_cycle = cycle_len;
+ size_t refbit_idx = 0;
+ for (size_t i = 0; i < sti.num_tokens; ++i) {
+ if (i == next_restart) {
+ JumpToByteBoundary(bw);
+ EmitMarker(bw, 0xD0 + next_restart_marker);
+ next_restart_marker += 1;
+ next_restart_marker &= 0x7;
+ next_restart = sti.restarts[++restart_idx];
+ }
+ WriteBits(bw, 1, sti.refbits[refbit_idx++]);
+ if (--next_cycle == 0) {
+ if (!EmptyBitWriterBuffer(bw)) {
+ JPEGLI_ERROR(
+ "Output suspension is not supported in "
+ "finish_compress");
+ }
+ next_cycle = cycle_len;
+ }
+ }
+}
+
+} // namespace
+
+void WriteScanData(j_compress_ptr cinfo, int scan_index) {
+ const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
+ JpegBitWriter* bw = &cinfo->master->bw;
+ if (scan_info->Ah == 0) {
+ WriteTokens(cinfo, scan_index, bw);
+ } else if (scan_info->Ss > 0) {
+ WriteACRefinementTokens(cinfo, scan_index, bw);
+ } else {
+ WriteDCRefinementBits(cinfo, scan_index, bw);
+ }
+ if (!bw->healthy) {
+ JPEGLI_ERROR("Unknown Huffman coded symbol found in scan %d", scan_index);
+ }
+ JumpToByteBoundary(bw);
+ if (!EmptyBitWriterBuffer(bw)) {
+ JPEGLI_ERROR("Output suspension is not supported in finish_compress");
+ }
+}
+
+} // namespace jpegli
diff --git a/lib/jpegli/bitstream.h b/lib/jpegli/bitstream.h
new file mode 100644
index 0000000..aa54c73
--- /dev/null
+++ b/lib/jpegli/bitstream.h
@@ -0,0 +1,44 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_BITSTREAM_H_
+#define LIB_JPEGLI_BITSTREAM_H_
+
+#include <initializer_list>
+#include <vector>
+
+#include "lib/jpegli/encode_internal.h"
+
+namespace jpegli {
+
+void WriteOutput(j_compress_ptr cinfo, const uint8_t* buf, size_t bufsize);
+void WriteOutput(j_compress_ptr cinfo, const std::vector<uint8_t>& bytes);
+void WriteOutput(j_compress_ptr cinfo, std::initializer_list<uint8_t> bytes);
+
+void EncodeAPP0(j_compress_ptr cinfo);
+void EncodeAPP14(j_compress_ptr cinfo);
+void WriteFileHeader(j_compress_ptr cinfo);
+
+// Returns true of only baseline 8-bit tables are used.
+bool EncodeDQT(j_compress_ptr cinfo, bool write_all_tables);
+void EncodeSOF(j_compress_ptr cinfo, bool is_baseline);
+void WriteFrameHeader(j_compress_ptr cinfo);
+
+void EncodeDRI(j_compress_ptr cinfo);
+void EncodeDHT(j_compress_ptr cinfo, size_t offset, size_t num);
+void EncodeSOS(j_compress_ptr cinfo, int scan_index);
+void WriteScanHeader(j_compress_ptr cinfo, int scan_index);
+
+void WriteBlock(const int32_t* JXL_RESTRICT symbols,
+ const int32_t* JXL_RESTRICT extra_bits, const int num_nonzeros,
+ const bool emit_eob,
+ const HuffmanCodeTable* JXL_RESTRICT dc_code,
+ const HuffmanCodeTable* JXL_RESTRICT ac_code,
+ JpegBitWriter* JXL_RESTRICT bw);
+void WriteScanData(j_compress_ptr cinfo, int scan_index);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_BITSTREAM_H_
diff --git a/lib/jpegli/color_quantize.cc b/lib/jpegli/color_quantize.cc
new file mode 100644
index 0000000..e8357e2
--- /dev/null
+++ b/lib/jpegli/color_quantize.cc
@@ -0,0 +1,533 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/color_quantize.h"
+
+#include <cmath>
+#include <limits>
+#include <unordered_map>
+
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jpegli/error.h"
+
+namespace jpegli {
+
+namespace {
+
+static constexpr int kNumColorCellBits[kMaxComponents] = {3, 4, 3, 3};
+static constexpr int kCompW[kMaxComponents] = {2, 3, 1, 1};
+
+int Pow(int a, int b) {
+ int r = 1;
+ for (int i = 0; i < b; ++i) {
+ r *= a;
+ }
+ return r;
+}
+
+int ComponentOrder(j_decompress_ptr cinfo, int i) {
+ if (cinfo->out_color_components == 3) {
+ return i < 2 ? 1 - i : i;
+ }
+ return i;
+}
+
+int GetColorComponent(int i, int N) {
+ return (i * 255 + (N - 1) / 2) / (N - 1);
+}
+
+} // namespace
+
+void ChooseColorMap1Pass(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ int components = cinfo->out_color_components;
+ int desired = std::min(cinfo->desired_number_of_colors, 256);
+ int num = 1;
+ while (Pow(num + 1, components) <= desired) {
+ ++num;
+ }
+ if (num == 1) {
+ JPEGLI_ERROR("Too few colors (%d) in requested colormap", desired);
+ }
+ int actual = Pow(num, components);
+ for (int i = 0; i < components; ++i) {
+ m->num_colors_[i] = num;
+ }
+ while (actual < desired) {
+ int total = actual;
+ for (int i = 0; i < components; ++i) {
+ int c = ComponentOrder(cinfo, i);
+ int new_total = (actual / m->num_colors_[c]) * (m->num_colors_[c] + 1);
+ if (new_total <= desired) {
+ ++m->num_colors_[c];
+ actual = new_total;
+ }
+ }
+ if (actual == total) {
+ break;
+ }
+ }
+ cinfo->actual_number_of_colors = actual;
+ cinfo->colormap = (*cinfo->mem->alloc_sarray)(
+ reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE, actual, components);
+ int next_color[kMaxComponents] = {0};
+ for (int i = 0; i < actual; ++i) {
+ for (int c = 0; c < components; ++c) {
+ cinfo->colormap[c][i] =
+ GetColorComponent(next_color[c], m->num_colors_[c]);
+ }
+ int c = components - 1;
+ while (c > 0 && next_color[c] + 1 == m->num_colors_[c]) {
+ next_color[c--] = 0;
+ }
+ ++next_color[c];
+ }
+ if (!m->colormap_lut_) {
+ m->colormap_lut_ = Allocate<uint8_t>(cinfo, components * 256, JPOOL_IMAGE);
+ }
+ int stride = actual;
+ for (int c = 0; c < components; ++c) {
+ int N = m->num_colors_[c];
+ stride /= N;
+ for (int i = 0; i < 256; ++i) {
+ int index = ((2 * i - 1) * (N - 1) + 254) / 510;
+ m->colormap_lut_[c * 256 + i] = index * stride;
+ }
+ }
+}
+
+namespace {
+
+// 2^13 priority levels for the PQ seems to be a good compromise between
+// accuracy, running time and stack space usage.
+static const int kMaxPriority = 1 << 13;
+static const int kMaxLevel = 3;
+
+// This function is used in the multi-resolution grid to be able to compute
+// the keys for the different resolutions by just shifting the first key.
+inline int InterlaceBitsRGB(uint8_t r, uint8_t g, uint8_t b) {
+ int z = 0;
+ for (int i = 0; i < 7; ++i) {
+ z += (r >> 5) & 4;
+ z += (g >> 6) & 2;
+ z += (b >> 7);
+ z <<= 3;
+ r <<= 1;
+ g <<= 1;
+ b <<= 1;
+ }
+ z += (r >> 5) & 4;
+ z += (g >> 6) & 2;
+ z += (b >> 7);
+ return z;
+}
+
+// This function will compute the actual priorities of the colors based on
+// the current distance from the palette, the population count and the signals
+// from the multi-resolution grid.
+inline int Priority(int d, int n, const int* density, const int* radius) {
+ int p = d * n;
+ for (int level = 0; level < kMaxLevel; ++level) {
+ if (d > radius[level]) {
+ p += density[level] * (d - radius[level]);
+ }
+ }
+ return std::min(kMaxPriority - 1, p >> 4);
+}
+
+inline int ColorIntQuadDistanceRGB(uint8_t r1, uint8_t g1, uint8_t b1,
+ uint8_t r2, uint8_t g2, uint8_t b2) {
+ // weights for the intensity calculation
+ static constexpr int ired = 2;
+ static constexpr int igreen = 5;
+ static constexpr int iblue = 1;
+ // normalization factor for the intensity calculation (2^ishift)
+ static constexpr int ishift = 3;
+ const int rd = r1 - r2;
+ const int gd = g1 - g2;
+ const int bd = b1 - b2;
+ const int id = ired * rd + igreen * gd + iblue * bd;
+ return rd * rd + gd * gd + bd * bd + ((id * id) >> (2 * ishift));
+}
+
+inline int ScaleQuadDistanceRGB(int d) {
+ return static_cast<int>(sqrt(d * 0.25) + 0.5);
+}
+
+// The function updates the minimal distances, the clustering and the
+// quantization error after the insertion of the new color into the palette.
+void AddToRGBPalette(const uint8_t* red, const uint8_t* green,
+ const uint8_t* blue,
+ const int* count, // histogram of colors
+ const int index, // index of color to be added
+ const int k, // size of current palette
+ const int n, // number of colors
+ int* dist, // array of distances from palette
+ int* cluster, // mapping of color indices to palette
+ int* center, // the inverse mapping
+ int64_t* error) { // measure of the quantization error
+ center[k] = index;
+ cluster[index] = k;
+ *error -=
+ static_cast<int64_t>(dist[index]) * static_cast<int64_t>(count[index]);
+ dist[index] = 0;
+ for (int j = 0; j < n; ++j) {
+ if (dist[j] > 0) {
+ const int d = ColorIntQuadDistanceRGB(
+ red[index], green[index], blue[index], red[j], green[j], blue[j]);
+ if (d < dist[j]) {
+ *error += static_cast<int64_t>((d - dist[j])) *
+ static_cast<int64_t>(count[j]);
+ dist[j] = d;
+ cluster[j] = k;
+ }
+ }
+ }
+}
+
+struct RGBPixelHasher {
+ // A quick but good-enough hash to get 24 bits of RGB into the lower 12 bits.
+ size_t operator()(uint32_t a) const { return (a ^ (a >> 12)) * 0x9e3779b9; }
+};
+
+struct WangHasher {
+ // Thomas Wang's Hash. Nearly perfect and still quite fast. Above (for
+ // pixels) we use a simpler hash because the number of hash calls is
+ // proportional to the number of pixels and that hash dominates; we want the
+ // cost to be minimal and we start with a large table. We can use a better
+ // hash for the histogram since the number of hash calls is proportional to
+ // the number of unique colors in the image, which is hopefully much smaller.
+ // Note that the difference is slight; e.g. replacing RGBPixelHasher with
+ // WangHasher only slows things down by 5% on an Opteron.
+ size_t operator()(uint32_t a) const {
+ a = (a ^ 61) ^ (a >> 16);
+ a = a + (a << 3);
+ a = a ^ (a >> 4);
+ a = a * 0x27d4eb2d;
+ a = a ^ (a >> 15);
+ return a;
+ }
+};
+
+// Build an index of all the different colors in the input
+// image. To do this we map the 24 bit RGB representation of the colors
+// to a unique integer index assigned to the different colors in order of
+// appearance in the image. Return the number of unique colors found.
+// The colors are pre-quantized to 3 * 6 bits precision.
+static int BuildRGBColorIndex(const uint8_t* const image, int const num_pixels,
+ int* const count, uint8_t* const red,
+ uint8_t* const green, uint8_t* const blue) {
+ // Impossible because rgb are in the low 24 bits, and the upper 8 bits is 0.
+ const uint32_t impossible_pixel_value = 0x10000000;
+ std::unordered_map<uint32_t, int, RGBPixelHasher> index_map(1 << 12);
+ std::unordered_map<uint32_t, int, RGBPixelHasher>::iterator index_map_lookup;
+ const uint8_t* imagep = &image[0];
+ uint32_t prev_pixel = impossible_pixel_value;
+ int index = 0;
+ int n = 0;
+ for (int i = 0; i < num_pixels; ++i) {
+ uint8_t r = ((*imagep++) & 0xfc) + 2;
+ uint8_t g = ((*imagep++) & 0xfc) + 2;
+ uint8_t b = ((*imagep++) & 0xfc) + 2;
+ uint32_t pixel = (b << 16) | (g << 8) | r;
+ if (pixel != prev_pixel) {
+ prev_pixel = pixel;
+ index_map_lookup = index_map.find(pixel);
+ if (index_map_lookup != index_map.end()) {
+ index = index_map_lookup->second;
+ } else {
+ index_map[pixel] = index = n++;
+ red[index] = r;
+ green[index] = g;
+ blue[index] = b;
+ }
+ }
+ ++count[index];
+ }
+ return n;
+}
+
+} // namespace
+
+void ChooseColorMap2Pass(j_decompress_ptr cinfo) {
+ if (cinfo->out_color_space != JCS_RGB) {
+ JPEGLI_ERROR("Two-pass quantizer must use RGB output color space.");
+ }
+ jpeg_decomp_master* m = cinfo->master;
+ const size_t num_pixels = cinfo->output_width * cinfo->output_height;
+ const int max_color_count = std::max<size_t>(num_pixels, 1u << 18);
+ const int max_palette_size = cinfo->desired_number_of_colors;
+ std::unique_ptr<uint8_t[]> red(new uint8_t[max_color_count]);
+ std::unique_ptr<uint8_t[]> green(new uint8_t[max_color_count]);
+ std::unique_ptr<uint8_t[]> blue(new uint8_t[max_color_count]);
+ std::vector<int> count(max_color_count, 0);
+ // number of colors
+ int n = BuildRGBColorIndex(m->pixels_, num_pixels, &count[0], &red[0],
+ &green[0], &blue[0]);
+
+ std::vector<int> dist(n, std::numeric_limits<int>::max());
+ std::vector<int> cluster(n);
+ std::vector<bool> in_palette(n, false);
+ int center[256];
+ int k = 0; // palette size
+ const int count_threshold = (num_pixels * 4) / max_palette_size;
+ static constexpr int kAveragePixelErrorThreshold = 1;
+ const int64_t error_threshold = num_pixels * kAveragePixelErrorThreshold;
+ int64_t error = 0; // quantization error
+
+ int max_count = 0;
+ int winner = 0;
+ for (int i = 0; i < n; ++i) {
+ if (count[i] > max_count) {
+ max_count = count[i];
+ winner = i;
+ }
+ if (!in_palette[i] && count[i] > count_threshold) {
+ AddToRGBPalette(&red[0], &green[0], &blue[0], &count[0], i, k++, n,
+ &dist[0], &cluster[0], &center[0], &error);
+ in_palette[i] = true;
+ }
+ }
+ if (k == 0) {
+ AddToRGBPalette(&red[0], &green[0], &blue[0], &count[0], winner, k++, n,
+ &dist[0], &cluster[0], &center[0], &error);
+ in_palette[winner] = true;
+ }
+
+ // Calculation of the multi-resolution density grid.
+ std::vector<int> density(n * kMaxLevel);
+ std::vector<int> radius(n * kMaxLevel);
+ std::unordered_map<uint32_t, int, WangHasher> histogram[kMaxLevel];
+ for (int level = 0; level < kMaxLevel; ++level) {
+ // This value is never used because key = InterlaceBitsRGB(...) >> 6
+ }
+
+ for (int i = 0; i < n; ++i) {
+ if (!in_palette[i]) {
+ const int key = InterlaceBitsRGB(red[i], green[i], blue[i]) >> 6;
+ for (int level = 0; level < kMaxLevel; ++level) {
+ histogram[level][key >> (3 * level)] += count[i];
+ }
+ }
+ }
+ for (int i = 0; i < n; ++i) {
+ if (!in_palette[i]) {
+ for (int level = 0; level < kMaxLevel; ++level) {
+ const int mask = (4 << level) - 1;
+ const int rd = std::max(red[i] & mask, mask - (red[i] & mask));
+ const int gd = std::max(green[i] & mask, mask - (green[i] & mask));
+ const int bd = std::max(blue[i] & mask, mask - (blue[i] & mask));
+ radius[i * kMaxLevel + level] =
+ ScaleQuadDistanceRGB(ColorIntQuadDistanceRGB(0, 0, 0, rd, gd, bd));
+ }
+ const int key = InterlaceBitsRGB(red[i], green[i], blue[i]) >> 6;
+ if (kMaxLevel > 0) {
+ density[i * kMaxLevel] = histogram[0][key] - count[i];
+ }
+ for (int level = 1; level < kMaxLevel; ++level) {
+ density[i * kMaxLevel + level] =
+ (histogram[level][key >> (3 * level)] -
+ histogram[level - 1][key >> (3 * level - 3)]);
+ }
+ }
+ }
+
+ // Calculate the initial error now that the palette has been initialized.
+ error = 0;
+ for (int i = 0; i < n; ++i) {
+ error += static_cast<int64_t>(dist[i]) * static_cast<int64_t>(count[i]);
+ }
+
+ std::unique_ptr<std::vector<int>[]> bucket_array(
+ new std::vector<int>[kMaxPriority]);
+ int top_priority = -1;
+ for (int i = 0; i < n; ++i) {
+ if (!in_palette[i]) {
+ int priority = Priority(ScaleQuadDistanceRGB(dist[i]), count[i],
+ &density[i * kMaxLevel], &radius[i * kMaxLevel]);
+ bucket_array[priority].push_back(i);
+ top_priority = std::max(priority, top_priority);
+ }
+ }
+ double error_accum = 0;
+ while (top_priority >= 0 && k < max_palette_size) {
+ if (error < error_threshold) {
+ error_accum += std::min(error_threshold, error_threshold - error);
+ if (error_accum >= 10 * error_threshold) {
+ break;
+ }
+ }
+ int i = bucket_array[top_priority].back();
+ int priority = Priority(ScaleQuadDistanceRGB(dist[i]), count[i],
+ &density[i * kMaxLevel], &radius[i * kMaxLevel]);
+ if (priority < top_priority) {
+ bucket_array[priority].push_back(i);
+ } else {
+ AddToRGBPalette(&red[0], &green[0], &blue[0], &count[0], i, k++, n,
+ &dist[0], &cluster[0], &center[0], &error);
+ }
+ bucket_array[top_priority].pop_back();
+ while (top_priority >= 0 && bucket_array[top_priority].empty()) {
+ --top_priority;
+ }
+ }
+
+ cinfo->actual_number_of_colors = k;
+ cinfo->colormap = (*cinfo->mem->alloc_sarray)(
+ reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE, k, 3);
+ for (int i = 0; i < k; ++i) {
+ int index = center[i];
+ cinfo->colormap[0][i] = red[index];
+ cinfo->colormap[1][i] = green[index];
+ cinfo->colormap[2][i] = blue[index];
+ }
+}
+
+namespace {
+
+void FindCandidatesForCell(j_decompress_ptr cinfo, int ncomp, int cell[],
+ std::vector<uint8_t>* candidates) {
+ int cell_min[kMaxComponents];
+ int cell_max[kMaxComponents];
+ int cell_center[kMaxComponents];
+ for (int c = 0; c < ncomp; ++c) {
+ cell_min[c] = cell[c] << (8 - kNumColorCellBits[c]);
+ cell_max[c] = cell_min[c] + (1 << (8 - kNumColorCellBits[c])) - 1;
+ cell_center[c] = (cell_min[c] + cell_max[c]) >> 1;
+ }
+ int min_maxdist = std::numeric_limits<int>::max();
+ int mindist[256];
+ for (int i = 0; i < cinfo->actual_number_of_colors; ++i) {
+ int dmin = 0;
+ int dmax = 0;
+ for (int c = 0; c < ncomp; ++c) {
+ int palette_c = cinfo->colormap[c][i];
+ int dminc = 0, dmaxc;
+ if (palette_c < cell_min[c]) {
+ dminc = cell_min[c] - palette_c;
+ dmaxc = cell_max[c] - palette_c;
+ } else if (palette_c > cell_max[c]) {
+ dminc = palette_c - cell_max[c];
+ dmaxc = palette_c - cell_min[c];
+ } else if (palette_c > cell_center[c]) {
+ dmaxc = palette_c - cell_min[c];
+ } else {
+ dmaxc = cell_max[c] - palette_c;
+ }
+ dminc *= kCompW[c];
+ dmaxc *= kCompW[c];
+ dmin += dminc * dminc;
+ dmax += dmaxc * dmaxc;
+ }
+ mindist[i] = dmin;
+ min_maxdist = std::min(dmax, min_maxdist);
+ }
+ for (int i = 0; i < cinfo->actual_number_of_colors; ++i) {
+ if (mindist[i] < min_maxdist) {
+ candidates->push_back(i);
+ }
+ }
+}
+
+} // namespace
+
+void CreateInverseColorMap(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ int ncomp = cinfo->out_color_components;
+ int num_cells = 1;
+ for (int c = 0; c < ncomp; ++c) {
+ num_cells *= (1 << kNumColorCellBits[c]);
+ }
+ m->candidate_lists_.resize(num_cells);
+
+ int next_cell[kMaxComponents] = {0};
+ for (int i = 0; i < num_cells; ++i) {
+ m->candidate_lists_[i].clear();
+ FindCandidatesForCell(cinfo, ncomp, next_cell, &m->candidate_lists_[i]);
+ int c = ncomp - 1;
+ while (c > 0 && next_cell[c] + 1 == (1 << kNumColorCellBits[c])) {
+ next_cell[c--] = 0;
+ }
+ ++next_cell[c];
+ }
+ m->regenerate_inverse_colormap_ = false;
+}
+
+int LookupColorIndex(j_decompress_ptr cinfo, JSAMPLE* pixel) {
+ jpeg_decomp_master* m = cinfo->master;
+ int num_channels = cinfo->out_color_components;
+ int index = 0;
+ if (m->quant_mode_ == 1) {
+ for (int c = 0; c < num_channels; ++c) {
+ index += m->colormap_lut_[c * 256 + pixel[c]];
+ }
+ } else {
+ size_t cell_idx = 0;
+ size_t stride = 1;
+ for (int c = num_channels - 1; c >= 0; --c) {
+ cell_idx += (pixel[c] >> (8 - kNumColorCellBits[c])) * stride;
+ stride <<= kNumColorCellBits[c];
+ }
+ JXL_ASSERT(cell_idx < m->candidate_lists_.size());
+ int mindist = std::numeric_limits<int>::max();
+ const auto& candidates = m->candidate_lists_[cell_idx];
+ for (uint8_t i : candidates) {
+ int dist = 0;
+ for (int c = 0; c < num_channels; ++c) {
+ int d = (cinfo->colormap[c][i] - pixel[c]) * kCompW[c];
+ dist += d * d;
+ }
+ if (dist < mindist) {
+ mindist = dist;
+ index = i;
+ }
+ }
+ }
+ JXL_ASSERT(index < cinfo->actual_number_of_colors);
+ return index;
+}
+
+void CreateOrderedDitherTables(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ static constexpr size_t kDitherSize = 4;
+ static constexpr size_t kDitherMask = kDitherSize - 1;
+ static constexpr float kBaseDitherMatrix[] = {
+ 0, 8, 2, 10, //
+ 12, 4, 14, 6, //
+ 3, 11, 1, 9, //
+ 15, 7, 13, 5, //
+ };
+ m->dither_size_ = kDitherSize;
+ m->dither_mask_ = kDitherMask;
+ size_t ncells = m->dither_size_ * m->dither_size_;
+ for (int c = 0; c < cinfo->out_color_components; ++c) {
+ float spread = 1.0f / (m->num_colors_[c] - 1);
+ float mul = spread / ncells;
+ float offset = 0.5f * spread;
+ if (m->dither_[c] == nullptr) {
+ m->dither_[c] = Allocate<float>(cinfo, ncells, JPOOL_IMAGE_ALIGNED);
+ }
+ for (size_t idx = 0; idx < ncells; ++idx) {
+ m->dither_[c][idx] = kBaseDitherMatrix[idx] * mul - offset;
+ }
+ }
+}
+
+void InitFSDitherState(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ for (int c = 0; c < cinfo->out_color_components; ++c) {
+ if (m->error_row_[c] == nullptr) {
+ m->error_row_[c] =
+ Allocate<float>(cinfo, cinfo->output_width, JPOOL_IMAGE_ALIGNED);
+ m->error_row_[c + kMaxComponents] =
+ Allocate<float>(cinfo, cinfo->output_width, JPOOL_IMAGE_ALIGNED);
+ }
+ memset(m->error_row_[c], 0.0, cinfo->output_width * sizeof(float));
+ memset(m->error_row_[c + kMaxComponents], 0.0,
+ cinfo->output_width * sizeof(float));
+ }
+}
+
+} // namespace jpegli
diff --git a/lib/jpegli/color_quantize.h b/lib/jpegli/color_quantize.h
new file mode 100644
index 0000000..3dda1d8
--- /dev/null
+++ b/lib/jpegli/color_quantize.h
@@ -0,0 +1,27 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_COLOR_QUANTIZE_H_
+#define LIB_JPEGLI_COLOR_QUANTIZE_H_
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+void ChooseColorMap1Pass(j_decompress_ptr cinfo);
+
+void ChooseColorMap2Pass(j_decompress_ptr cinfo);
+
+void CreateInverseColorMap(j_decompress_ptr cinfo);
+
+void CreateOrderedDitherTables(j_decompress_ptr cinfo);
+
+void InitFSDitherState(j_decompress_ptr cinfo);
+
+int LookupColorIndex(j_decompress_ptr cinfo, JSAMPLE* pixel);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_COLOR_QUANTIZE_H_
diff --git a/lib/jpegli/color_transform.cc b/lib/jpegli/color_transform.cc
new file mode 100644
index 0000000..020a6fd
--- /dev/null
+++ b/lib/jpegli/color_transform.cc
@@ -0,0 +1,281 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/color_transform.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/color_transform.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jpegli/error.h"
+#include "lib/jxl/base/compiler_specific.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Div;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Sub;
+
+void YCbCrToRGB(float* row[kMaxComponents], size_t xsize) {
+ const HWY_CAPPED(float, 8) df;
+ float* JXL_RESTRICT row0 = row[0];
+ float* JXL_RESTRICT row1 = row[1];
+ float* JXL_RESTRICT row2 = row[2];
+
+ // Full-range BT.601 as defined by JFIF Clause 7:
+ // https://www.itu.int/rec/T-REC-T.871-201105-I/en
+ const auto crcr = Set(df, 1.402f);
+ const auto cgcb = Set(df, -0.114f * 1.772f / 0.587f);
+ const auto cgcr = Set(df, -0.299f * 1.402f / 0.587f);
+ const auto cbcb = Set(df, 1.772f);
+
+ for (size_t x = 0; x < xsize; x += Lanes(df)) {
+ const auto y_vec = Load(df, row0 + x);
+ const auto cb_vec = Load(df, row1 + x);
+ const auto cr_vec = Load(df, row2 + x);
+ const auto r_vec = MulAdd(crcr, cr_vec, y_vec);
+ const auto g_vec = MulAdd(cgcr, cr_vec, MulAdd(cgcb, cb_vec, y_vec));
+ const auto b_vec = MulAdd(cbcb, cb_vec, y_vec);
+ Store(r_vec, df, row0 + x);
+ Store(g_vec, df, row1 + x);
+ Store(b_vec, df, row2 + x);
+ }
+}
+
+void YCCKToCMYK(float* row[kMaxComponents], size_t xsize) {
+ const HWY_CAPPED(float, 8) df;
+ float* JXL_RESTRICT row0 = row[0];
+ float* JXL_RESTRICT row1 = row[1];
+ float* JXL_RESTRICT row2 = row[2];
+ YCbCrToRGB(row, xsize);
+ const auto offset = Set(df, -1.0f / 255.0f);
+ for (size_t x = 0; x < xsize; x += Lanes(df)) {
+ Store(Sub(offset, Load(df, row0 + x)), df, row0 + x);
+ Store(Sub(offset, Load(df, row1 + x)), df, row1 + x);
+ Store(Sub(offset, Load(df, row2 + x)), df, row2 + x);
+ }
+}
+
+void RGBToYCbCr(float* row[kMaxComponents], size_t xsize) {
+ const HWY_CAPPED(float, 8) df;
+ float* JXL_RESTRICT row0 = row[0];
+ float* JXL_RESTRICT row1 = row[1];
+ float* JXL_RESTRICT row2 = row[2];
+ // Full-range BT.601 as defined by JFIF Clause 7:
+ // https://www.itu.int/rec/T-REC-T.871-201105-I/en
+ const auto c128 = Set(df, 128.0f);
+ const auto kR = Set(df, 0.299f); // NTSC luma
+ const auto kG = Set(df, 0.587f);
+ const auto kB = Set(df, 0.114f);
+ const auto kAmpR = Set(df, 0.701f);
+ const auto kAmpB = Set(df, 0.886f);
+ const auto kDiffR = Add(kAmpR, kR);
+ const auto kDiffB = Add(kAmpB, kB);
+ const auto kNormR = Div(Set(df, 1.0f), (Add(kAmpR, Add(kG, kB))));
+ const auto kNormB = Div(Set(df, 1.0f), (Add(kR, Add(kG, kAmpB))));
+
+ for (size_t x = 0; x < xsize; x += Lanes(df)) {
+ const auto r = Load(df, row0 + x);
+ const auto g = Load(df, row1 + x);
+ const auto b = Load(df, row2 + x);
+ const auto r_base = Mul(r, kR);
+ const auto r_diff = Mul(r, kDiffR);
+ const auto g_base = Mul(g, kG);
+ const auto b_base = Mul(b, kB);
+ const auto b_diff = Mul(b, kDiffB);
+ const auto y_base = Add(r_base, Add(g_base, b_base));
+ const auto cb_vec = MulAdd(Sub(b_diff, y_base), kNormB, c128);
+ const auto cr_vec = MulAdd(Sub(r_diff, y_base), kNormR, c128);
+ Store(y_base, df, row0 + x);
+ Store(cb_vec, df, row1 + x);
+ Store(cr_vec, df, row2 + x);
+ }
+}
+
+void CMYKToYCCK(float* row[kMaxComponents], size_t xsize) {
+ const HWY_CAPPED(float, 8) df;
+ float* JXL_RESTRICT row0 = row[0];
+ float* JXL_RESTRICT row1 = row[1];
+ float* JXL_RESTRICT row2 = row[2];
+ const auto unity = Set(df, 255.0f);
+ for (size_t x = 0; x < xsize; x += Lanes(df)) {
+ Store(Sub(unity, Load(df, row0 + x)), df, row0 + x);
+ Store(Sub(unity, Load(df, row1 + x)), df, row1 + x);
+ Store(Sub(unity, Load(df, row2 + x)), df, row2 + x);
+ }
+ RGBToYCbCr(row, xsize);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+
+HWY_EXPORT(CMYKToYCCK);
+HWY_EXPORT(YCCKToCMYK);
+HWY_EXPORT(YCbCrToRGB);
+HWY_EXPORT(RGBToYCbCr);
+
+bool CheckColorSpaceComponents(int num_components, J_COLOR_SPACE colorspace) {
+ switch (colorspace) {
+ case JCS_GRAYSCALE:
+ return num_components == 1;
+ case JCS_RGB:
+ case JCS_YCbCr:
+ case JCS_EXT_RGB:
+ case JCS_EXT_BGR:
+ return num_components == 3;
+ case JCS_CMYK:
+ case JCS_YCCK:
+ case JCS_EXT_RGBX:
+ case JCS_EXT_BGRX:
+ case JCS_EXT_XBGR:
+ case JCS_EXT_XRGB:
+ case JCS_EXT_RGBA:
+ case JCS_EXT_BGRA:
+ case JCS_EXT_ABGR:
+ case JCS_EXT_ARGB:
+ return num_components == 4;
+ default:
+ // Unrecognized colorspaces can have any number of channels, since no
+ // color transform will be performed on them.
+ return true;
+ }
+}
+
+void NullTransform(float* row[kMaxComponents], size_t len) {}
+
+void GrayscaleToRGB(float* row[kMaxComponents], size_t len) {
+ memcpy(row[1], row[0], len * sizeof(row[1][0]));
+ memcpy(row[2], row[0], len * sizeof(row[2][0]));
+}
+
+void GrayscaleToYCbCr(float* row[kMaxComponents], size_t len) {
+ memset(row[1], 0, len * sizeof(row[1][0]));
+ memset(row[2], 0, len * sizeof(row[2][0]));
+}
+
+void ChooseColorTransform(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ if (!CheckColorSpaceComponents(cinfo->input_components,
+ cinfo->in_color_space)) {
+ JPEGLI_ERROR("Invalid number of input components %d for colorspace %d",
+ cinfo->input_components, cinfo->in_color_space);
+ }
+ if (!CheckColorSpaceComponents(cinfo->num_components,
+ cinfo->jpeg_color_space)) {
+ JPEGLI_ERROR("Invalid number of components %d for colorspace %d",
+ cinfo->num_components, cinfo->jpeg_color_space);
+ }
+ if (cinfo->jpeg_color_space == cinfo->in_color_space) {
+ if (cinfo->num_components != cinfo->input_components) {
+ JPEGLI_ERROR("Input/output components mismatch: %d vs %d",
+ cinfo->input_components, cinfo->num_components);
+ }
+ // No color transform requested.
+ m->color_transform = NullTransform;
+ return;
+ }
+
+ if (cinfo->in_color_space == JCS_RGB && m->xyb_mode) {
+ JPEGLI_ERROR("Color transform on XYB colorspace is not supported.");
+ }
+
+ m->color_transform = nullptr;
+ if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
+ if (cinfo->in_color_space == JCS_RGB) {
+ m->color_transform = HWY_DYNAMIC_DISPATCH(RGBToYCbCr);
+ } else if (cinfo->in_color_space == JCS_YCbCr ||
+ cinfo->in_color_space == JCS_YCCK) {
+ // Since the first luminance channel is the grayscale version of the
+ // image, nothing to do here
+ m->color_transform = NullTransform;
+ }
+ } else if (cinfo->jpeg_color_space == JCS_RGB) {
+ if (cinfo->in_color_space == JCS_GRAYSCALE) {
+ m->color_transform = GrayscaleToRGB;
+ }
+ } else if (cinfo->jpeg_color_space == JCS_YCbCr) {
+ if (cinfo->in_color_space == JCS_RGB) {
+ m->color_transform = HWY_DYNAMIC_DISPATCH(RGBToYCbCr);
+ } else if (cinfo->in_color_space == JCS_GRAYSCALE) {
+ m->color_transform = GrayscaleToYCbCr;
+ }
+ } else if (cinfo->jpeg_color_space == JCS_YCCK) {
+ if (cinfo->in_color_space == JCS_CMYK) {
+ m->color_transform = HWY_DYNAMIC_DISPATCH(CMYKToYCCK);
+ }
+ }
+
+ if (m->color_transform == nullptr) {
+ // TODO(szabadka) Support more color transforms.
+ JPEGLI_ERROR("Unsupported color transform %d -> %d", cinfo->in_color_space,
+ cinfo->jpeg_color_space);
+ }
+}
+
+void ChooseColorTransform(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (!CheckColorSpaceComponents(cinfo->out_color_components,
+ cinfo->out_color_space)) {
+ JPEGLI_ERROR("Invalid number of output components %d for colorspace %d",
+ cinfo->out_color_components, cinfo->out_color_space);
+ }
+ if (!CheckColorSpaceComponents(cinfo->num_components,
+ cinfo->jpeg_color_space)) {
+ JPEGLI_ERROR("Invalid number of components %d for colorspace %d",
+ cinfo->num_components, cinfo->jpeg_color_space);
+ }
+ if (cinfo->jpeg_color_space == cinfo->out_color_space) {
+ if (cinfo->num_components != cinfo->out_color_components) {
+ JPEGLI_ERROR("Input/output components mismatch: %d vs %d",
+ cinfo->num_components, cinfo->out_color_components);
+ }
+ // No color transform requested.
+ m->color_transform = NullTransform;
+ return;
+ }
+
+ m->color_transform = nullptr;
+ if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
+ if (cinfo->out_color_space == JCS_RGB) {
+ m->color_transform = GrayscaleToRGB;
+ }
+ } else if (cinfo->jpeg_color_space == JCS_RGB) {
+ if (cinfo->out_color_space == JCS_GRAYSCALE) {
+ m->color_transform = HWY_DYNAMIC_DISPATCH(RGBToYCbCr);
+ }
+ } else if (cinfo->jpeg_color_space == JCS_YCbCr) {
+ if (cinfo->out_color_space == JCS_RGB) {
+ m->color_transform = HWY_DYNAMIC_DISPATCH(YCbCrToRGB);
+ } else if (cinfo->out_color_space == JCS_GRAYSCALE) {
+ m->color_transform = NullTransform;
+ }
+ } else if (cinfo->jpeg_color_space == JCS_YCCK) {
+ if (cinfo->out_color_space == JCS_CMYK) {
+ m->color_transform = HWY_DYNAMIC_DISPATCH(YCCKToCMYK);
+ }
+ }
+
+ if (m->color_transform == nullptr) {
+ // TODO(szabadka) Support more color transforms.
+ JPEGLI_ERROR("Unsupported color transform %d -> %d",
+ cinfo->jpeg_color_space, cinfo->out_color_space);
+ }
+}
+
+} // namespace jpegli
+#endif // HWY_ONCE
diff --git a/lib/jpegli/color_transform.h b/lib/jpegli/color_transform.h
new file mode 100644
index 0000000..8d58f88
--- /dev/null
+++ b/lib/jpegli/color_transform.h
@@ -0,0 +1,20 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_COLOR_TRANSFORM_H_
+#define LIB_JPEGLI_COLOR_TRANSFORM_H_
+
+#include "lib/jpegli/common.h"
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jpegli {
+
+void ChooseColorTransform(j_compress_ptr cinfo);
+
+void ChooseColorTransform(j_decompress_ptr cinfo);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_COLOR_TRANSFORM_H_
diff --git a/lib/jpegli/common.cc b/lib/jpegli/common.cc
new file mode 100644
index 0000000..5f34372
--- /dev/null
+++ b/lib/jpegli/common.cc
@@ -0,0 +1,59 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/common.h"
+
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jpegli/memory_manager.h"
+
+void jpegli_abort(j_common_ptr cinfo) {
+ if (cinfo->mem == nullptr) return;
+ for (int pool_id = 0; pool_id < JPOOL_NUMPOOLS; ++pool_id) {
+ if (pool_id == JPOOL_PERMANENT) continue;
+ (*cinfo->mem->free_pool)(cinfo, pool_id);
+ }
+ if (cinfo->is_decompressor) {
+ cinfo->global_state = jpegli::kDecStart;
+ } else {
+ cinfo->global_state = jpegli::kEncStart;
+ }
+}
+
+void jpegli_destroy(j_common_ptr cinfo) {
+ if (cinfo->mem == nullptr) return;
+ (*cinfo->mem->self_destruct)(cinfo);
+ if (cinfo->is_decompressor) {
+ cinfo->global_state = jpegli::kDecNull;
+ delete reinterpret_cast<j_decompress_ptr>(cinfo)->master;
+ } else {
+ cinfo->global_state = jpegli::kEncNull;
+ }
+}
+
+JQUANT_TBL* jpegli_alloc_quant_table(j_common_ptr cinfo) {
+ JQUANT_TBL* table = jpegli::Allocate<JQUANT_TBL>(cinfo, 1);
+ table->sent_table = FALSE;
+ return table;
+}
+
+JHUFF_TBL* jpegli_alloc_huff_table(j_common_ptr cinfo) {
+ JHUFF_TBL* table = jpegli::Allocate<JHUFF_TBL>(cinfo, 1);
+ table->sent_table = FALSE;
+ return table;
+}
+
+int jpegli_bytes_per_sample(JpegliDataType data_type) {
+ switch (data_type) {
+ case JPEGLI_TYPE_UINT8:
+ return 1;
+ case JPEGLI_TYPE_UINT16:
+ return 2;
+ case JPEGLI_TYPE_FLOAT:
+ return 4;
+ default:
+ return 0;
+ }
+}
diff --git a/lib/jpegli/common.h b/lib/jpegli/common.h
new file mode 100644
index 0000000..42487f2
--- /dev/null
+++ b/lib/jpegli/common.h
@@ -0,0 +1,48 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// This file contains the C API of the common encoder/decoder part of libjpegli
+// library, which is based on the C API of libjpeg, with the function names
+// changed from jpeg_* to jpegli_*, while compressor and decompressor object
+// definitions are included directly from jpeglib.h
+//
+// Applications can use the libjpegli library in one of the following ways:
+//
+// (1) Include jpegli/encode.h and/or jpegli/decode.h, update the function
+// names of the API and link against libjpegli.
+//
+// (2) Leave the application code unchanged, but replace the libjpeg.so library
+// with the one built by this project that is API- and ABI-compatible with
+// libjpeg-turbo's version of libjpeg.so.
+
+#ifndef LIB_JPEGLI_COMMON_H_
+#define LIB_JPEGLI_COMMON_H_
+
+/* clang-format off */
+#include <stdio.h>
+#include <jpeglib.h>
+/* clang-format on */
+
+#include "lib/jpegli/types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+struct jpeg_error_mgr* jpegli_std_error(struct jpeg_error_mgr* err);
+
+void jpegli_abort(j_common_ptr cinfo);
+
+void jpegli_destroy(j_common_ptr cinfo);
+
+JQUANT_TBL* jpegli_alloc_quant_table(j_common_ptr cinfo);
+
+JHUFF_TBL* jpegli_alloc_huff_table(j_common_ptr cinfo);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif // LIB_JPEGLI_COMMON_H_
diff --git a/lib/jpegli/common_internal.h b/lib/jpegli/common_internal.h
new file mode 100644
index 0000000..248d315
--- /dev/null
+++ b/lib/jpegli/common_internal.h
@@ -0,0 +1,150 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_COMMON_INTERNAL_H_
+#define LIB_JPEGLI_COMMON_INTERNAL_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <algorithm>
+#include <hwy/aligned_allocator.h>
+
+#include "lib/jpegli/memory_manager.h"
+#include "lib/jpegli/simd.h"
+#include "lib/jxl/base/compiler_specific.h" // for ssize_t
+#include "lib/jxl/base/status.h" // for JXL_CHECK
+
+namespace jpegli {
+
+enum State {
+ kDecNull,
+ kDecStart,
+ kDecInHeader,
+ kDecHeaderDone,
+ kDecProcessMarkers,
+ kDecProcessScan,
+ kEncNull,
+ kEncStart,
+ kEncHeader,
+ kEncReadImage,
+ kEncWriteCoeffs,
+};
+
+template <typename T1, typename T2>
+constexpr inline T1 DivCeil(T1 a, T2 b) {
+ return (a + b - 1) / b;
+}
+
+template <typename T1, typename T2>
+constexpr inline T1 RoundUpTo(T1 a, T2 b) {
+ return DivCeil(a, b) * b;
+}
+
+constexpr size_t kDCTBlockSize = 64;
+// This is set to the same value as MAX_COMPS_IN_SCAN, because that is the
+// maximum number of channels the libjpeg-turbo decoder can decode.
+constexpr int kMaxComponents = 4;
+constexpr int kMaxQuantTables = 4;
+constexpr int kJpegPrecision = 8;
+constexpr int kMaxHuffmanTables = 4;
+constexpr size_t kJpegHuffmanMaxBitLength = 16;
+constexpr int kJpegHuffmanAlphabetSize = 256;
+constexpr int kJpegDCAlphabetSize = 12;
+constexpr int kMaxDHTMarkers = 512;
+constexpr int kMaxDimPixels = 65535;
+constexpr uint8_t kApp1 = 0xE1;
+constexpr uint8_t kApp2 = 0xE2;
+const uint8_t kIccProfileTag[12] = "ICC_PROFILE";
+const uint8_t kExifTag[6] = "Exif\0";
+const uint8_t kXMPTag[29] = "http://ns.adobe.com/xap/1.0/";
+
+/* clang-format off */
+constexpr uint32_t kJPEGNaturalOrder[80] = {
+ 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63,
+ // extra entries for safety in decoder
+ 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63
+};
+
+constexpr uint32_t kJPEGZigZagOrder[64] = {
+ 0, 1, 5, 6, 14, 15, 27, 28,
+ 2, 4, 7, 13, 16, 26, 29, 42,
+ 3, 8, 12, 17, 25, 30, 41, 43,
+ 9, 11, 18, 24, 31, 40, 44, 53,
+ 10, 19, 23, 32, 39, 45, 52, 54,
+ 20, 22, 33, 38, 46, 51, 55, 60,
+ 21, 34, 37, 47, 50, 56, 59, 61,
+ 35, 36, 48, 49, 57, 58, 62, 63
+};
+/* clang-format on */
+
+template <typename T>
+class RowBuffer {
+ public:
+ template <typename CInfoType>
+ void Allocate(CInfoType cinfo, size_t num_rows, size_t rowsize) {
+ size_t vec_size = std::max(VectorSize(), sizeof(T));
+ JXL_CHECK(vec_size % sizeof(T) == 0);
+ size_t alignment = std::max<size_t>(HWY_ALIGNMENT, vec_size);
+ size_t min_memstride = alignment + rowsize * sizeof(T) + vec_size;
+ size_t memstride = RoundUpTo(min_memstride, alignment);
+ xsize_ = rowsize;
+ ysize_ = num_rows;
+ stride_ = memstride / sizeof(T);
+ offset_ = alignment / sizeof(T);
+ data_ = ::jpegli::Allocate<T>(cinfo, ysize_ * stride_, JPOOL_IMAGE_ALIGNED);
+ }
+
+ T* Row(ssize_t y) const {
+ return &data_[((ysize_ + y) % ysize_) * stride_ + offset_];
+ }
+
+ size_t xsize() const { return xsize_; };
+ size_t ysize() const { return ysize_; };
+ size_t stride() const { return stride_; }
+
+ void PadRow(size_t y, size_t from, int border) {
+ float* row = Row(y);
+ for (int offset = -border; offset < 0; ++offset) {
+ row[offset] = row[0];
+ }
+ float last_val = row[from - 1];
+ for (size_t x = from; x < xsize_ + border; ++x) {
+ row[x] = last_val;
+ }
+ }
+
+ void CopyRow(ssize_t dst_row, ssize_t src_row, int border) {
+ memcpy(Row(dst_row) - border, Row(src_row) - border,
+ (xsize_ + 2 * border) * sizeof(T));
+ }
+
+ void FillRow(ssize_t y, T val, size_t len) {
+ T* row = Row(y);
+ for (size_t x = 0; x < len; ++x) {
+ row[x] = val;
+ }
+ }
+
+ private:
+ size_t xsize_;
+ size_t ysize_;
+ size_t stride_;
+ size_t offset_;
+ T* data_;
+};
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_COMMON_INTERNAL_H_
diff --git a/lib/jpegli/dct-inl.h b/lib/jpegli/dct-inl.h
new file mode 100644
index 0000000..1cbe704
--- /dev/null
+++ b/lib/jpegli/dct-inl.h
@@ -0,0 +1,258 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JPEGLI_DCT_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JPEGLI_DCT_INL_H_
+#undef LIB_JPEGLI_DCT_INL_H_
+#else
+#define LIB_JPEGLI_DCT_INL_H_
+#endif
+
+#include "lib/jpegli/transpose-inl.h"
+#include "lib/jxl/base/compiler_specific.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::DemoteTo;
+using hwy::HWY_NAMESPACE::Ge;
+using hwy::HWY_NAMESPACE::IfThenElseZero;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::Round;
+using hwy::HWY_NAMESPACE::Sub;
+using hwy::HWY_NAMESPACE::Vec;
+
+using D = HWY_FULL(float);
+using DI = HWY_FULL(int32_t);
+
+template <size_t N>
+void AddReverse(const float* JXL_RESTRICT ain1, const float* JXL_RESTRICT ain2,
+ float* JXL_RESTRICT aout) {
+ HWY_CAPPED(float, 8) d8;
+ for (size_t i = 0; i < N; i++) {
+ auto in1 = Load(d8, ain1 + i * 8);
+ auto in2 = Load(d8, ain2 + (N - i - 1) * 8);
+ Store(Add(in1, in2), d8, aout + i * 8);
+ }
+}
+
+template <size_t N>
+void SubReverse(const float* JXL_RESTRICT ain1, const float* JXL_RESTRICT ain2,
+ float* JXL_RESTRICT aout) {
+ HWY_CAPPED(float, 8) d8;
+ for (size_t i = 0; i < N; i++) {
+ auto in1 = Load(d8, ain1 + i * 8);
+ auto in2 = Load(d8, ain2 + (N - i - 1) * 8);
+ Store(Sub(in1, in2), d8, aout + i * 8);
+ }
+}
+
+template <size_t N>
+void B(float* JXL_RESTRICT coeff) {
+ HWY_CAPPED(float, 8) d8;
+ constexpr float kSqrt2 = 1.41421356237f;
+ auto sqrt2 = Set(d8, kSqrt2);
+ auto in1 = Load(d8, coeff);
+ auto in2 = Load(d8, coeff + 8);
+ Store(MulAdd(in1, sqrt2, in2), d8, coeff);
+ for (size_t i = 1; i + 1 < N; i++) {
+ auto in1 = Load(d8, coeff + i * 8);
+ auto in2 = Load(d8, coeff + (i + 1) * 8);
+ Store(Add(in1, in2), d8, coeff + i * 8);
+ }
+}
+
+// Ideally optimized away by compiler (except the multiply).
+template <size_t N>
+void InverseEvenOdd(const float* JXL_RESTRICT ain, float* JXL_RESTRICT aout) {
+ HWY_CAPPED(float, 8) d8;
+ for (size_t i = 0; i < N / 2; i++) {
+ auto in1 = Load(d8, ain + i * 8);
+ Store(in1, d8, aout + 2 * i * 8);
+ }
+ for (size_t i = N / 2; i < N; i++) {
+ auto in1 = Load(d8, ain + i * 8);
+ Store(in1, d8, aout + (2 * (i - N / 2) + 1) * 8);
+ }
+}
+
+// Constants for DCT implementation. Generated by the following snippet:
+// for i in range(N // 2):
+// print(1.0 / (2 * math.cos((i + 0.5) * math.pi / N)), end=", ")
+template <size_t N>
+struct WcMultipliers;
+
+template <>
+struct WcMultipliers<4> {
+ static constexpr float kMultipliers[] = {
+ 0.541196100146197,
+ 1.3065629648763764,
+ };
+};
+
+template <>
+struct WcMultipliers<8> {
+ static constexpr float kMultipliers[] = {
+ 0.5097955791041592,
+ 0.6013448869350453,
+ 0.8999762231364156,
+ 2.5629154477415055,
+ };
+};
+
+constexpr float WcMultipliers<4>::kMultipliers[];
+constexpr float WcMultipliers<8>::kMultipliers[];
+
+// Invoked on full vector.
+template <size_t N>
+void Multiply(float* JXL_RESTRICT coeff) {
+ HWY_CAPPED(float, 8) d8;
+ for (size_t i = 0; i < N / 2; i++) {
+ auto in1 = Load(d8, coeff + (N / 2 + i) * 8);
+ auto mul = Set(d8, WcMultipliers<N>::kMultipliers[i]);
+ Store(Mul(in1, mul), d8, coeff + (N / 2 + i) * 8);
+ }
+}
+
+void LoadFromBlock(const float* JXL_RESTRICT pixels, size_t pixels_stride,
+ size_t off, float* JXL_RESTRICT coeff) {
+ HWY_CAPPED(float, 8) d8;
+ for (size_t i = 0; i < 8; i++) {
+ Store(LoadU(d8, pixels + i * pixels_stride + off), d8, coeff + i * 8);
+ }
+}
+
+void StoreToBlockAndScale(const float* JXL_RESTRICT coeff, float* output,
+ size_t off) {
+ HWY_CAPPED(float, 8) d8;
+ auto mul = Set(d8, 1.0f / 8);
+ for (size_t i = 0; i < 8; i++) {
+ StoreU(Mul(mul, Load(d8, coeff + i * 8)), d8, output + i * 8 + off);
+ }
+}
+
+template <size_t N>
+struct DCT1DImpl;
+
+template <>
+struct DCT1DImpl<1> {
+ JXL_INLINE void operator()(float* JXL_RESTRICT mem) {}
+};
+
+template <>
+struct DCT1DImpl<2> {
+ JXL_INLINE void operator()(float* JXL_RESTRICT mem) {
+ HWY_CAPPED(float, 8) d8;
+ auto in1 = Load(d8, mem);
+ auto in2 = Load(d8, mem + 8);
+ Store(Add(in1, in2), d8, mem);
+ Store(Sub(in1, in2), d8, mem + 8);
+ }
+};
+
+template <size_t N>
+struct DCT1DImpl {
+ void operator()(float* JXL_RESTRICT mem) {
+ HWY_ALIGN float tmp[N * 8];
+ AddReverse<N / 2>(mem, mem + N * 4, tmp);
+ DCT1DImpl<N / 2>()(tmp);
+ SubReverse<N / 2>(mem, mem + N * 4, tmp + N * 4);
+ Multiply<N>(tmp);
+ DCT1DImpl<N / 2>()(tmp + N * 4);
+ B<N / 2>(tmp + N * 4);
+ InverseEvenOdd<N>(tmp, mem);
+ }
+};
+
+void DCT1D(const float* JXL_RESTRICT pixels, size_t pixels_stride,
+ float* JXL_RESTRICT output) {
+ HWY_CAPPED(float, 8) d8;
+ HWY_ALIGN float tmp[64];
+ for (size_t i = 0; i < 8; i += Lanes(d8)) {
+ // TODO(veluca): consider removing the temporary memory here (as is done in
+ // IDCT), if it turns out that some compilers don't optimize away the loads
+ // and this is performance-critical.
+ LoadFromBlock(pixels, pixels_stride, i, tmp);
+ DCT1DImpl<8>()(tmp);
+ StoreToBlockAndScale(tmp, output, i);
+ }
+}
+
+static JXL_INLINE JXL_MAYBE_UNUSED void TransformFromPixels(
+ const float* JXL_RESTRICT pixels, size_t pixels_stride,
+ float* JXL_RESTRICT coefficients, float* JXL_RESTRICT scratch_space) {
+ DCT1D(pixels, pixels_stride, scratch_space);
+ Transpose8x8Block(scratch_space, coefficients);
+ DCT1D(coefficients, 8, scratch_space);
+ Transpose8x8Block(scratch_space, coefficients);
+}
+
+static JXL_INLINE JXL_MAYBE_UNUSED void StoreQuantizedValue(const Vec<DI>& ival,
+ int16_t* out) {
+ Rebind<int16_t, DI> di16;
+ Store(DemoteTo(di16, ival), di16, out);
+}
+
+static JXL_INLINE JXL_MAYBE_UNUSED void StoreQuantizedValue(const Vec<DI>& ival,
+ int32_t* out) {
+ DI di;
+ Store(ival, di, out);
+}
+
+template <typename T>
+void QuantizeBlock(const float* dct, const float* qmc, float aq_strength,
+ const float* zero_bias_offset, const float* zero_bias_mul,
+ T* block) {
+ D d;
+ DI di;
+ const auto aq_mul = Set(d, aq_strength);
+ for (size_t k = 0; k < DCTSIZE2; k += Lanes(d)) {
+ const auto val = Load(d, dct + k);
+ const auto q = Load(d, qmc + k);
+ const auto qval = Mul(val, q);
+ const auto zb_offset = Load(d, zero_bias_offset + k);
+ const auto zb_mul = Load(d, zero_bias_mul + k);
+ const auto threshold = Add(zb_offset, Mul(zb_mul, aq_mul));
+ const auto nzero_mask = Ge(Abs(qval), threshold);
+ const auto ival = ConvertTo(di, IfThenElseZero(nzero_mask, Round(qval)));
+ StoreQuantizedValue(ival, block + k);
+ }
+}
+
+template <typename T>
+void ComputeCoefficientBlock(const float* JXL_RESTRICT pixels, size_t stride,
+ const float* JXL_RESTRICT qmc,
+ int16_t last_dc_coeff, float aq_strength,
+ const float* zero_bias_offset,
+ const float* zero_bias_mul,
+ float* JXL_RESTRICT tmp, T* block) {
+ float* JXL_RESTRICT dct = tmp;
+ float* JXL_RESTRICT scratch_space = tmp + DCTSIZE2;
+ TransformFromPixels(pixels, stride, dct, scratch_space);
+ QuantizeBlock(dct, qmc, aq_strength, zero_bias_offset, zero_bias_mul, block);
+ // Center DC values around zero.
+ static constexpr float kDCBias = 128.0f;
+ const float dc = (dct[0] - kDCBias) * qmc[0];
+ float dc_threshold = zero_bias_offset[0] + aq_strength * zero_bias_mul[0];
+ if (std::abs(dc - last_dc_coeff) < dc_threshold) {
+ block[0] = last_dc_coeff;
+ } else {
+ block[0] = std::round(dc);
+ }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+#endif // LIB_JPEGLI_DCT_INL_H_
diff --git a/lib/jpegli/decode.cc b/lib/jpegli/decode.cc
new file mode 100644
index 0000000..758babe
--- /dev/null
+++ b/lib/jpegli/decode.cc
@@ -0,0 +1,1028 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/decode.h"
+
+#include <string.h>
+
+#include <vector>
+
+#include "lib/jpegli/color_quantize.h"
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jpegli/decode_marker.h"
+#include "lib/jpegli/decode_scan.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/memory_manager.h"
+#include "lib/jpegli/render.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/status.h"
+
+namespace jpegli {
+
+void InitializeImage(j_decompress_ptr cinfo) {
+ cinfo->restart_interval = 0;
+ cinfo->saw_JFIF_marker = FALSE;
+ cinfo->JFIF_major_version = 1;
+ cinfo->JFIF_minor_version = 1;
+ cinfo->density_unit = 0;
+ cinfo->X_density = 1;
+ cinfo->Y_density = 1;
+ cinfo->saw_Adobe_marker = FALSE;
+ cinfo->Adobe_transform = 0;
+ cinfo->CCIR601_sampling = FALSE; // not used
+ cinfo->marker_list = nullptr;
+ cinfo->comp_info = nullptr;
+ cinfo->input_scan_number = 0;
+ cinfo->input_iMCU_row = 0;
+ cinfo->output_scan_number = 0;
+ cinfo->output_iMCU_row = 0;
+ cinfo->output_scanline = 0;
+ cinfo->unread_marker = 0;
+ cinfo->coef_bits = nullptr;
+ // We set all these to zero since we don't yet support arithmetic coding.
+ memset(cinfo->arith_dc_L, 0, sizeof(cinfo->arith_dc_L));
+ memset(cinfo->arith_dc_U, 0, sizeof(cinfo->arith_dc_U));
+ memset(cinfo->arith_ac_K, 0, sizeof(cinfo->arith_ac_K));
+ // Initialize the private fields.
+ jpeg_decomp_master* m = cinfo->master;
+ m->input_buffer_.clear();
+ m->input_buffer_pos_ = 0;
+ m->codestream_bits_ahead_ = 0;
+ m->is_multiscan_ = false;
+ m->found_soi_ = false;
+ m->found_dri_ = false;
+ m->found_sof_ = false;
+ m->found_eoi_ = false;
+ m->icc_index_ = 0;
+ m->icc_total_ = 0;
+ m->icc_profile_.clear();
+ memset(m->dc_huff_lut_, 0, sizeof(m->dc_huff_lut_));
+ memset(m->ac_huff_lut_, 0, sizeof(m->ac_huff_lut_));
+ // Initialize the values to an invalid symbol so that we can recognize it
+ // when reading the bit stream using a Huffman code with space > 0.
+ for (size_t i = 0; i < kAllHuffLutSize; ++i) {
+ m->dc_huff_lut_[i].bits = 0;
+ m->dc_huff_lut_[i].value = 0xffff;
+ m->ac_huff_lut_[i].bits = 0;
+ m->ac_huff_lut_[i].value = 0xffff;
+ }
+ m->colormap_lut_ = nullptr;
+ m->pixels_ = nullptr;
+ m->scanlines_ = nullptr;
+ m->regenerate_inverse_colormap_ = true;
+ for (int i = 0; i < kMaxComponents; ++i) {
+ m->dither_[i] = nullptr;
+ m->error_row_[i] = nullptr;
+ }
+ m->output_passes_done_ = 0;
+ m->xoffset_ = 0;
+ m->dequant_ = nullptr;
+}
+
+void InitializeDecompressParams(j_decompress_ptr cinfo) {
+ cinfo->jpeg_color_space = JCS_UNKNOWN;
+ cinfo->out_color_space = JCS_UNKNOWN;
+ cinfo->scale_num = 1;
+ cinfo->scale_denom = 1;
+ cinfo->output_gamma = 0.0f;
+ cinfo->buffered_image = FALSE;
+ cinfo->raw_data_out = FALSE;
+ cinfo->dct_method = JDCT_DEFAULT;
+ cinfo->do_fancy_upsampling = TRUE;
+ cinfo->do_block_smoothing = TRUE;
+ cinfo->quantize_colors = FALSE;
+ cinfo->dither_mode = JDITHER_FS;
+ cinfo->two_pass_quantize = TRUE;
+ cinfo->desired_number_of_colors = 256;
+ cinfo->enable_1pass_quant = FALSE;
+ cinfo->enable_external_quant = FALSE;
+ cinfo->enable_2pass_quant = FALSE;
+ cinfo->actual_number_of_colors = 0;
+ cinfo->colormap = nullptr;
+}
+
+void InitProgressMonitor(j_decompress_ptr cinfo, bool coef_only) {
+ if (!cinfo->progress) return;
+ jpeg_decomp_master* m = cinfo->master;
+ int nc = cinfo->num_components;
+ int estimated_num_scans =
+ cinfo->progressive_mode ? 2 + 3 * nc : (m->is_multiscan_ ? nc : 1);
+ cinfo->progress->pass_limit = cinfo->total_iMCU_rows * estimated_num_scans;
+ cinfo->progress->pass_counter = 0;
+ if (coef_only) {
+ cinfo->progress->total_passes = 1;
+ } else {
+ int input_passes = !cinfo->buffered_image && m->is_multiscan_ ? 1 : 0;
+ bool two_pass_quant = cinfo->quantize_colors && !cinfo->colormap &&
+ cinfo->two_pass_quantize && cinfo->enable_2pass_quant;
+ cinfo->progress->total_passes = input_passes + (two_pass_quant ? 2 : 1);
+ }
+ cinfo->progress->completed_passes = 0;
+}
+
+void InitProgressMonitorForOutput(j_decompress_ptr cinfo) {
+ if (!cinfo->progress) return;
+ jpeg_decomp_master* m = cinfo->master;
+ int passes_per_output = cinfo->enable_2pass_quant ? 2 : 1;
+ int output_passes_left = cinfo->buffered_image && !m->found_eoi_ ? 2 : 1;
+ cinfo->progress->total_passes =
+ m->output_passes_done_ + passes_per_output * output_passes_left;
+ cinfo->progress->completed_passes = m->output_passes_done_;
+}
+
+void ProgressMonitorInputPass(j_decompress_ptr cinfo) {
+ if (!cinfo->progress) return;
+ cinfo->progress->pass_counter =
+ ((cinfo->input_scan_number - 1) * cinfo->total_iMCU_rows +
+ cinfo->input_iMCU_row);
+ if (cinfo->progress->pass_counter > cinfo->progress->pass_limit) {
+ cinfo->progress->pass_limit =
+ cinfo->input_scan_number * cinfo->total_iMCU_rows;
+ }
+ (*cinfo->progress->progress_monitor)(reinterpret_cast<j_common_ptr>(cinfo));
+}
+
+void ProgressMonitorOutputPass(j_decompress_ptr cinfo) {
+ if (!cinfo->progress) return;
+ jpeg_decomp_master* m = cinfo->master;
+ int input_passes = !cinfo->buffered_image && m->is_multiscan_ ? 1 : 0;
+ cinfo->progress->pass_counter = cinfo->output_scanline;
+ cinfo->progress->pass_limit = cinfo->output_height;
+ cinfo->progress->completed_passes = input_passes + m->output_passes_done_;
+ (*cinfo->progress->progress_monitor)(reinterpret_cast<j_common_ptr>(cinfo));
+}
+
+void BuildHuffmanLookupTable(j_decompress_ptr cinfo, JHUFF_TBL* table,
+ HuffmanTableEntry* huff_lut) {
+ uint32_t counts[kJpegHuffmanMaxBitLength + 1] = {};
+ counts[0] = 0;
+ int total_count = 0;
+ int space = 1 << kJpegHuffmanMaxBitLength;
+ int max_depth = 1;
+ for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) {
+ int count = table->bits[i];
+ if (count != 0) {
+ max_depth = i;
+ }
+ counts[i] = count;
+ total_count += count;
+ space -= count * (1 << (kJpegHuffmanMaxBitLength - i));
+ }
+ uint32_t values[kJpegHuffmanAlphabetSize + 1] = {};
+ uint8_t values_seen[256] = {0};
+ for (int i = 0; i < total_count; ++i) {
+ int value = table->huffval[i];
+ if (values_seen[value]) {
+ return JPEGLI_ERROR("Duplicate Huffman code value %d", value);
+ }
+ values_seen[value] = 1;
+ values[i] = value;
+ }
+ // Add an invalid symbol that will have the all 1 code.
+ ++counts[max_depth];
+ values[total_count] = kJpegHuffmanAlphabetSize;
+ space -= (1 << (kJpegHuffmanMaxBitLength - max_depth));
+ if (space < 0) {
+ JPEGLI_ERROR("Invalid Huffman code lengths.");
+ } else if (space > 0 && huff_lut[0].value != 0xffff) {
+ // Re-initialize the values to an invalid symbol so that we can recognize
+ // it when reading the bit stream using a Huffman code with space > 0.
+ for (int i = 0; i < kJpegHuffmanLutSize; ++i) {
+ huff_lut[i].bits = 0;
+ huff_lut[i].value = 0xffff;
+ }
+ }
+ BuildJpegHuffmanTable(&counts[0], &values[0], huff_lut);
+}
+
+void PrepareForScan(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+ int comp_idx = cinfo->cur_comp_info[i]->component_index;
+ int* prev_coef_bits = cinfo->coef_bits[comp_idx + cinfo->num_components];
+ for (int k = std::min(cinfo->Ss, 1); k <= std::max(cinfo->Se, 9); k++) {
+ prev_coef_bits[k] =
+ (cinfo->input_scan_number > 0) ? cinfo->coef_bits[comp_idx][k] : 0;
+ }
+ for (int k = cinfo->Ss; k <= cinfo->Se; ++k) {
+ cinfo->coef_bits[comp_idx][k] = cinfo->Al;
+ }
+ }
+ AddStandardHuffmanTables(reinterpret_cast<j_common_ptr>(cinfo),
+ /*is_dc=*/false);
+ AddStandardHuffmanTables(reinterpret_cast<j_common_ptr>(cinfo),
+ /*is_dc=*/true);
+ // Check that all the Huffman tables needed for this scan are defined and
+ // build derived lookup tables.
+ for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+ if (cinfo->Ss == 0) {
+ int dc_tbl_idx = cinfo->cur_comp_info[i]->dc_tbl_no;
+ JHUFF_TBL* table = cinfo->dc_huff_tbl_ptrs[dc_tbl_idx];
+ HuffmanTableEntry* huff_lut =
+ &m->dc_huff_lut_[dc_tbl_idx * kJpegHuffmanLutSize];
+ if (!table) {
+ return JPEGLI_ERROR("DC Huffman table %d not found", dc_tbl_idx);
+ }
+ BuildHuffmanLookupTable(cinfo, table, huff_lut);
+ }
+ if (cinfo->Se > 0) {
+ int ac_tbl_idx = cinfo->cur_comp_info[i]->ac_tbl_no;
+ JHUFF_TBL* table = cinfo->ac_huff_tbl_ptrs[ac_tbl_idx];
+ HuffmanTableEntry* huff_lut =
+ &m->ac_huff_lut_[ac_tbl_idx * kJpegHuffmanLutSize];
+ if (!table) {
+ return JPEGLI_ERROR("AC Huffman table %d not found", ac_tbl_idx);
+ }
+ BuildHuffmanLookupTable(cinfo, table, huff_lut);
+ }
+ }
+ // Copy quantization tables into comp_info.
+ for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+ jpeg_component_info* comp = cinfo->cur_comp_info[i];
+ if (comp->quant_table == nullptr) {
+ comp->quant_table = Allocate<JQUANT_TBL>(cinfo, 1, JPOOL_IMAGE);
+ memcpy(comp->quant_table, cinfo->quant_tbl_ptrs[comp->quant_tbl_no],
+ sizeof(JQUANT_TBL));
+ }
+ }
+ if (cinfo->comps_in_scan == 1) {
+ const auto& comp = *cinfo->cur_comp_info[0];
+ cinfo->MCUs_per_row = DivCeil(cinfo->image_width * comp.h_samp_factor,
+ cinfo->max_h_samp_factor * DCTSIZE);
+ cinfo->MCU_rows_in_scan = DivCeil(cinfo->image_height * comp.v_samp_factor,
+ cinfo->max_v_samp_factor * DCTSIZE);
+ m->mcu_rows_per_iMCU_row_ = cinfo->cur_comp_info[0]->v_samp_factor;
+ } else {
+ cinfo->MCU_rows_in_scan = cinfo->total_iMCU_rows;
+ cinfo->MCUs_per_row = m->iMCU_cols_;
+ m->mcu_rows_per_iMCU_row_ = 1;
+ size_t mcu_size = 0;
+ for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+ jpeg_component_info* comp = cinfo->cur_comp_info[i];
+ mcu_size += comp->h_samp_factor * comp->v_samp_factor;
+ }
+ if (mcu_size > D_MAX_BLOCKS_IN_MCU) {
+ JPEGLI_ERROR("MCU size too big");
+ }
+ }
+ memset(m->last_dc_coeff_, 0, sizeof(m->last_dc_coeff_));
+ m->restarts_to_go_ = cinfo->restart_interval;
+ m->next_restart_marker_ = 0;
+ m->eobrun_ = -1;
+ m->scan_mcu_row_ = 0;
+ m->scan_mcu_col_ = 0;
+ m->codestream_bits_ahead_ = 0;
+ ++cinfo->input_scan_number;
+ cinfo->input_iMCU_row = 0;
+ PrepareForiMCURow(cinfo);
+ cinfo->global_state = kDecProcessScan;
+}
+
+int ConsumeInput(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (cinfo->global_state == kDecProcessScan && m->streaming_mode_ &&
+ cinfo->input_iMCU_row > cinfo->output_iMCU_row) {
+ // Prevent input from getting ahead of output in streaming mode.
+ return JPEG_SUSPENDED;
+ }
+ jpeg_source_mgr* src = cinfo->src;
+ int status;
+ for (;;) {
+ const uint8_t* data;
+ size_t len;
+ if (m->input_buffer_.empty()) {
+ data = cinfo->src->next_input_byte;
+ len = cinfo->src->bytes_in_buffer;
+ } else {
+ data = &m->input_buffer_[m->input_buffer_pos_];
+ len = m->input_buffer_.size() - m->input_buffer_pos_;
+ }
+ size_t pos = 0;
+ if (cinfo->global_state == kDecProcessScan) {
+ status = ProcessScan(cinfo, data, len, &pos, &m->codestream_bits_ahead_);
+ } else {
+ status = ProcessMarkers(cinfo, data, len, &pos);
+ }
+ if (m->input_buffer_.empty()) {
+ cinfo->src->next_input_byte += pos;
+ cinfo->src->bytes_in_buffer -= pos;
+ } else {
+ m->input_buffer_pos_ += pos;
+ size_t bytes_left = m->input_buffer_.size() - m->input_buffer_pos_;
+ if (bytes_left <= src->bytes_in_buffer) {
+ src->next_input_byte += (src->bytes_in_buffer - bytes_left);
+ src->bytes_in_buffer = bytes_left;
+ m->input_buffer_.clear();
+ m->input_buffer_pos_ = 0;
+ }
+ }
+ if (status == kHandleRestart) {
+ JXL_DASSERT(m->input_buffer_.size() <=
+ m->input_buffer_pos_ + src->bytes_in_buffer);
+ m->input_buffer_.clear();
+ m->input_buffer_pos_ = 0;
+ if (cinfo->unread_marker == 0xd0 + m->next_restart_marker_) {
+ cinfo->unread_marker = 0;
+ } else {
+ if (!(*cinfo->src->resync_to_restart)(cinfo, m->next_restart_marker_)) {
+ return JPEG_SUSPENDED;
+ }
+ }
+ m->next_restart_marker_ += 1;
+ m->next_restart_marker_ &= 0x7;
+ m->restarts_to_go_ = cinfo->restart_interval;
+ if (cinfo->unread_marker != 0) {
+ JPEGLI_WARN("Failed to resync to next restart marker, skipping scan.");
+ return JPEG_SCAN_COMPLETED;
+ }
+ continue;
+ }
+ if (status == kHandleMarkerProcessor) {
+ JXL_DASSERT(m->input_buffer_.size() <=
+ m->input_buffer_pos_ + src->bytes_in_buffer);
+ m->input_buffer_.clear();
+ m->input_buffer_pos_ = 0;
+ if (!(*GetMarkerProcessor(cinfo))(cinfo)) {
+ return JPEG_SUSPENDED;
+ }
+ cinfo->unread_marker = 0;
+ continue;
+ }
+ if (status != kNeedMoreInput) {
+ break;
+ }
+ if (m->input_buffer_.empty()) {
+ JXL_DASSERT(m->input_buffer_pos_ == 0);
+ m->input_buffer_.assign(src->next_input_byte,
+ src->next_input_byte + src->bytes_in_buffer);
+ }
+ if (!(*cinfo->src->fill_input_buffer)(cinfo)) {
+ m->input_buffer_.clear();
+ m->input_buffer_pos_ = 0;
+ return JPEG_SUSPENDED;
+ }
+ if (src->bytes_in_buffer == 0) {
+ JPEGLI_ERROR("Empty input.");
+ }
+ m->input_buffer_.insert(m->input_buffer_.end(), src->next_input_byte,
+ src->next_input_byte + src->bytes_in_buffer);
+ }
+ if (status == JPEG_SCAN_COMPLETED) {
+ cinfo->global_state = kDecProcessMarkers;
+ } else if (status == JPEG_REACHED_SOS) {
+ if (cinfo->global_state == kDecInHeader) {
+ cinfo->global_state = kDecHeaderDone;
+ } else {
+ PrepareForScan(cinfo);
+ }
+ }
+ return status;
+}
+
+bool IsInputReady(j_decompress_ptr cinfo) {
+ if (cinfo->master->found_eoi_) {
+ return true;
+ }
+ if (cinfo->input_scan_number > cinfo->output_scan_number) {
+ return true;
+ }
+ if (cinfo->input_scan_number < cinfo->output_scan_number) {
+ return false;
+ }
+ if (cinfo->input_iMCU_row == cinfo->total_iMCU_rows) {
+ return true;
+ }
+ return cinfo->input_iMCU_row >
+ cinfo->output_iMCU_row + (cinfo->master->streaming_mode_ ? 0 : 2);
+}
+
+bool ReadOutputPass(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (!m->pixels_) {
+ size_t stride = cinfo->out_color_components * cinfo->output_width;
+ size_t num_samples = cinfo->output_height * stride;
+ m->pixels_ = Allocate<uint8_t>(cinfo, num_samples, JPOOL_IMAGE);
+ m->scanlines_ =
+ Allocate<JSAMPROW>(cinfo, cinfo->output_height, JPOOL_IMAGE);
+ for (size_t i = 0; i < cinfo->output_height; ++i) {
+ m->scanlines_[i] = &m->pixels_[i * stride];
+ }
+ }
+ size_t num_output_rows = 0;
+ while (num_output_rows < cinfo->output_height) {
+ if (IsInputReady(cinfo)) {
+ ProgressMonitorOutputPass(cinfo);
+ ProcessOutput(cinfo, &num_output_rows, m->scanlines_,
+ cinfo->output_height);
+ } else if (ConsumeInput(cinfo) == JPEG_SUSPENDED) {
+ return false;
+ }
+ }
+ cinfo->output_scanline = 0;
+ cinfo->output_iMCU_row = 0;
+ return true;
+}
+
+boolean PrepareQuantizedOutput(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (cinfo->raw_data_out) {
+ JPEGLI_ERROR("Color quantization is not supported in raw data mode.");
+ }
+ if (m->output_data_type_ != JPEGLI_TYPE_UINT8) {
+ JPEGLI_ERROR("Color quantization must use 8-bit mode.");
+ }
+ if (cinfo->colormap) {
+ m->quant_mode_ = 3;
+ } else if (cinfo->two_pass_quantize && cinfo->enable_2pass_quant) {
+ m->quant_mode_ = 2;
+ } else if (cinfo->enable_1pass_quant) {
+ m->quant_mode_ = 1;
+ } else {
+ JPEGLI_ERROR("Invalid quantization mode change");
+ }
+ if (m->quant_mode_ > 1 && cinfo->dither_mode == JDITHER_ORDERED) {
+ cinfo->dither_mode = JDITHER_FS;
+ }
+ if (m->quant_mode_ == 1) {
+ ChooseColorMap1Pass(cinfo);
+ } else if (m->quant_mode_ == 2) {
+ m->quant_pass_ = 0;
+ if (!ReadOutputPass(cinfo)) {
+ return FALSE;
+ }
+ ChooseColorMap2Pass(cinfo);
+ }
+ if (m->quant_mode_ == 2 ||
+ (m->quant_mode_ == 3 && m->regenerate_inverse_colormap_)) {
+ CreateInverseColorMap(cinfo);
+ }
+ if (cinfo->dither_mode == JDITHER_ORDERED) {
+ CreateOrderedDitherTables(cinfo);
+ } else if (cinfo->dither_mode == JDITHER_FS) {
+ InitFSDitherState(cinfo);
+ }
+ m->quant_pass_ = 1;
+ return TRUE;
+}
+
+void AllocateCoefficientBuffer(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ j_common_ptr comptr = reinterpret_cast<j_common_ptr>(cinfo);
+ jvirt_barray_ptr* coef_arrays = jpegli::Allocate<jvirt_barray_ptr>(
+ cinfo, cinfo->num_components, JPOOL_IMAGE);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ size_t height_in_blocks =
+ m->streaming_mode_ ? comp->v_samp_factor : comp->height_in_blocks;
+ coef_arrays[c] = (*cinfo->mem->request_virt_barray)(
+ comptr, JPOOL_IMAGE, TRUE, comp->width_in_blocks, height_in_blocks,
+ comp->v_samp_factor);
+ }
+ cinfo->master->coef_arrays = coef_arrays;
+ (*cinfo->mem->realize_virt_arrays)(comptr);
+}
+
+void AllocateOutputBuffers(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ size_t iMCU_width = cinfo->max_h_samp_factor * m->min_scaled_dct_size;
+ size_t output_stride = m->iMCU_cols_ * iMCU_width;
+ m->need_context_rows_ = false;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ if (cinfo->do_fancy_upsampling && m->v_factor[c] == 2) {
+ m->need_context_rows_ = true;
+ }
+ }
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ const auto& comp = cinfo->comp_info[c];
+ size_t cheight = comp.v_samp_factor * m->scaled_dct_size[c];
+ int downsampled_width = output_stride / m->h_factor[c];
+ m->raw_height_[c] = cinfo->total_iMCU_rows * cheight;
+ if (m->need_context_rows_) {
+ cheight *= 3;
+ }
+ m->raw_output_[c].Allocate(cinfo, cheight, downsampled_width);
+ }
+ int num_all_components =
+ std::max(cinfo->out_color_components, cinfo->num_components);
+ for (int c = 0; c < num_all_components; ++c) {
+ m->render_output_[c].Allocate(cinfo, cinfo->max_v_samp_factor,
+ output_stride);
+ }
+ m->idct_scratch_ = Allocate<float>(cinfo, 5 * DCTSIZE2, JPOOL_IMAGE_ALIGNED);
+ // Padding for horizontal chroma upsampling.
+ constexpr size_t kPaddingLeft = 64;
+ constexpr size_t kPaddingRight = 64;
+ m->upsample_scratch_ = Allocate<float>(
+ cinfo, output_stride + kPaddingLeft + kPaddingRight, JPOOL_IMAGE_ALIGNED);
+ size_t bytes_per_sample = jpegli_bytes_per_sample(m->output_data_type_);
+ size_t bytes_per_pixel = cinfo->out_color_components * bytes_per_sample;
+ size_t scratch_stride = RoundUpTo(output_stride, HWY_ALIGNMENT);
+ m->output_scratch_ = Allocate<uint8_t>(
+ cinfo, bytes_per_pixel * scratch_stride, JPOOL_IMAGE_ALIGNED);
+ m->smoothing_scratch_ =
+ Allocate<int16_t>(cinfo, DCTSIZE2, JPOOL_IMAGE_ALIGNED);
+ size_t coeffs_per_block = cinfo->num_components * DCTSIZE2;
+ m->nonzeros_ = Allocate<int>(cinfo, coeffs_per_block, JPOOL_IMAGE_ALIGNED);
+ m->sumabs_ = Allocate<int>(cinfo, coeffs_per_block, JPOOL_IMAGE_ALIGNED);
+ m->biases_ = Allocate<float>(cinfo, coeffs_per_block, JPOOL_IMAGE_ALIGNED);
+ m->dequant_ = Allocate<float>(cinfo, coeffs_per_block, JPOOL_IMAGE_ALIGNED);
+ memset(m->dequant_, 0, coeffs_per_block * sizeof(float));
+}
+
+} // namespace jpegli
+
+void jpegli_CreateDecompress(j_decompress_ptr cinfo, int version,
+ size_t structsize) {
+ cinfo->mem = nullptr;
+ if (structsize != sizeof(*cinfo)) {
+ JPEGLI_ERROR("jpeg_decompress_struct has wrong size.");
+ }
+ jpegli::InitMemoryManager(reinterpret_cast<j_common_ptr>(cinfo));
+ cinfo->is_decompressor = TRUE;
+ cinfo->progress = nullptr;
+ cinfo->src = nullptr;
+ for (int i = 0; i < NUM_QUANT_TBLS; i++) {
+ cinfo->quant_tbl_ptrs[i] = nullptr;
+ }
+ for (int i = 0; i < NUM_HUFF_TBLS; i++) {
+ cinfo->dc_huff_tbl_ptrs[i] = nullptr;
+ cinfo->ac_huff_tbl_ptrs[i] = nullptr;
+ }
+ cinfo->global_state = jpegli::kDecStart;
+ cinfo->sample_range_limit = nullptr; // not used
+ cinfo->rec_outbuf_height = 1; // output works with any buffer height
+ cinfo->master = new jpeg_decomp_master;
+ jpeg_decomp_master* m = cinfo->master;
+ for (int i = 0; i < 16; ++i) {
+ m->app_marker_parsers[i] = nullptr;
+ }
+ m->com_marker_parser = nullptr;
+ memset(m->markers_to_save_, 0, sizeof(m->markers_to_save_));
+ jpegli::InitializeDecompressParams(cinfo);
+ jpegli::InitializeImage(cinfo);
+}
+
+void jpegli_destroy_decompress(j_decompress_ptr cinfo) {
+ jpegli_destroy(reinterpret_cast<j_common_ptr>(cinfo));
+}
+
+void jpegli_abort_decompress(j_decompress_ptr cinfo) {
+ jpegli_abort(reinterpret_cast<j_common_ptr>(cinfo));
+}
+
+void jpegli_save_markers(j_decompress_ptr cinfo, int marker_code,
+ unsigned int length_limit) {
+ // TODO(szabadka) Limit our memory usage by taking into account length_limit.
+ jpeg_decomp_master* m = cinfo->master;
+ if (marker_code < 0xe0) {
+ JPEGLI_ERROR("jpegli_save_markers: invalid marker code %d", marker_code);
+ }
+ m->markers_to_save_[marker_code - 0xe0] = 1;
+}
+
+void jpegli_set_marker_processor(j_decompress_ptr cinfo, int marker_code,
+ jpeg_marker_parser_method routine) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (marker_code == 0xfe) {
+ m->com_marker_parser = routine;
+ } else if (marker_code >= 0xe0 && marker_code <= 0xef) {
+ m->app_marker_parsers[marker_code - 0xe0] = routine;
+ } else {
+ JPEGLI_ERROR("jpegli_set_marker_processor: invalid marker code %d",
+ marker_code);
+ }
+}
+
+int jpegli_consume_input(j_decompress_ptr cinfo) {
+ if (cinfo->global_state == jpegli::kDecStart) {
+ (*cinfo->err->reset_error_mgr)(reinterpret_cast<j_common_ptr>(cinfo));
+ (*cinfo->src->init_source)(cinfo);
+ jpegli::InitializeDecompressParams(cinfo);
+ jpegli::InitializeImage(cinfo);
+ cinfo->global_state = jpegli::kDecInHeader;
+ }
+ if (cinfo->global_state == jpegli::kDecHeaderDone) {
+ return JPEG_REACHED_SOS;
+ }
+ if (cinfo->master->found_eoi_) {
+ return JPEG_REACHED_EOI;
+ }
+ if (cinfo->global_state == jpegli::kDecInHeader ||
+ cinfo->global_state == jpegli::kDecProcessMarkers ||
+ cinfo->global_state == jpegli::kDecProcessScan) {
+ return jpegli::ConsumeInput(cinfo);
+ }
+ JPEGLI_ERROR("Unexpected state %d", cinfo->global_state);
+ return JPEG_REACHED_EOI; // return value does not matter
+}
+
+int jpegli_read_header(j_decompress_ptr cinfo, boolean require_image) {
+ if (cinfo->global_state != jpegli::kDecStart &&
+ cinfo->global_state != jpegli::kDecInHeader) {
+ JPEGLI_ERROR("jpegli_read_header: unexpected state %d",
+ cinfo->global_state);
+ }
+ if (cinfo->src == nullptr) {
+ JPEGLI_ERROR("Missing source.");
+ }
+ for (;;) {
+ int retcode = jpegli_consume_input(cinfo);
+ if (retcode == JPEG_SUSPENDED) {
+ return retcode;
+ } else if (retcode == JPEG_REACHED_SOS) {
+ break;
+ } else if (retcode == JPEG_REACHED_EOI) {
+ if (require_image) {
+ JPEGLI_ERROR("jpegli_read_header: unexpected EOI marker.");
+ }
+ jpegli_abort_decompress(cinfo);
+ return JPEG_HEADER_TABLES_ONLY;
+ }
+ };
+ return JPEG_HEADER_OK;
+}
+
+boolean jpegli_read_icc_profile(j_decompress_ptr cinfo, JOCTET** icc_data_ptr,
+ unsigned int* icc_data_len) {
+ if (cinfo->global_state == jpegli::kDecStart ||
+ cinfo->global_state == jpegli::kDecInHeader) {
+ JPEGLI_ERROR("jpegli_read_icc_profile: unexpected state %d",
+ cinfo->global_state);
+ }
+ if (icc_data_ptr == nullptr || icc_data_len == nullptr) {
+ JPEGLI_ERROR("jpegli_read_icc_profile: invalid output buffer");
+ }
+ jpeg_decomp_master* m = cinfo->master;
+ if (m->icc_profile_.empty()) {
+ *icc_data_ptr = nullptr;
+ *icc_data_len = 0;
+ return FALSE;
+ }
+ *icc_data_len = m->icc_profile_.size();
+ *icc_data_ptr = (JOCTET*)malloc(*icc_data_len);
+ if (*icc_data_ptr == nullptr) {
+ JPEGLI_ERROR("jpegli_read_icc_profile: Out of memory");
+ }
+ memcpy(*icc_data_ptr, m->icc_profile_.data(), *icc_data_len);
+ return TRUE;
+}
+
+void jpegli_core_output_dimensions(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (!m->found_sof_) {
+ JPEGLI_ERROR("No SOF marker found.");
+ }
+ if (cinfo->raw_data_out) {
+ if (cinfo->scale_num != 1 || cinfo->scale_denom != 1) {
+ JPEGLI_ERROR("Output scaling is not supported in raw output mode");
+ }
+ }
+ if (cinfo->scale_num != 1 || cinfo->scale_denom != 1) {
+ int dctsize = 16;
+ while (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * (dctsize - 1)) {
+ --dctsize;
+ }
+ m->min_scaled_dct_size = dctsize;
+ cinfo->output_width =
+ jpegli::DivCeil(cinfo->image_width * dctsize, DCTSIZE);
+ cinfo->output_height =
+ jpegli::DivCeil(cinfo->image_height * dctsize, DCTSIZE);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ m->scaled_dct_size[c] = m->min_scaled_dct_size;
+ }
+ } else {
+ cinfo->output_width = cinfo->image_width;
+ cinfo->output_height = cinfo->image_height;
+ m->min_scaled_dct_size = DCTSIZE;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ m->scaled_dct_size[c] = DCTSIZE;
+ }
+ }
+}
+
+void jpegli_calc_output_dimensions(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ jpegli_core_output_dimensions(cinfo);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ m->h_factor[c] = cinfo->max_h_samp_factor / comp->h_samp_factor;
+ m->v_factor[c] = cinfo->max_v_samp_factor / comp->v_samp_factor;
+ }
+ if (cinfo->scale_num != 1 || cinfo->scale_denom != 1) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ // Prefer IDCT scaling over 2x upsampling.
+ while (m->scaled_dct_size[c] < DCTSIZE && (m->v_factor[c] % 2) == 0 &&
+ (m->h_factor[c] % 2) == 0) {
+ m->scaled_dct_size[c] *= 2;
+ m->v_factor[c] /= 2;
+ m->h_factor[c] /= 2;
+ }
+ }
+ }
+ if (cinfo->out_color_space == JCS_GRAYSCALE) {
+ cinfo->out_color_components = 1;
+ } else if (cinfo->out_color_space == JCS_RGB ||
+ cinfo->out_color_space == JCS_YCbCr) {
+ cinfo->out_color_components = 3;
+ } else if (cinfo->out_color_space == JCS_CMYK ||
+ cinfo->out_color_space == JCS_YCCK) {
+ cinfo->out_color_components = 4;
+ } else {
+ cinfo->out_color_components = cinfo->num_components;
+ }
+ cinfo->output_components =
+ cinfo->quantize_colors ? 1 : cinfo->out_color_components;
+ cinfo->rec_outbuf_height = 1;
+}
+
+boolean jpegli_has_multiple_scans(j_decompress_ptr cinfo) {
+ if (cinfo->input_scan_number == 0) {
+ JPEGLI_ERROR("No SOS marker found.");
+ }
+ return cinfo->master->is_multiscan_;
+}
+
+boolean jpegli_input_complete(j_decompress_ptr cinfo) {
+ return cinfo->master->found_eoi_;
+}
+
+boolean jpegli_start_decompress(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (cinfo->global_state == jpegli::kDecHeaderDone) {
+ m->streaming_mode_ = !m->is_multiscan_ && !cinfo->buffered_image &&
+ (!cinfo->quantize_colors || !cinfo->two_pass_quantize);
+ jpegli::AllocateCoefficientBuffer(cinfo);
+ jpegli_calc_output_dimensions(cinfo);
+ jpegli::PrepareForScan(cinfo);
+ if (cinfo->quantize_colors) {
+ if (cinfo->colormap != nullptr) {
+ cinfo->enable_external_quant = TRUE;
+ } else if (cinfo->two_pass_quantize &&
+ cinfo->out_color_space == JCS_RGB) {
+ cinfo->enable_2pass_quant = TRUE;
+ } else {
+ cinfo->enable_1pass_quant = TRUE;
+ }
+ }
+ jpegli::InitProgressMonitor(cinfo, /*coef_only=*/false);
+ jpegli::AllocateOutputBuffers(cinfo);
+ if (cinfo->buffered_image == TRUE) {
+ cinfo->output_scan_number = 0;
+ return TRUE;
+ }
+ } else if (!m->is_multiscan_) {
+ JPEGLI_ERROR("jpegli_start_decompress: unexpected state %d",
+ cinfo->global_state);
+ }
+ if (m->is_multiscan_) {
+ if (cinfo->global_state != jpegli::kDecProcessScan &&
+ cinfo->global_state != jpegli::kDecProcessMarkers) {
+ JPEGLI_ERROR("jpegli_start_decompress: unexpected state %d",
+ cinfo->global_state);
+ }
+ while (!m->found_eoi_) {
+ jpegli::ProgressMonitorInputPass(cinfo);
+ if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) {
+ return FALSE;
+ }
+ }
+ }
+ cinfo->output_scan_number = cinfo->input_scan_number;
+ jpegli::PrepareForOutput(cinfo);
+ if (cinfo->quantize_colors) {
+ return jpegli::PrepareQuantizedOutput(cinfo);
+ } else {
+ return TRUE;
+ }
+}
+
+boolean jpegli_start_output(j_decompress_ptr cinfo, int scan_number) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (!cinfo->buffered_image) {
+ JPEGLI_ERROR("jpegli_start_output: buffered image mode was not set");
+ }
+ if (cinfo->global_state != jpegli::kDecProcessScan &&
+ cinfo->global_state != jpegli::kDecProcessMarkers) {
+ JPEGLI_ERROR("jpegli_start_output: unexpected state %d",
+ cinfo->global_state);
+ }
+ cinfo->output_scan_number = std::max(1, scan_number);
+ if (m->found_eoi_) {
+ cinfo->output_scan_number =
+ std::min(cinfo->output_scan_number, cinfo->input_scan_number);
+ }
+ jpegli::InitProgressMonitorForOutput(cinfo);
+ jpegli::PrepareForOutput(cinfo);
+ if (cinfo->quantize_colors) {
+ return jpegli::PrepareQuantizedOutput(cinfo);
+ } else {
+ return TRUE;
+ }
+}
+
+boolean jpegli_finish_output(j_decompress_ptr cinfo) {
+ if (!cinfo->buffered_image) {
+ JPEGLI_ERROR("jpegli_finish_output: buffered image mode was not set");
+ }
+ if (cinfo->global_state != jpegli::kDecProcessScan &&
+ cinfo->global_state != jpegli::kDecProcessMarkers) {
+ JPEGLI_ERROR("jpegli_finish_output: unexpected state %d",
+ cinfo->global_state);
+ }
+ // Advance input to the start of the next scan, or to the end of input.
+ while (cinfo->input_scan_number <= cinfo->output_scan_number &&
+ !cinfo->master->found_eoi_) {
+ if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+JDIMENSION jpegli_read_scanlines(j_decompress_ptr cinfo, JSAMPARRAY scanlines,
+ JDIMENSION max_lines) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (cinfo->global_state != jpegli::kDecProcessScan &&
+ cinfo->global_state != jpegli::kDecProcessMarkers) {
+ JPEGLI_ERROR("jpegli_read_scanlines: unexpected state %d",
+ cinfo->global_state);
+ }
+ if (cinfo->buffered_image) {
+ if (cinfo->output_scan_number == 0) {
+ JPEGLI_ERROR(
+ "jpegli_read_scanlines: "
+ "jpegli_start_output() was not called");
+ }
+ } else if (m->is_multiscan_ && !m->found_eoi_) {
+ JPEGLI_ERROR(
+ "jpegli_read_scanlines: "
+ "jpegli_start_decompress() did not finish");
+ }
+ if (cinfo->output_scanline + max_lines > cinfo->output_height) {
+ max_lines = cinfo->output_height - cinfo->output_scanline;
+ }
+ jpegli::ProgressMonitorOutputPass(cinfo);
+ size_t num_output_rows = 0;
+ while (num_output_rows < max_lines) {
+ if (jpegli::IsInputReady(cinfo)) {
+ jpegli::ProcessOutput(cinfo, &num_output_rows, scanlines, max_lines);
+ } else if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) {
+ break;
+ }
+ }
+ return num_output_rows;
+}
+
+JDIMENSION jpegli_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines) {
+ // TODO(szabadka) Skip the IDCT for skipped over blocks.
+ return jpegli_read_scanlines(cinfo, nullptr, num_lines);
+}
+
+void jpegli_crop_scanline(j_decompress_ptr cinfo, JDIMENSION* xoffset,
+ JDIMENSION* width) {
+ jpeg_decomp_master* m = cinfo->master;
+ if ((cinfo->global_state != jpegli::kDecProcessScan &&
+ cinfo->global_state != jpegli::kDecProcessMarkers) ||
+ cinfo->output_scanline != 0) {
+ JPEGLI_ERROR("jpegli_crop_decompress: unexpected state %d",
+ cinfo->global_state);
+ }
+ if (cinfo->raw_data_out) {
+ JPEGLI_ERROR("Output cropping is not supported in raw data mode");
+ }
+ if (xoffset == nullptr || width == nullptr || *width == 0 ||
+ *xoffset + *width > cinfo->output_width) {
+ JPEGLI_ERROR("jpegli_crop_scanline: Invalid arguments");
+ }
+ // TODO(szabadka) Skip the IDCT for skipped over blocks.
+ size_t xend = *xoffset + *width;
+ size_t iMCU_width = m->min_scaled_dct_size * cinfo->max_h_samp_factor;
+ *xoffset = (*xoffset / iMCU_width) * iMCU_width;
+ *width = xend - *xoffset;
+ cinfo->master->xoffset_ = *xoffset;
+ cinfo->output_width = *width;
+}
+
+JDIMENSION jpegli_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data,
+ JDIMENSION max_lines) {
+ if ((cinfo->global_state != jpegli::kDecProcessScan &&
+ cinfo->global_state != jpegli::kDecProcessMarkers) ||
+ !cinfo->raw_data_out) {
+ JPEGLI_ERROR("jpegli_read_raw_data: unexpected state %d",
+ cinfo->global_state);
+ }
+ size_t iMCU_height = cinfo->max_v_samp_factor * DCTSIZE;
+ if (max_lines < iMCU_height) {
+ JPEGLI_ERROR("jpegli_read_raw_data: output buffer too small");
+ }
+ jpegli::ProgressMonitorOutputPass(cinfo);
+ while (!jpegli::IsInputReady(cinfo)) {
+ if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) {
+ return 0;
+ }
+ }
+ if (cinfo->output_iMCU_row < cinfo->total_iMCU_rows) {
+ jpegli::ProcessRawOutput(cinfo, data);
+ return iMCU_height;
+ }
+ return 0;
+}
+
+jvirt_barray_ptr* jpegli_read_coefficients(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ m->streaming_mode_ = false;
+ if (!cinfo->buffered_image && cinfo->global_state == jpegli::kDecHeaderDone) {
+ jpegli::AllocateCoefficientBuffer(cinfo);
+ jpegli_calc_output_dimensions(cinfo);
+ jpegli::InitProgressMonitor(cinfo, /*coef_only=*/true);
+ jpegli::PrepareForScan(cinfo);
+ }
+ if (cinfo->global_state != jpegli::kDecProcessScan &&
+ cinfo->global_state != jpegli::kDecProcessMarkers) {
+ JPEGLI_ERROR("jpegli_read_coefficients: unexpected state %d",
+ cinfo->global_state);
+ }
+ if (!cinfo->buffered_image) {
+ while (!m->found_eoi_) {
+ jpegli::ProgressMonitorInputPass(cinfo);
+ if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) {
+ return nullptr;
+ }
+ }
+ cinfo->output_scanline = cinfo->output_height;
+ }
+ return m->coef_arrays;
+}
+
+boolean jpegli_finish_decompress(j_decompress_ptr cinfo) {
+ if (cinfo->global_state != jpegli::kDecProcessScan &&
+ cinfo->global_state != jpegli::kDecProcessMarkers) {
+ JPEGLI_ERROR("jpegli_finish_decompress: unexpected state %d",
+ cinfo->global_state);
+ }
+ if (!cinfo->buffered_image && cinfo->output_scanline < cinfo->output_height) {
+ JPEGLI_ERROR("Incomplete output");
+ }
+ while (!cinfo->master->found_eoi_) {
+ if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) {
+ return FALSE;
+ }
+ }
+ (*cinfo->src->term_source)(cinfo);
+ jpegli_abort_decompress(cinfo);
+ return TRUE;
+}
+
+boolean jpegli_resync_to_restart(j_decompress_ptr cinfo, int desired) {
+ JPEGLI_WARN("Invalid restart marker found: 0x%02x vs 0x%02x.",
+ cinfo->unread_marker, 0xd0 + desired);
+ // This is a trivial implementation, we just let the decoder skip the entire
+ // scan and attempt to render the partial input.
+ return TRUE;
+}
+
+void jpegli_new_colormap(j_decompress_ptr cinfo) {
+ if (cinfo->global_state != jpegli::kDecProcessScan &&
+ cinfo->global_state != jpegli::kDecProcessMarkers) {
+ JPEGLI_ERROR("jpegli_new_colormap: unexpected state %d",
+ cinfo->global_state);
+ }
+ if (!cinfo->buffered_image) {
+ JPEGLI_ERROR("jpegli_new_colormap: not in buffered image mode");
+ }
+ if (!cinfo->enable_external_quant) {
+ JPEGLI_ERROR("external colormap quantizer was not enabled");
+ }
+ if (!cinfo->quantize_colors || cinfo->colormap == nullptr) {
+ JPEGLI_ERROR("jpegli_new_colormap: not in external colormap mode");
+ }
+ cinfo->master->regenerate_inverse_colormap_ = true;
+}
+
+void jpegli_set_output_format(j_decompress_ptr cinfo, JpegliDataType data_type,
+ JpegliEndianness endianness) {
+ switch (data_type) {
+ case JPEGLI_TYPE_UINT8:
+ case JPEGLI_TYPE_UINT16:
+ case JPEGLI_TYPE_FLOAT:
+ cinfo->master->output_data_type_ = data_type;
+ break;
+ default:
+ JPEGLI_ERROR("Unsupported data type %d", data_type);
+ }
+ switch (endianness) {
+ case JPEGLI_NATIVE_ENDIAN:
+ cinfo->master->swap_endianness_ = false;
+ break;
+ case JPEGLI_LITTLE_ENDIAN:
+ cinfo->master->swap_endianness_ = !IsLittleEndian();
+ break;
+ case JPEGLI_BIG_ENDIAN:
+ cinfo->master->swap_endianness_ = IsLittleEndian();
+ break;
+ default:
+ JPEGLI_ERROR("Unsupported endianness %d", endianness);
+ }
+}
diff --git a/lib/jpegli/decode.h b/lib/jpegli/decode.h
new file mode 100644
index 0000000..9800ebf
--- /dev/null
+++ b/lib/jpegli/decode.h
@@ -0,0 +1,106 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// This file contains the C API of the decoder part of the libjpegli library,
+// which is based on the C API of libjpeg, with the function names changed from
+// jpeg_* to jpegli_*, while decompressor object definitions are included
+// directly from jpeglib.h
+//
+// Applications can use the libjpegli library in one of the following ways:
+//
+// (1) Include jpegli/encode.h and/or jpegli/decode.h, update the function
+// names of the API and link against libjpegli.
+//
+// (2) Leave the application code unchanged, but replace the libjpeg.so library
+// with the one built by this project that is API- and ABI-compatible with
+// libjpeg-turbo's version of libjpeg.so.
+
+#ifndef LIB_JPEGLI_DECODE_H_
+#define LIB_JPEGLI_DECODE_H_
+
+#include "lib/jpegli/common.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define jpegli_create_decompress(cinfo) \
+ jpegli_CreateDecompress((cinfo), JPEG_LIB_VERSION, \
+ (size_t)sizeof(struct jpeg_decompress_struct))
+
+void jpegli_CreateDecompress(j_decompress_ptr cinfo, int version,
+ size_t structsize);
+
+void jpegli_stdio_src(j_decompress_ptr cinfo, FILE *infile);
+
+void jpegli_mem_src(j_decompress_ptr cinfo, const unsigned char *inbuffer,
+ unsigned long insize);
+
+int jpegli_read_header(j_decompress_ptr cinfo, boolean require_image);
+
+boolean jpegli_start_decompress(j_decompress_ptr cinfo);
+
+JDIMENSION jpegli_read_scanlines(j_decompress_ptr cinfo, JSAMPARRAY scanlines,
+ JDIMENSION max_lines);
+
+JDIMENSION jpegli_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines);
+
+void jpegli_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset,
+ JDIMENSION *width);
+
+boolean jpegli_finish_decompress(j_decompress_ptr cinfo);
+
+JDIMENSION jpegli_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data,
+ JDIMENSION max_lines);
+
+jvirt_barray_ptr *jpegli_read_coefficients(j_decompress_ptr cinfo);
+
+boolean jpegli_has_multiple_scans(j_decompress_ptr cinfo);
+
+boolean jpegli_start_output(j_decompress_ptr cinfo, int scan_number);
+
+boolean jpegli_finish_output(j_decompress_ptr cinfo);
+
+boolean jpegli_input_complete(j_decompress_ptr cinfo);
+
+int jpegli_consume_input(j_decompress_ptr cinfo);
+
+#if JPEG_LIB_VERSION >= 80
+void jpegli_core_output_dimensions(j_decompress_ptr cinfo);
+#endif
+void jpegli_calc_output_dimensions(j_decompress_ptr cinfo);
+
+void jpegli_save_markers(j_decompress_ptr cinfo, int marker_code,
+ unsigned int length_limit);
+
+void jpegli_set_marker_processor(j_decompress_ptr cinfo, int marker_code,
+ jpeg_marker_parser_method routine);
+
+boolean jpegli_resync_to_restart(j_decompress_ptr cinfo, int desired);
+
+boolean jpegli_read_icc_profile(j_decompress_ptr cinfo, JOCTET **icc_data_ptr,
+ unsigned int *icc_data_len);
+
+void jpegli_abort_decompress(j_decompress_ptr cinfo);
+
+void jpegli_destroy_decompress(j_decompress_ptr cinfo);
+
+void jpegli_new_colormap(j_decompress_ptr cinfo);
+
+//
+// New API functions that are not available in libjpeg
+//
+// NOTE: This part of the API is still experimental and will probably change in
+// the future.
+//
+
+void jpegli_set_output_format(j_decompress_ptr cinfo, JpegliDataType data_type,
+ JpegliEndianness endianness);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif // LIB_JPEGLI_DECODE_H_
diff --git a/lib/jpegli/decode_api_test.cc b/lib/jpegli/decode_api_test.cc
new file mode 100644
index 0000000..c48b937
--- /dev/null
+++ b/lib/jpegli/decode_api_test.cc
@@ -0,0 +1,1304 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <cmath>
+#include <cstdint>
+#include <vector>
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jpegli {
+namespace {
+
+static constexpr uint8_t kFakeEoiMarker[2] = {0xff, 0xd9};
+static constexpr size_t kNumSourceBuffers = 4;
+
+// Custom source manager that refills the input buffer in chunks, simulating
+// a file reader with a fixed buffer size.
+class SourceManager {
+ public:
+ SourceManager(const uint8_t* data, size_t len, size_t max_chunk_size)
+ : data_(data), len_(len), max_chunk_size_(max_chunk_size) {
+ pub_.skip_input_data = skip_input_data;
+ pub_.resync_to_restart = jpegli_resync_to_restart;
+ pub_.term_source = term_source;
+ pub_.init_source = init_source;
+ pub_.fill_input_buffer = fill_input_buffer;
+ if (max_chunk_size_ == 0) max_chunk_size_ = len;
+ buffers_.resize(kNumSourceBuffers, std::vector<uint8_t>(max_chunk_size_));
+ Reset();
+ }
+
+ void Reset() {
+ pub_.next_input_byte = nullptr;
+ pub_.bytes_in_buffer = 0;
+ pos_ = 0;
+ chunk_idx_ = 0;
+ }
+
+ ~SourceManager() {
+ EXPECT_EQ(0, pub_.bytes_in_buffer);
+ EXPECT_EQ(len_, pos_);
+ }
+
+ private:
+ jpeg_source_mgr pub_;
+ const uint8_t* data_;
+ size_t len_;
+ size_t chunk_idx_;
+ size_t pos_;
+ size_t max_chunk_size_;
+ std::vector<std::vector<uint8_t>> buffers_;
+
+ static void init_source(j_decompress_ptr cinfo) {}
+
+ static boolean fill_input_buffer(j_decompress_ptr cinfo) {
+ auto src = reinterpret_cast<SourceManager*>(cinfo->src);
+ if (src->pos_ < src->len_) {
+ size_t chunk_size = std::min(src->len_ - src->pos_, src->max_chunk_size_);
+ size_t next_idx = ++src->chunk_idx_ % kNumSourceBuffers;
+ uint8_t* next_buffer = src->buffers_[next_idx].data();
+ memcpy(next_buffer, src->data_ + src->pos_, chunk_size);
+ src->pub_.next_input_byte = next_buffer;
+ src->pub_.bytes_in_buffer = chunk_size;
+ } else {
+ src->pub_.next_input_byte = kFakeEoiMarker;
+ src->pub_.bytes_in_buffer = 2;
+ src->len_ += 2;
+ }
+ src->pos_ += src->pub_.bytes_in_buffer;
+ return TRUE;
+ }
+
+ static void skip_input_data(j_decompress_ptr cinfo, long num_bytes) {
+ auto src = reinterpret_cast<SourceManager*>(cinfo->src);
+ if (num_bytes <= 0) {
+ return;
+ }
+ if (src->pub_.bytes_in_buffer >= static_cast<size_t>(num_bytes)) {
+ src->pub_.bytes_in_buffer -= num_bytes;
+ src->pub_.next_input_byte += num_bytes;
+ } else {
+ src->pos_ += num_bytes - src->pub_.bytes_in_buffer;
+ src->pub_.bytes_in_buffer = 0;
+ }
+ }
+
+ static void term_source(j_decompress_ptr cinfo) {}
+};
+
+uint8_t markers_seen[kMarkerSequenceLen];
+size_t num_markers_seen = 0;
+
+uint8_t get_next_byte(j_decompress_ptr cinfo) {
+ if (cinfo->src->bytes_in_buffer == 0) {
+ (*cinfo->src->fill_input_buffer)(cinfo);
+ }
+ cinfo->src->bytes_in_buffer--;
+ return *cinfo->src->next_input_byte++;
+}
+
+boolean test_marker_processor(j_decompress_ptr cinfo) {
+ markers_seen[num_markers_seen] = cinfo->unread_marker;
+ size_t marker_len = (get_next_byte(cinfo) << 8) + get_next_byte(cinfo);
+ EXPECT_EQ(2 + ((num_markers_seen + 2) % sizeof(kMarkerData)), marker_len);
+ if (marker_len > 2) {
+ (*cinfo->src->skip_input_data)(cinfo, marker_len - 2);
+ }
+ ++num_markers_seen;
+ return TRUE;
+}
+
+void ReadOutputImage(const DecompressParams& dparams, j_decompress_ptr cinfo,
+ TestImage* output) {
+ JDIMENSION xoffset = 0;
+ JDIMENSION yoffset = 0;
+ JDIMENSION xsize_cropped = cinfo->output_width;
+ JDIMENSION ysize_cropped = cinfo->output_height;
+ if (dparams.crop_output) {
+ xoffset = xsize_cropped = cinfo->output_width / 3;
+ yoffset = ysize_cropped = cinfo->output_height / 3;
+ jpegli_crop_scanline(cinfo, &xoffset, &xsize_cropped);
+ }
+ output->ysize = ysize_cropped;
+ output->xsize = cinfo->output_width;
+ output->components = cinfo->out_color_components;
+ output->data_type = dparams.data_type;
+ output->endianness = dparams.endianness;
+ size_t bytes_per_sample = jpegli_bytes_per_sample(dparams.data_type);
+ if (cinfo->raw_data_out) {
+ output->color_space = cinfo->jpeg_color_space;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE;
+ size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE;
+ std::vector<uint8_t> plane(ysize * xsize * bytes_per_sample);
+ output->raw_data.emplace_back(std::move(plane));
+ }
+ } else {
+ output->color_space = cinfo->out_color_space;
+ output->AllocatePixels();
+ }
+ size_t total_output_lines = 0;
+ while (cinfo->output_scanline < cinfo->output_height) {
+ size_t max_lines;
+ size_t num_output_lines;
+ if (cinfo->raw_data_out) {
+ size_t iMCU_height = cinfo->max_v_samp_factor * DCTSIZE;
+ EXPECT_EQ(cinfo->output_scanline, cinfo->output_iMCU_row * iMCU_height);
+ max_lines = iMCU_height;
+ std::vector<std::vector<JSAMPROW>> rowdata(cinfo->num_components);
+ std::vector<JSAMPARRAY> data(cinfo->num_components);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE;
+ size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE;
+ size_t num_lines = cinfo->comp_info[c].v_samp_factor * DCTSIZE;
+ rowdata[c].resize(num_lines);
+ size_t y0 = cinfo->output_iMCU_row * num_lines;
+ for (size_t i = 0; i < num_lines; ++i) {
+ rowdata[c][i] =
+ y0 + i < ysize ? &output->raw_data[c][(y0 + i) * xsize] : nullptr;
+ }
+ data[c] = &rowdata[c][0];
+ }
+ num_output_lines = jpegli_read_raw_data(cinfo, &data[0], max_lines);
+ } else {
+ size_t max_output_lines = dparams.max_output_lines;
+ if (max_output_lines == 0) max_output_lines = cinfo->output_height;
+ if (cinfo->output_scanline < yoffset) {
+ max_lines = yoffset - cinfo->output_scanline;
+ num_output_lines = jpegli_skip_scanlines(cinfo, max_lines);
+ } else if (cinfo->output_scanline >= yoffset + ysize_cropped) {
+ max_lines = cinfo->output_height - cinfo->output_scanline;
+ num_output_lines = jpegli_skip_scanlines(cinfo, max_lines);
+ } else {
+ size_t lines_left = yoffset + ysize_cropped - cinfo->output_scanline;
+ max_lines = std::min<size_t>(max_output_lines, lines_left);
+ size_t stride = cinfo->output_width * cinfo->out_color_components *
+ bytes_per_sample;
+ std::vector<JSAMPROW> scanlines(max_lines);
+ for (size_t i = 0; i < max_lines; ++i) {
+ size_t yidx = cinfo->output_scanline - yoffset + i;
+ scanlines[i] = &output->pixels[yidx * stride];
+ }
+ num_output_lines =
+ jpegli_read_scanlines(cinfo, &scanlines[0], max_lines);
+ if (cinfo->quantize_colors) {
+ for (size_t i = 0; i < num_output_lines; ++i) {
+ UnmapColors(scanlines[i], cinfo->output_width,
+ cinfo->out_color_components, cinfo->colormap,
+ cinfo->actual_number_of_colors);
+ }
+ }
+ }
+ }
+ total_output_lines += num_output_lines;
+ EXPECT_EQ(total_output_lines, cinfo->output_scanline);
+ EXPECT_EQ(num_output_lines, max_lines);
+ }
+ EXPECT_EQ(cinfo->total_iMCU_rows,
+ DivCeil(cinfo->image_height, cinfo->max_v_samp_factor * DCTSIZE));
+}
+
+struct TestConfig {
+ std::string fn;
+ std::string fn_desc;
+ TestImage input;
+ CompressParams jparams;
+ DecompressParams dparams;
+ bool compare_to_orig = false;
+ float max_tolerance_factor = 1.01f;
+ float max_rms_dist = 1.0f;
+ float max_diff = 35.0f;
+};
+
+std::vector<uint8_t> GetTestJpegData(TestConfig& config) {
+ std::vector<uint8_t> compressed;
+ if (!config.fn.empty()) {
+ compressed = ReadTestData(config.fn.c_str());
+ } else {
+ GeneratePixels(&config.input);
+ JXL_CHECK(EncodeWithJpegli(config.input, config.jparams, &compressed));
+ }
+ if (config.dparams.size_factor < 1.0f) {
+ compressed.resize(compressed.size() * config.dparams.size_factor);
+ }
+ return compressed;
+}
+
+void TestAPINonBuffered(const CompressParams& jparams,
+ const DecompressParams& dparams,
+ const TestImage& expected_output,
+ j_decompress_ptr cinfo, TestImage* output) {
+ if (jparams.add_marker) {
+ jpegli_save_markers(cinfo, kSpecialMarker0, 0xffff);
+ jpegli_save_markers(cinfo, kSpecialMarker1, 0xffff);
+ num_markers_seen = 0;
+ jpegli_set_marker_processor(cinfo, 0xe6, test_marker_processor);
+ jpegli_set_marker_processor(cinfo, 0xe7, test_marker_processor);
+ jpegli_set_marker_processor(cinfo, 0xe8, test_marker_processor);
+ }
+ if (!jparams.icc.empty()) {
+ jpegli_save_markers(cinfo, JPEG_APP0 + 2, 0xffff);
+ }
+ jpegli_read_header(cinfo, /*require_image=*/TRUE);
+ if (jparams.add_marker) {
+ EXPECT_EQ(num_markers_seen, kMarkerSequenceLen);
+ EXPECT_EQ(0, memcmp(markers_seen, kMarkerSequence, num_markers_seen));
+ }
+ if (!jparams.icc.empty()) {
+ uint8_t* icc_data = nullptr;
+ unsigned int icc_len;
+ JXL_CHECK(jpegli_read_icc_profile(cinfo, &icc_data, &icc_len));
+ JXL_CHECK(icc_data);
+ EXPECT_EQ(0, memcmp(jparams.icc.data(), icc_data, icc_len));
+ free(icc_data);
+ }
+ // Check that jpegli_calc_output_dimensions can be called multiple times
+ // even with different parameters.
+ if (!cinfo->raw_data_out) {
+ cinfo->scale_num = 1;
+ cinfo->scale_denom = 2;
+ }
+ jpegli_calc_output_dimensions(cinfo);
+ SetDecompressParams(dparams, cinfo);
+ jpegli_set_output_format(cinfo, dparams.data_type, dparams.endianness);
+ VerifyHeader(jparams, cinfo);
+ jpegli_calc_output_dimensions(cinfo);
+ EXPECT_LE(expected_output.xsize, cinfo->output_width);
+ if (!dparams.crop_output) {
+ EXPECT_EQ(expected_output.xsize, cinfo->output_width);
+ }
+ if (dparams.output_mode == COEFFICIENTS) {
+ jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(cinfo);
+ JXL_CHECK(coef_arrays != nullptr);
+ CopyCoefficients(cinfo, coef_arrays, output);
+ } else {
+ jpegli_start_decompress(cinfo);
+ VerifyScanHeader(jparams, cinfo);
+ ReadOutputImage(dparams, cinfo, output);
+ }
+ jpegli_finish_decompress(cinfo);
+}
+
+void TestAPIBuffered(const CompressParams& jparams,
+ const DecompressParams& dparams, j_decompress_ptr cinfo,
+ std::vector<TestImage>* output_progression) {
+ EXPECT_EQ(JPEG_REACHED_SOS,
+ jpegli_read_header(cinfo, /*require_image=*/TRUE));
+ cinfo->buffered_image = TRUE;
+ SetDecompressParams(dparams, cinfo);
+ jpegli_set_output_format(cinfo, dparams.data_type, dparams.endianness);
+ VerifyHeader(jparams, cinfo);
+ EXPECT_TRUE(jpegli_start_decompress(cinfo));
+ // start decompress should not read the whole input in buffered image mode
+ EXPECT_FALSE(jpegli_input_complete(cinfo));
+ bool has_multiple_scans = jpegli_has_multiple_scans(cinfo);
+ EXPECT_EQ(0, cinfo->output_scan_number);
+ int sos_marker_cnt = 1; // read_header reads the first SOS marker
+ while (!jpegli_input_complete(cinfo)) {
+ EXPECT_EQ(cinfo->input_scan_number, sos_marker_cnt);
+ if (dparams.skip_scans && (cinfo->input_scan_number % 2) != 1) {
+ int result = JPEG_SUSPENDED;
+ while (result != JPEG_REACHED_SOS && result != JPEG_REACHED_EOI) {
+ result = jpegli_consume_input(cinfo);
+ }
+ if (result == JPEG_REACHED_SOS) ++sos_marker_cnt;
+ continue;
+ }
+ SetScanDecompressParams(dparams, cinfo, cinfo->input_scan_number);
+ EXPECT_TRUE(jpegli_start_output(cinfo, cinfo->input_scan_number));
+ // start output sets output_scan_number, but does not change
+ // input_scan_number
+ EXPECT_EQ(cinfo->output_scan_number, cinfo->input_scan_number);
+ EXPECT_EQ(cinfo->input_scan_number, sos_marker_cnt);
+ VerifyScanHeader(jparams, cinfo);
+ TestImage output;
+ ReadOutputImage(dparams, cinfo, &output);
+ output_progression->emplace_back(std::move(output));
+ // read scanlines/read raw data does not change input/output scan number
+ EXPECT_EQ(cinfo->input_scan_number, sos_marker_cnt);
+ EXPECT_EQ(cinfo->output_scan_number, cinfo->input_scan_number);
+ EXPECT_TRUE(jpegli_finish_output(cinfo));
+ ++sos_marker_cnt; // finish output reads the next SOS marker or EOI
+ if (dparams.output_mode == COEFFICIENTS) {
+ jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(cinfo);
+ JXL_CHECK(coef_arrays != nullptr);
+ CopyCoefficients(cinfo, coef_arrays, &output_progression->back());
+ }
+ }
+ jpegli_finish_decompress(cinfo);
+ if (dparams.size_factor == 1.0f) {
+ EXPECT_EQ(has_multiple_scans, cinfo->input_scan_number > 1);
+ }
+}
+
+TEST(DecodeAPITest, ReuseCinfo) {
+ TestImage input, output, expected;
+ std::vector<TestImage> output_progression, expected_output_progression;
+ CompressParams jparams;
+ DecompressParams dparams;
+ std::vector<uint8_t> compressed;
+ jpeg_decompress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ input.xsize = 129;
+ input.ysize = 73;
+ GeneratePixels(&input);
+ for (int h_samp : {2, 1}) {
+ for (int v_samp : {2, 1}) {
+ for (int progr : {0, 2}) {
+ jparams.h_sampling = {h_samp, 1, 1};
+ jparams.v_sampling = {v_samp, 1, 1};
+ jparams.progressive_mode = progr;
+ printf(
+ "Generating input with %dx%d chroma subsampling "
+ "progressive level %d\n",
+ h_samp, v_samp, progr);
+ JXL_CHECK(EncodeWithJpegli(input, jparams, &compressed));
+ for (JpegIOMode output_mode : {PIXELS, RAW_DATA, COEFFICIENTS}) {
+ for (bool crop : {true, false}) {
+ if (crop && output_mode != PIXELS) continue;
+ for (int scale_num : {1, 2, 3, 4, 7, 8, 13, 16}) {
+ if (scale_num != 8 && output_mode != PIXELS) continue;
+ int scale_denom = 8;
+ while (scale_num % 2 == 0 && scale_denom % 2 == 0) {
+ scale_num /= 2;
+ scale_denom /= 2;
+ }
+ printf("Decoding with output mode %d output scaling %d/%d %s\n",
+ output_mode, scale_num, scale_denom,
+ crop ? "with cropped output" : "");
+ dparams.output_mode = output_mode;
+ dparams.scale_num = scale_num;
+ dparams.scale_denom = scale_denom;
+ expected.Clear();
+ DecodeWithLibjpeg(jparams, dparams, compressed, &expected);
+ output.Clear();
+ cinfo.buffered_image = false;
+ cinfo.raw_data_out = false;
+ cinfo.scale_num = cinfo.scale_denom = 1;
+ SourceManager src(compressed.data(), compressed.size(),
+ 1u << 12);
+ cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+ jpegli_read_header(&cinfo, /*require_image=*/TRUE);
+ jpegli_abort_decompress(&cinfo);
+ src.Reset();
+ TestAPINonBuffered(jparams, dparams, expected, &cinfo, &output);
+ float max_rms = output_mode == COEFFICIENTS ? 0.0f : 1.0f;
+ if (scale_num == 1 && scale_denom == 8 && h_samp != v_samp) {
+ max_rms = 5.0f; // libjpeg does not do fancy upsampling
+ }
+ VerifyOutputImage(expected, output, max_rms);
+ printf("Decoding in buffered image mode\n");
+ expected_output_progression.clear();
+ DecodeAllScansWithLibjpeg(jparams, dparams, compressed,
+ &expected_output_progression);
+ output_progression.clear();
+ src.Reset();
+ TestAPIBuffered(jparams, dparams, &cinfo, &output_progression);
+ JXL_CHECK(output_progression.size() ==
+ expected_output_progression.size());
+ for (size_t i = 0; i < output_progression.size(); ++i) {
+ const TestImage& output = output_progression[i];
+ const TestImage& expected = expected_output_progression[i];
+ VerifyOutputImage(expected, output, max_rms);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ return true;
+ };
+ ASSERT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+}
+
+std::vector<TestConfig> GenerateBasicConfigs() {
+ std::vector<TestConfig> all_configs;
+ for (int samp : {1, 2}) {
+ for (int progr : {0, 2}) {
+ TestConfig config;
+ config.input.xsize = 257 + samp * 37;
+ config.input.ysize = 265 + (progr / 2) * 17;
+ config.jparams.h_sampling = {samp, 1, 1};
+ config.jparams.v_sampling = {samp, 1, 1};
+ config.jparams.progressive_mode = progr;
+ GeneratePixels(&config.input);
+ all_configs.push_back(config);
+ }
+ }
+ return all_configs;
+}
+
+TEST(DecodeAPITest, ReuseCinfoSameMemSource) {
+ std::vector<TestConfig> all_configs = GenerateBasicConfigs();
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ {
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ for (const TestConfig& config : all_configs) {
+ EncodeWithJpegli(config.input, config.jparams, &cinfo);
+ }
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ }
+ std::vector<TestImage> all_outputs(all_configs.size());
+ {
+ jpeg_decompress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ jpegli_mem_src(&cinfo, buffer, buffer_size);
+ for (size_t i = 0; i < all_configs.size(); ++i) {
+ TestAPINonBuffered(all_configs[i].jparams, DecompressParams(),
+ all_configs[i].input, &cinfo, &all_outputs[i]);
+ }
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+ }
+ for (size_t i = 0; i < all_configs.size(); ++i) {
+ VerifyOutputImage(all_configs[i].input, all_outputs[i], 2.35f);
+ }
+ if (buffer) free(buffer);
+}
+
+TEST(DecodeAPITest, ReuseCinfoSameStdSource) {
+ std::vector<TestConfig> all_configs = GenerateBasicConfigs();
+ FILE* tmpf = tmpfile();
+ JXL_CHECK(tmpf);
+ {
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_stdio_dest(&cinfo, tmpf);
+ for (const TestConfig& config : all_configs) {
+ EncodeWithJpegli(config.input, config.jparams, &cinfo);
+ }
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ }
+ rewind(tmpf);
+ std::vector<TestImage> all_outputs(all_configs.size());
+ {
+ jpeg_decompress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ jpegli_stdio_src(&cinfo, tmpf);
+ for (size_t i = 0; i < all_configs.size(); ++i) {
+ TestAPINonBuffered(all_configs[i].jparams, DecompressParams(),
+ all_configs[i].input, &cinfo, &all_outputs[i]);
+ }
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+ }
+ for (size_t i = 0; i < all_configs.size(); ++i) {
+ VerifyOutputImage(all_configs[i].input, all_outputs[i], 2.35f);
+ }
+ fclose(tmpf);
+}
+
+TEST(DecodeAPITest, AbbreviatedStreams) {
+ uint8_t* table_stream = nullptr;
+ unsigned long table_stream_size = 0;
+ uint8_t* data_stream = nullptr;
+ unsigned long data_stream_size = 0;
+ {
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &table_stream, &table_stream_size);
+ cinfo.input_components = 3;
+ cinfo.in_color_space = JCS_RGB;
+ jpegli_set_defaults(&cinfo);
+ jpegli_write_tables(&cinfo);
+ jpegli_mem_dest(&cinfo, &data_stream, &data_stream_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.optimize_coding = FALSE;
+ jpegli_set_progressive_level(&cinfo, 0);
+ jpegli_start_compress(&cinfo, FALSE);
+ JSAMPLE image[3] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ EXPECT_LT(data_stream_size, 50);
+ jpegli_destroy_compress(&cinfo);
+ }
+ {
+ jpeg_decompress_struct cinfo = {};
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ jpegli_mem_src(&cinfo, table_stream, table_stream_size);
+ jpegli_read_header(&cinfo, FALSE);
+ jpegli_mem_src(&cinfo, data_stream, data_stream_size);
+ jpegli_read_header(&cinfo, TRUE);
+ EXPECT_EQ(1, cinfo.image_width);
+ EXPECT_EQ(1, cinfo.image_height);
+ EXPECT_EQ(3, cinfo.num_components);
+ jpegli_start_decompress(&cinfo);
+ JSAMPLE image[3] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_read_scanlines(&cinfo, row, 1);
+ EXPECT_EQ(0, image[0]);
+ EXPECT_EQ(0, image[1]);
+ EXPECT_EQ(0, image[2]);
+ jpegli_finish_decompress(&cinfo);
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+ }
+ if (table_stream) free(table_stream);
+ if (data_stream) free(data_stream);
+}
+
+class DecodeAPITestParam : public ::testing::TestWithParam<TestConfig> {};
+
+TEST_P(DecodeAPITestParam, TestAPI) {
+ TestConfig config = GetParam();
+ const DecompressParams& dparams = config.dparams;
+ if (dparams.skip_scans) return;
+ const std::vector<uint8_t> compressed = GetTestJpegData(config);
+ SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size);
+
+ TestImage output1;
+ DecodeWithLibjpeg(config.jparams, dparams, compressed, &output1);
+
+ TestImage output0;
+ jpeg_decompress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+ TestAPINonBuffered(config.jparams, dparams, output1, &cinfo, &output0);
+ return true;
+ };
+ ASSERT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+
+ if (config.compare_to_orig) {
+ double rms0 = DistanceRms(config.input, output0);
+ double rms1 = DistanceRms(config.input, output1);
+ printf("rms: %f vs %f\n", rms0, rms1);
+ EXPECT_LE(rms0, rms1 * config.max_tolerance_factor);
+ } else {
+ VerifyOutputImage(output0, output1, config.max_rms_dist, config.max_diff);
+ }
+}
+
+class DecodeAPITestParamBuffered : public ::testing::TestWithParam<TestConfig> {
+};
+
+TEST_P(DecodeAPITestParamBuffered, TestAPI) {
+ TestConfig config = GetParam();
+ const DecompressParams& dparams = config.dparams;
+ const std::vector<uint8_t> compressed = GetTestJpegData(config);
+ SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size);
+
+ std::vector<TestImage> output_progression1;
+ DecodeAllScansWithLibjpeg(config.jparams, dparams, compressed,
+ &output_progression1);
+
+ std::vector<TestImage> output_progression0;
+ jpeg_decompress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+ TestAPIBuffered(config.jparams, dparams, &cinfo, &output_progression0);
+ return true;
+ };
+ ASSERT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+
+ ASSERT_EQ(output_progression0.size(), output_progression1.size());
+ for (size_t i = 0; i < output_progression0.size(); ++i) {
+ const TestImage& output = output_progression0[i];
+ const TestImage& expected = output_progression1[i];
+ if (config.compare_to_orig) {
+ double rms0 = DistanceRms(config.input, output);
+ double rms1 = DistanceRms(config.input, expected);
+ printf("rms: %f vs %f\n", rms0, rms1);
+ EXPECT_LE(rms0, rms1 * config.max_tolerance_factor);
+ } else {
+ VerifyOutputImage(expected, output, config.max_rms_dist, config.max_diff);
+ }
+ }
+}
+
+std::vector<TestConfig> GenerateTests(bool buffered) {
+ std::vector<TestConfig> all_tests;
+ {
+ std::vector<std::pair<std::string, std::string>> testfiles({
+ {"jxl/flower/flower.png.im_q85_420_progr.jpg", "Q85YUV420PROGR"},
+ {"jxl/flower/flower.png.im_q85_420_R13B.jpg", "Q85YUV420R13B"},
+ {"jxl/flower/flower.png.im_q85_444.jpg", "Q85YUV444"},
+ });
+ for (size_t i = 0; i < (buffered ? 1u : testfiles.size()); ++i) {
+ TestConfig config;
+ config.fn = testfiles[i].first;
+ config.fn_desc = testfiles[i].second;
+ for (size_t chunk_size : {0, 1, 64, 65536}) {
+ config.dparams.chunk_size = chunk_size;
+ for (size_t max_output_lines : {0, 1, 8, 16}) {
+ config.dparams.max_output_lines = max_output_lines;
+ config.dparams.output_mode = PIXELS;
+ all_tests.push_back(config);
+ }
+ {
+ config.dparams.max_output_lines = 16;
+ config.dparams.output_mode = RAW_DATA;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+
+ {
+ std::vector<std::pair<std::string, std::string>> testfiles({
+ {"jxl/flower/flower_small.q85_444_non_interleaved.jpg",
+ "Q85YUV444NonInterleaved"},
+ {"jxl/flower/flower_small.q85_420_non_interleaved.jpg",
+ "Q85YUV420NonInterleaved"},
+ {"jxl/flower/flower_small.q85_444_partially_interleaved.jpg",
+ "Q85YUV444PartiallyInterleaved"},
+ {"jxl/flower/flower_small.q85_420_partially_interleaved.jpg",
+ "Q85YUV420PartiallyInterleaved"},
+ {"jxl/flower/flower.png.im_q85_422.jpg", "Q85YUV422"},
+ {"jxl/flower/flower.png.im_q85_440.jpg", "Q85YUV440"},
+ {"jxl/flower/flower.png.im_q85_444_1x2.jpg", "Q85YUV444_1x2"},
+ {"jxl/flower/flower.png.im_q85_asymmetric.jpg", "Q85Asymmetric"},
+ {"jxl/flower/flower.png.im_q85_gray.jpg", "Q85Gray"},
+ {"jxl/flower/flower.png.im_q85_luma_subsample.jpg", "Q85LumaSubsample"},
+ {"jxl/flower/flower.png.im_q85_rgb.jpg", "Q85RGB"},
+ {"jxl/flower/flower.png.im_q85_rgb_subsample_blue.jpg",
+ "Q85RGBSubsampleBlue"},
+ {"jxl/flower/flower_small.cmyk.jpg", "CMYK"},
+ });
+ for (size_t i = 0; i < (buffered ? 4u : testfiles.size()); ++i) {
+ for (JpegIOMode output_mode : {PIXELS, RAW_DATA}) {
+ TestConfig config;
+ config.fn = testfiles[i].first;
+ config.fn_desc = testfiles[i].second;
+ config.dparams.output_mode = output_mode;
+ all_tests.push_back(config);
+ }
+ }
+ }
+
+ // Tests for common chroma subsampling and output modes.
+ for (JpegIOMode output_mode : {PIXELS, RAW_DATA, COEFFICIENTS}) {
+ for (int h_samp : {1, 2}) {
+ for (int v_samp : {1, 2}) {
+ for (bool fancy : {true, false}) {
+ if (!fancy && (output_mode != PIXELS || h_samp * v_samp == 1)) {
+ continue;
+ }
+ TestConfig config;
+ config.dparams.output_mode = output_mode;
+ config.dparams.do_fancy_upsampling = fancy;
+ config.jparams.progressive_mode = 2;
+ config.jparams.h_sampling = {h_samp, 1, 1};
+ config.jparams.v_sampling = {v_samp, 1, 1};
+ if (output_mode == COEFFICIENTS) {
+ config.max_rms_dist = 0.0f;
+ }
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+
+ // Tests for partial input.
+ for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f}) {
+ for (int progr : {0, 1, 3}) {
+ for (int samp : {1, 2}) {
+ for (bool skip_scans : {false, true}) {
+ if (skip_scans && (progr != 1 || size_factor < 0.5f)) continue;
+ for (JpegIOMode output_mode : {PIXELS, RAW_DATA}) {
+ TestConfig config;
+ config.input.xsize = 517;
+ config.input.ysize = 523;
+ config.jparams.h_sampling = {samp, 1, 1};
+ config.jparams.v_sampling = {samp, 1, 1};
+ config.jparams.progressive_mode = progr;
+ config.dparams.size_factor = size_factor;
+ config.dparams.output_mode = output_mode;
+ config.dparams.skip_scans = skip_scans;
+ // The last partially available block can behave differently.
+ // TODO(szabadka) Figure out if we can make the behaviour more
+ // similar.
+ config.max_rms_dist = samp == 1 ? 1.75f : 3.0f;
+ config.max_diff = 255.0f;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ }
+
+ // Tests for block smoothing.
+ for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f, 1.0f}) {
+ for (int samp : {1, 2}) {
+ for (bool skip_scans : {false, true}) {
+ if (skip_scans && size_factor < 0.3f) continue;
+ TestConfig config;
+ config.input.xsize = 517;
+ config.input.ysize = 523;
+ config.jparams.h_sampling = {samp, 1, 1};
+ config.jparams.v_sampling = {samp, 1, 1};
+ config.jparams.progressive_mode = 2;
+ config.dparams.size_factor = size_factor;
+ config.dparams.do_block_smoothing = true;
+ config.dparams.skip_scans = skip_scans;
+ // libjpeg does smoothing for incomplete scans differently at
+ // the border between current and previous scans.
+ config.max_rms_dist = 8.0f;
+ config.max_diff = 255.0f;
+ all_tests.push_back(config);
+ }
+ }
+ }
+
+ // Test for switching output color quantization modes between scans.
+ if (buffered) {
+ TestConfig config;
+ config.jparams.progressive_mode = 2;
+ config.dparams.quantize_colors = true;
+ config.dparams.scan_params = {
+ {3, JDITHER_NONE, CQUANT_1PASS}, {4, JDITHER_ORDERED, CQUANT_1PASS},
+ {5, JDITHER_FS, CQUANT_1PASS}, {6, JDITHER_NONE, CQUANT_EXTERNAL},
+ {8, JDITHER_NONE, CQUANT_REUSE}, {9, JDITHER_NONE, CQUANT_EXTERNAL},
+ {10, JDITHER_NONE, CQUANT_2PASS}, {11, JDITHER_NONE, CQUANT_REUSE},
+ {12, JDITHER_NONE, CQUANT_2PASS}, {13, JDITHER_FS, CQUANT_2PASS},
+ };
+ config.compare_to_orig = true;
+ config.max_tolerance_factor = 1.04f;
+ all_tests.push_back(config);
+ }
+
+ if (buffered) {
+ return all_tests;
+ }
+
+ // Tests for output color quantization.
+ for (int num_colors : {8, 64, 256}) {
+ for (ColorQuantMode mode : {CQUANT_1PASS, CQUANT_EXTERNAL, CQUANT_2PASS}) {
+ if (mode == CQUANT_EXTERNAL && num_colors != 256) continue;
+ for (J_DITHER_MODE dither : {JDITHER_NONE, JDITHER_ORDERED, JDITHER_FS}) {
+ if (mode == CQUANT_EXTERNAL && dither != JDITHER_NONE) continue;
+ if (mode != CQUANT_1PASS && dither == JDITHER_ORDERED) continue;
+ for (bool crop : {false, true}) {
+ for (bool scale : {false, true}) {
+ for (bool samp : {false, true}) {
+ if ((num_colors != 256) && (crop || scale || samp)) {
+ continue;
+ }
+ if (mode == CQUANT_2PASS && crop) continue;
+ TestConfig config;
+ config.input.xsize = 1024;
+ config.input.ysize = 768;
+ config.dparams.quantize_colors = true;
+ config.dparams.desired_number_of_colors = num_colors;
+ config.dparams.scan_params = {{kLastScan, dither, mode}};
+ config.dparams.crop_output = crop;
+ if (scale) {
+ config.dparams.scale_num = 7;
+ config.dparams.scale_denom = 8;
+ }
+ if (samp) {
+ config.jparams.h_sampling = {2, 1, 1};
+ config.jparams.v_sampling = {2, 1, 1};
+ }
+ if (!scale && !crop) {
+ config.compare_to_orig = true;
+ if (dither != JDITHER_NONE) {
+ config.max_tolerance_factor = 1.05f;
+ }
+ if (mode == CQUANT_2PASS &&
+ (num_colors == 8 || dither == JDITHER_FS)) {
+ // TODO(szabadka) Lower this bound.
+ config.max_tolerance_factor = 1.5f;
+ }
+ } else {
+ // We only test for buffer overflows, etc.
+ config.max_rms_dist = 100.0f;
+ config.max_diff = 255.0f;
+ }
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Tests for output formats.
+ for (JpegliDataType type :
+ {JPEGLI_TYPE_UINT8, JPEGLI_TYPE_UINT16, JPEGLI_TYPE_FLOAT}) {
+ for (JpegliEndianness endianness :
+ {JPEGLI_NATIVE_ENDIAN, JPEGLI_LITTLE_ENDIAN, JPEGLI_BIG_ENDIAN}) {
+ if (type == JPEGLI_TYPE_UINT8 && endianness != JPEGLI_NATIVE_ENDIAN) {
+ continue;
+ }
+ for (int channels = 1; channels <= 4; ++channels) {
+ TestConfig config;
+ config.dparams.data_type = type;
+ config.dparams.endianness = endianness;
+ config.input.color_space = JCS_UNKNOWN;
+ config.input.components = channels;
+ config.dparams.set_out_color_space = true;
+ config.dparams.out_color_space = JCS_UNKNOWN;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ // Test for output cropping.
+ {
+ TestConfig config;
+ config.dparams.crop_output = true;
+ all_tests.push_back(config);
+ }
+ // Tests for color transforms.
+ for (J_COLOR_SPACE out_color_space : {JCS_RGB, JCS_GRAYSCALE}) {
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 256;
+ config.input.color_space = JCS_GRAYSCALE;
+ config.dparams.set_out_color_space = true;
+ config.dparams.out_color_space = out_color_space;
+ all_tests.push_back(config);
+ }
+ for (J_COLOR_SPACE jpeg_color_space : {JCS_RGB, JCS_YCbCr}) {
+ for (J_COLOR_SPACE out_color_space : {JCS_RGB, JCS_YCbCr, JCS_GRAYSCALE}) {
+ if (jpeg_color_space == JCS_RGB && out_color_space == JCS_YCbCr) continue;
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 256;
+ config.jparams.set_jpeg_colorspace = true;
+ config.jparams.jpeg_color_space = jpeg_color_space;
+ config.dparams.set_out_color_space = true;
+ config.dparams.out_color_space = out_color_space;
+ all_tests.push_back(config);
+ }
+ }
+ for (J_COLOR_SPACE jpeg_color_space : {JCS_CMYK, JCS_YCCK}) {
+ for (J_COLOR_SPACE out_color_space : {JCS_CMYK, JCS_YCCK}) {
+ if (jpeg_color_space == JCS_CMYK && out_color_space == JCS_YCCK) continue;
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 256;
+ config.input.color_space = JCS_CMYK;
+ config.jparams.set_jpeg_colorspace = true;
+ config.jparams.jpeg_color_space = jpeg_color_space;
+ config.dparams.set_out_color_space = true;
+ config.dparams.out_color_space = out_color_space;
+ all_tests.push_back(config);
+ }
+ }
+ // Tests for progressive levels.
+ for (int p = 0; p < 3 + NumTestScanScripts(); ++p) {
+ TestConfig config;
+ config.jparams.progressive_mode = p;
+ all_tests.push_back(config);
+ }
+ // Tests for RST markers.
+ for (size_t r : {1, 17, 1024}) {
+ for (size_t chunk_size : {1, 65536}) {
+ for (int progr : {0, 2}) {
+ TestConfig config;
+ config.dparams.chunk_size = chunk_size;
+ config.jparams.progressive_mode = progr;
+ config.jparams.restart_interval = r;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ for (size_t rr : {1, 3, 8, 100}) {
+ TestConfig config;
+ config.jparams.restart_in_rows = rr;
+ all_tests.push_back(config);
+ }
+ // Tests for custom quantization tables.
+ for (int type : {0, 1, 10, 100, 10000}) {
+ for (int scale : {1, 50, 100, 200, 500}) {
+ for (bool add_raw : {false, true}) {
+ for (bool baseline : {true, false}) {
+ if (!baseline && (add_raw || type * scale < 25500)) continue;
+ TestConfig config;
+ config.input.xsize = 64;
+ config.input.ysize = 64;
+ CustomQuantTable table;
+ table.table_type = type;
+ table.scale_factor = scale;
+ table.force_baseline = baseline;
+ table.add_raw = add_raw;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ config.jparams.quant_indexes = {0, 0, 0};
+ config.compare_to_orig = true;
+ config.max_tolerance_factor = 1.02;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ for (int qidx = 0; qidx < 8; ++qidx) {
+ if (qidx == 3) continue;
+ TestConfig config;
+ config.input.xsize = 256;
+ config.input.ysize = 256;
+ config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1,
+ (qidx >> 0) & 1};
+ all_tests.push_back(config);
+ }
+ for (int qidx = 0; qidx < 8; ++qidx) {
+ for (int slot_idx = 0; slot_idx < 2; ++slot_idx) {
+ if (qidx == 0 && slot_idx == 0) continue;
+ TestConfig config;
+ config.input.xsize = 256;
+ config.input.ysize = 256;
+ config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1,
+ (qidx >> 0) & 1};
+ CustomQuantTable table;
+ table.slot_idx = slot_idx;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ all_tests.push_back(config);
+ }
+ }
+ for (int qidx = 0; qidx < 8; ++qidx) {
+ for (bool xyb : {false, true}) {
+ TestConfig config;
+ config.input.xsize = 256;
+ config.input.ysize = 256;
+ config.jparams.xyb_mode = xyb;
+ config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1,
+ (qidx >> 0) & 1};
+ {
+ CustomQuantTable table;
+ table.slot_idx = 0;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ }
+ {
+ CustomQuantTable table;
+ table.slot_idx = 1;
+ table.table_type = 20;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ }
+ config.compare_to_orig = true;
+ all_tests.push_back(config);
+ }
+ }
+ for (bool xyb : {false, true}) {
+ TestConfig config;
+ config.input.xsize = 256;
+ config.input.ysize = 256;
+ config.jparams.xyb_mode = xyb;
+ config.jparams.quant_indexes = {0, 1, 2};
+ {
+ CustomQuantTable table;
+ table.slot_idx = 0;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ }
+ {
+ CustomQuantTable table;
+ table.slot_idx = 1;
+ table.table_type = 20;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ }
+ {
+ CustomQuantTable table;
+ table.slot_idx = 2;
+ table.table_type = 30;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ }
+ config.compare_to_orig = true;
+ all_tests.push_back(config);
+ }
+ // Tests for fixed (and custom) prefix codes.
+ for (J_COLOR_SPACE jpeg_color_space : {JCS_RGB, JCS_YCbCr}) {
+ for (bool flat_dc_luma : {false, true}) {
+ TestConfig config;
+ config.jparams.set_jpeg_colorspace = true;
+ config.jparams.jpeg_color_space = jpeg_color_space;
+ config.jparams.progressive_mode = 0;
+ config.jparams.optimize_coding = 0;
+ config.jparams.use_flat_dc_luma_code = flat_dc_luma;
+ all_tests.push_back(config);
+ }
+ }
+ for (J_COLOR_SPACE jpeg_color_space : {JCS_CMYK, JCS_YCCK}) {
+ for (bool flat_dc_luma : {false, true}) {
+ TestConfig config;
+ config.input.color_space = JCS_CMYK;
+ config.jparams.set_jpeg_colorspace = true;
+ config.jparams.jpeg_color_space = jpeg_color_space;
+ config.jparams.progressive_mode = 0;
+ config.jparams.optimize_coding = 0;
+ config.jparams.use_flat_dc_luma_code = flat_dc_luma;
+ all_tests.push_back(config);
+ }
+ }
+ // Test for jpeg without DHT marker.
+ {
+ TestConfig config;
+ config.jparams.progressive_mode = 0;
+ config.jparams.optimize_coding = 0;
+ config.jparams.omit_standard_tables = true;
+ all_tests.push_back(config);
+ }
+ // Test for custom component ids.
+ {
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 128;
+ config.jparams.comp_ids = {7, 17, 177};
+ all_tests.push_back(config);
+ }
+ // Tests for JFIF/Adobe markers.
+ for (int override_JFIF : {-1, 0, 1}) {
+ for (int override_Adobe : {-1, 0, 1}) {
+ if (override_JFIF == -1 && override_Adobe == -1) continue;
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 128;
+ config.jparams.override_JFIF = override_JFIF;
+ config.jparams.override_Adobe = override_Adobe;
+ all_tests.push_back(config);
+ }
+ }
+ // Tests for small images.
+ for (int xsize : {1, 7, 8, 9, 15, 16, 17}) {
+ for (int ysize : {1, 7, 8, 9, 15, 16, 17}) {
+ TestConfig config;
+ config.input.xsize = xsize;
+ config.input.ysize = ysize;
+ all_tests.push_back(config);
+ }
+ }
+ // Tests for custom marker processor.
+ for (size_t chunk_size : {0, 1, 64, 65536}) {
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 256;
+ config.dparams.chunk_size = chunk_size;
+ config.jparams.add_marker = true;
+ all_tests.push_back(config);
+ }
+ // Tests for icc profile decoding.
+ for (size_t icc_size : {728, 70000, 1000000}) {
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 256;
+ config.jparams.icc.resize(icc_size);
+ for (size_t i = 0; i < icc_size; ++i) {
+ config.jparams.icc[i] = (i * 17) & 0xff;
+ }
+ all_tests.push_back(config);
+ }
+ // Tests for unusual sampling factors.
+ for (int h0_samp : {1, 2, 3, 4}) {
+ for (int v0_samp : {1, 2, 3, 4}) {
+ for (int dxb = 0; dxb < h0_samp; ++dxb) {
+ for (int dyb = 0; dyb < v0_samp; ++dyb) {
+ for (int dx = 0; dx < 2; ++dx) {
+ for (int dy = 0; dy < 2; ++dy) {
+ TestConfig config;
+ config.input.xsize = 128 + dyb * 8 + dy;
+ config.input.ysize = 256 + dxb * 8 + dx;
+ config.jparams.progressive_mode = 2;
+ config.jparams.h_sampling = {h0_samp, 1, 1};
+ config.jparams.v_sampling = {v0_samp, 1, 1};
+ config.compare_to_orig = true;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ }
+ }
+ for (int h0_samp : {1, 2, 4}) {
+ for (int v0_samp : {1, 2, 4}) {
+ for (int h2_samp : {1, 2, 4}) {
+ for (int v2_samp : {1, 2, 4}) {
+ TestConfig config;
+ config.input.xsize = 137;
+ config.input.ysize = 75;
+ config.jparams.progressive_mode = 2;
+ config.jparams.h_sampling = {h0_samp, 1, h2_samp};
+ config.jparams.v_sampling = {v0_samp, 1, v2_samp};
+ config.compare_to_orig = true;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ for (int h0_samp : {1, 3}) {
+ for (int v0_samp : {1, 3}) {
+ for (int h2_samp : {1, 3}) {
+ for (int v2_samp : {1, 3}) {
+ TestConfig config;
+ config.input.xsize = 205;
+ config.input.ysize = 99;
+ config.jparams.progressive_mode = 2;
+ config.jparams.h_sampling = {h0_samp, 1, h2_samp};
+ config.jparams.v_sampling = {v0_samp, 1, v2_samp};
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ // Tests for output scaling.
+ for (int scale_num = 1; scale_num <= 16; ++scale_num) {
+ if (scale_num == 8) continue;
+ for (bool crop : {false, true}) {
+ for (int samp : {1, 2}) {
+ for (int progr : {0, 2}) {
+ TestConfig config;
+ config.jparams.h_sampling = {samp, 1, 1};
+ config.jparams.v_sampling = {samp, 1, 1};
+ config.jparams.progressive_mode = progr;
+ config.dparams.scale_num = scale_num;
+ config.dparams.scale_denom = 8;
+ config.dparams.crop_output = crop;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ return all_tests;
+}
+
+std::string QuantMode(ColorQuantMode mode) {
+ switch (mode) {
+ case CQUANT_1PASS:
+ return "1pass";
+ case CQUANT_EXTERNAL:
+ return "External";
+ case CQUANT_2PASS:
+ return "2pass";
+ case CQUANT_REUSE:
+ return "Reuse";
+ }
+ return "";
+}
+
+std::string DitherMode(J_DITHER_MODE mode) {
+ switch (mode) {
+ case JDITHER_NONE:
+ return "No";
+ case JDITHER_ORDERED:
+ return "Ordered";
+ case JDITHER_FS:
+ return "FS";
+ }
+ return "";
+}
+
+std::ostream& operator<<(std::ostream& os, const DecompressParams& dparams) {
+ if (dparams.chunk_size == 0) {
+ os << "CompleteInput";
+ } else {
+ os << "InputChunks" << dparams.chunk_size;
+ }
+ if (dparams.size_factor < 1.0f) {
+ os << "Partial" << static_cast<int>(dparams.size_factor * 100) << "p";
+ }
+ if (dparams.max_output_lines == 0) {
+ os << "CompleteOutput";
+ } else {
+ os << "OutputLines" << dparams.max_output_lines;
+ }
+ if (dparams.output_mode == RAW_DATA) {
+ os << "RawDataOut";
+ } else if (dparams.output_mode == COEFFICIENTS) {
+ os << "CoeffsOut";
+ }
+ os << IOMethodName(dparams.data_type, dparams.endianness);
+ if (dparams.set_out_color_space) {
+ os << "OutColor" << ColorSpaceName((J_COLOR_SPACE)dparams.out_color_space);
+ }
+ if (dparams.crop_output) {
+ os << "Crop";
+ }
+ if (dparams.do_block_smoothing) {
+ os << "BlockSmoothing";
+ }
+ if (!dparams.do_fancy_upsampling) {
+ os << "NoFancyUpsampling";
+ }
+ if (dparams.scale_num != 1 || dparams.scale_denom != 1) {
+ os << "Scale" << dparams.scale_num << "_" << dparams.scale_denom;
+ }
+ if (dparams.quantize_colors) {
+ os << "Quant" << dparams.desired_number_of_colors << "colors";
+ for (size_t i = 0; i < dparams.scan_params.size(); ++i) {
+ if (i > 0) os << "_";
+ const auto& sparam = dparams.scan_params[i];
+ os << QuantMode(sparam.color_quant_mode);
+ os << DitherMode((J_DITHER_MODE)sparam.dither_mode) << "Dither";
+ }
+ }
+ if (dparams.skip_scans) {
+ os << "SkipScans";
+ }
+ return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const TestConfig& c) {
+ if (!c.fn.empty()) {
+ os << c.fn_desc;
+ } else {
+ os << c.input;
+ }
+ os << c.jparams;
+ os << c.dparams;
+ return os;
+}
+
+std::string TestDescription(const testing::TestParamInfo<TestConfig>& info) {
+ std::stringstream name;
+ name << info.param;
+ return name.str();
+}
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(DecodeAPITest, DecodeAPITestParam,
+ testing::ValuesIn(GenerateTests(false)),
+ TestDescription);
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(DecodeAPITestBuffered,
+ DecodeAPITestParamBuffered,
+ testing::ValuesIn(GenerateTests(true)),
+ TestDescription);
+
+} // namespace
+} // namespace jpegli
diff --git a/lib/jpegli/decode_internal.h b/lib/jpegli/decode_internal.h
new file mode 100644
index 0000000..ed7baa3
--- /dev/null
+++ b/lib/jpegli/decode_internal.h
@@ -0,0 +1,151 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_DECODE_INTERNAL_H_
+#define LIB_JPEGLI_DECODE_INTERNAL_H_
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <vector>
+
+#include "lib/jpegli/common.h"
+#include "lib/jpegli/common_internal.h"
+#include "lib/jpegli/huffman.h"
+
+namespace jpegli {
+
+static constexpr int kNeedMoreInput = 100;
+static constexpr int kHandleRestart = 101;
+static constexpr int kHandleMarkerProcessor = 102;
+static constexpr int kProcessNextMarker = 103;
+static constexpr size_t kAllHuffLutSize = NUM_HUFF_TBLS * kJpegHuffmanLutSize;
+
+typedef int16_t coeff_t;
+
+// State of the decoder that has to be saved before decoding one MCU in case
+// we run out of the bitstream.
+struct MCUCodingState {
+ coeff_t last_dc_coeff[kMaxComponents];
+ int eobrun;
+ coeff_t coeffs[D_MAX_BLOCKS_IN_MCU * DCTSIZE2];
+};
+
+} // namespace jpegli
+
+// Use this forward-declared libjpeg struct to hold all our private variables.
+// TODO(szabadka) Remove variables that have a corresponding version in cinfo.
+struct jpeg_decomp_master {
+ //
+ // Input handling state.
+ //
+ std::vector<uint8_t> input_buffer_;
+ size_t input_buffer_pos_;
+ // Number of bits after codestream_pos_ that were already processed.
+ size_t codestream_bits_ahead_;
+ bool streaming_mode_;
+
+ // Coefficient buffers
+ jvirt_barray_ptr* coef_arrays;
+ JBLOCKARRAY coeff_rows[jpegli::kMaxComponents];
+
+ //
+ // Marker data processing state.
+ //
+ bool found_soi_;
+ bool found_dri_;
+ bool found_sof_;
+ bool found_eoi_;
+ size_t icc_index_;
+ size_t icc_total_;
+ std::vector<uint8_t> icc_profile_;
+ jpegli::HuffmanTableEntry dc_huff_lut_[jpegli::kAllHuffLutSize];
+ jpegli::HuffmanTableEntry ac_huff_lut_[jpegli::kAllHuffLutSize];
+ uint8_t markers_to_save_[32];
+ jpeg_marker_parser_method app_marker_parsers[16];
+ jpeg_marker_parser_method com_marker_parser;
+ // Whether this jpeg has multiple scans (progressive or non-interleaved
+ // sequential).
+ bool is_multiscan_;
+
+ // Fields defined by SOF marker.
+ size_t iMCU_cols_;
+ int h_factor[jpegli::kMaxComponents];
+ int v_factor[jpegli::kMaxComponents];
+
+ // Initialized at strat of frame.
+ uint16_t scan_progression_[jpegli::kMaxComponents][DCTSIZE2];
+
+ //
+ // Per scan state.
+ //
+ size_t scan_mcu_row_;
+ size_t scan_mcu_col_;
+ size_t mcu_rows_per_iMCU_row_;
+ jpegli::coeff_t last_dc_coeff_[jpegli::kMaxComponents];
+ int eobrun_;
+ int restarts_to_go_;
+ int next_restart_marker_;
+
+ jpegli::MCUCodingState mcu_;
+
+ //
+ // Rendering state.
+ //
+ int output_passes_done_;
+ JpegliDataType output_data_type_ = JPEGLI_TYPE_UINT8;
+ bool swap_endianness_ = false;
+ size_t xoffset_;
+ bool need_context_rows_;
+
+ int min_scaled_dct_size;
+ int scaled_dct_size[jpegli::kMaxComponents];
+
+ size_t raw_height_[jpegli::kMaxComponents];
+ jpegli::RowBuffer<float> raw_output_[jpegli::kMaxComponents];
+ jpegli::RowBuffer<float> render_output_[jpegli::kMaxComponents];
+
+ void (*inverse_transform[jpegli::kMaxComponents])(
+ const int16_t* JXL_RESTRICT qblock, const float* JXL_RESTRICT dequant,
+ const float* JXL_RESTRICT biases, float* JXL_RESTRICT scratch_space,
+ float* JXL_RESTRICT output, size_t output_stride, size_t dctsize);
+
+ void (*color_transform)(float* row[jpegli::kMaxComponents], size_t len);
+
+ float* idct_scratch_;
+ float* upsample_scratch_;
+ uint8_t* output_scratch_;
+ int16_t* smoothing_scratch_;
+ float* dequant_;
+ // 1 = 1pass, 2 = 2pass, 3 = external
+ int quant_mode_;
+ int quant_pass_;
+ int num_colors_[jpegli::kMaxComponents];
+ uint8_t* colormap_lut_;
+ uint8_t* pixels_;
+ JSAMPARRAY scanlines_;
+ std::vector<std::vector<uint8_t>> candidate_lists_;
+ bool regenerate_inverse_colormap_;
+ float* dither_[jpegli::kMaxComponents];
+ float* error_row_[2 * jpegli::kMaxComponents];
+ size_t dither_size_;
+ size_t dither_mask_;
+
+ // Per channel and per frequency statistics about the number of nonzeros and
+ // the sum of coefficient absolute values, used in dequantization bias
+ // computation.
+ int* nonzeros_;
+ int* sumabs_;
+ size_t num_processed_blocks_[jpegli::kMaxComponents];
+ float* biases_;
+#define SAVED_COEFS 10
+ // This holds the coef_bits of the scan before the current scan,
+ // i.e. the bottom half when rendering incomplete scans.
+ int (*coef_bits_latch)[SAVED_COEFS];
+ int (*prev_coef_bits_latch)[SAVED_COEFS];
+ bool apply_smoothing;
+};
+
+#endif // LIB_JPEGLI_DECODE_INTERNAL_H_
diff --git a/lib/jpegli/decode_marker.cc b/lib/jpegli/decode_marker.cc
new file mode 100644
index 0000000..c5c5790
--- /dev/null
+++ b/lib/jpegli/decode_marker.cc
@@ -0,0 +1,588 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/decode_marker.h"
+
+#include <string.h>
+
+#include "lib/jpegli/common.h"
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/huffman.h"
+#include "lib/jpegli/memory_manager.h"
+#include "lib/jxl/base/printf_macros.h"
+
+namespace jpegli {
+namespace {
+
+constexpr int kMaxDimPixels = 65535;
+constexpr uint8_t kIccProfileTag[12] = "ICC_PROFILE";
+
+// Macros for commonly used error conditions.
+
+#define JPEG_VERIFY_LEN(n) \
+ if (pos + (n) > len) { \
+ return JPEGLI_ERROR("Unexpected end of marker: pos=%" PRIuS \
+ " need=%d len=%" PRIuS, \
+ pos, static_cast<int>(n), len); \
+ }
+
+#define JPEG_VERIFY_INPUT(var, low, high) \
+ if ((var) < (low) || (var) > (high)) { \
+ return JPEGLI_ERROR("Invalid " #var ": %d", static_cast<int>(var)); \
+ }
+
+#define JPEG_VERIFY_MARKER_END() \
+ if (pos != len) { \
+ return JPEGLI_ERROR("Invalid marker length: declared=%" PRIuS \
+ " actual=%" PRIuS, \
+ len, pos); \
+ }
+
+inline int ReadUint8(const uint8_t* data, size_t* pos) {
+ return data[(*pos)++];
+}
+
+inline int ReadUint16(const uint8_t* data, size_t* pos) {
+ int v = (data[*pos] << 8) + data[*pos + 1];
+ *pos += 2;
+ return v;
+}
+
+void ProcessSOF(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (!m->found_soi_) {
+ JPEGLI_ERROR("Unexpected SOF marker.");
+ }
+ if (m->found_sof_) {
+ JPEGLI_ERROR("Duplicate SOF marker.");
+ }
+ m->found_sof_ = true;
+ cinfo->progressive_mode = (cinfo->unread_marker == 0xc2);
+ cinfo->arith_code = 0;
+ size_t pos = 2;
+ JPEG_VERIFY_LEN(6);
+ cinfo->data_precision = ReadUint8(data, &pos);
+ cinfo->image_height = ReadUint16(data, &pos);
+ cinfo->image_width = ReadUint16(data, &pos);
+ cinfo->num_components = ReadUint8(data, &pos);
+ JPEG_VERIFY_INPUT(cinfo->data_precision, kJpegPrecision, kJpegPrecision);
+ JPEG_VERIFY_INPUT(cinfo->image_height, 1, kMaxDimPixels);
+ JPEG_VERIFY_INPUT(cinfo->image_width, 1, kMaxDimPixels);
+ JPEG_VERIFY_INPUT(cinfo->num_components, 1, kMaxComponents);
+ JPEG_VERIFY_LEN(3 * cinfo->num_components);
+ cinfo->comp_info = jpegli::Allocate<jpeg_component_info>(
+ cinfo, cinfo->num_components, JPOOL_IMAGE);
+
+ // Read sampling factors and quant table index for each component.
+ uint8_t ids_seen[256] = {0};
+ cinfo->max_h_samp_factor = 1;
+ cinfo->max_v_samp_factor = 1;
+ for (int i = 0; i < cinfo->num_components; ++i) {
+ jpeg_component_info* comp = &cinfo->comp_info[i];
+ comp->component_index = i;
+ const int id = ReadUint8(data, &pos);
+ if (ids_seen[id]) { // (cf. section B.2.2, syntax of Ci)
+ JPEGLI_ERROR("Duplicate ID %d in SOF.", id);
+ }
+ ids_seen[id] = 1;
+ comp->component_id = id;
+ int factor = ReadUint8(data, &pos);
+ int h_samp_factor = factor >> 4;
+ int v_samp_factor = factor & 0xf;
+ JPEG_VERIFY_INPUT(h_samp_factor, 1, MAX_SAMP_FACTOR);
+ JPEG_VERIFY_INPUT(v_samp_factor, 1, MAX_SAMP_FACTOR);
+ comp->h_samp_factor = h_samp_factor;
+ comp->v_samp_factor = v_samp_factor;
+ cinfo->max_h_samp_factor =
+ std::max(cinfo->max_h_samp_factor, h_samp_factor);
+ cinfo->max_v_samp_factor =
+ std::max(cinfo->max_v_samp_factor, v_samp_factor);
+ int quant_tbl_idx = ReadUint8(data, &pos);
+ JPEG_VERIFY_INPUT(quant_tbl_idx, 0, NUM_QUANT_TBLS - 1);
+ comp->quant_tbl_no = quant_tbl_idx;
+ if (cinfo->quant_tbl_ptrs[quant_tbl_idx] == nullptr) {
+ JPEGLI_ERROR("Quantization table with index %u not found", quant_tbl_idx);
+ }
+ comp->quant_table = nullptr; // will be allocated after SOS marker
+ }
+ JPEG_VERIFY_MARKER_END();
+
+ // Set the input colorspace based on the markers we have seen and set
+ // default output colorspace.
+ if (cinfo->num_components == 1) {
+ cinfo->jpeg_color_space = JCS_GRAYSCALE;
+ cinfo->out_color_space = JCS_GRAYSCALE;
+ } else if (cinfo->num_components == 3) {
+ if (cinfo->saw_JFIF_marker) {
+ cinfo->jpeg_color_space = JCS_YCbCr;
+ } else if (cinfo->saw_Adobe_marker) {
+ cinfo->jpeg_color_space =
+ cinfo->Adobe_transform == 0 ? JCS_RGB : JCS_YCbCr;
+ } else {
+ cinfo->jpeg_color_space = JCS_YCbCr;
+ if (cinfo->comp_info[0].component_id == 'R' && //
+ cinfo->comp_info[1].component_id == 'G' && //
+ cinfo->comp_info[2].component_id == 'B') {
+ cinfo->jpeg_color_space = JCS_RGB;
+ }
+ }
+ cinfo->out_color_space = JCS_RGB;
+ } else if (cinfo->num_components == 4) {
+ if (cinfo->saw_Adobe_marker) {
+ cinfo->jpeg_color_space =
+ cinfo->Adobe_transform == 0 ? JCS_CMYK : JCS_YCCK;
+ } else {
+ cinfo->jpeg_color_space = JCS_CMYK;
+ }
+ cinfo->out_color_space = JCS_CMYK;
+ }
+
+ // We have checked above that none of the sampling factors are 0, so the max
+ // sampling factors can not be 0.
+ cinfo->total_iMCU_rows =
+ DivCeil(cinfo->image_height, cinfo->max_v_samp_factor * DCTSIZE);
+ m->iMCU_cols_ =
+ DivCeil(cinfo->image_width, cinfo->max_h_samp_factor * DCTSIZE);
+ // Compute the block dimensions for each component.
+ for (int i = 0; i < cinfo->num_components; ++i) {
+ jpeg_component_info* comp = &cinfo->comp_info[i];
+ if (cinfo->max_h_samp_factor % comp->h_samp_factor != 0 ||
+ cinfo->max_v_samp_factor % comp->v_samp_factor != 0) {
+ JPEGLI_ERROR("Non-integral subsampling ratios.");
+ }
+ m->h_factor[i] = cinfo->max_h_samp_factor / comp->h_samp_factor;
+ m->v_factor[i] = cinfo->max_v_samp_factor / comp->v_samp_factor;
+ comp->downsampled_width = DivCeil(cinfo->image_width, m->h_factor[i]);
+ comp->downsampled_height = DivCeil(cinfo->image_height, m->v_factor[i]);
+ comp->width_in_blocks = DivCeil(comp->downsampled_width, DCTSIZE);
+ comp->height_in_blocks = DivCeil(comp->downsampled_height, DCTSIZE);
+ }
+ memset(m->scan_progression_, 0, sizeof(m->scan_progression_));
+}
+
+void ProcessSOS(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (!m->found_sof_) {
+ JPEGLI_ERROR("Unexpected SOS marker.");
+ }
+ size_t pos = 2;
+ JPEG_VERIFY_LEN(1);
+ cinfo->comps_in_scan = ReadUint8(data, &pos);
+ JPEG_VERIFY_INPUT(cinfo->comps_in_scan, 1, cinfo->num_components);
+ JPEG_VERIFY_INPUT(cinfo->comps_in_scan, 1, MAX_COMPS_IN_SCAN);
+
+ JPEG_VERIFY_LEN(2 * cinfo->comps_in_scan);
+ bool is_interleaved = (cinfo->comps_in_scan > 1);
+ uint8_t ids_seen[256] = {0};
+ cinfo->blocks_in_MCU = 0;
+ for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+ int id = ReadUint8(data, &pos);
+ if (ids_seen[id]) { // (cf. section B.2.3, regarding CSj)
+ return JPEGLI_ERROR("Duplicate ID %d in SOS.", id);
+ }
+ ids_seen[id] = 1;
+ jpeg_component_info* comp = nullptr;
+ for (int j = 0; j < cinfo->num_components; ++j) {
+ if (cinfo->comp_info[j].component_id == id) {
+ comp = &cinfo->comp_info[j];
+ cinfo->cur_comp_info[i] = comp;
+ }
+ }
+ if (!comp) {
+ return JPEGLI_ERROR("SOS marker: Could not find component with id %d",
+ id);
+ }
+ int c = ReadUint8(data, &pos);
+ comp->dc_tbl_no = c >> 4;
+ comp->ac_tbl_no = c & 0xf;
+ JPEG_VERIFY_INPUT(comp->dc_tbl_no, 0, 3);
+ JPEG_VERIFY_INPUT(comp->ac_tbl_no, 0, 3);
+ comp->MCU_width = is_interleaved ? comp->h_samp_factor : 1;
+ comp->MCU_height = is_interleaved ? comp->v_samp_factor : 1;
+ comp->MCU_blocks = comp->MCU_width * comp->MCU_height;
+ if (cinfo->blocks_in_MCU + comp->MCU_blocks > D_MAX_BLOCKS_IN_MCU) {
+ JPEGLI_ERROR("Too many blocks in MCU.");
+ }
+ for (int j = 0; j < comp->MCU_blocks; ++j) {
+ cinfo->MCU_membership[cinfo->blocks_in_MCU++] = i;
+ }
+ }
+ JPEG_VERIFY_LEN(3);
+ cinfo->Ss = ReadUint8(data, &pos);
+ cinfo->Se = ReadUint8(data, &pos);
+ JPEG_VERIFY_INPUT(cinfo->Ss, 0, 63);
+ JPEG_VERIFY_INPUT(cinfo->Se, cinfo->Ss, 63);
+ int c = ReadUint8(data, &pos);
+ cinfo->Ah = c >> 4;
+ cinfo->Al = c & 0xf;
+ JPEG_VERIFY_MARKER_END();
+
+ if (cinfo->input_scan_number == 0) {
+ m->is_multiscan_ = (cinfo->comps_in_scan < cinfo->num_components ||
+ cinfo->progressive_mode);
+ }
+ if (cinfo->Ah != 0 && cinfo->Al != cinfo->Ah - 1) {
+ // section G.1.1.1.2 : Successive approximation control only improves
+ // by one bit at a time.
+ JPEGLI_ERROR("Invalid progressive parameters: Al=%d Ah=%d", cinfo->Al,
+ cinfo->Ah);
+ }
+ if (!cinfo->progressive_mode) {
+ cinfo->Ss = 0;
+ cinfo->Se = 63;
+ cinfo->Ah = 0;
+ cinfo->Al = 0;
+ }
+ const uint16_t scan_bitmask =
+ cinfo->Ah == 0 ? (0xffff << cinfo->Al) : (1u << cinfo->Al);
+ const uint16_t refinement_bitmask = (1 << cinfo->Al) - 1;
+ if (!cinfo->coef_bits) {
+ cinfo->coef_bits =
+ Allocate<int[DCTSIZE2]>(cinfo, cinfo->num_components * 2, JPOOL_IMAGE);
+ m->coef_bits_latch =
+ Allocate<int[SAVED_COEFS]>(cinfo, cinfo->num_components, JPOOL_IMAGE);
+ m->prev_coef_bits_latch =
+ Allocate<int[SAVED_COEFS]>(cinfo, cinfo->num_components, JPOOL_IMAGE);
+
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ for (int i = 0; i < DCTSIZE2; ++i) {
+ cinfo->coef_bits[c][i] = -1;
+ if (i < SAVED_COEFS) {
+ m->coef_bits_latch[c][i] = -1;
+ }
+ }
+ }
+ }
+
+ for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+ int comp_idx = cinfo->cur_comp_info[i]->component_index;
+ for (int k = cinfo->Ss; k <= cinfo->Se; ++k) {
+ if (m->scan_progression_[comp_idx][k] & scan_bitmask) {
+ return JPEGLI_ERROR(
+ "Overlapping scans: component=%d k=%d prev_mask: %u cur_mask %u",
+ comp_idx, k, m->scan_progression_[i][k], scan_bitmask);
+ }
+ if (m->scan_progression_[comp_idx][k] & refinement_bitmask) {
+ return JPEGLI_ERROR(
+ "Invalid scan order, a more refined scan was already done: "
+ "component=%d k=%d prev_mask=%u cur_mask=%u",
+ comp_idx, k, m->scan_progression_[i][k], scan_bitmask);
+ }
+ m->scan_progression_[comp_idx][k] |= scan_bitmask;
+ }
+ }
+ if (cinfo->Al > 10) {
+ return JPEGLI_ERROR("Scan parameter Al=%d is not supported.", cinfo->Al);
+ }
+}
+
+// Reads the Define Huffman Table (DHT) marker segment and builds the Huffman
+// decoding table in either dc_huff_lut_ or ac_huff_lut_, depending on the type
+// and solt_id of Huffman code being read.
+void ProcessDHT(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+ size_t pos = 2;
+ if (pos == len) {
+ return JPEGLI_ERROR("DHT marker: no Huffman table found");
+ }
+ while (pos < len) {
+ JPEG_VERIFY_LEN(1 + kJpegHuffmanMaxBitLength);
+ // The index of the Huffman code in the current set of Huffman codes. For AC
+ // component Huffman codes, 0x10 is added to the index.
+ int slot_id = ReadUint8(data, &pos);
+ int huffman_index = slot_id;
+ int is_ac_table = (slot_id & 0x10) != 0;
+ JHUFF_TBL** table;
+ if (is_ac_table) {
+ huffman_index -= 0x10;
+ JPEG_VERIFY_INPUT(huffman_index, 0, NUM_HUFF_TBLS - 1);
+ table = &cinfo->ac_huff_tbl_ptrs[huffman_index];
+ } else {
+ JPEG_VERIFY_INPUT(huffman_index, 0, NUM_HUFF_TBLS - 1);
+ table = &cinfo->dc_huff_tbl_ptrs[huffman_index];
+ }
+ if (*table == nullptr) {
+ *table = jpegli_alloc_huff_table(reinterpret_cast<j_common_ptr>(cinfo));
+ }
+ int total_count = 0;
+ for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) {
+ int count = ReadUint8(data, &pos);
+ (*table)->bits[i] = count;
+ total_count += count;
+ }
+ if (is_ac_table) {
+ JPEG_VERIFY_INPUT(total_count, 0, kJpegHuffmanAlphabetSize);
+ } else {
+ // Allow symbols up to 15 here, we check later whether any invalid symbols
+ // are actually decoded.
+ // TODO(szabadka) Make sure decoder works (does not crash) with up to
+ // 15-nbits DC symbols and then increase kJpegDCAlphabetSize.
+ JPEG_VERIFY_INPUT(total_count, 0, 16);
+ }
+ JPEG_VERIFY_LEN(total_count);
+ for (int i = 0; i < total_count; ++i) {
+ int value = ReadUint8(data, &pos);
+ if (!is_ac_table) {
+ JPEG_VERIFY_INPUT(value, 0, 15);
+ }
+ (*table)->huffval[i] = value;
+ }
+ for (int i = total_count; i < kJpegHuffmanAlphabetSize; ++i) {
+ (*table)->huffval[i] = 0;
+ }
+ }
+ JPEG_VERIFY_MARKER_END();
+}
+
+void ProcessDQT(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (m->found_sof_) {
+ JPEGLI_ERROR("Updating quant tables between scans is not supported.");
+ }
+ size_t pos = 2;
+ if (pos == len) {
+ return JPEGLI_ERROR("DQT marker: no quantization table found");
+ }
+ while (pos < len) {
+ JPEG_VERIFY_LEN(1);
+ int quant_table_index = ReadUint8(data, &pos);
+ int precision = quant_table_index >> 4;
+ JPEG_VERIFY_INPUT(precision, 0, 1);
+ quant_table_index &= 0xf;
+ JPEG_VERIFY_INPUT(quant_table_index, 0, NUM_QUANT_TBLS - 1);
+ JPEG_VERIFY_LEN((precision + 1) * DCTSIZE2);
+
+ if (cinfo->quant_tbl_ptrs[quant_table_index] == nullptr) {
+ cinfo->quant_tbl_ptrs[quant_table_index] =
+ jpegli_alloc_quant_table(reinterpret_cast<j_common_ptr>(cinfo));
+ }
+ JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[quant_table_index];
+
+ for (size_t i = 0; i < DCTSIZE2; ++i) {
+ int quant_val =
+ precision ? ReadUint16(data, &pos) : ReadUint8(data, &pos);
+ JPEG_VERIFY_INPUT(quant_val, 1, 65535);
+ quant_table->quantval[kJPEGNaturalOrder[i]] = quant_val;
+ }
+ }
+ JPEG_VERIFY_MARKER_END();
+}
+
+void ProcessDNL(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+ // Ignore marker.
+}
+
+void ProcessDRI(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (m->found_dri_) {
+ return JPEGLI_ERROR("Duplicate DRI marker.");
+ }
+ m->found_dri_ = true;
+ size_t pos = 2;
+ JPEG_VERIFY_LEN(2);
+ cinfo->restart_interval = ReadUint16(data, &pos);
+ JPEG_VERIFY_MARKER_END();
+}
+
+void ProcessAPP(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+ jpeg_decomp_master* m = cinfo->master;
+ const uint8_t marker = cinfo->unread_marker;
+ const uint8_t* payload = data + 2;
+ size_t payload_size = len - 2;
+ if (marker == 0xE0) {
+ if (payload_size >= 14 && memcmp(payload, "JFIF", 4) == 0) {
+ cinfo->saw_JFIF_marker = TRUE;
+ cinfo->JFIF_major_version = payload[5];
+ cinfo->JFIF_minor_version = payload[6];
+ cinfo->density_unit = payload[7];
+ cinfo->X_density = (payload[8] << 8) + payload[9];
+ cinfo->Y_density = (payload[10] << 8) + payload[11];
+ }
+ } else if (marker == 0xEE) {
+ if (payload_size >= 12 && memcmp(payload, "Adobe", 5) == 0) {
+ cinfo->saw_Adobe_marker = TRUE;
+ cinfo->Adobe_transform = payload[11];
+ }
+ } else if (marker == 0xE2) {
+ if (payload_size >= sizeof(kIccProfileTag) &&
+ memcmp(payload, kIccProfileTag, sizeof(kIccProfileTag)) == 0) {
+ payload += sizeof(kIccProfileTag);
+ payload_size -= sizeof(kIccProfileTag);
+ if (payload_size < 2) {
+ return JPEGLI_ERROR("ICC chunk is too small.");
+ }
+ uint8_t index = payload[0];
+ uint8_t total = payload[1];
+ ++m->icc_index_;
+ if (m->icc_index_ != index) {
+ return JPEGLI_ERROR("Invalid ICC chunk order.");
+ }
+ if (total == 0) {
+ return JPEGLI_ERROR("Invalid ICC chunk total.");
+ }
+ if (m->icc_total_ == 0) {
+ m->icc_total_ = total;
+ } else if (m->icc_total_ != total) {
+ return JPEGLI_ERROR("Invalid ICC chunk total.");
+ }
+ if (m->icc_index_ > m->icc_total_) {
+ return JPEGLI_ERROR("Invalid ICC chunk index.");
+ }
+ m->icc_profile_.insert(m->icc_profile_.end(), payload + 2,
+ payload + payload_size);
+ }
+ }
+}
+
+void ProcessCOM(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+ // Ignore marker.
+}
+
+void ProcessSOI(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (m->found_soi_) {
+ JPEGLI_ERROR("Duplicate SOI marker");
+ }
+ m->found_soi_ = true;
+}
+
+void ProcessEOI(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+ cinfo->master->found_eoi_ = true;
+}
+
+void SaveMarker(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+ const uint8_t marker = cinfo->unread_marker;
+ const uint8_t* payload = data + 2;
+ size_t payload_size = len - 2;
+
+ // Insert new saved marker to the head of the list.
+ jpeg_saved_marker_ptr next = cinfo->marker_list;
+ cinfo->marker_list =
+ jpegli::Allocate<jpeg_marker_struct>(cinfo, 1, JPOOL_IMAGE);
+ cinfo->marker_list->next = next;
+ cinfo->marker_list->marker = marker;
+ cinfo->marker_list->original_length = payload_size;
+ cinfo->marker_list->data_length = payload_size;
+ cinfo->marker_list->data =
+ jpegli::Allocate<uint8_t>(cinfo, payload_size, JPOOL_IMAGE);
+ memcpy(cinfo->marker_list->data, payload, payload_size);
+}
+
+uint8_t ProcessNextMarker(j_decompress_ptr cinfo, const uint8_t* const data,
+ const size_t len, size_t* pos) {
+ jpeg_decomp_master* m = cinfo->master;
+ size_t num_skipped = 0;
+ uint8_t marker = cinfo->unread_marker;
+ if (marker == 0) {
+ // kIsValidMarker[i] == 1 means (0xc0 + i) is a valid marker.
+ static const uint8_t kIsValidMarker[] = {
+ 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
+ };
+ // Skip bytes between markers.
+ while (*pos + 1 < len && (data[*pos] != 0xff || data[*pos + 1] < 0xc0 ||
+ !kIsValidMarker[data[*pos + 1] - 0xc0])) {
+ ++(*pos);
+ ++num_skipped;
+ }
+ if (*pos + 2 > len) {
+ return kNeedMoreInput;
+ }
+ marker = data[*pos + 1];
+ if (num_skipped > 0) {
+ if (m->found_soi_) {
+ JPEGLI_WARN("Skipped %d bytes before marker 0x%02x", (int)num_skipped,
+ marker);
+ } else {
+ JPEGLI_ERROR("Did not find SOI marker.");
+ }
+ }
+ *pos += 2;
+ cinfo->unread_marker = marker;
+ }
+ if (!m->found_soi_ && marker != 0xd8) {
+ JPEGLI_ERROR("Did not find SOI marker.");
+ }
+ if (GetMarkerProcessor(cinfo)) {
+ return kHandleMarkerProcessor;
+ }
+ const uint8_t* marker_data = &data[*pos];
+ size_t marker_len = 0;
+ if (marker != 0xd8 && marker != 0xd9) {
+ if (*pos + 2 > len) {
+ return kNeedMoreInput;
+ }
+ marker_len += (data[*pos] << 8) + data[*pos + 1];
+ if (marker_len < 2) {
+ JPEGLI_ERROR("Invalid marker length");
+ }
+ if (*pos + marker_len > len) {
+ // TODO(szabadka) Limit our memory usage by using the skip_input_data
+ // source manager callback on APP markers that are not saved.
+ return kNeedMoreInput;
+ }
+ if (marker >= 0xe0 && m->markers_to_save_[marker - 0xe0]) {
+ SaveMarker(cinfo, marker_data, marker_len);
+ }
+ }
+ if (marker == 0xc0 || marker == 0xc1 || marker == 0xc2) {
+ ProcessSOF(cinfo, marker_data, marker_len);
+ } else if (marker == 0xc4) {
+ ProcessDHT(cinfo, marker_data, marker_len);
+ } else if (marker == 0xda) {
+ ProcessSOS(cinfo, marker_data, marker_len);
+ } else if (marker == 0xdb) {
+ ProcessDQT(cinfo, marker_data, marker_len);
+ } else if (marker == 0xdc) {
+ ProcessDNL(cinfo, marker_data, marker_len);
+ } else if (marker == 0xdd) {
+ ProcessDRI(cinfo, marker_data, marker_len);
+ } else if (marker >= 0xe0 && marker <= 0xef) {
+ ProcessAPP(cinfo, marker_data, marker_len);
+ } else if (marker == 0xfe) {
+ ProcessCOM(cinfo, marker_data, marker_len);
+ } else if (marker == 0xd8) {
+ ProcessSOI(cinfo, marker_data, marker_len);
+ } else if (marker == 0xd9) {
+ ProcessEOI(cinfo, marker_data, marker_len);
+ } else {
+ JPEGLI_ERROR("Unexpected marker 0x%x", marker);
+ }
+ *pos += marker_len;
+ cinfo->unread_marker = 0;
+ if (marker == 0xda) {
+ return JPEG_REACHED_SOS;
+ } else if (marker == 0xd9) {
+ return JPEG_REACHED_EOI;
+ }
+ return kProcessNextMarker;
+}
+
+} // namespace
+
+jpeg_marker_parser_method GetMarkerProcessor(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ uint8_t marker = cinfo->unread_marker;
+ jpeg_marker_parser_method callback = nullptr;
+ if (marker >= 0xe0 && marker <= 0xef) {
+ callback = m->app_marker_parsers[marker - 0xe0];
+ } else if (marker == 0xfe) {
+ callback = m->com_marker_parser;
+ }
+ return callback;
+}
+
+int ProcessMarkers(j_decompress_ptr cinfo, const uint8_t* const data,
+ const size_t len, size_t* pos) {
+ for (;;) {
+ int status = ProcessNextMarker(cinfo, data, len, pos);
+ if (status != kProcessNextMarker) {
+ return status;
+ }
+ }
+}
+
+} // namespace jpegli
diff --git a/lib/jpegli/decode_marker.h b/lib/jpegli/decode_marker.h
new file mode 100644
index 0000000..fb24b3e
--- /dev/null
+++ b/lib/jpegli/decode_marker.h
@@ -0,0 +1,32 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_DECODE_MARKER_H_
+#define LIB_JPEGLI_DECODE_MARKER_H_
+
+#include <stdint.h>
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+// Reads the available input in the source manager's input buffer until either
+// the end of the next SOS marker or the end of the input.
+// The corresponding fields of cinfo are updated with the processed input data.
+// Upon return, the input buffer will be at the start or at the end of a marker
+// data segment (inter-marker data is allowed).
+// Return value is one of:
+// * JPEG_SUSPENDED, if the current input buffer ends before the next SOS or
+// EOI marker. Input buffer refill is handled by the caller;
+// * JPEG_REACHED_SOS, if the next SOS marker is found;
+// * JPEG_REACHED_EOR, if the end of the input is found.
+int ProcessMarkers(j_decompress_ptr cinfo, const uint8_t* const data,
+ const size_t len, size_t* pos);
+
+jpeg_marker_parser_method GetMarkerProcessor(j_decompress_ptr cinfo);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_DECODE_MARKER_H_
diff --git a/lib/jpegli/decode_scan.cc b/lib/jpegli/decode_scan.cc
new file mode 100644
index 0000000..05b1f37
--- /dev/null
+++ b/lib/jpegli/decode_scan.cc
@@ -0,0 +1,566 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/decode_scan.h"
+
+#include <string.h>
+
+#include <hwy/base.h>
+
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jpegli/error.h"
+#include "lib/jxl/base/status.h"
+
+namespace jpegli {
+namespace {
+
+// Max 14 block per MCU (when 1 channel is subsampled)
+// Max 64 nonzero coefficients per block
+// Max 16 symbol bits plus 11 extra bits per nonzero symbol
+// Max 2 bytes per 8 bits (worst case is all bytes are escaped 0xff)
+constexpr int kMaxMCUByteSize = 6048;
+
+// Helper structure to read bits from the entropy coded data segment.
+struct BitReaderState {
+ BitReaderState(const uint8_t* data, const size_t len, size_t pos)
+ : data_(data), len_(len), start_pos_(pos) {
+ Reset(pos);
+ }
+
+ void Reset(size_t pos) {
+ pos_ = pos;
+ val_ = 0;
+ bits_left_ = 0;
+ next_marker_pos_ = len_;
+ FillBitWindow();
+ }
+
+ // Returns the next byte and skips the 0xff/0x00 escape sequences.
+ uint8_t GetNextByte() {
+ if (pos_ >= next_marker_pos_) {
+ ++pos_;
+ return 0;
+ }
+ uint8_t c = data_[pos_++];
+ if (c == 0xff) {
+ uint8_t escape = pos_ < len_ ? data_[pos_] : 0;
+ if (escape == 0) {
+ ++pos_;
+ } else {
+ // 0xff was followed by a non-zero byte, which means that we found the
+ // start of the next marker segment.
+ next_marker_pos_ = pos_ - 1;
+ }
+ }
+ return c;
+ }
+
+ void FillBitWindow() {
+ if (bits_left_ <= 16) {
+ while (bits_left_ <= 56) {
+ val_ <<= 8;
+ val_ |= (uint64_t)GetNextByte();
+ bits_left_ += 8;
+ }
+ }
+ }
+
+ int ReadBits(int nbits) {
+ FillBitWindow();
+ uint64_t val = (val_ >> (bits_left_ - nbits)) & ((1ULL << nbits) - 1);
+ bits_left_ -= nbits;
+ return val;
+ }
+
+ // Sets *pos to the next stream position, and *bit_pos to the bit position
+ // within the next byte where parsing should continue.
+ // Returns false if the stream ended too early.
+ bool FinishStream(size_t* pos, size_t* bit_pos) {
+ *bit_pos = (8 - (bits_left_ & 7)) & 7;
+ // Give back some bytes that we did not use.
+ int unused_bytes_left = DivCeil(bits_left_, 8);
+ while (unused_bytes_left-- > 0) {
+ --pos_;
+ // If we give back a 0 byte, we need to check if it was a 0xff/0x00 escape
+ // sequence, and if yes, we need to give back one more byte.
+ if (((pos_ == len_ && pos_ == next_marker_pos_) ||
+ (pos_ > 0 && pos_ < next_marker_pos_ && data_[pos_] == 0)) &&
+ (data_[pos_ - 1] == 0xff)) {
+ --pos_;
+ }
+ }
+ if (pos_ >= next_marker_pos_) {
+ *pos = next_marker_pos_;
+ if (pos_ > next_marker_pos_ || *bit_pos > 0) {
+ // Data ran out before the scan was complete.
+ return false;
+ }
+ }
+ *pos = pos_;
+ return true;
+ }
+
+ const uint8_t* data_;
+ const size_t len_;
+ size_t pos_;
+ uint64_t val_;
+ int bits_left_;
+ size_t next_marker_pos_;
+ size_t start_pos_;
+};
+
+// Returns the next Huffman-coded symbol.
+int ReadSymbol(const HuffmanTableEntry* table, BitReaderState* br) {
+ int nbits;
+ br->FillBitWindow();
+ int val = (br->val_ >> (br->bits_left_ - 8)) & 0xff;
+ table += val;
+ nbits = table->bits - 8;
+ if (nbits > 0) {
+ br->bits_left_ -= 8;
+ table += table->value;
+ val = (br->val_ >> (br->bits_left_ - nbits)) & ((1 << nbits) - 1);
+ table += val;
+ }
+ br->bits_left_ -= table->bits;
+ return table->value;
+}
+
+/**
+ * Returns the DC diff or AC value for extra bits value x and prefix code s.
+ *
+ * CCITT Rec. T.81 (1992 E)
+ * Table F.1 – Difference magnitude categories for DC coding
+ * SSSS | DIFF values
+ * ------+--------------------------
+ * 0 | 0
+ * 1 | –1, 1
+ * 2 | –3, –2, 2, 3
+ * 3 | –7..–4, 4..7
+ * ......|..........................
+ * 11 | –2047..–1024, 1024..2047
+ *
+ * CCITT Rec. T.81 (1992 E)
+ * Table F.2 – Categories assigned to coefficient values
+ * [ Same as Table F.1, but does not include SSSS equal to 0 and 11]
+ *
+ *
+ * CCITT Rec. T.81 (1992 E)
+ * F.1.2.1.1 Structure of DC code table
+ * For each category,... additional bits... appended... to uniquely identify
+ * which difference... occurred... When DIFF is positive... SSSS... bits of DIFF
+ * are appended. When DIFF is negative... SSSS... bits of (DIFF – 1) are
+ * appended... Most significant bit... is 0 for negative differences and 1 for
+ * positive differences.
+ *
+ * In other words the upper half of extra bits range represents DIFF as is.
+ * The lower half represents the negative DIFFs with an offset.
+ */
+int HuffExtend(int x, int s) {
+ JXL_DASSERT(s >= 1);
+ int half = 1 << (s - 1);
+ if (x >= half) {
+ JXL_DASSERT(x < (1 << s));
+ return x;
+ } else {
+ return x - (1 << s) + 1;
+ }
+}
+
+// Decodes one 8x8 block of DCT coefficients from the bit stream.
+bool DecodeDCTBlock(const HuffmanTableEntry* dc_huff,
+ const HuffmanTableEntry* ac_huff, int Ss, int Se, int Al,
+ int* eobrun, BitReaderState* br, coeff_t* last_dc_coeff,
+ coeff_t* coeffs) {
+ // Nowadays multiplication is even faster than variable shift.
+ int Am = 1 << Al;
+ bool eobrun_allowed = Ss > 0;
+ if (Ss == 0) {
+ int s = ReadSymbol(dc_huff, br);
+ if (s >= kJpegDCAlphabetSize) {
+ return false;
+ }
+ int diff = 0;
+ if (s > 0) {
+ int bits = br->ReadBits(s);
+ diff = HuffExtend(bits, s);
+ }
+ int coeff = diff + *last_dc_coeff;
+ const int dc_coeff = coeff * Am;
+ coeffs[0] = dc_coeff;
+ // TODO(eustas): is there a more elegant / explicit way to check this?
+ if (dc_coeff != coeffs[0]) {
+ return false;
+ }
+ *last_dc_coeff = coeff;
+ ++Ss;
+ }
+ if (Ss > Se) {
+ return true;
+ }
+ if (*eobrun > 0) {
+ --(*eobrun);
+ return true;
+ }
+ for (int k = Ss; k <= Se; k++) {
+ int sr = ReadSymbol(ac_huff, br);
+ if (sr >= kJpegHuffmanAlphabetSize) {
+ return false;
+ }
+ int r = sr >> 4;
+ int s = sr & 15;
+ if (s > 0) {
+ k += r;
+ if (k > Se) {
+ return false;
+ }
+ if (s + Al >= kJpegDCAlphabetSize) {
+ return false;
+ }
+ int bits = br->ReadBits(s);
+ int coeff = HuffExtend(bits, s);
+ coeffs[kJPEGNaturalOrder[k]] = coeff * Am;
+ } else if (r == 15) {
+ k += 15;
+ } else {
+ *eobrun = 1 << r;
+ if (r > 0) {
+ if (!eobrun_allowed) {
+ return false;
+ }
+ *eobrun += br->ReadBits(r);
+ }
+ break;
+ }
+ }
+ --(*eobrun);
+ return true;
+}
+
+bool RefineDCTBlock(const HuffmanTableEntry* ac_huff, int Ss, int Se, int Al,
+ int* eobrun, BitReaderState* br, coeff_t* coeffs) {
+ // Nowadays multiplication is even faster than variable shift.
+ int Am = 1 << Al;
+ bool eobrun_allowed = Ss > 0;
+ if (Ss == 0) {
+ int s = br->ReadBits(1);
+ coeff_t dc_coeff = coeffs[0];
+ dc_coeff |= s * Am;
+ coeffs[0] = dc_coeff;
+ ++Ss;
+ }
+ if (Ss > Se) {
+ return true;
+ }
+ int p1 = Am;
+ int m1 = -Am;
+ int k = Ss;
+ int r;
+ int s;
+ bool in_zero_run = false;
+ if (*eobrun <= 0) {
+ for (; k <= Se; k++) {
+ s = ReadSymbol(ac_huff, br);
+ if (s >= kJpegHuffmanAlphabetSize) {
+ return false;
+ }
+ r = s >> 4;
+ s &= 15;
+ if (s) {
+ if (s != 1) {
+ return false;
+ }
+ s = br->ReadBits(1) ? p1 : m1;
+ in_zero_run = false;
+ } else {
+ if (r != 15) {
+ *eobrun = 1 << r;
+ if (r > 0) {
+ if (!eobrun_allowed) {
+ return false;
+ }
+ *eobrun += br->ReadBits(r);
+ }
+ break;
+ }
+ in_zero_run = true;
+ }
+ do {
+ coeff_t thiscoef = coeffs[kJPEGNaturalOrder[k]];
+ if (thiscoef != 0) {
+ if (br->ReadBits(1)) {
+ if ((thiscoef & p1) == 0) {
+ if (thiscoef >= 0) {
+ thiscoef += p1;
+ } else {
+ thiscoef += m1;
+ }
+ }
+ }
+ coeffs[kJPEGNaturalOrder[k]] = thiscoef;
+ } else {
+ if (--r < 0) {
+ break;
+ }
+ }
+ k++;
+ } while (k <= Se);
+ if (s) {
+ if (k > Se) {
+ return false;
+ }
+ coeffs[kJPEGNaturalOrder[k]] = s;
+ }
+ }
+ }
+ if (in_zero_run) {
+ return false;
+ }
+ if (*eobrun > 0) {
+ for (; k <= Se; k++) {
+ coeff_t thiscoef = coeffs[kJPEGNaturalOrder[k]];
+ if (thiscoef != 0) {
+ if (br->ReadBits(1)) {
+ if ((thiscoef & p1) == 0) {
+ if (thiscoef >= 0) {
+ thiscoef += p1;
+ } else {
+ thiscoef += m1;
+ }
+ }
+ }
+ coeffs[kJPEGNaturalOrder[k]] = thiscoef;
+ }
+ }
+ }
+ --(*eobrun);
+ return true;
+}
+
+void SaveMCUCodingState(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ memcpy(m->mcu_.last_dc_coeff, m->last_dc_coeff_, sizeof(m->last_dc_coeff_));
+ m->mcu_.eobrun = m->eobrun_;
+ size_t offset = 0;
+ for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+ const jpeg_component_info* comp = cinfo->cur_comp_info[i];
+ int c = comp->component_index;
+ size_t block_x = m->scan_mcu_col_ * comp->MCU_width;
+ for (int iy = 0; iy < comp->MCU_height; ++iy) {
+ size_t block_y = m->scan_mcu_row_ * comp->MCU_height + iy;
+ size_t biy = block_y % comp->v_samp_factor;
+ if (block_y >= comp->height_in_blocks) {
+ continue;
+ }
+ size_t nblocks =
+ std::min<size_t>(comp->MCU_width, comp->width_in_blocks - block_x);
+ size_t ncoeffs = nblocks * DCTSIZE2;
+ coeff_t* coeffs = &m->coeff_rows[c][biy][block_x][0];
+ memcpy(&m->mcu_.coeffs[offset], coeffs, ncoeffs * sizeof(coeffs[0]));
+ offset += ncoeffs;
+ }
+ }
+}
+
+void RestoreMCUCodingState(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ memcpy(m->last_dc_coeff_, m->mcu_.last_dc_coeff, sizeof(m->last_dc_coeff_));
+ m->eobrun_ = m->mcu_.eobrun;
+ size_t offset = 0;
+ for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+ const jpeg_component_info* comp = cinfo->cur_comp_info[i];
+ int c = comp->component_index;
+ size_t block_x = m->scan_mcu_col_ * comp->MCU_width;
+ for (int iy = 0; iy < comp->MCU_height; ++iy) {
+ size_t block_y = m->scan_mcu_row_ * comp->MCU_height + iy;
+ size_t biy = block_y % comp->v_samp_factor;
+ if (block_y >= comp->height_in_blocks) {
+ continue;
+ }
+ size_t nblocks =
+ std::min<size_t>(comp->MCU_width, comp->width_in_blocks - block_x);
+ size_t ncoeffs = nblocks * DCTSIZE2;
+ coeff_t* coeffs = &m->coeff_rows[c][biy][block_x][0];
+ memcpy(coeffs, &m->mcu_.coeffs[offset], ncoeffs * sizeof(coeffs[0]));
+ offset += ncoeffs;
+ }
+ }
+}
+
+bool FinishScan(j_decompress_ptr cinfo, const uint8_t* data, const size_t len,
+ size_t* pos, size_t* bit_pos) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (m->eobrun_ > 0) {
+ JPEGLI_ERROR("End-of-block run too long.");
+ }
+ m->eobrun_ = -1;
+ memset(m->last_dc_coeff_, 0, sizeof(m->last_dc_coeff_));
+ if (*bit_pos == 0) {
+ return true;
+ }
+ if (data[*pos] == 0xff) {
+ // After last br.FinishStream we checked that there is at least 2 bytes
+ // in the buffer.
+ JXL_DASSERT(*pos + 1 < len);
+ // br.FinishStream would have detected an early marker.
+ JXL_DASSERT(data[*pos + 1] == 0);
+ *pos += 2;
+ } else {
+ *pos += 1;
+ }
+ *bit_pos = 0;
+ return true;
+}
+
+} // namespace
+
+void PrepareForiMCURow(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+ const jpeg_component_info* comp = cinfo->cur_comp_info[i];
+ int c = comp->component_index;
+ int by0 = cinfo->input_iMCU_row * comp->v_samp_factor;
+ int block_rows_left = comp->height_in_blocks - by0;
+ int max_block_rows = std::min(comp->v_samp_factor, block_rows_left);
+ int offset = m->streaming_mode_ ? 0 : by0;
+ m->coeff_rows[c] = (*cinfo->mem->access_virt_barray)(
+ reinterpret_cast<j_common_ptr>(cinfo), m->coef_arrays[c], offset,
+ max_block_rows, true);
+ }
+}
+
+int ProcessScan(j_decompress_ptr cinfo, const uint8_t* const data,
+ const size_t len, size_t* pos, size_t* bit_pos) {
+ if (len == 0) {
+ return kNeedMoreInput;
+ }
+ jpeg_decomp_master* m = cinfo->master;
+ for (;;) {
+ // Handle the restart intervals.
+ if (cinfo->restart_interval > 0 && m->restarts_to_go_ == 0) {
+ if (!FinishScan(cinfo, data, len, pos, bit_pos)) {
+ return kNeedMoreInput;
+ }
+ // Go to the next marker, warn if we had to skip any data.
+ size_t num_skipped = 0;
+ while (*pos + 1 < len && (data[*pos] != 0xff || data[*pos + 1] == 0 ||
+ data[*pos + 1] == 0xff)) {
+ ++(*pos);
+ ++num_skipped;
+ }
+ if (num_skipped > 0) {
+ JPEGLI_WARN("Skipped %d bytes before restart marker", (int)num_skipped);
+ }
+ if (*pos + 2 > len) {
+ return kNeedMoreInput;
+ }
+ cinfo->unread_marker = data[*pos + 1];
+ *pos += 2;
+ return kHandleRestart;
+ }
+
+ size_t start_pos = *pos;
+ BitReaderState br(data, len, start_pos);
+ if (*bit_pos > 0) {
+ br.ReadBits(*bit_pos);
+ }
+ if (start_pos + kMaxMCUByteSize > len) {
+ SaveMCUCodingState(cinfo);
+ }
+
+ // Decode one MCU.
+ HWY_ALIGN_MAX coeff_t sink_block[DCTSIZE2];
+ bool scan_ok = true;
+ for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+ const jpeg_component_info* comp = cinfo->cur_comp_info[i];
+ int c = comp->component_index;
+ const HuffmanTableEntry* dc_lut =
+ &m->dc_huff_lut_[comp->dc_tbl_no * kJpegHuffmanLutSize];
+ const HuffmanTableEntry* ac_lut =
+ &m->ac_huff_lut_[comp->ac_tbl_no * kJpegHuffmanLutSize];
+ for (int iy = 0; iy < comp->MCU_height; ++iy) {
+ size_t block_y = m->scan_mcu_row_ * comp->MCU_height + iy;
+ int biy = block_y % comp->v_samp_factor;
+ for (int ix = 0; ix < comp->MCU_width; ++ix) {
+ size_t block_x = m->scan_mcu_col_ * comp->MCU_width + ix;
+ coeff_t* coeffs;
+ if (block_x >= comp->width_in_blocks ||
+ block_y >= comp->height_in_blocks) {
+ // Note that it is OK that sink_block is uninitialized because
+ // it will never be used in any branches, even in the RefineDCTBlock
+ // case, because only DC scans can be interleaved and we don't use
+ // the zero-ness of the DC coeff in the DC refinement code-path.
+ coeffs = sink_block;
+ } else {
+ coeffs = &m->coeff_rows[c][biy][block_x][0];
+ }
+ if (cinfo->Ah == 0) {
+ if (!DecodeDCTBlock(dc_lut, ac_lut, cinfo->Ss, cinfo->Se, cinfo->Al,
+ &m->eobrun_, &br,
+ &m->last_dc_coeff_[comp->component_index],
+ coeffs)) {
+ scan_ok = false;
+ }
+ } else {
+ if (!RefineDCTBlock(ac_lut, cinfo->Ss, cinfo->Se, cinfo->Al,
+ &m->eobrun_, &br, coeffs)) {
+ scan_ok = false;
+ }
+ }
+ }
+ }
+ }
+ size_t new_pos;
+ size_t new_bit_pos;
+ bool stream_ok = br.FinishStream(&new_pos, &new_bit_pos);
+ if (new_pos + 2 > len) {
+ // If reading stopped within the last two bytes, we have to request more
+ // input even if FinishStream() returned true, since the Huffman code
+ // reader could have peaked ahead some bits past the current input chunk
+ // and thus the last prefix code length could have been wrong. We can do
+ // this because a valid JPEG bit stream has two extra bytes at the end.
+ RestoreMCUCodingState(cinfo);
+ return kNeedMoreInput;
+ }
+ *pos = new_pos;
+ *bit_pos = new_bit_pos;
+ if (!stream_ok) {
+ // We hit a marker during parsing.
+ JXL_DASSERT(data[*pos] == 0xff);
+ JXL_DASSERT(data[*pos + 1] != 0);
+ RestoreMCUCodingState(cinfo);
+ JPEGLI_WARN("Incomplete scan detected.");
+ return JPEG_SCAN_COMPLETED;
+ }
+ if (!scan_ok) {
+ JPEGLI_ERROR("Failed to decode DCT block");
+ }
+ if (m->restarts_to_go_ > 0) {
+ --m->restarts_to_go_;
+ }
+ ++m->scan_mcu_col_;
+ if (m->scan_mcu_col_ == cinfo->MCUs_per_row) {
+ ++m->scan_mcu_row_;
+ m->scan_mcu_col_ = 0;
+ if (m->scan_mcu_row_ == cinfo->MCU_rows_in_scan) {
+ if (!FinishScan(cinfo, data, len, pos, bit_pos)) {
+ return kNeedMoreInput;
+ }
+ break;
+ } else if ((m->scan_mcu_row_ % m->mcu_rows_per_iMCU_row_) == 0) {
+ // Current iMCU row is done.
+ break;
+ }
+ }
+ }
+ ++cinfo->input_iMCU_row;
+ if (cinfo->input_iMCU_row < cinfo->total_iMCU_rows) {
+ PrepareForiMCURow(cinfo);
+ return JPEG_ROW_COMPLETED;
+ }
+ return JPEG_SCAN_COMPLETED;
+}
+
+} // namespace jpegli
diff --git a/lib/jpegli/decode_scan.h b/lib/jpegli/decode_scan.h
new file mode 100644
index 0000000..1d7b18f
--- /dev/null
+++ b/lib/jpegli/decode_scan.h
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_DECODE_SCAN_H_
+#define LIB_JPEGLI_DECODE_SCAN_H_
+
+#include <stdint.h>
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+// Reads the available input in the source manager's input buffer until the end
+// of the next iMCU row.
+// The corresponding fields of cinfo are updated with the processed input data.
+// Upon return, the input buffer will be at the start of an MCU, or at the end
+// of the scan.
+// Return value is one of:
+// * JPEG_SUSPENDED, if the input buffer ends before the end of an iMCU row;
+// * JPEG_ROW_COMPLETED, if the next iMCU row (but not the scan) is reached;
+// * JPEG_SCAN_COMPLETED, if the end of the scan is reached.
+int ProcessScan(j_decompress_ptr cinfo, const uint8_t* const data,
+ const size_t len, size_t* pos, size_t* bit_pos);
+
+void PrepareForiMCURow(j_decompress_ptr cinfo);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_DECODE_SCAN_H_
diff --git a/lib/jpegli/destination_manager.cc b/lib/jpegli/destination_manager.cc
new file mode 100644
index 0000000..9bc269f
--- /dev/null
+++ b/lib/jpegli/destination_manager.cc
@@ -0,0 +1,148 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <string.h>
+
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/memory_manager.h"
+
+namespace jpegli {
+
+constexpr size_t kDestBufferSize = 64 << 10;
+
+struct StdioDestinationManager {
+ jpeg_destination_mgr pub;
+ FILE* f;
+ uint8_t* buffer;
+
+ static void init_destination(j_compress_ptr cinfo) {
+ auto dest = reinterpret_cast<StdioDestinationManager*>(cinfo->dest);
+ dest->pub.next_output_byte = dest->buffer;
+ dest->pub.free_in_buffer = kDestBufferSize;
+ }
+
+ static boolean empty_output_buffer(j_compress_ptr cinfo) {
+ auto dest = reinterpret_cast<StdioDestinationManager*>(cinfo->dest);
+ if (fwrite(dest->buffer, 1, kDestBufferSize, dest->f) != kDestBufferSize) {
+ JPEGLI_ERROR("Failed to write to output stream.");
+ }
+ dest->pub.next_output_byte = dest->buffer;
+ dest->pub.free_in_buffer = kDestBufferSize;
+ return TRUE;
+ }
+
+ static void term_destination(j_compress_ptr cinfo) {
+ auto dest = reinterpret_cast<StdioDestinationManager*>(cinfo->dest);
+ size_t bytes_left = kDestBufferSize - dest->pub.free_in_buffer;
+ if (bytes_left &&
+ fwrite(dest->buffer, 1, bytes_left, dest->f) != bytes_left) {
+ JPEGLI_ERROR("Failed to write to output stream.");
+ }
+ fflush(dest->f);
+ if (ferror(dest->f)) {
+ JPEGLI_ERROR("Failed to write to output stream.");
+ }
+ }
+};
+
+struct MemoryDestinationManager {
+ jpeg_destination_mgr pub;
+ // Output buffer supplied by the application
+ uint8_t** output;
+ unsigned long* output_size;
+ // Output buffer allocated by us.
+ uint8_t* temp_buffer;
+ // Current output buffer (either application supplied or allocated by us).
+ uint8_t* current_buffer;
+ size_t buffer_size;
+
+ static void init_destination(j_compress_ptr cinfo) {}
+
+ static boolean empty_output_buffer(j_compress_ptr cinfo) {
+ auto dest = reinterpret_cast<MemoryDestinationManager*>(cinfo->dest);
+ uint8_t* next_buffer =
+ reinterpret_cast<uint8_t*>(malloc(dest->buffer_size * 2));
+ memcpy(next_buffer, dest->current_buffer, dest->buffer_size);
+ if (dest->temp_buffer != nullptr) {
+ free(dest->temp_buffer);
+ }
+ dest->temp_buffer = next_buffer;
+ dest->current_buffer = next_buffer;
+ *dest->output = next_buffer;
+ *dest->output_size = dest->buffer_size;
+ dest->pub.next_output_byte = next_buffer + dest->buffer_size;
+ dest->pub.free_in_buffer = dest->buffer_size;
+ dest->buffer_size *= 2;
+ return TRUE;
+ }
+
+ static void term_destination(j_compress_ptr cinfo) {
+ auto dest = reinterpret_cast<MemoryDestinationManager*>(cinfo->dest);
+ *dest->output_size = dest->buffer_size - dest->pub.free_in_buffer;
+ }
+};
+
+} // namespace jpegli
+
+void jpegli_stdio_dest(j_compress_ptr cinfo, FILE* outfile) {
+ if (outfile == nullptr) {
+ JPEGLI_ERROR("jpegli_stdio_dest: Invalid destination.");
+ }
+ if (cinfo->dest && cinfo->dest->init_destination !=
+ jpegli::StdioDestinationManager::init_destination) {
+ JPEGLI_ERROR("jpegli_stdio_dest: a different dest manager was already set");
+ }
+ if (!cinfo->dest) {
+ cinfo->dest = reinterpret_cast<jpeg_destination_mgr*>(
+ jpegli::Allocate<jpegli::StdioDestinationManager>(cinfo, 1));
+ }
+ auto dest = reinterpret_cast<jpegli::StdioDestinationManager*>(cinfo->dest);
+ dest->f = outfile;
+ dest->buffer = jpegli::Allocate<uint8_t>(cinfo, jpegli::kDestBufferSize);
+ dest->pub.next_output_byte = dest->buffer;
+ dest->pub.free_in_buffer = jpegli::kDestBufferSize;
+ dest->pub.init_destination =
+ jpegli::StdioDestinationManager::init_destination;
+ dest->pub.empty_output_buffer =
+ jpegli::StdioDestinationManager::empty_output_buffer;
+ dest->pub.term_destination =
+ jpegli::StdioDestinationManager::term_destination;
+}
+
+void jpegli_mem_dest(j_compress_ptr cinfo, unsigned char** outbuffer,
+ unsigned long* outsize) {
+ if (outbuffer == nullptr || outsize == nullptr) {
+ JPEGLI_ERROR("jpegli_mem_dest: Invalid destination.");
+ }
+ if (cinfo->dest && cinfo->dest->init_destination !=
+ jpegli::MemoryDestinationManager::init_destination) {
+ JPEGLI_ERROR("jpegli_mem_dest: a different dest manager was already set");
+ }
+ if (!cinfo->dest) {
+ auto dest = jpegli::Allocate<jpegli::MemoryDestinationManager>(cinfo, 1);
+ dest->temp_buffer = nullptr;
+ cinfo->dest = reinterpret_cast<jpeg_destination_mgr*>(dest);
+ }
+ auto dest = reinterpret_cast<jpegli::MemoryDestinationManager*>(cinfo->dest);
+ dest->pub.init_destination =
+ jpegli::MemoryDestinationManager::init_destination;
+ dest->pub.empty_output_buffer =
+ jpegli::MemoryDestinationManager::empty_output_buffer;
+ dest->pub.term_destination =
+ jpegli::MemoryDestinationManager::term_destination;
+ dest->output = outbuffer;
+ dest->output_size = outsize;
+ if (*outbuffer == nullptr || *outsize == 0) {
+ dest->temp_buffer =
+ reinterpret_cast<uint8_t*>(malloc(jpegli::kDestBufferSize));
+ *outbuffer = dest->temp_buffer;
+ *outsize = jpegli::kDestBufferSize;
+ }
+ dest->current_buffer = *outbuffer;
+ dest->buffer_size = *outsize;
+ dest->pub.next_output_byte = dest->current_buffer;
+ dest->pub.free_in_buffer = dest->buffer_size;
+}
diff --git a/lib/jpegli/downsample.cc b/lib/jpegli/downsample.cc
new file mode 100644
index 0000000..df2c156
--- /dev/null
+++ b/lib/jpegli/downsample.cc
@@ -0,0 +1,356 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/downsample.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/downsample.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jpegli/error.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::Vec;
+
+using D = HWY_CAPPED(float, 8);
+constexpr D d;
+
+void DownsampleRow2x1(const float* row_in, size_t len, float* row_out) {
+ const size_t N = Lanes(d);
+ const size_t len_out = len / 2;
+ const auto mul = Set(d, 0.5f);
+ Vec<D> v0, v1;
+ for (size_t x = 0; x < len_out; x += N) {
+ LoadInterleaved2(d, row_in + 2 * x, v0, v1);
+ Store(Mul(mul, Add(v0, v1)), d, row_out + x);
+ }
+}
+
+void DownsampleRow3x1(const float* row_in, size_t len, float* row_out) {
+ const size_t N = Lanes(d);
+ const size_t len_out = len / 3;
+ const auto mul = Set(d, 1.0f / 3);
+ Vec<D> v0, v1, v2;
+ for (size_t x = 0; x < len_out; x += N) {
+ LoadInterleaved3(d, row_in + 3 * x, v0, v1, v2);
+ Store(Mul(mul, Add(Add(v0, v1), v2)), d, row_out + x);
+ }
+}
+
+void DownsampleRow4x1(const float* row_in, size_t len, float* row_out) {
+ const size_t N = Lanes(d);
+ const size_t len_out = len / 4;
+ const auto mul = Set(d, 0.25f);
+ Vec<D> v0, v1, v2, v3;
+ for (size_t x = 0; x < len_out; x += N) {
+ LoadInterleaved4(d, row_in + 4 * x, v0, v1, v2, v3);
+ Store(Mul(mul, Add(Add(v0, v1), Add(v2, v3))), d, row_out + x);
+ }
+}
+
+void Downsample2x1(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ DownsampleRow2x1(rows_in[0], len, row_out);
+}
+
+void Downsample3x1(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ DownsampleRow3x1(rows_in[0], len, row_out);
+}
+
+void Downsample4x1(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ DownsampleRow4x1(rows_in[0], len, row_out);
+}
+
+void Downsample1x2(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ const size_t N = Lanes(d);
+ const auto mul = Set(d, 0.5f);
+ float* row0 = rows_in[0];
+ float* row1 = rows_in[1];
+ for (size_t x = 0; x < len; x += N) {
+ Store(Mul(mul, Add(Load(d, row0 + x), Load(d, row1 + x))), d, row_out + x);
+ }
+}
+
+void Downsample2x2(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ const size_t N = Lanes(d);
+ const size_t len_out = len / 2;
+ const auto mul = Set(d, 0.25f);
+ float* row0 = rows_in[0];
+ float* row1 = rows_in[1];
+ Vec<D> v0, v1, v2, v3;
+ for (size_t x = 0; x < len_out; x += N) {
+ LoadInterleaved2(d, row0 + 2 * x, v0, v1);
+ LoadInterleaved2(d, row1 + 2 * x, v2, v3);
+ Store(Mul(mul, Add(Add(v0, v1), Add(v2, v3))), d, row_out + x);
+ }
+}
+
+void Downsample3x2(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ DownsampleRow3x1(rows_in[0], len, rows_in[0]);
+ DownsampleRow3x1(rows_in[1], len, rows_in[1]);
+ Downsample1x2(rows_in, len / 3, row_out);
+}
+
+void Downsample4x2(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ DownsampleRow4x1(rows_in[0], len, rows_in[0]);
+ DownsampleRow4x1(rows_in[1], len, rows_in[1]);
+ Downsample1x2(rows_in, len / 4, row_out);
+}
+
+void Downsample1x3(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ const size_t N = Lanes(d);
+ const auto mul = Set(d, 1.0f / 3);
+ float* row0 = rows_in[0];
+ float* row1 = rows_in[1];
+ float* row2 = rows_in[2];
+ for (size_t x = 0; x < len; x += N) {
+ const auto in0 = Load(d, row0 + x);
+ const auto in1 = Load(d, row1 + x);
+ const auto in2 = Load(d, row2 + x);
+ Store(Mul(mul, Add(Add(in0, in1), in2)), d, row_out + x);
+ }
+}
+
+void Downsample2x3(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ DownsampleRow2x1(rows_in[0], len, rows_in[0]);
+ DownsampleRow2x1(rows_in[1], len, rows_in[1]);
+ DownsampleRow2x1(rows_in[2], len, rows_in[2]);
+ Downsample1x3(rows_in, len / 2, row_out);
+}
+
+void Downsample3x3(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ DownsampleRow3x1(rows_in[0], len, rows_in[0]);
+ DownsampleRow3x1(rows_in[1], len, rows_in[1]);
+ DownsampleRow3x1(rows_in[2], len, rows_in[2]);
+ Downsample1x3(rows_in, len / 3, row_out);
+}
+
+void Downsample4x3(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ DownsampleRow4x1(rows_in[0], len, rows_in[0]);
+ DownsampleRow4x1(rows_in[1], len, rows_in[1]);
+ DownsampleRow4x1(rows_in[2], len, rows_in[2]);
+ Downsample1x3(rows_in, len / 4, row_out);
+}
+
+void Downsample1x4(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ const size_t N = Lanes(d);
+ const auto mul = Set(d, 0.25f);
+ float* row0 = rows_in[0];
+ float* row1 = rows_in[1];
+ float* row2 = rows_in[2];
+ float* row3 = rows_in[3];
+ for (size_t x = 0; x < len; x += N) {
+ const auto in0 = Load(d, row0 + x);
+ const auto in1 = Load(d, row1 + x);
+ const auto in2 = Load(d, row2 + x);
+ const auto in3 = Load(d, row3 + x);
+ Store(Mul(mul, Add(Add(in0, in1), Add(in2, in3))), d, row_out + x);
+ }
+}
+
+void Downsample2x4(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ DownsampleRow2x1(rows_in[0], len, rows_in[0]);
+ DownsampleRow2x1(rows_in[1], len, rows_in[1]);
+ DownsampleRow2x1(rows_in[2], len, rows_in[2]);
+ DownsampleRow2x1(rows_in[3], len, rows_in[3]);
+ Downsample1x4(rows_in, len / 2, row_out);
+}
+
+void Downsample3x4(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ DownsampleRow3x1(rows_in[0], len, rows_in[0]);
+ DownsampleRow3x1(rows_in[1], len, rows_in[1]);
+ DownsampleRow3x1(rows_in[2], len, rows_in[2]);
+ DownsampleRow3x1(rows_in[3], len, rows_in[3]);
+ Downsample1x4(rows_in, len / 3, row_out);
+}
+
+void Downsample4x4(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ DownsampleRow4x1(rows_in[0], len, rows_in[0]);
+ DownsampleRow4x1(rows_in[1], len, rows_in[1]);
+ DownsampleRow4x1(rows_in[2], len, rows_in[2]);
+ DownsampleRow4x1(rows_in[3], len, rows_in[3]);
+ Downsample1x4(rows_in, len / 4, row_out);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+
+HWY_EXPORT(Downsample1x2);
+HWY_EXPORT(Downsample1x3);
+HWY_EXPORT(Downsample1x4);
+HWY_EXPORT(Downsample2x1);
+HWY_EXPORT(Downsample2x2);
+HWY_EXPORT(Downsample2x3);
+HWY_EXPORT(Downsample2x4);
+HWY_EXPORT(Downsample3x1);
+HWY_EXPORT(Downsample3x2);
+HWY_EXPORT(Downsample3x3);
+HWY_EXPORT(Downsample3x4);
+HWY_EXPORT(Downsample4x1);
+HWY_EXPORT(Downsample4x2);
+HWY_EXPORT(Downsample4x3);
+HWY_EXPORT(Downsample4x4);
+
+void NullDownsample(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {}
+
+void ChooseDownsampleMethods(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ for (int c = 0; c < cinfo->num_components; c++) {
+ m->downsample_method[c] = nullptr;
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ const int h_factor = cinfo->max_h_samp_factor / comp->h_samp_factor;
+ const int v_factor = cinfo->max_v_samp_factor / comp->v_samp_factor;
+ if (v_factor == 1) {
+ if (h_factor == 1) {
+ m->downsample_method[c] = NullDownsample;
+ } else if (h_factor == 2) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample2x1);
+ } else if (h_factor == 3) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample3x1);
+ } else if (h_factor == 4) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample4x1);
+ }
+ } else if (v_factor == 2) {
+ if (h_factor == 1) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample1x2);
+ } else if (h_factor == 2) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample2x2);
+ } else if (h_factor == 3) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample3x2);
+ } else if (h_factor == 4) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample4x2);
+ }
+ } else if (v_factor == 3) {
+ if (h_factor == 1) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample1x2);
+ } else if (h_factor == 2) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample2x2);
+ } else if (h_factor == 3) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample3x2);
+ } else if (h_factor == 4) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample4x2);
+ }
+ } else if (v_factor == 4) {
+ if (h_factor == 1) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample1x4);
+ } else if (h_factor == 2) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample2x4);
+ } else if (h_factor == 3) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample3x4);
+ } else if (h_factor == 4) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample4x4);
+ }
+ }
+ if (m->downsample_method[c] == nullptr) {
+ JPEGLI_ERROR("Unsupported downsampling ratio %dx%d", h_factor, v_factor);
+ }
+ }
+}
+
+void DownsampleInputBuffer(j_compress_ptr cinfo) {
+ if (cinfo->max_h_samp_factor == 1 && cinfo->max_v_samp_factor == 1) {
+ return;
+ }
+ jpeg_comp_master* m = cinfo->master;
+ const size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor;
+ const size_t y0 = m->next_iMCU_row * iMCU_height;
+ const size_t y1 = y0 + iMCU_height;
+ const size_t xsize_padded = m->xsize_blocks * DCTSIZE;
+ for (int c = 0; c < cinfo->num_components; c++) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ const int h_factor = cinfo->max_h_samp_factor / comp->h_samp_factor;
+ const int v_factor = cinfo->max_v_samp_factor / comp->v_samp_factor;
+ if (h_factor == 1 && v_factor == 1) {
+ continue;
+ }
+ auto& input = *m->smooth_input[c];
+ auto& output = *m->raw_data[c];
+ const size_t yout0 = y0 / v_factor;
+ float* rows_in[MAX_SAMP_FACTOR];
+ for (size_t yin = y0, yout = yout0; yin < y1; yin += v_factor, ++yout) {
+ for (int iy = 0; iy < v_factor; ++iy) {
+ rows_in[iy] = input.Row(yin + iy);
+ }
+ float* row_out = output.Row(yout);
+ (*m->downsample_method[c])(rows_in, xsize_padded, row_out);
+ }
+ }
+}
+
+void ApplyInputSmoothing(j_compress_ptr cinfo) {
+ if (!cinfo->smoothing_factor) {
+ return;
+ }
+ jpeg_comp_master* m = cinfo->master;
+ const float kW1 = cinfo->smoothing_factor / 1024.0;
+ const float kW0 = 1.0f - 8.0f * kW1;
+ const size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor;
+ const ssize_t y0 = m->next_iMCU_row * iMCU_height;
+ const ssize_t y1 = y0 + iMCU_height;
+ const ssize_t xsize_padded = m->xsize_blocks * DCTSIZE;
+ for (int c = 0; c < cinfo->num_components; c++) {
+ auto& input = m->input_buffer[c];
+ auto& output = *m->smooth_input[c];
+ if (m->next_iMCU_row == 0) {
+ input.CopyRow(-1, 0, 1);
+ }
+ if (m->next_iMCU_row + 1 == cinfo->total_iMCU_rows) {
+ size_t last_row = m->ysize_blocks * DCTSIZE - 1;
+ input.CopyRow(last_row + 1, last_row, 1);
+ }
+ // TODO(szabadka) SIMDify this.
+ for (ssize_t y = y0; y < y1; ++y) {
+ const float* row_t = input.Row(y - 1);
+ const float* row_m = input.Row(y);
+ const float* row_b = input.Row(y + 1);
+ float* row_out = output.Row(y);
+ for (ssize_t x = 0; x < xsize_padded; ++x) {
+ float val_tl = row_t[x - 1];
+ float val_tm = row_t[x];
+ float val_tr = row_t[x + 1];
+ float val_ml = row_m[x - 1];
+ float val_mm = row_m[x];
+ float val_mr = row_m[x + 1];
+ float val_bl = row_b[x - 1];
+ float val_bm = row_b[x];
+ float val_br = row_b[x + 1];
+ float val1 = (val_tl + val_tm + val_tr + val_ml + val_mr + val_bl +
+ val_bm + val_br);
+ row_out[x] = val_mm * kW0 + val1 * kW1;
+ }
+ }
+ }
+}
+
+} // namespace jpegli
+#endif // HWY_ONCE
diff --git a/lib/jpegli/downsample.h b/lib/jpegli/downsample.h
new file mode 100644
index 0000000..3ccf069
--- /dev/null
+++ b/lib/jpegli/downsample.h
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_DOWNSAMPLE_H_
+#define LIB_JPEGLI_DOWNSAMPLE_H_
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+void ChooseDownsampleMethods(j_compress_ptr cinfo);
+
+void DownsampleInputBuffer(j_compress_ptr cinfo);
+
+void ApplyInputSmoothing(j_compress_ptr cinfo);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_DOWNSAMPLE_H_
diff --git a/lib/jpegli/encode.cc b/lib/jpegli/encode.cc
new file mode 100644
index 0000000..8a106e2
--- /dev/null
+++ b/lib/jpegli/encode.cc
@@ -0,0 +1,1253 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/encode.h"
+
+#include <cmath>
+#include <initializer_list>
+#include <vector>
+
+#include "lib/jpegli/adaptive_quantization.h"
+#include "lib/jpegli/bit_writer.h"
+#include "lib/jpegli/bitstream.h"
+#include "lib/jpegli/color_transform.h"
+#include "lib/jpegli/downsample.h"
+#include "lib/jpegli/encode_finish.h"
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jpegli/encode_streaming.h"
+#include "lib/jpegli/entropy_coding.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/huffman.h"
+#include "lib/jpegli/input.h"
+#include "lib/jpegli/memory_manager.h"
+#include "lib/jpegli/quant.h"
+
+namespace jpegli {
+
+constexpr size_t kMaxBytesInMarker = 65533;
+
+void CheckState(j_compress_ptr cinfo, int state) {
+ if (cinfo->global_state != state) {
+ JPEGLI_ERROR("Unexpected global state %d [expected %d]",
+ cinfo->global_state, state);
+ }
+}
+
+void CheckState(j_compress_ptr cinfo, int state1, int state2) {
+ if (cinfo->global_state != state1 && cinfo->global_state != state2) {
+ JPEGLI_ERROR("Unexpected global state %d [expected %d or %d]",
+ cinfo->global_state, state1, state2);
+ }
+}
+
+//
+// Parameter setup
+//
+
+// Initialize cinfo fields that are not dependent on input image. This is shared
+// between jpegli_CreateCompress() and jpegli_set_defaults()
+void InitializeCompressParams(j_compress_ptr cinfo) {
+ cinfo->data_precision = 8;
+ cinfo->num_scans = 0;
+ cinfo->scan_info = nullptr;
+ cinfo->raw_data_in = FALSE;
+ cinfo->arith_code = FALSE;
+ cinfo->optimize_coding = FALSE;
+ cinfo->CCIR601_sampling = FALSE;
+ cinfo->smoothing_factor = 0;
+ cinfo->dct_method = JDCT_FLOAT;
+ cinfo->restart_interval = 0;
+ cinfo->restart_in_rows = 0;
+ cinfo->write_JFIF_header = FALSE;
+ cinfo->JFIF_major_version = 1;
+ cinfo->JFIF_minor_version = 1;
+ cinfo->density_unit = 0;
+ cinfo->X_density = 1;
+ cinfo->Y_density = 1;
+#if JPEG_LIB_VERSION >= 70
+ cinfo->scale_num = 1;
+ cinfo->scale_denom = 1;
+ cinfo->do_fancy_downsampling = FALSE;
+ cinfo->min_DCT_h_scaled_size = DCTSIZE;
+ cinfo->min_DCT_v_scaled_size = DCTSIZE;
+#endif
+ cinfo->master->psnr_target = 0.0f;
+ cinfo->master->psnr_tolerance = 0.01f;
+ cinfo->master->min_distance = 0.1f;
+ cinfo->master->max_distance = 25.0f;
+}
+
+float LinearQualityToDistance(int scale_factor) {
+ scale_factor = std::min(5000, std::max(0, scale_factor));
+ int quality =
+ scale_factor < 100 ? 100 - scale_factor / 2 : 5000 / scale_factor;
+ return jpegli_quality_to_distance(quality);
+}
+
+template <typename T>
+void SetSentTableFlag(T** table_ptrs, size_t num, boolean val) {
+ for (size_t i = 0; i < num; ++i) {
+ if (table_ptrs[i]) table_ptrs[i]->sent_table = val;
+ }
+}
+
+//
+// Compressor initialization
+//
+
+struct ProgressiveScan {
+ int Ss, Se, Ah, Al;
+ bool interleaved;
+};
+
+void SetDefaultScanScript(j_compress_ptr cinfo) {
+ int level = cinfo->master->progressive_level;
+ std::vector<ProgressiveScan> progressive_mode;
+ bool interleave_dc =
+ (cinfo->max_h_samp_factor == 1 && cinfo->max_v_samp_factor == 1);
+ if (level == 0) {
+ progressive_mode.push_back({0, 63, 0, 0, true});
+ } else if (level == 1) {
+ progressive_mode.push_back({0, 0, 0, 0, interleave_dc});
+ progressive_mode.push_back({1, 63, 0, 1, false});
+ progressive_mode.push_back({1, 63, 1, 0, false});
+ } else {
+ progressive_mode.push_back({0, 0, 0, 0, interleave_dc});
+ progressive_mode.push_back({1, 2, 0, 0, false});
+ progressive_mode.push_back({3, 63, 0, 2, false});
+ progressive_mode.push_back({3, 63, 2, 1, false});
+ progressive_mode.push_back({3, 63, 1, 0, false});
+ }
+
+ cinfo->script_space_size = 0;
+ for (const auto& scan : progressive_mode) {
+ int comps = scan.interleaved ? MAX_COMPS_IN_SCAN : 1;
+ cinfo->script_space_size += DivCeil(cinfo->num_components, comps);
+ }
+ cinfo->script_space =
+ Allocate<jpeg_scan_info>(cinfo, cinfo->script_space_size);
+
+ jpeg_scan_info* next_scan = cinfo->script_space;
+ for (const auto& scan : progressive_mode) {
+ int comps = scan.interleaved ? MAX_COMPS_IN_SCAN : 1;
+ for (int c = 0; c < cinfo->num_components; c += comps) {
+ next_scan->Ss = scan.Ss;
+ next_scan->Se = scan.Se;
+ next_scan->Ah = scan.Ah;
+ next_scan->Al = scan.Al;
+ next_scan->comps_in_scan = std::min(comps, cinfo->num_components - c);
+ for (int j = 0; j < next_scan->comps_in_scan; ++j) {
+ next_scan->component_index[j] = c + j;
+ }
+ ++next_scan;
+ }
+ }
+ JXL_ASSERT(next_scan - cinfo->script_space == cinfo->script_space_size);
+ cinfo->scan_info = cinfo->script_space;
+ cinfo->num_scans = cinfo->script_space_size;
+}
+
+void ValidateScanScript(j_compress_ptr cinfo) {
+ // Mask of coefficient bits defined by the scan script, for each component
+ // and coefficient index.
+ uint16_t comp_mask[kMaxComponents][DCTSIZE2] = {};
+ static constexpr int kMaxRefinementBit = 10;
+
+ for (int i = 0; i < cinfo->num_scans; ++i) {
+ const jpeg_scan_info& si = cinfo->scan_info[i];
+ if (si.comps_in_scan < 1 || si.comps_in_scan > MAX_COMPS_IN_SCAN) {
+ JPEGLI_ERROR("Invalid number of components in scan %d", si.comps_in_scan);
+ }
+ int last_ci = -1;
+ for (int j = 0; j < si.comps_in_scan; ++j) {
+ int ci = si.component_index[j];
+ if (ci < 0 || ci >= cinfo->num_components) {
+ JPEGLI_ERROR("Invalid component index %d in scan", ci);
+ } else if (ci == last_ci) {
+ JPEGLI_ERROR("Duplicate component index %d in scan", ci);
+ } else if (ci < last_ci) {
+ JPEGLI_ERROR("Out of order component index %d in scan", ci);
+ }
+ last_ci = ci;
+ }
+ if (si.Ss < 0 || si.Se < si.Ss || si.Se >= DCTSIZE2) {
+ JPEGLI_ERROR("Invalid spectral range %d .. %d in scan", si.Ss, si.Se);
+ }
+ if (si.Ah < 0 || si.Al < 0 || si.Al > kMaxRefinementBit) {
+ JPEGLI_ERROR("Invalid refinement bits %d/%d", si.Ah, si.Al);
+ }
+ if (!cinfo->progressive_mode) {
+ if (si.Ss != 0 || si.Se != DCTSIZE2 - 1 || si.Ah != 0 || si.Al != 0) {
+ JPEGLI_ERROR("Invalid scan for sequential mode");
+ }
+ } else {
+ if (si.Ss == 0 && si.Se != 0) {
+ JPEGLI_ERROR("DC and AC together in progressive scan");
+ }
+ }
+ if (si.Ss != 0 && si.comps_in_scan != 1) {
+ JPEGLI_ERROR("Interleaved AC only scan.");
+ }
+ for (int j = 0; j < si.comps_in_scan; ++j) {
+ int ci = si.component_index[j];
+ if (si.Ss != 0 && comp_mask[ci][0] == 0) {
+ JPEGLI_ERROR("AC before DC in component %d of scan", ci);
+ }
+ for (int k = si.Ss; k <= si.Se; ++k) {
+ if (comp_mask[ci][k] == 0) {
+ if (si.Ah != 0) {
+ JPEGLI_ERROR("Invalid first scan refinement bit");
+ }
+ comp_mask[ci][k] = ((0xffff << si.Al) & 0xffff);
+ } else {
+ if (comp_mask[ci][k] != ((0xffff << si.Ah) & 0xffff) ||
+ si.Al != si.Ah - 1) {
+ JPEGLI_ERROR("Invalid refinement bit progression.");
+ }
+ comp_mask[ci][k] |= 1 << si.Al;
+ }
+ }
+ }
+ if (si.comps_in_scan > 1) {
+ size_t mcu_size = 0;
+ for (int j = 0; j < si.comps_in_scan; ++j) {
+ int ci = si.component_index[j];
+ jpeg_component_info* comp = &cinfo->comp_info[ci];
+ mcu_size += comp->h_samp_factor * comp->v_samp_factor;
+ }
+ if (mcu_size > C_MAX_BLOCKS_IN_MCU) {
+ JPEGLI_ERROR("MCU size too big");
+ }
+ }
+ }
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ if (comp_mask[c][k] != 0xffff) {
+ JPEGLI_ERROR("Incomplete scan of component %d and frequency %d", c, k);
+ }
+ }
+ }
+}
+
+void ProcessCompressionParams(j_compress_ptr cinfo) {
+ if (cinfo->dest == nullptr) {
+ JPEGLI_ERROR("Missing destination.");
+ }
+ if (cinfo->image_width < 1 || cinfo->image_height < 1 ||
+ cinfo->input_components < 1) {
+ JPEGLI_ERROR("Empty input image.");
+ }
+ if (cinfo->image_width > static_cast<int>(JPEG_MAX_DIMENSION) ||
+ cinfo->image_height > static_cast<int>(JPEG_MAX_DIMENSION) ||
+ cinfo->input_components > static_cast<int>(kMaxComponents)) {
+ JPEGLI_ERROR("Input image too big.");
+ }
+ if (cinfo->num_components < 1 ||
+ cinfo->num_components > static_cast<int>(kMaxComponents)) {
+ JPEGLI_ERROR("Invalid number of components.");
+ }
+ if (cinfo->data_precision != kJpegPrecision) {
+ JPEGLI_ERROR("Invalid data precision");
+ }
+ if (cinfo->arith_code) {
+ JPEGLI_ERROR("Arithmetic coding is not implemented.");
+ }
+ if (cinfo->CCIR601_sampling) {
+ JPEGLI_ERROR("CCIR601 sampling is not implemented.");
+ }
+ if (cinfo->restart_interval > 65535u) {
+ JPEGLI_ERROR("Restart interval too big");
+ }
+ if (cinfo->smoothing_factor < 0 || cinfo->smoothing_factor > 100) {
+ JPEGLI_ERROR("Invalid smoothing factor %d", cinfo->smoothing_factor);
+ }
+ jpeg_comp_master* m = cinfo->master;
+ cinfo->max_h_samp_factor = cinfo->max_v_samp_factor = 1;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ if (comp->component_index != c) {
+ JPEGLI_ERROR("Invalid component index");
+ }
+ for (int j = 0; j < c; ++j) {
+ if (cinfo->comp_info[j].component_id == comp->component_id) {
+ JPEGLI_ERROR("Duplicate component id %d", comp->component_id);
+ }
+ }
+ if (comp->h_samp_factor <= 0 || comp->v_samp_factor <= 0 ||
+ comp->h_samp_factor > MAX_SAMP_FACTOR ||
+ comp->v_samp_factor > MAX_SAMP_FACTOR) {
+ JPEGLI_ERROR("Invalid sampling factor %d x %d", comp->h_samp_factor,
+ comp->v_samp_factor);
+ }
+ cinfo->max_h_samp_factor =
+ std::max(comp->h_samp_factor, cinfo->max_h_samp_factor);
+ cinfo->max_v_samp_factor =
+ std::max(comp->v_samp_factor, cinfo->max_v_samp_factor);
+ }
+ if (cinfo->num_components == 1 &&
+ (cinfo->max_h_samp_factor != 1 || cinfo->max_v_samp_factor != 1)) {
+ JPEGLI_ERROR("Sampling is not supported for simgle component image.");
+ }
+ size_t iMCU_width = DCTSIZE * cinfo->max_h_samp_factor;
+ size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor;
+ size_t total_iMCU_cols = DivCeil(cinfo->image_width, iMCU_width);
+ cinfo->total_iMCU_rows = DivCeil(cinfo->image_height, iMCU_height);
+ m->xsize_blocks = total_iMCU_cols * cinfo->max_h_samp_factor;
+ m->ysize_blocks = cinfo->total_iMCU_rows * cinfo->max_v_samp_factor;
+
+ size_t blocks_per_iMCU = 0;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ if (cinfo->max_h_samp_factor % comp->h_samp_factor != 0 ||
+ cinfo->max_v_samp_factor % comp->v_samp_factor != 0) {
+ JPEGLI_ERROR("Non-integral sampling ratios are not supported.");
+ }
+ m->h_factor[c] = cinfo->max_h_samp_factor / comp->h_samp_factor;
+ m->v_factor[c] = cinfo->max_v_samp_factor / comp->v_samp_factor;
+ comp->downsampled_width = DivCeil(cinfo->image_width, m->h_factor[c]);
+ comp->downsampled_height = DivCeil(cinfo->image_height, m->v_factor[c]);
+ comp->width_in_blocks = DivCeil(comp->downsampled_width, DCTSIZE);
+ comp->height_in_blocks = DivCeil(comp->downsampled_height, DCTSIZE);
+ blocks_per_iMCU += comp->h_samp_factor * comp->v_samp_factor;
+ }
+ m->blocks_per_iMCU_row = total_iMCU_cols * blocks_per_iMCU;
+ // Disable adaptive quantization for subsampled luma channel.
+ int y_channel = cinfo->jpeg_color_space == JCS_RGB ? 1 : 0;
+ jpeg_component_info* y_comp = &cinfo->comp_info[y_channel];
+ if (y_comp->h_samp_factor != cinfo->max_h_samp_factor ||
+ y_comp->v_samp_factor != cinfo->max_v_samp_factor) {
+ m->use_adaptive_quantization = false;
+ }
+ if (cinfo->scan_info == nullptr) {
+ SetDefaultScanScript(cinfo);
+ }
+ cinfo->progressive_mode =
+ cinfo->scan_info->Ss != 0 || cinfo->scan_info->Se != DCTSIZE2 - 1;
+ ValidateScanScript(cinfo);
+ m->scan_token_info =
+ Allocate<ScanTokenInfo>(cinfo, cinfo->num_scans, JPOOL_IMAGE);
+ memset(m->scan_token_info, 0, cinfo->num_scans * sizeof(ScanTokenInfo));
+ m->ac_ctx_offset = Allocate<uint8_t>(cinfo, cinfo->num_scans, JPOOL_IMAGE);
+ size_t num_ac_contexts = 0;
+ for (int i = 0; i < cinfo->num_scans; ++i) {
+ const jpeg_scan_info* scan_info = &cinfo->scan_info[i];
+ m->ac_ctx_offset[i] = 4 + num_ac_contexts;
+ if (scan_info->Se > 0) {
+ num_ac_contexts += scan_info->comps_in_scan;
+ }
+ if (num_ac_contexts > 252) {
+ JPEGLI_ERROR("Too many AC scans in image");
+ }
+ ScanTokenInfo* sti = &m->scan_token_info[i];
+ if (scan_info->comps_in_scan == 1) {
+ int comp_idx = scan_info->component_index[0];
+ jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+ sti->MCUs_per_row = comp->width_in_blocks;
+ sti->MCU_rows_in_scan = comp->height_in_blocks;
+ sti->blocks_in_MCU = 1;
+ } else {
+ sti->MCUs_per_row =
+ DivCeil(cinfo->image_width, DCTSIZE * cinfo->max_h_samp_factor);
+ sti->MCU_rows_in_scan =
+ DivCeil(cinfo->image_height, DCTSIZE * cinfo->max_v_samp_factor);
+ sti->blocks_in_MCU = 0;
+ for (int j = 0; j < scan_info->comps_in_scan; ++j) {
+ int comp_idx = scan_info->component_index[j];
+ jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+ sti->blocks_in_MCU += comp->h_samp_factor * comp->v_samp_factor;
+ }
+ }
+ size_t num_MCUs = sti->MCU_rows_in_scan * sti->MCUs_per_row;
+ sti->num_blocks = num_MCUs * sti->blocks_in_MCU;
+ if (cinfo->restart_in_rows <= 0) {
+ sti->restart_interval = cinfo->restart_interval;
+ } else {
+ sti->restart_interval =
+ std::min<size_t>(sti->MCUs_per_row * cinfo->restart_in_rows, 65535u);
+ }
+ sti->num_restarts = sti->restart_interval > 0
+ ? DivCeil(num_MCUs, sti->restart_interval)
+ : 1;
+ sti->restarts = Allocate<size_t>(cinfo, sti->num_restarts, JPOOL_IMAGE);
+ }
+ m->num_contexts = 4 + num_ac_contexts;
+}
+
+bool IsStreamingSupported(j_compress_ptr cinfo) {
+ if (cinfo->global_state == kEncWriteCoeffs) {
+ return false;
+ }
+ // TODO(szabadka) Remove this restriction.
+ if (cinfo->restart_interval > 0 || cinfo->restart_in_rows > 0) {
+ return false;
+ }
+ if (cinfo->num_scans > 1) {
+ return false;
+ }
+ if (cinfo->master->psnr_target > 0) {
+ return false;
+ }
+ return true;
+}
+
+void AllocateBuffers(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ memset(m->last_dc_coeff, 0, sizeof(m->last_dc_coeff));
+ if (!IsStreamingSupported(cinfo) || cinfo->optimize_coding) {
+ int ysize_blocks = DivCeil(cinfo->image_height, DCTSIZE);
+ int num_arrays = cinfo->num_scans * ysize_blocks;
+ m->token_arrays = Allocate<TokenArray>(cinfo, num_arrays, JPOOL_IMAGE);
+ m->cur_token_array = 0;
+ memset(m->token_arrays, 0, num_arrays * sizeof(TokenArray));
+ m->num_tokens = 0;
+ m->total_num_tokens = 0;
+ }
+ if (cinfo->global_state == kEncWriteCoeffs) {
+ return;
+ }
+ size_t iMCU_width = DCTSIZE * cinfo->max_h_samp_factor;
+ size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor;
+ size_t total_iMCU_cols = DivCeil(cinfo->image_width, iMCU_width);
+ size_t xsize_full = total_iMCU_cols * iMCU_width;
+ size_t ysize_full = 3 * iMCU_height;
+ if (!cinfo->raw_data_in) {
+ int num_all_components =
+ std::max(cinfo->input_components, cinfo->num_components);
+ for (int c = 0; c < num_all_components; ++c) {
+ m->input_buffer[c].Allocate(cinfo, ysize_full, xsize_full);
+ }
+ }
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ size_t xsize = total_iMCU_cols * comp->h_samp_factor * DCTSIZE;
+ size_t ysize = 3 * comp->v_samp_factor * DCTSIZE;
+ if (cinfo->raw_data_in) {
+ m->input_buffer[c].Allocate(cinfo, ysize, xsize);
+ }
+ m->smooth_input[c] = &m->input_buffer[c];
+ if (!cinfo->raw_data_in && cinfo->smoothing_factor) {
+ m->smooth_input[c] = Allocate<RowBuffer<float>>(cinfo, 1, JPOOL_IMAGE);
+ m->smooth_input[c]->Allocate(cinfo, ysize_full, xsize_full);
+ }
+ m->raw_data[c] = m->smooth_input[c];
+ if (!cinfo->raw_data_in && (m->h_factor[c] > 1 || m->v_factor[c] > 1)) {
+ m->raw_data[c] = Allocate<RowBuffer<float>>(cinfo, 1, JPOOL_IMAGE);
+ m->raw_data[c]->Allocate(cinfo, ysize, xsize);
+ }
+ m->quant_mul[c] = Allocate<float>(cinfo, DCTSIZE2, JPOOL_IMAGE_ALIGNED);
+ }
+ m->dct_buffer = Allocate<float>(cinfo, 2 * DCTSIZE2, JPOOL_IMAGE_ALIGNED);
+ m->block_tmp = Allocate<int32_t>(cinfo, DCTSIZE2 * 4, JPOOL_IMAGE_ALIGNED);
+ if (!IsStreamingSupported(cinfo)) {
+ m->coeff_buffers =
+ Allocate<jvirt_barray_ptr>(cinfo, cinfo->num_components, JPOOL_IMAGE);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ const size_t xsize_blocks = comp->width_in_blocks;
+ const size_t ysize_blocks = comp->height_in_blocks;
+ m->coeff_buffers[c] = (*cinfo->mem->request_virt_barray)(
+ reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE,
+ /*pre_zero=*/false, xsize_blocks, ysize_blocks, comp->v_samp_factor);
+ }
+ }
+ if (m->use_adaptive_quantization) {
+ int y_channel = cinfo->jpeg_color_space == JCS_RGB ? 1 : 0;
+ jpeg_component_info* y_comp = &cinfo->comp_info[y_channel];
+ const size_t xsize_blocks = y_comp->width_in_blocks;
+ const size_t vecsize = VectorSize();
+ const size_t xsize_padded = DivCeil(2 * xsize_blocks, vecsize) * vecsize;
+ m->diff_buffer =
+ Allocate<float>(cinfo, xsize_blocks * DCTSIZE + 8, JPOOL_IMAGE_ALIGNED);
+ m->fuzzy_erosion_tmp.Allocate(cinfo, 2, xsize_padded);
+ m->pre_erosion.Allocate(cinfo, 6 * cinfo->max_v_samp_factor, xsize_padded);
+ size_t qf_height = cinfo->max_v_samp_factor;
+ if (m->psnr_target > 0) {
+ qf_height *= cinfo->total_iMCU_rows;
+ }
+ m->quant_field.Allocate(cinfo, qf_height, xsize_blocks);
+ } else {
+ m->quant_field.Allocate(cinfo, 1, m->xsize_blocks);
+ m->quant_field.FillRow(0, 0, m->xsize_blocks);
+ }
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ m->zero_bias_offset[c] =
+ Allocate<float>(cinfo, DCTSIZE2, JPOOL_IMAGE_ALIGNED);
+ m->zero_bias_mul[c] = Allocate<float>(cinfo, DCTSIZE2, JPOOL_IMAGE_ALIGNED);
+ memset(m->zero_bias_mul[c], 0, DCTSIZE2 * sizeof(float));
+ memset(m->zero_bias_offset[c], 0, DCTSIZE2 * sizeof(float));
+ }
+}
+
+void InitProgressMonitor(j_compress_ptr cinfo) {
+ if (cinfo->progress == nullptr) {
+ return;
+ }
+ if (IsStreamingSupported(cinfo)) {
+ // We have only one input pass.
+ cinfo->progress->total_passes = 1;
+ } else {
+ // We have one input pass, a histogram pass for each scan, and an encode
+ // pass for each scan.
+ cinfo->progress->total_passes = 1 + 2 * cinfo->num_scans;
+ }
+}
+
+// Common setup code between streaming and transcoding code paths. Called in
+// both jpegli_start_compress() and jpegli_write_coefficients().
+void InitCompress(j_compress_ptr cinfo, boolean write_all_tables) {
+ jpeg_comp_master* m = cinfo->master;
+ (*cinfo->err->reset_error_mgr)(reinterpret_cast<j_common_ptr>(cinfo));
+ ProcessCompressionParams(cinfo);
+ InitProgressMonitor(cinfo);
+ AllocateBuffers(cinfo);
+ if (cinfo->global_state != kEncWriteCoeffs) {
+ ChooseInputMethod(cinfo);
+ if (!cinfo->raw_data_in) {
+ ChooseColorTransform(cinfo);
+ ChooseDownsampleMethods(cinfo);
+ }
+ QuantPass pass = m->psnr_target > 0 ? QuantPass::SEARCH_FIRST_PASS
+ : QuantPass::NO_SEARCH;
+ InitQuantizer(cinfo, pass);
+ }
+ if (write_all_tables) {
+ jpegli_suppress_tables(cinfo, FALSE);
+ }
+ if (!cinfo->optimize_coding && !cinfo->progressive_mode) {
+ CopyHuffmanTables(cinfo);
+ InitEntropyCoder(cinfo);
+ }
+ (*cinfo->dest->init_destination)(cinfo);
+ WriteFileHeader(cinfo);
+ JpegBitWriterInit(cinfo);
+ m->next_iMCU_row = 0;
+ m->last_restart_interval = 0;
+ m->next_dht_index = 0;
+}
+
+//
+// Input streaming
+//
+
+void ProgressMonitorInputPass(j_compress_ptr cinfo) {
+ if (cinfo->progress == nullptr) {
+ return;
+ }
+ cinfo->progress->completed_passes = 0;
+ cinfo->progress->pass_counter = cinfo->next_scanline;
+ cinfo->progress->pass_limit = cinfo->image_height;
+ (*cinfo->progress->progress_monitor)(reinterpret_cast<j_common_ptr>(cinfo));
+}
+
+void ReadInputRow(j_compress_ptr cinfo, const uint8_t* scanline,
+ float* row[kMaxComponents]) {
+ jpeg_comp_master* m = cinfo->master;
+ int num_all_components =
+ std::max(cinfo->input_components, cinfo->num_components);
+ for (int c = 0; c < num_all_components; ++c) {
+ row[c] = m->input_buffer[c].Row(m->next_input_row);
+ }
+ ++m->next_input_row;
+ if (scanline == nullptr) {
+ for (int c = 0; c < cinfo->input_components; ++c) {
+ memset(row[c], 0, cinfo->image_width * sizeof(row[c][0]));
+ }
+ return;
+ }
+ (*m->input_method)(scanline, cinfo->image_width, row);
+}
+
+void PadInputBuffer(j_compress_ptr cinfo, float* row[kMaxComponents]) {
+ jpeg_comp_master* m = cinfo->master;
+ const size_t len0 = cinfo->image_width;
+ const size_t len1 = m->xsize_blocks * DCTSIZE;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ // Pad row to a multiple of the iMCU width, plus create a border of 1
+ // repeated pixel for adaptive quant field calculation.
+ float last_val = row[c][len0 - 1];
+ for (size_t x = len0; x <= len1; ++x) {
+ row[c][x] = last_val;
+ }
+ row[c][-1] = row[c][0];
+ }
+ if (m->next_input_row == cinfo->image_height) {
+ size_t num_rows = m->ysize_blocks * DCTSIZE - cinfo->image_height;
+ for (size_t i = 0; i < num_rows; ++i) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ float* dest = m->input_buffer[c].Row(m->next_input_row) - 1;
+ memcpy(dest, row[c] - 1, (len1 + 2) * sizeof(dest[0]));
+ }
+ ++m->next_input_row;
+ }
+ }
+}
+
+void ProcessiMCURow(j_compress_ptr cinfo) {
+ JXL_ASSERT(cinfo->master->next_iMCU_row < cinfo->total_iMCU_rows);
+ if (!cinfo->raw_data_in) {
+ ApplyInputSmoothing(cinfo);
+ DownsampleInputBuffer(cinfo);
+ }
+ ComputeAdaptiveQuantField(cinfo);
+ if (IsStreamingSupported(cinfo)) {
+ if (cinfo->optimize_coding) {
+ ComputeTokensForiMCURow(cinfo);
+ } else {
+ WriteiMCURow(cinfo);
+ }
+ } else {
+ ComputeCoefficientsForiMCURow(cinfo);
+ }
+ ++cinfo->master->next_iMCU_row;
+}
+
+void ProcessiMCURows(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor;
+ // To have context rows both above and below the current iMCU row, we delay
+ // processing the first iMCU row and process two iMCU rows after we receive
+ // the last input row.
+ if (m->next_input_row % iMCU_height == 0 && m->next_input_row > iMCU_height) {
+ ProcessiMCURow(cinfo);
+ }
+ if (m->next_input_row >= cinfo->image_height) {
+ ProcessiMCURow(cinfo);
+ }
+}
+
+//
+// Non-streaming part
+//
+
+void ZigZagShuffleBlocks(j_compress_ptr cinfo) {
+ JCOEF tmp[DCTSIZE2];
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) {
+ JBLOCKARRAY ba = GetBlockRow(cinfo, c, by);
+ for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) {
+ JCOEF* block = &ba[0][bx][0];
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ tmp[k] = block[kJPEGNaturalOrder[k]];
+ }
+ memcpy(block, tmp, sizeof(tmp));
+ }
+ }
+ }
+}
+
+} // namespace jpegli
+
+//
+// Parameter setup
+//
+
+void jpegli_CreateCompress(j_compress_ptr cinfo, int version,
+ size_t structsize) {
+ cinfo->mem = nullptr;
+ if (structsize != sizeof(*cinfo)) {
+ JPEGLI_ERROR("jpegli_compress_struct has wrong size.");
+ }
+ jpegli::InitMemoryManager(reinterpret_cast<j_common_ptr>(cinfo));
+ cinfo->progress = nullptr;
+ cinfo->is_decompressor = FALSE;
+ cinfo->global_state = jpegli::kEncStart;
+ cinfo->dest = nullptr;
+ cinfo->image_width = 0;
+ cinfo->image_height = 0;
+ cinfo->input_components = 0;
+ cinfo->in_color_space = JCS_UNKNOWN;
+ cinfo->input_gamma = 1.0f;
+ cinfo->num_components = 0;
+ cinfo->jpeg_color_space = JCS_UNKNOWN;
+ cinfo->comp_info = nullptr;
+ for (int i = 0; i < NUM_QUANT_TBLS; ++i) {
+ cinfo->quant_tbl_ptrs[i] = nullptr;
+ }
+ for (int i = 0; i < NUM_HUFF_TBLS; ++i) {
+ cinfo->dc_huff_tbl_ptrs[i] = nullptr;
+ cinfo->ac_huff_tbl_ptrs[i] = nullptr;
+ }
+ memset(cinfo->arith_dc_L, 0, sizeof(cinfo->arith_dc_L));
+ memset(cinfo->arith_dc_U, 0, sizeof(cinfo->arith_dc_U));
+ memset(cinfo->arith_ac_K, 0, sizeof(cinfo->arith_ac_K));
+ cinfo->write_Adobe_marker = false;
+ cinfo->master = jpegli::Allocate<jpeg_comp_master>(cinfo, 1);
+ jpegli::InitializeCompressParams(cinfo);
+ cinfo->master->force_baseline = true;
+ cinfo->master->xyb_mode = false;
+ cinfo->master->cicp_transfer_function = 2; // unknown transfer function code
+ cinfo->master->use_std_tables = false;
+ cinfo->master->use_adaptive_quantization = true;
+ cinfo->master->progressive_level = jpegli::kDefaultProgressiveLevel;
+ cinfo->master->data_type = JPEGLI_TYPE_UINT8;
+ cinfo->master->endianness = JPEGLI_NATIVE_ENDIAN;
+ cinfo->master->coeff_buffers = nullptr;
+}
+
+void jpegli_set_xyb_mode(j_compress_ptr cinfo) {
+ CheckState(cinfo, jpegli::kEncStart);
+ cinfo->master->xyb_mode = true;
+}
+
+void jpegli_set_cicp_transfer_function(j_compress_ptr cinfo, int code) {
+ CheckState(cinfo, jpegli::kEncStart);
+ cinfo->master->cicp_transfer_function = code;
+}
+
+void jpegli_set_defaults(j_compress_ptr cinfo) {
+ CheckState(cinfo, jpegli::kEncStart);
+ jpegli::InitializeCompressParams(cinfo);
+ jpegli_default_colorspace(cinfo);
+ jpegli_set_quality(cinfo, 90, TRUE);
+ jpegli_set_progressive_level(cinfo, jpegli::kDefaultProgressiveLevel);
+ jpegli::AddStandardHuffmanTables(reinterpret_cast<j_common_ptr>(cinfo),
+ /*is_dc=*/false);
+ jpegli::AddStandardHuffmanTables(reinterpret_cast<j_common_ptr>(cinfo),
+ /*is_dc=*/true);
+}
+
+void jpegli_default_colorspace(j_compress_ptr cinfo) {
+ CheckState(cinfo, jpegli::kEncStart);
+ switch (cinfo->in_color_space) {
+ case JCS_GRAYSCALE:
+ jpegli_set_colorspace(cinfo, JCS_GRAYSCALE);
+ break;
+ case JCS_RGB: {
+ if (cinfo->master->xyb_mode) {
+ jpegli_set_colorspace(cinfo, JCS_RGB);
+ } else {
+ jpegli_set_colorspace(cinfo, JCS_YCbCr);
+ }
+ break;
+ }
+ case JCS_YCbCr:
+ jpegli_set_colorspace(cinfo, JCS_YCbCr);
+ break;
+ case JCS_CMYK:
+ jpegli_set_colorspace(cinfo, JCS_CMYK);
+ break;
+ case JCS_YCCK:
+ jpegli_set_colorspace(cinfo, JCS_YCCK);
+ break;
+ case JCS_UNKNOWN:
+ jpegli_set_colorspace(cinfo, JCS_UNKNOWN);
+ break;
+ default:
+ JPEGLI_ERROR("Unsupported input colorspace %d", cinfo->in_color_space);
+ }
+}
+
+void jpegli_set_colorspace(j_compress_ptr cinfo, J_COLOR_SPACE colorspace) {
+ CheckState(cinfo, jpegli::kEncStart);
+ cinfo->jpeg_color_space = colorspace;
+ switch (colorspace) {
+ case JCS_GRAYSCALE:
+ cinfo->num_components = 1;
+ break;
+ case JCS_RGB:
+ case JCS_YCbCr:
+ cinfo->num_components = 3;
+ break;
+ case JCS_CMYK:
+ case JCS_YCCK:
+ cinfo->num_components = 4;
+ break;
+ case JCS_UNKNOWN:
+ cinfo->num_components =
+ std::min<int>(jpegli::kMaxComponents, cinfo->input_components);
+ break;
+ default:
+ JPEGLI_ERROR("Unsupported jpeg colorspace %d", colorspace);
+ }
+ // Adobe marker is only needed to distinguish CMYK and YCCK JPEGs.
+ cinfo->write_Adobe_marker = (cinfo->jpeg_color_space == JCS_YCCK);
+ if (cinfo->comp_info == nullptr) {
+ cinfo->comp_info =
+ jpegli::Allocate<jpeg_component_info>(cinfo, MAX_COMPONENTS);
+ }
+ memset(cinfo->comp_info, 0,
+ jpegli::kMaxComponents * sizeof(jpeg_component_info));
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ comp->component_index = c;
+ comp->component_id = c + 1;
+ comp->h_samp_factor = 1;
+ comp->v_samp_factor = 1;
+ comp->quant_tbl_no = 0;
+ comp->dc_tbl_no = 0;
+ comp->ac_tbl_no = 0;
+ }
+ if (colorspace == JCS_RGB) {
+ cinfo->comp_info[0].component_id = 'R';
+ cinfo->comp_info[1].component_id = 'G';
+ cinfo->comp_info[2].component_id = 'B';
+ if (cinfo->master->xyb_mode) {
+ // Subsample blue channel.
+ cinfo->comp_info[0].h_samp_factor = cinfo->comp_info[0].v_samp_factor = 2;
+ cinfo->comp_info[1].h_samp_factor = cinfo->comp_info[1].v_samp_factor = 2;
+ cinfo->comp_info[2].h_samp_factor = cinfo->comp_info[2].v_samp_factor = 1;
+ // Use separate quantization tables for each component
+ cinfo->comp_info[1].quant_tbl_no = 1;
+ cinfo->comp_info[2].quant_tbl_no = 2;
+ }
+ } else if (colorspace == JCS_CMYK) {
+ cinfo->comp_info[0].component_id = 'C';
+ cinfo->comp_info[1].component_id = 'M';
+ cinfo->comp_info[2].component_id = 'Y';
+ cinfo->comp_info[3].component_id = 'K';
+ } else if (colorspace == JCS_YCbCr || colorspace == JCS_YCCK) {
+ // Use separate quantization and Huffman tables for luma and chroma
+ cinfo->comp_info[1].quant_tbl_no = 1;
+ cinfo->comp_info[2].quant_tbl_no = 1;
+ cinfo->comp_info[1].dc_tbl_no = cinfo->comp_info[1].ac_tbl_no = 1;
+ cinfo->comp_info[2].dc_tbl_no = cinfo->comp_info[2].ac_tbl_no = 1;
+ }
+}
+
+void jpegli_set_distance(j_compress_ptr cinfo, float distance,
+ boolean force_baseline) {
+ CheckState(cinfo, jpegli::kEncStart);
+ cinfo->master->force_baseline = force_baseline;
+ float distances[NUM_QUANT_TBLS] = {distance, distance, distance};
+ jpegli::SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/true);
+}
+
+float jpegli_quality_to_distance(int quality) {
+ return (quality >= 100 ? 0.01f
+ : quality >= 30 ? 0.1f + (100 - quality) * 0.09f
+ : 53.0f / 3000.0f * quality * quality -
+ 23.0f / 20.0f * quality + 25.0f);
+}
+
+void jpegli_set_psnr(j_compress_ptr cinfo, float psnr, float tolerance,
+ float min_distance, float max_distance) {
+ CheckState(cinfo, jpegli::kEncStart);
+ cinfo->master->psnr_target = psnr;
+ cinfo->master->psnr_tolerance = tolerance;
+ cinfo->master->min_distance = min_distance;
+ cinfo->master->max_distance = max_distance;
+}
+
+void jpegli_set_quality(j_compress_ptr cinfo, int quality,
+ boolean force_baseline) {
+ CheckState(cinfo, jpegli::kEncStart);
+ cinfo->master->force_baseline = force_baseline;
+ float distance = jpegli_quality_to_distance(quality);
+ float distances[NUM_QUANT_TBLS] = {distance, distance, distance};
+ jpegli::SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/false);
+}
+
+void jpegli_set_linear_quality(j_compress_ptr cinfo, int scale_factor,
+ boolean force_baseline) {
+ CheckState(cinfo, jpegli::kEncStart);
+ cinfo->master->force_baseline = force_baseline;
+ float distance = jpegli::LinearQualityToDistance(scale_factor);
+ float distances[NUM_QUANT_TBLS] = {distance, distance, distance};
+ jpegli::SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/false);
+}
+
+#if JPEG_LIB_VERSION >= 70
+void jpegli_default_qtables(j_compress_ptr cinfo, boolean force_baseline) {
+ CheckState(cinfo, jpegli::kEncStart);
+ cinfo->master->force_baseline = force_baseline;
+ float distances[NUM_QUANT_TBLS];
+ for (int i = 0; i < NUM_QUANT_TBLS; ++i) {
+ distances[i] = jpegli::LinearQualityToDistance(cinfo->q_scale_factor[i]);
+ }
+ jpegli::SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/false);
+}
+#endif
+
+int jpegli_quality_scaling(int quality) {
+ quality = std::min(100, std::max(1, quality));
+ return quality < 50 ? 5000 / quality : 200 - 2 * quality;
+}
+
+void jpegli_use_standard_quant_tables(j_compress_ptr cinfo) {
+ CheckState(cinfo, jpegli::kEncStart);
+ cinfo->master->use_std_tables = true;
+}
+
+void jpegli_add_quant_table(j_compress_ptr cinfo, int which_tbl,
+ const unsigned int* basic_table, int scale_factor,
+ boolean force_baseline) {
+ CheckState(cinfo, jpegli::kEncStart);
+ if (which_tbl < 0 || which_tbl > NUM_QUANT_TBLS) {
+ JPEGLI_ERROR("Invalid quant table index %d", which_tbl);
+ }
+ if (cinfo->quant_tbl_ptrs[which_tbl] == nullptr) {
+ cinfo->quant_tbl_ptrs[which_tbl] =
+ jpegli_alloc_quant_table(reinterpret_cast<j_common_ptr>(cinfo));
+ }
+ int max_qval = force_baseline ? 255 : 32767U;
+ JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[which_tbl];
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ int qval = (basic_table[k] * scale_factor + 50) / 100;
+ qval = std::max(1, std::min(qval, max_qval));
+ quant_table->quantval[k] = qval;
+ }
+ quant_table->sent_table = FALSE;
+}
+
+void jpegli_enable_adaptive_quantization(j_compress_ptr cinfo, boolean value) {
+ CheckState(cinfo, jpegli::kEncStart);
+ cinfo->master->use_adaptive_quantization = value;
+}
+
+void jpegli_simple_progression(j_compress_ptr cinfo) {
+ CheckState(cinfo, jpegli::kEncStart);
+ jpegli_set_progressive_level(cinfo, 2);
+}
+
+void jpegli_set_progressive_level(j_compress_ptr cinfo, int level) {
+ CheckState(cinfo, jpegli::kEncStart);
+ if (level < 0) {
+ JPEGLI_ERROR("Invalid progressive level %d", level);
+ }
+ cinfo->master->progressive_level = level;
+}
+
+void jpegli_set_input_format(j_compress_ptr cinfo, JpegliDataType data_type,
+ JpegliEndianness endianness) {
+ CheckState(cinfo, jpegli::kEncStart);
+ switch (data_type) {
+ case JPEGLI_TYPE_UINT8:
+ case JPEGLI_TYPE_UINT16:
+ case JPEGLI_TYPE_FLOAT:
+ cinfo->master->data_type = data_type;
+ break;
+ default:
+ JPEGLI_ERROR("Unsupported data type %d", data_type);
+ }
+ switch (endianness) {
+ case JPEGLI_NATIVE_ENDIAN:
+ case JPEGLI_LITTLE_ENDIAN:
+ case JPEGLI_BIG_ENDIAN:
+ cinfo->master->endianness = endianness;
+ break;
+ default:
+ JPEGLI_ERROR("Unsupported endianness %d", endianness);
+ }
+}
+
+#if JPEG_LIB_VERSION >= 70
+void jpegli_calc_jpeg_dimensions(j_compress_ptr cinfo) {
+ // Since input scaling is not supported, we just copy the image dimensions.
+ cinfo->jpeg_width = cinfo->image_width;
+ cinfo->jpeg_height = cinfo->image_height;
+}
+#endif
+
+void jpegli_copy_critical_parameters(j_decompress_ptr srcinfo,
+ j_compress_ptr dstinfo) {
+ CheckState(dstinfo, jpegli::kEncStart);
+ // Image parameters.
+ dstinfo->image_width = srcinfo->image_width;
+ dstinfo->image_height = srcinfo->image_height;
+ dstinfo->input_components = srcinfo->num_components;
+ dstinfo->in_color_space = srcinfo->jpeg_color_space;
+ dstinfo->input_gamma = srcinfo->output_gamma;
+ // Compression parameters.
+ jpegli_set_defaults(dstinfo);
+ jpegli_set_colorspace(dstinfo, srcinfo->jpeg_color_space);
+ if (dstinfo->num_components != srcinfo->num_components) {
+ const auto& cinfo = dstinfo;
+ return JPEGLI_ERROR("Mismatch between src colorspace and components");
+ }
+ dstinfo->data_precision = srcinfo->data_precision;
+ dstinfo->CCIR601_sampling = srcinfo->CCIR601_sampling;
+ dstinfo->JFIF_major_version = srcinfo->JFIF_major_version;
+ dstinfo->JFIF_minor_version = srcinfo->JFIF_minor_version;
+ dstinfo->density_unit = srcinfo->density_unit;
+ dstinfo->X_density = srcinfo->X_density;
+ dstinfo->Y_density = srcinfo->Y_density;
+ for (int c = 0; c < dstinfo->num_components; ++c) {
+ jpeg_component_info* srccomp = &srcinfo->comp_info[c];
+ jpeg_component_info* dstcomp = &dstinfo->comp_info[c];
+ dstcomp->component_id = srccomp->component_id;
+ dstcomp->h_samp_factor = srccomp->h_samp_factor;
+ dstcomp->v_samp_factor = srccomp->v_samp_factor;
+ dstcomp->quant_tbl_no = srccomp->quant_tbl_no;
+ }
+ for (int i = 0; i < NUM_QUANT_TBLS; ++i) {
+ if (!srcinfo->quant_tbl_ptrs[i]) continue;
+ if (dstinfo->quant_tbl_ptrs[i] == nullptr) {
+ dstinfo->quant_tbl_ptrs[i] = jpegli::Allocate<JQUANT_TBL>(dstinfo, 1);
+ }
+ memcpy(dstinfo->quant_tbl_ptrs[i], srcinfo->quant_tbl_ptrs[i],
+ sizeof(JQUANT_TBL));
+ dstinfo->quant_tbl_ptrs[i]->sent_table = FALSE;
+ }
+}
+
+void jpegli_suppress_tables(j_compress_ptr cinfo, boolean suppress) {
+ jpegli::SetSentTableFlag(cinfo->quant_tbl_ptrs, NUM_QUANT_TBLS, suppress);
+ jpegli::SetSentTableFlag(cinfo->dc_huff_tbl_ptrs, NUM_HUFF_TBLS, suppress);
+ jpegli::SetSentTableFlag(cinfo->ac_huff_tbl_ptrs, NUM_HUFF_TBLS, suppress);
+}
+
+//
+// Compressor initialization
+//
+
+void jpegli_start_compress(j_compress_ptr cinfo, boolean write_all_tables) {
+ CheckState(cinfo, jpegli::kEncStart);
+ cinfo->global_state = jpegli::kEncHeader;
+ jpegli::InitCompress(cinfo, write_all_tables);
+ cinfo->next_scanline = 0;
+ cinfo->master->next_input_row = 0;
+}
+
+void jpegli_write_coefficients(j_compress_ptr cinfo,
+ jvirt_barray_ptr* coef_arrays) {
+ CheckState(cinfo, jpegli::kEncStart);
+ cinfo->global_state = jpegli::kEncWriteCoeffs;
+ jpegli::InitCompress(cinfo, /*write_all_tables=*/true);
+ cinfo->master->coeff_buffers = coef_arrays;
+ cinfo->next_scanline = cinfo->image_height;
+ cinfo->master->next_input_row = cinfo->image_height;
+}
+
+void jpegli_write_tables(j_compress_ptr cinfo) {
+ CheckState(cinfo, jpegli::kEncStart);
+ if (cinfo->dest == nullptr) {
+ JPEGLI_ERROR("Missing destination.");
+ }
+ jpeg_comp_master* m = cinfo->master;
+ (*cinfo->err->reset_error_mgr)(reinterpret_cast<j_common_ptr>(cinfo));
+ (*cinfo->dest->init_destination)(cinfo);
+ jpegli::WriteOutput(cinfo, {0xFF, 0xD8}); // SOI
+ jpegli::EncodeDQT(cinfo, /*write_all_tables=*/true);
+ jpegli::CopyHuffmanTables(cinfo);
+ jpegli::EncodeDHT(cinfo, 0, m->num_huffman_tables);
+ jpegli::WriteOutput(cinfo, {0xFF, 0xD9}); // EOI
+ (*cinfo->dest->term_destination)(cinfo);
+ jpegli_suppress_tables(cinfo, TRUE);
+}
+
+//
+// Marker writing
+//
+
+void jpegli_write_m_header(j_compress_ptr cinfo, int marker,
+ unsigned int datalen) {
+ CheckState(cinfo, jpegli::kEncHeader, jpegli::kEncWriteCoeffs);
+ if (datalen > jpegli::kMaxBytesInMarker) {
+ JPEGLI_ERROR("Invalid marker length %u", datalen);
+ }
+ if (marker != 0xfe && (marker < 0xe0 || marker > 0xef)) {
+ JPEGLI_ERROR(
+ "jpegli_write_m_header: Only APP and COM markers are supported.");
+ }
+ std::vector<uint8_t> marker_data(4 + datalen);
+ marker_data[0] = 0xff;
+ marker_data[1] = marker;
+ marker_data[2] = (datalen + 2) >> 8;
+ marker_data[3] = (datalen + 2) & 0xff;
+ jpegli::WriteOutput(cinfo, &marker_data[0], 4);
+}
+
+void jpegli_write_m_byte(j_compress_ptr cinfo, int val) {
+ uint8_t data = val;
+ jpegli::WriteOutput(cinfo, &data, 1);
+}
+
+void jpegli_write_marker(j_compress_ptr cinfo, int marker,
+ const JOCTET* dataptr, unsigned int datalen) {
+ jpegli_write_m_header(cinfo, marker, datalen);
+ jpegli::WriteOutput(cinfo, dataptr, datalen);
+}
+
+void jpegli_write_icc_profile(j_compress_ptr cinfo, const JOCTET* icc_data_ptr,
+ unsigned int icc_data_len) {
+ constexpr size_t kMaxIccBytesInMarker =
+ jpegli::kMaxBytesInMarker - sizeof jpegli::kICCSignature - 2;
+ const int num_markers =
+ static_cast<int>(jpegli::DivCeil(icc_data_len, kMaxIccBytesInMarker));
+ size_t begin = 0;
+ for (int current_marker = 0; current_marker < num_markers; ++current_marker) {
+ const size_t length = std::min(kMaxIccBytesInMarker, icc_data_len - begin);
+ jpegli_write_m_header(
+ cinfo, jpegli::kICCMarker,
+ static_cast<unsigned int>(length + sizeof jpegli::kICCSignature + 2));
+ for (const unsigned char c : jpegli::kICCSignature) {
+ jpegli_write_m_byte(cinfo, c);
+ }
+ jpegli_write_m_byte(cinfo, current_marker + 1);
+ jpegli_write_m_byte(cinfo, num_markers);
+ for (size_t i = 0; i < length; ++i) {
+ jpegli_write_m_byte(cinfo, icc_data_ptr[begin]);
+ ++begin;
+ }
+ }
+}
+
+//
+// Input streaming
+//
+
+JDIMENSION jpegli_write_scanlines(j_compress_ptr cinfo, JSAMPARRAY scanlines,
+ JDIMENSION num_lines) {
+ CheckState(cinfo, jpegli::kEncHeader, jpegli::kEncReadImage);
+ if (cinfo->raw_data_in) {
+ JPEGLI_ERROR("jpegli_write_raw_data() must be called for raw data mode.");
+ }
+ jpegli::ProgressMonitorInputPass(cinfo);
+ if (cinfo->global_state == jpegli::kEncHeader &&
+ jpegli::IsStreamingSupported(cinfo) && !cinfo->optimize_coding) {
+ jpegli::WriteFrameHeader(cinfo);
+ jpegli::WriteScanHeader(cinfo, 0);
+ }
+ cinfo->global_state = jpegli::kEncReadImage;
+ jpeg_comp_master* m = cinfo->master;
+ if (num_lines + cinfo->next_scanline > cinfo->image_height) {
+ num_lines = cinfo->image_height - cinfo->next_scanline;
+ }
+ JDIMENSION prev_scanline = cinfo->next_scanline;
+ size_t input_lag = (std::min<size_t>(cinfo->image_height, m->next_input_row) -
+ cinfo->next_scanline);
+ if (input_lag > num_lines) {
+ JPEGLI_ERROR("Need at least %u lines to continue", input_lag);
+ }
+ if (input_lag > 0) {
+ if (!jpegli::EmptyBitWriterBuffer(&m->bw)) {
+ return 0;
+ }
+ cinfo->next_scanline += input_lag;
+ }
+ float* rows[jpegli::kMaxComponents];
+ for (size_t i = input_lag; i < num_lines; ++i) {
+ jpegli::ReadInputRow(cinfo, scanlines[i], rows);
+ (*m->color_transform)(rows, cinfo->image_width);
+ jpegli::PadInputBuffer(cinfo, rows);
+ jpegli::ProcessiMCURows(cinfo);
+ if (!jpegli::EmptyBitWriterBuffer(&m->bw)) {
+ break;
+ }
+ ++cinfo->next_scanline;
+ }
+ return cinfo->next_scanline - prev_scanline;
+}
+
+JDIMENSION jpegli_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data,
+ JDIMENSION num_lines) {
+ CheckState(cinfo, jpegli::kEncHeader, jpegli::kEncReadImage);
+ if (!cinfo->raw_data_in) {
+ JPEGLI_ERROR("jpegli_write_raw_data(): raw data mode was not set");
+ }
+ jpegli::ProgressMonitorInputPass(cinfo);
+ if (cinfo->global_state == jpegli::kEncHeader &&
+ jpegli::IsStreamingSupported(cinfo) && !cinfo->optimize_coding) {
+ jpegli::WriteFrameHeader(cinfo);
+ jpegli::WriteScanHeader(cinfo, 0);
+ }
+ cinfo->global_state = jpegli::kEncReadImage;
+ jpeg_comp_master* m = cinfo->master;
+ if (cinfo->next_scanline >= cinfo->image_height) {
+ return 0;
+ }
+ size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor;
+ if (num_lines < iMCU_height) {
+ JPEGLI_ERROR("Missing input lines, minimum is %u", iMCU_height);
+ }
+ if (cinfo->next_scanline < m->next_input_row) {
+ JXL_ASSERT(m->next_input_row - cinfo->next_scanline == iMCU_height);
+ if (!jpegli::EmptyBitWriterBuffer(&m->bw)) {
+ return 0;
+ }
+ cinfo->next_scanline = m->next_input_row;
+ return iMCU_height;
+ }
+ size_t iMCU_y = m->next_input_row / iMCU_height;
+ float* rows[jpegli::kMaxComponents];
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ JSAMPARRAY plane = data[c];
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ size_t xsize = comp->width_in_blocks * DCTSIZE;
+ size_t ysize = comp->v_samp_factor * DCTSIZE;
+ size_t y0 = iMCU_y * ysize;
+ auto& buffer = m->input_buffer[c];
+ for (size_t i = 0; i < ysize; ++i) {
+ rows[0] = buffer.Row(y0 + i);
+ if (plane[i] == nullptr) {
+ memset(rows[0], 0, xsize * sizeof(rows[0][0]));
+ } else {
+ (*m->input_method)(plane[i], xsize, rows);
+ }
+ // We need a border of 1 repeated pixel for adaptive quant field.
+ buffer.PadRow(y0 + i, xsize, /*border=*/1);
+ }
+ }
+ m->next_input_row += iMCU_height;
+ jpegli::ProcessiMCURows(cinfo);
+ if (!jpegli::EmptyBitWriterBuffer(&m->bw)) {
+ return 0;
+ }
+ cinfo->next_scanline += iMCU_height;
+ return iMCU_height;
+}
+
+//
+// Non-streaming part
+//
+
+void jpegli_finish_compress(j_compress_ptr cinfo) {
+ CheckState(cinfo, jpegli::kEncReadImage, jpegli::kEncWriteCoeffs);
+ jpeg_comp_master* m = cinfo->master;
+ if (cinfo->next_scanline < cinfo->image_height) {
+ JPEGLI_ERROR("Incomplete image, expected %d rows, got %d",
+ cinfo->image_height, cinfo->next_scanline);
+ }
+
+ if (cinfo->global_state == jpegli::kEncWriteCoeffs) {
+ // Zig-zag shuffle all the blocks. For non-transcoding case it was already
+ // done in EncodeiMCURow().
+ jpegli::ZigZagShuffleBlocks(cinfo);
+ }
+
+ if (m->psnr_target > 0) {
+ jpegli::QuantizetoPSNR(cinfo);
+ }
+
+ const bool tokens_done = jpegli::IsStreamingSupported(cinfo);
+ const bool bitstream_done = tokens_done && !cinfo->optimize_coding;
+
+ if (!tokens_done) {
+ jpegli::TokenizeJpeg(cinfo);
+ }
+
+ if (cinfo->optimize_coding || cinfo->progressive_mode) {
+ jpegli::OptimizeHuffmanCodes(cinfo);
+ jpegli::InitEntropyCoder(cinfo);
+ }
+
+ if (!bitstream_done) {
+ jpegli::WriteFrameHeader(cinfo);
+ for (int i = 0; i < cinfo->num_scans; ++i) {
+ jpegli::WriteScanHeader(cinfo, i);
+ jpegli::WriteScanData(cinfo, i);
+ }
+ } else {
+ JumpToByteBoundary(&m->bw);
+ if (!EmptyBitWriterBuffer(&m->bw)) {
+ JPEGLI_ERROR("Output suspension is not supported in finish_compress");
+ }
+ }
+
+ jpegli::WriteOutput(cinfo, {0xFF, 0xD9}); // EOI
+ (*cinfo->dest->term_destination)(cinfo);
+
+ // Release memory and reset global state.
+ jpegli_abort_compress(cinfo);
+}
+
+void jpegli_abort_compress(j_compress_ptr cinfo) {
+ jpegli_abort(reinterpret_cast<j_common_ptr>(cinfo));
+}
+
+void jpegli_destroy_compress(j_compress_ptr cinfo) {
+ jpegli_destroy(reinterpret_cast<j_common_ptr>(cinfo));
+}
diff --git a/lib/jpegli/encode.h b/lib/jpegli/encode.h
new file mode 100644
index 0000000..320dfaa
--- /dev/null
+++ b/lib/jpegli/encode.h
@@ -0,0 +1,158 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// This file contains the C API of the encoder part of the libjpegli library,
+// which is based on the C API of libjpeg, with the function names changed from
+// jpeg_* to jpegli_*, while compressor object definitions are included directly
+// from jpeglib.h
+//
+// Applications can use the libjpegli library in one of the following ways:
+//
+// (1) Include jpegli/encode.h and/or jpegli/decode.h, update the function
+// names of the API and link against libjpegli.
+//
+// (2) Leave the application code unchanged, but replace the libjpeg.so library
+// with the one built by this project that is API- and ABI-compatible with
+// libjpeg-turbo's version of libjpeg.so.
+
+#ifndef LIB_JPEGLI_ENCODE_H_
+#define LIB_JPEGLI_ENCODE_H_
+
+#include "lib/jpegli/common.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define jpegli_create_compress(cinfo) \
+ jpegli_CreateCompress((cinfo), JPEG_LIB_VERSION, \
+ (size_t)sizeof(struct jpeg_compress_struct))
+void jpegli_CreateCompress(j_compress_ptr cinfo, int version,
+ size_t structsize);
+
+void jpegli_stdio_dest(j_compress_ptr cinfo, FILE* outfile);
+
+void jpegli_mem_dest(j_compress_ptr cinfo, unsigned char** outbuffer,
+ unsigned long* outsize);
+
+void jpegli_set_defaults(j_compress_ptr cinfo);
+
+void jpegli_default_colorspace(j_compress_ptr cinfo);
+
+void jpegli_set_colorspace(j_compress_ptr cinfo, J_COLOR_SPACE colorspace);
+
+void jpegli_set_quality(j_compress_ptr cinfo, int quality,
+ boolean force_baseline);
+
+void jpegli_set_linear_quality(j_compress_ptr cinfo, int scale_factor,
+ boolean force_baseline);
+
+#if JPEG_LIB_VERSION >= 70
+void jpegli_default_qtables(j_compress_ptr cinfo, boolean force_baseline);
+#endif
+
+int jpegli_quality_scaling(int quality);
+
+void jpegli_add_quant_table(j_compress_ptr cinfo, int which_tbl,
+ const unsigned int* basic_table, int scale_factor,
+ boolean force_baseline);
+
+void jpegli_simple_progression(j_compress_ptr cinfo);
+
+void jpegli_suppress_tables(j_compress_ptr cinfo, boolean suppress);
+
+#if JPEG_LIB_VERSION >= 70
+void jpegli_calc_jpeg_dimensions(j_compress_ptr cinfo);
+#endif
+
+void jpegli_copy_critical_parameters(j_decompress_ptr srcinfo,
+ j_compress_ptr dstinfo);
+
+void jpegli_write_m_header(j_compress_ptr cinfo, int marker,
+ unsigned int datalen);
+
+void jpegli_write_m_byte(j_compress_ptr cinfo, int val);
+
+void jpegli_write_marker(j_compress_ptr cinfo, int marker,
+ const JOCTET* dataptr, unsigned int datalen);
+
+void jpegli_write_icc_profile(j_compress_ptr cinfo, const JOCTET* icc_data_ptr,
+ unsigned int icc_data_len);
+
+void jpegli_start_compress(j_compress_ptr cinfo, boolean write_all_tables);
+
+void jpegli_write_tables(j_compress_ptr cinfo);
+
+JDIMENSION jpegli_write_scanlines(j_compress_ptr cinfo, JSAMPARRAY scanlines,
+ JDIMENSION num_lines);
+
+JDIMENSION jpegli_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data,
+ JDIMENSION num_lines);
+
+void jpegli_write_coefficients(j_compress_ptr cinfo,
+ jvirt_barray_ptr* coef_arrays);
+
+void jpegli_finish_compress(j_compress_ptr cinfo);
+
+void jpegli_abort_compress(j_compress_ptr cinfo);
+
+void jpegli_destroy_compress(j_compress_ptr cinfo);
+
+//
+// New API functions that are not available in libjpeg
+//
+// NOTE: This part of the API is still experimental and will probably change in
+// the future.
+//
+
+// Sets the butteraugli target distance for the compressor. This may override
+// the default quantization table indexes based on jpeg colorspace, therefore
+// it must be called after jpegli_set_defaults() or after the last
+// jpegli_set_colorspace() or jpegli_default_colorspace() calls.
+void jpegli_set_distance(j_compress_ptr cinfo, float distance,
+ boolean force_baseline);
+
+// Returns the butteraugli target distance for the given quality parameter.
+float jpegli_quality_to_distance(int quality);
+
+// Enables distance parameter search to meet the given psnr target.
+void jpegli_set_psnr(j_compress_ptr cinfo, float psnr, float tolerance,
+ float min_distance, float max_distance);
+
+// Changes the default behaviour of the encoder in the selection of quantization
+// matrices and chroma subsampling. Must be called before jpegli_set_defaults()
+// because some default setting depend on the XYB mode.
+void jpegli_set_xyb_mode(j_compress_ptr cinfo);
+
+// Signals to the encoder that the pixel data that will be provided later
+// through jpegli_write_scanlines() has this transfer function. This must be
+// called before jpegli_set_defaults() because it changes the default
+// quantization tables.
+void jpegli_set_cicp_transfer_function(j_compress_ptr cinfo, int code);
+
+void jpegli_set_input_format(j_compress_ptr cinfo, JpegliDataType data_type,
+ JpegliEndianness endianness);
+
+// Sets whether or not the encoder uses adaptive quantization for creating more
+// zero coefficients based on the local properties of the image.
+// Enabled by default.
+void jpegli_enable_adaptive_quantization(j_compress_ptr cinfo, boolean value);
+
+// Sets the default progression parameters, where level 0 is sequential, and
+// greater level value means more progression steps. Default is 2.
+void jpegli_set_progressive_level(j_compress_ptr cinfo, int level);
+
+// If this function is called before starting compression, the quality and
+// linear quality parameters will be used to scale the standard quantization
+// tables from Annex K of the JPEG standard. By default jpegli uses a different
+// set of quantization tables and used different scaling parameters for DC and
+// AC coefficients. Must be called before jpegli_set_defaults().
+void jpegli_use_standard_quant_tables(j_compress_ptr cinfo);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif // LIB_JPEGLI_ENCODE_H_
diff --git a/lib/jpegli/encode_api_test.cc b/lib/jpegli/encode_api_test.cc
new file mode 100644
index 0000000..8d53557
--- /dev/null
+++ b/lib/jpegli/encode_api_test.cc
@@ -0,0 +1,837 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <algorithm>
+#include <cmath>
+#include <vector>
+
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jpegli {
+namespace {
+
+struct TestConfig {
+ TestImage input;
+ CompressParams jparams;
+ JpegIOMode input_mode = PIXELS;
+ double max_bpp;
+ double max_dist;
+};
+
+class EncodeAPITestParam : public ::testing::TestWithParam<TestConfig> {};
+
+void GenerateInput(JpegIOMode input_mode, const CompressParams& jparams,
+ TestImage* input) {
+ GeneratePixels(input);
+ if (input_mode == RAW_DATA) {
+ GenerateRawData(jparams, input);
+ } else if (input_mode == COEFFICIENTS) {
+ GenerateCoeffs(jparams, input);
+ }
+}
+
+TEST_P(EncodeAPITestParam, TestAPI) {
+ TestConfig config = GetParam();
+ GenerateInput(config.input_mode, config.jparams, &config.input);
+ std::vector<uint8_t> compressed;
+ ASSERT_TRUE(EncodeWithJpegli(config.input, config.jparams, &compressed));
+ if (config.jparams.icc.empty()) {
+ double bpp =
+ compressed.size() * 8.0 / (config.input.xsize * config.input.ysize);
+ printf("bpp: %f\n", bpp);
+ EXPECT_LT(bpp, config.max_bpp);
+ }
+ DecompressParams dparams;
+ dparams.output_mode =
+ config.input_mode == COEFFICIENTS ? COEFFICIENTS : PIXELS;
+ if (config.jparams.set_jpeg_colorspace &&
+ config.jparams.jpeg_color_space == JCS_GRAYSCALE) {
+ ConvertToGrayscale(&config.input);
+ } else {
+ dparams.set_out_color_space = true;
+ dparams.out_color_space = config.input.color_space;
+ }
+ TestImage output;
+ DecodeWithLibjpeg(config.jparams, dparams, compressed, &output);
+ VerifyOutputImage(config.input, output, config.max_dist);
+}
+
+TEST(EncodeAPITest, ReuseCinfoSameImageTwice) {
+ TestImage input;
+ input.xsize = 129;
+ input.ysize = 73;
+ CompressParams jparams;
+ GenerateInput(PIXELS, jparams, &input);
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ std::vector<uint8_t> compressed0;
+ std::vector<uint8_t> compressed1;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ EncodeWithJpegli(input, jparams, &cinfo);
+ compressed0.assign(buffer, buffer + buffer_size);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ EncodeWithJpegli(input, jparams, &cinfo);
+ compressed1.assign(buffer, buffer + buffer_size);
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+ ASSERT_EQ(compressed0.size(), compressed1.size());
+ EXPECT_EQ(0,
+ memcmp(compressed0.data(), compressed1.data(), compressed0.size()));
+}
+
+std::vector<TestConfig> GenerateBasicConfigs() {
+ std::vector<TestConfig> all_configs;
+ for (int samp : {1, 2}) {
+ for (int progr : {0, 2}) {
+ for (int optimize : {0, 1}) {
+ if (progr && optimize) continue;
+ TestConfig config;
+ config.input.xsize = 257 + samp * 37;
+ config.input.ysize = 265 + optimize * 17;
+ config.jparams.h_sampling = {samp, 1, 1};
+ config.jparams.v_sampling = {samp, 1, 1};
+ config.jparams.progressive_mode = progr;
+ config.jparams.optimize_coding = optimize;
+ config.max_dist = 2.4f;
+ GeneratePixels(&config.input);
+ all_configs.push_back(config);
+ }
+ }
+ }
+ return all_configs;
+}
+
+TEST(EncodeAPITest, ReuseCinfoSameMemOutput) {
+ std::vector<TestConfig> all_configs = GenerateBasicConfigs();
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ {
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ for (const TestConfig& config : all_configs) {
+ EncodeWithJpegli(config.input, config.jparams, &cinfo);
+ }
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ }
+ size_t pos = 0;
+ for (size_t i = 0; i < all_configs.size(); ++i) {
+ TestImage output;
+ pos +=
+ DecodeWithLibjpeg(all_configs[i].jparams, DecompressParams(), nullptr,
+ 0, buffer + pos, buffer_size - pos, &output);
+ VerifyOutputImage(all_configs[i].input, output, all_configs[i].max_dist);
+ }
+ if (buffer) free(buffer);
+}
+
+TEST(EncodeAPITest, ReuseCinfoSameStdOutput) {
+ std::vector<TestConfig> all_configs = GenerateBasicConfigs();
+ FILE* tmpf = tmpfile();
+ JXL_CHECK(tmpf);
+ {
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_stdio_dest(&cinfo, tmpf);
+ for (const TestConfig& config : all_configs) {
+ EncodeWithJpegli(config.input, config.jparams, &cinfo);
+ }
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ }
+ size_t total_size = ftell(tmpf);
+ rewind(tmpf);
+ std::vector<uint8_t> compressed(total_size);
+ JXL_CHECK(total_size == fread(&compressed[0], 1, total_size, tmpf));
+ fclose(tmpf);
+ size_t pos = 0;
+ for (size_t i = 0; i < all_configs.size(); ++i) {
+ TestImage output;
+ pos += DecodeWithLibjpeg(all_configs[i].jparams, DecompressParams(),
+ nullptr, 0, &compressed[pos],
+ compressed.size() - pos, &output);
+ VerifyOutputImage(all_configs[i].input, output, all_configs[i].max_dist);
+ }
+}
+
+TEST(EncodeAPITest, ReuseCinfoChangeParams) {
+ TestImage input, output;
+ CompressParams jparams;
+ DecompressParams dparams;
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ std::vector<uint8_t> compressed;
+ jpeg_compress_struct cinfo;
+ const auto max_rms = [](int q, int hs, int vs) {
+ if (hs == 1 && vs == 1) return q == 90 ? 2.2 : 0.6;
+ if (hs == 2 && vs == 2) return q == 90 ? 2.8 : 1.2;
+ return q == 90 ? 2.4 : 1.0;
+ };
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ input.xsize = 129;
+ input.ysize = 73;
+ dparams.set_out_color_space = true;
+ for (JpegIOMode input_mode : {PIXELS, RAW_DATA, PIXELS, COEFFICIENTS}) {
+ for (int h_samp : {2, 1}) {
+ for (int v_samp : {2, 1}) {
+ for (int progr : {0, 2}) {
+ for (int quality : {90, 100}) {
+ input.Clear();
+ input.color_space =
+ (input_mode == RAW_DATA ? JCS_YCbCr : JCS_RGB);
+ jparams.quality = quality;
+ jparams.h_sampling = {h_samp, 1, 1};
+ jparams.v_sampling = {v_samp, 1, 1};
+ jparams.progressive_mode = progr;
+ printf(
+ "Generating input with quality %d chroma subsampling %dx%d "
+ "input mode %d progressive_mode %d\n",
+ quality, h_samp, v_samp, input_mode, progr);
+ GenerateInput(input_mode, jparams, &input);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ if (input_mode != COEFFICIENTS) {
+ cinfo.image_width = input.xsize;
+ cinfo.image_height = input.ysize;
+ cinfo.input_components = input.components;
+ jpegli_set_defaults(&cinfo);
+ jpegli_start_compress(&cinfo, TRUE);
+ jpegli_abort_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ }
+ EncodeWithJpegli(input, jparams, &cinfo);
+ compressed.resize(buffer_size);
+ std::copy_n(buffer, buffer_size, compressed.data());
+ dparams.output_mode =
+ input_mode == COEFFICIENTS ? COEFFICIENTS : PIXELS;
+ dparams.out_color_space = input.color_space;
+ output.Clear();
+ DecodeWithLibjpeg(jparams, dparams, compressed, &output);
+ VerifyOutputImage(input, output,
+ max_rms(quality, h_samp, v_samp));
+ }
+ }
+ }
+ }
+ }
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncodeAPITest, AbbreviatedStreams) {
+ uint8_t* table_stream = nullptr;
+ unsigned long table_stream_size = 0;
+ uint8_t* data_stream = nullptr;
+ unsigned long data_stream_size = 0;
+ {
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &table_stream, &table_stream_size);
+ cinfo.input_components = 3;
+ cinfo.in_color_space = JCS_RGB;
+ jpegli_set_defaults(&cinfo);
+ jpegli_write_tables(&cinfo);
+ jpegli_mem_dest(&cinfo, &data_stream, &data_stream_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.optimize_coding = FALSE;
+ jpegli_set_progressive_level(&cinfo, 0);
+ jpegli_start_compress(&cinfo, FALSE);
+ JSAMPLE image[3] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ EXPECT_LT(data_stream_size, 50);
+ jpegli_destroy_compress(&cinfo);
+ }
+ TestImage output;
+ DecodeWithLibjpeg(CompressParams(), DecompressParams(), table_stream,
+ table_stream_size, data_stream, data_stream_size, &output);
+ EXPECT_EQ(1, output.xsize);
+ EXPECT_EQ(1, output.ysize);
+ EXPECT_EQ(3, output.components);
+ EXPECT_EQ(0, output.pixels[0]);
+ EXPECT_EQ(0, output.pixels[1]);
+ EXPECT_EQ(0, output.pixels[2]);
+ if (table_stream) free(table_stream);
+ if (data_stream) free(data_stream);
+}
+
+void CopyQuantTables(j_compress_ptr cinfo, uint16_t* quant_tables) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ int quant_idx = cinfo->comp_info[c].quant_tbl_no;
+ JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[quant_idx];
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ quant_tables[c * DCTSIZE2 + k] = quant_table->quantval[k];
+ }
+ }
+}
+
+TEST(EncodeAPITest, QualitySettings) {
+ // Test that jpegli_set_quality, jpegli_set_linear_quality and
+ // jpegli_quality_scaling are consistent with each other.
+ uint16_t quant_tables0[3 * DCTSIZE2];
+ uint16_t quant_tables1[3 * DCTSIZE2];
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ cinfo.input_components = 3;
+ cinfo.in_color_space = JCS_RGB;
+ jpegli_set_defaults(&cinfo);
+ for (boolean baseline : {FALSE, TRUE}) {
+ for (int q = 1; q <= 100; ++q) {
+ jpegli_set_quality(&cinfo, q, baseline);
+ CopyQuantTables(&cinfo, quant_tables0);
+ jpegli_set_linear_quality(&cinfo, jpegli_quality_scaling(q), baseline);
+ CopyQuantTables(&cinfo, quant_tables1);
+ EXPECT_EQ(0,
+ memcmp(quant_tables0, quant_tables1, sizeof(quant_tables0)));
+#if JPEG_LIB_VERSION >= 70
+ for (int i = 0; i < NUM_QUANT_TBLS; ++i) {
+ cinfo.q_scale_factor[i] = jpegli_quality_scaling(q);
+ }
+ jpegli_default_qtables(&cinfo, baseline);
+ CopyQuantTables(&cinfo, quant_tables1);
+ EXPECT_EQ(0,
+ memcmp(quant_tables0, quant_tables1, sizeof(quant_tables0)));
+#endif
+ }
+ }
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ // Test jpegli_quality_scaling for some specific values .
+ EXPECT_EQ(5000, jpegli_quality_scaling(-1));
+ EXPECT_EQ(5000, jpegli_quality_scaling(0));
+ EXPECT_EQ(5000, jpegli_quality_scaling(1));
+ EXPECT_EQ(100, jpegli_quality_scaling(50));
+ EXPECT_EQ(50, jpegli_quality_scaling(75));
+ EXPECT_EQ(20, jpegli_quality_scaling(90));
+ EXPECT_EQ(0, jpegli_quality_scaling(100));
+ EXPECT_EQ(0, jpegli_quality_scaling(101));
+}
+
+std::vector<TestConfig> GenerateTests() {
+ std::vector<TestConfig> all_tests;
+ for (int h_samp : {1, 2}) {
+ for (int v_samp : {1, 2}) {
+ for (int progr : {0, 2}) {
+ for (int optimize : {0, 1}) {
+ if (progr && optimize) continue;
+ TestConfig config;
+ config.jparams.h_sampling = {h_samp, 1, 1};
+ config.jparams.v_sampling = {v_samp, 1, 1};
+ config.jparams.progressive_mode = progr;
+ if (!progr) {
+ config.jparams.optimize_coding = optimize;
+ }
+ const float kMaxBpp[4] = {1.55, 1.4, 1.4, 1.32};
+ const float kMaxDist[4] = {1.95, 2.2, 2.2, 2.0};
+ const int idx = v_samp * 2 + h_samp - 3;
+ config.max_bpp =
+ kMaxBpp[idx] * (optimize ? 0.97 : 1.0) * (progr ? 0.97 : 1.0);
+ config.max_dist = kMaxDist[idx];
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ {
+ TestConfig config;
+ config.jparams.quality = 100;
+ config.max_bpp = 6.6;
+ config.max_dist = 0.6;
+ all_tests.push_back(config);
+ }
+ {
+ TestConfig config;
+ config.jparams.quality = 80;
+ config.max_bpp = 1.05;
+ config.max_dist = 2.7;
+ all_tests.push_back(config);
+ }
+ for (int samp : {1, 2}) {
+ for (int progr : {0, 2}) {
+ for (int optimize : {0, 1}) {
+ if (progr && optimize) continue;
+ TestConfig config;
+ config.input.xsize = 257;
+ config.input.ysize = 265;
+ config.jparams.h_sampling = {samp, 1, 1};
+ config.jparams.v_sampling = {samp, 1, 1};
+ config.jparams.progressive_mode = progr;
+ if (!progr) {
+ config.jparams.optimize_coding = optimize;
+ }
+ config.jparams.use_adaptive_quantization = false;
+ config.max_bpp = 2.05f;
+ config.max_dist = 2.3f;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ for (int h0_samp : {1, 2, 4}) {
+ for (int v0_samp : {1, 2, 4}) {
+ for (int h2_samp : {1, 2, 4}) {
+ for (int v2_samp : {1, 2, 4}) {
+ TestConfig config;
+ config.input.xsize = 137;
+ config.input.ysize = 75;
+ config.jparams.progressive_mode = 2;
+ config.jparams.h_sampling = {h0_samp, 1, h2_samp};
+ config.jparams.v_sampling = {v0_samp, 1, v2_samp};
+ config.max_bpp = 2.5;
+ config.max_dist = 12.0;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ for (int h0_samp : {1, 3}) {
+ for (int v0_samp : {1, 3}) {
+ for (int h2_samp : {1, 3}) {
+ for (int v2_samp : {1, 3}) {
+ TestConfig config;
+ config.input.xsize = 205;
+ config.input.ysize = 99;
+ config.jparams.progressive_mode = 2;
+ config.jparams.h_sampling = {h0_samp, 1, h2_samp};
+ config.jparams.v_sampling = {v0_samp, 1, v2_samp};
+ config.max_bpp = 2.5;
+ config.max_dist = 10.0;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ for (int h0_samp : {1, 2, 3, 4}) {
+ for (int v0_samp : {1, 2, 3, 4}) {
+ TestConfig config;
+ config.input.xsize = 217;
+ config.input.ysize = 129;
+ config.jparams.progressive_mode = 2;
+ config.jparams.h_sampling = {h0_samp, 1, 1};
+ config.jparams.v_sampling = {v0_samp, 1, 1};
+ config.max_bpp = 2.0;
+ config.max_dist = 5.5;
+ all_tests.push_back(config);
+ }
+ }
+ for (int p = 0; p < 3 + NumTestScanScripts(); ++p) {
+ for (int samp : {1, 2}) {
+ for (int quality : {100, 90, 1}) {
+ for (int r : {0, 1024, 1}) {
+ for (int optimize : {0, 1}) {
+ bool progressive = p == 1 || p == 2 || p > 4;
+ if (progressive && !optimize) continue;
+ TestConfig config;
+ config.input.xsize = 273;
+ config.input.ysize = 265;
+ config.jparams.progressive_mode = p;
+ if (!progressive) {
+ config.jparams.optimize_coding = optimize;
+ }
+ config.jparams.h_sampling = {samp, 1, 1};
+ config.jparams.v_sampling = {samp, 1, 1};
+ config.jparams.quality = quality;
+ config.jparams.restart_interval = r;
+ config.max_bpp = quality == 100 ? 8.0 : 1.9;
+ if (r == 1) {
+ config.max_bpp += 10.0;
+ }
+ config.max_dist = quality == 1 ? 20.0 : 2.1;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ }
+ {
+ TestConfig config;
+ config.jparams.simple_progression = true;
+ config.max_bpp = 1.48;
+ config.max_dist = 2.0;
+ all_tests.push_back(config);
+ }
+ {
+ TestConfig config;
+ config.input_mode = COEFFICIENTS;
+ config.jparams.h_sampling = {2, 1, 1};
+ config.jparams.v_sampling = {2, 1, 1};
+ config.jparams.progressive_mode = 0;
+ config.jparams.optimize_coding = 0;
+ config.max_bpp = 16;
+ config.max_dist = 0.0;
+ all_tests.push_back(config);
+ }
+ {
+ TestConfig config;
+ config.jparams.xyb_mode = true;
+ config.jparams.progressive_mode = 2;
+ config.max_bpp = 1.5;
+ config.max_dist = 3.5;
+ all_tests.push_back(config);
+ }
+ {
+ TestConfig config;
+ config.jparams.libjpeg_mode = true;
+ config.max_bpp = 2.1;
+ config.max_dist = 1.7;
+ all_tests.push_back(config);
+ }
+
+ for (J_COLOR_SPACE in_color_space : {JCS_RGB, JCS_YCbCr, JCS_GRAYSCALE}) {
+ for (J_COLOR_SPACE jpeg_color_space : {JCS_RGB, JCS_YCbCr, JCS_GRAYSCALE}) {
+ if (jpeg_color_space == JCS_RGB && in_color_space == JCS_YCbCr) continue;
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 256;
+ config.input.color_space = in_color_space;
+ config.jparams.set_jpeg_colorspace = true;
+ config.jparams.jpeg_color_space = jpeg_color_space;
+ config.max_bpp = jpeg_color_space == JCS_RGB ? 4.5 : 1.85;
+ config.max_dist = jpeg_color_space == JCS_RGB ? 1.4 : 2.05;
+ all_tests.push_back(config);
+ }
+ }
+ for (J_COLOR_SPACE in_color_space : {JCS_CMYK, JCS_YCCK}) {
+ for (J_COLOR_SPACE jpeg_color_space : {JCS_CMYK, JCS_YCCK}) {
+ if (jpeg_color_space == JCS_CMYK && in_color_space == JCS_YCCK) continue;
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 256;
+ config.input.color_space = in_color_space;
+ if (in_color_space != jpeg_color_space) {
+ config.jparams.set_jpeg_colorspace = true;
+ config.jparams.jpeg_color_space = jpeg_color_space;
+ }
+ config.max_bpp = jpeg_color_space == JCS_CMYK ? 4.0 : 3.6;
+ config.max_dist = jpeg_color_space == JCS_CMYK ? 1.2 : 1.5;
+ all_tests.push_back(config);
+ }
+ }
+ {
+ TestConfig config;
+ config.input.color_space = JCS_YCbCr;
+ config.max_bpp = 1.6;
+ config.max_dist = 1.35;
+ all_tests.push_back(config);
+ }
+ for (bool xyb : {false, true}) {
+ TestConfig config;
+ config.input.color_space = JCS_GRAYSCALE;
+ config.jparams.xyb_mode = xyb;
+ config.max_bpp = 1.35;
+ config.max_dist = 1.4;
+ all_tests.push_back(config);
+ }
+ for (int channels = 1; channels <= 4; ++channels) {
+ TestConfig config;
+ config.input.color_space = JCS_UNKNOWN;
+ config.input.components = channels;
+ config.max_bpp = 1.35 * channels;
+ config.max_dist = 1.4;
+ all_tests.push_back(config);
+ }
+ for (size_t r : {1, 3, 17, 1024}) {
+ for (int progr : {0, 2}) {
+ TestConfig config;
+ config.jparams.restart_interval = r;
+ config.jparams.progressive_mode = progr;
+ config.max_bpp = 1.58 + 5.5 / r;
+ config.max_dist = 2.2;
+ all_tests.push_back(config);
+ }
+ }
+ for (size_t rr : {1, 3, 8, 100}) {
+ TestConfig config;
+ config.jparams.restart_in_rows = rr;
+ config.max_bpp = 1.6;
+ config.max_dist = 2.2;
+ all_tests.push_back(config);
+ }
+ for (int type : {0, 1, 10, 100, 10000}) {
+ for (int scale : {1, 50, 100, 200, 500}) {
+ for (bool add_raw : {false, true}) {
+ for (bool baseline : {true, false}) {
+ if (!baseline && (add_raw || type * scale < 25500)) continue;
+ TestConfig config;
+ config.input.xsize = 64;
+ config.input.ysize = 64;
+ CustomQuantTable table;
+ table.table_type = type;
+ table.scale_factor = scale;
+ table.force_baseline = baseline;
+ table.add_raw = add_raw;
+ table.Generate();
+ config.jparams.optimize_coding = 1;
+ config.jparams.quant_tables.push_back(table);
+ config.jparams.quant_indexes = {0, 0, 0};
+ float q = (type == 0 ? 16 : type) * scale * 0.01f;
+ if (baseline && !add_raw) q = std::max(1.0f, std::min(255.0f, q));
+ config.max_bpp = 1.5f + 25.0f / q;
+ config.max_dist = 0.6f + 0.25f * q;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ for (int qidx = 0; qidx < 8; ++qidx) {
+ if (qidx == 3) continue;
+ TestConfig config;
+ config.input.xsize = 256;
+ config.input.ysize = 256;
+ config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1,
+ (qidx >> 0) & 1};
+ config.max_bpp = 2.25;
+ config.max_dist = 2.8;
+ all_tests.push_back(config);
+ }
+ for (int qidx = 0; qidx < 8; ++qidx) {
+ for (int slot_idx = 0; slot_idx < 2; ++slot_idx) {
+ if (qidx == 0 && slot_idx == 0) continue;
+ TestConfig config;
+ config.input.xsize = 256;
+ config.input.ysize = 256;
+ config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1,
+ (qidx >> 0) & 1};
+ CustomQuantTable table;
+ table.slot_idx = slot_idx;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ config.max_bpp = 2.3;
+ config.max_dist = 2.9;
+ all_tests.push_back(config);
+ }
+ }
+ for (int qidx = 0; qidx < 8; ++qidx) {
+ for (bool xyb : {false, true}) {
+ TestConfig config;
+ config.input.xsize = 256;
+ config.input.ysize = 256;
+ config.jparams.xyb_mode = xyb;
+ config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1,
+ (qidx >> 0) & 1};
+ {
+ CustomQuantTable table;
+ table.slot_idx = 0;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ }
+ {
+ CustomQuantTable table;
+ table.slot_idx = 1;
+ table.table_type = 20;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ }
+ config.max_bpp = 2.0;
+ config.max_dist = 3.85;
+ all_tests.push_back(config);
+ }
+ }
+ for (bool xyb : {false, true}) {
+ TestConfig config;
+ config.input.xsize = 256;
+ config.input.ysize = 256;
+ config.jparams.xyb_mode = xyb;
+ config.jparams.quant_indexes = {0, 1, 2};
+ {
+ CustomQuantTable table;
+ table.slot_idx = 0;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ }
+ {
+ CustomQuantTable table;
+ table.slot_idx = 1;
+ table.table_type = 20;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ }
+ {
+ CustomQuantTable table;
+ table.slot_idx = 2;
+ table.table_type = 30;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ }
+ config.max_bpp = 1.5;
+ config.max_dist = 3.75;
+ all_tests.push_back(config);
+ }
+ {
+ TestConfig config;
+ config.jparams.comp_ids = {7, 17, 177};
+ config.input.xsize = config.input.ysize = 128;
+ config.max_bpp = 2.25;
+ config.max_dist = 2.4;
+ all_tests.push_back(config);
+ }
+ for (int override_JFIF : {-1, 0, 1}) {
+ for (int override_Adobe : {-1, 0, 1}) {
+ if (override_JFIF == -1 && override_Adobe == -1) continue;
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 128;
+ config.jparams.override_JFIF = override_JFIF;
+ config.jparams.override_Adobe = override_Adobe;
+ config.max_bpp = 2.25;
+ config.max_dist = 2.4;
+ all_tests.push_back(config);
+ }
+ }
+ {
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 256;
+ config.max_bpp = 1.85;
+ config.max_dist = 2.05;
+ config.jparams.add_marker = true;
+ all_tests.push_back(config);
+ }
+ for (size_t icc_size : {728, 70000, 1000000}) {
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 256;
+ config.max_dist = 2.05;
+ config.jparams.icc.resize(icc_size);
+ for (size_t i = 0; i < icc_size; ++i) {
+ config.jparams.icc[i] = (i * 17) & 0xff;
+ }
+ all_tests.push_back(config);
+ }
+ for (JpegIOMode input_mode : {PIXELS, RAW_DATA, COEFFICIENTS}) {
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 256;
+ config.input_mode = input_mode;
+ if (input_mode == RAW_DATA) {
+ config.input.color_space = JCS_YCbCr;
+ }
+ config.jparams.progressive_mode = 0;
+ config.jparams.optimize_coding = 0;
+ config.max_bpp = 1.85;
+ config.max_dist = 2.05;
+ if (input_mode == COEFFICIENTS) {
+ config.max_bpp = 3.5;
+ config.max_dist = 0.0;
+ }
+ all_tests.push_back(config);
+ config.jparams.use_flat_dc_luma_code = true;
+ all_tests.push_back(config);
+ }
+ for (int xsize : {640, 641, 648, 649}) {
+ for (int ysize : {640, 641, 648, 649}) {
+ for (int h_sampling : {1, 2}) {
+ for (int v_sampling : {1, 2}) {
+ if (h_sampling == 1 && v_sampling == 1) continue;
+ for (int progr : {0, 2}) {
+ TestConfig config;
+ config.input.xsize = xsize;
+ config.input.ysize = ysize;
+ config.input.color_space = JCS_YCbCr;
+ config.jparams.h_sampling = {h_sampling, 1, 1};
+ config.jparams.v_sampling = {v_sampling, 1, 1};
+ config.jparams.progressive_mode = progr;
+ config.input_mode = RAW_DATA;
+ config.max_bpp = 1.75;
+ config.max_dist = 2.0;
+ all_tests.push_back(config);
+ config.input_mode = COEFFICIENTS;
+ if (xsize & 1) {
+ config.jparams.add_marker = true;
+ }
+ config.max_bpp = 24.0;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ }
+ for (JpegliDataType data_type : {JPEGLI_TYPE_UINT16, JPEGLI_TYPE_FLOAT}) {
+ for (JpegliEndianness endianness :
+ {JPEGLI_LITTLE_ENDIAN, JPEGLI_BIG_ENDIAN, JPEGLI_NATIVE_ENDIAN}) {
+ J_COLOR_SPACE colorspace[4] = {JCS_GRAYSCALE, JCS_UNKNOWN, JCS_RGB,
+ JCS_CMYK};
+ float max_bpp[4] = {1.32, 2.7, 1.6, 4.0};
+ for (int channels = 1; channels <= 4; ++channels) {
+ TestConfig config;
+ config.input.data_type = data_type;
+ config.input.endianness = endianness;
+ config.input.components = channels;
+ config.input.color_space = colorspace[channels - 1];
+ config.max_bpp = max_bpp[channels - 1];
+ config.max_dist = 2.2;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ for (int smoothing : {1, 5, 50, 100}) {
+ for (int h_sampling : {1, 2}) {
+ for (int v_sampling : {1, 2}) {
+ TestConfig config;
+ config.input.xsize = 257;
+ config.input.ysize = 265;
+ config.jparams.smoothing_factor = smoothing;
+ config.jparams.h_sampling = {h_sampling, 1, 1};
+ config.jparams.v_sampling = {v_sampling, 1, 1};
+ config.max_bpp = 1.85;
+ config.max_dist = 3.05f;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ return all_tests;
+};
+
+std::ostream& operator<<(std::ostream& os, const TestConfig& c) {
+ os << c.input;
+ os << c.jparams;
+ if (c.input_mode == RAW_DATA) {
+ os << "RawDataIn";
+ } else if (c.input_mode == COEFFICIENTS) {
+ os << "WriteCoeffs";
+ }
+ return os;
+}
+
+std::string TestDescription(
+ const testing::TestParamInfo<EncodeAPITestParam::ParamType>& info) {
+ std::stringstream name;
+ name << info.param;
+ return name.str();
+}
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(EncodeAPITest, EncodeAPITestParam,
+ testing::ValuesIn(GenerateTests()),
+ TestDescription);
+} // namespace
+} // namespace jpegli
diff --git a/lib/jpegli/encode_finish.cc b/lib/jpegli/encode_finish.cc
new file mode 100644
index 0000000..955676b
--- /dev/null
+++ b/lib/jpegli/encode_finish.cc
@@ -0,0 +1,230 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/encode_finish.h"
+
+#include <cmath>
+#include <limits>
+
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/memory_manager.h"
+#include "lib/jpegli/quant.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/encode_finish.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/dct-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::GetLane;
+
+using D = HWY_FULL(float);
+using DI = HWY_FULL(int32_t);
+using DI16 = Rebind<int16_t, HWY_FULL(int32_t)>;
+
+void ReQuantizeBlock(int16_t* block, const float* qmc, float aq_strength,
+ const float* zero_bias_offset,
+ const float* zero_bias_mul) {
+ D d;
+ DI di;
+ DI16 di16;
+ const auto aq_mul = Set(d, aq_strength);
+ for (size_t k = 0; k < DCTSIZE2; k += Lanes(d)) {
+ const auto in = Load(di16, block + k);
+ const auto val = ConvertTo(d, PromoteTo(di, in));
+ const auto q = Load(d, qmc + k);
+ const auto qval = Mul(val, q);
+ const auto zb_offset = Load(d, zero_bias_offset + k);
+ const auto zb_mul = Load(d, zero_bias_mul + k);
+ const auto threshold = Add(zb_offset, Mul(zb_mul, aq_mul));
+ const auto nzero_mask = Ge(Abs(qval), threshold);
+ const auto iqval = IfThenElseZero(nzero_mask, Round(qval));
+ Store(DemoteTo(di16, ConvertTo(di, iqval)), di16, block + k);
+ }
+}
+
+float BlockError(const int16_t* block, const float* qmc, const float* iqmc,
+ const float aq_strength, const float* zero_bias_offset,
+ const float* zero_bias_mul) {
+ D d;
+ DI di;
+ DI16 di16;
+ auto err = Zero(d);
+ const auto scale = Set(d, 1.0 / 16);
+ const auto aq_mul = Set(d, aq_strength);
+ for (size_t k = 0; k < DCTSIZE2; k += Lanes(d)) {
+ const auto in = Load(di16, block + k);
+ const auto val = ConvertTo(d, PromoteTo(di, in));
+ const auto q = Load(d, qmc + k);
+ const auto qval = Mul(val, q);
+ const auto zb_offset = Load(d, zero_bias_offset + k);
+ const auto zb_mul = Load(d, zero_bias_mul + k);
+ const auto threshold = Add(zb_offset, Mul(zb_mul, aq_mul));
+ const auto nzero_mask = Ge(Abs(qval), threshold);
+ const auto iqval = IfThenElseZero(nzero_mask, Round(qval));
+ const auto invq = Load(d, iqmc + k);
+ const auto rval = Mul(iqval, invq);
+ const auto diff = Mul(Sub(val, rval), scale);
+ err = Add(err, Mul(diff, diff));
+ }
+ return GetLane(SumOfLanes(d, err));
+}
+
+void ComputeInverseWeights(const float* qmc, float* iqmc) {
+ for (int k = 0; k < 64; ++k) {
+ iqmc[k] = 1.0f / qmc[k];
+ }
+}
+
+float ComputePSNR(j_compress_ptr cinfo, int sampling) {
+ jpeg_comp_master* m = cinfo->master;
+ InitQuantizer(cinfo, QuantPass::SEARCH_SECOND_PASS);
+ double error = 0.0;
+ size_t num = 0;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ const float* qmc = m->quant_mul[c];
+ const int h_factor = m->h_factor[c];
+ const int v_factor = m->v_factor[c];
+ const float* zero_bias_offset = m->zero_bias_offset[c];
+ const float* zero_bias_mul = m->zero_bias_mul[c];
+ HWY_ALIGN float iqmc[64];
+ ComputeInverseWeights(qmc, iqmc);
+ for (JDIMENSION by = 0; by < comp->height_in_blocks; by += sampling) {
+ JBLOCKARRAY ba = GetBlockRow(cinfo, c, by);
+ const float* qf = m->quant_field.Row(by * v_factor);
+ for (JDIMENSION bx = 0; bx < comp->width_in_blocks; bx += sampling) {
+ error += BlockError(&ba[0][bx][0], qmc, iqmc, qf[bx * h_factor],
+ zero_bias_offset, zero_bias_mul);
+ num += DCTSIZE2;
+ }
+ }
+ }
+ return 4.3429448f * log(num / (error / 255. / 255.));
+}
+
+void ReQuantizeCoeffs(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ InitQuantizer(cinfo, QuantPass::SEARCH_SECOND_PASS);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ const float* qmc = m->quant_mul[c];
+ const int h_factor = m->h_factor[c];
+ const int v_factor = m->v_factor[c];
+ const float* zero_bias_offset = m->zero_bias_offset[c];
+ const float* zero_bias_mul = m->zero_bias_mul[c];
+ for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) {
+ JBLOCKARRAY ba = GetBlockRow(cinfo, c, by);
+ const float* qf = m->quant_field.Row(by * v_factor);
+ for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) {
+ ReQuantizeBlock(&ba[0][bx][0], qmc, qf[bx * h_factor], zero_bias_offset,
+ zero_bias_mul);
+ }
+ }
+ }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+namespace {
+HWY_EXPORT(ComputePSNR);
+HWY_EXPORT(ReQuantizeCoeffs);
+
+void ReQuantizeCoeffs(j_compress_ptr cinfo) {
+ HWY_DYNAMIC_DISPATCH(ReQuantizeCoeffs)(cinfo);
+}
+
+float ComputePSNR(j_compress_ptr cinfo, int sampling) {
+ return HWY_DYNAMIC_DISPATCH(ComputePSNR)(cinfo, sampling);
+}
+
+void UpdateDistance(j_compress_ptr cinfo, float distance) {
+ float distances[NUM_QUANT_TBLS] = {distance, distance, distance};
+ SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/true);
+}
+
+float Clamp(float val, float minval, float maxval) {
+ return std::max(minval, std::min(maxval, val));
+}
+
+#define PSNR_SEARCH_DBG 0
+
+float FindDistanceForPSNR(j_compress_ptr cinfo) {
+ constexpr int kMaxIters = 20;
+ const float psnr_target = cinfo->master->psnr_target;
+ const float tolerance = cinfo->master->psnr_tolerance;
+ const float min_dist = cinfo->master->min_distance;
+ const float max_dist = cinfo->master->max_distance;
+ float d = Clamp(1.0f, min_dist, max_dist);
+ for (int sampling : {4, 1}) {
+ float best_diff = std::numeric_limits<float>::max();
+ float best_distance = 0.0f;
+ float best_psnr = 0.0;
+ float dmin = min_dist;
+ float dmax = max_dist;
+ bool found_lower_bound = false;
+ bool found_upper_bound = false;
+ for (int i = 0; i < kMaxIters; ++i) {
+ UpdateDistance(cinfo, d);
+ float psnr = ComputePSNR(cinfo, sampling);
+ if (psnr > psnr_target) {
+ dmin = d;
+ found_lower_bound = true;
+ } else {
+ dmax = d;
+ found_upper_bound = true;
+ }
+#if (PSNR_SEARCH_DBG > 1)
+ printf("sampling %d iter %2d d %7.4f psnr %.2f", sampling, i, d, psnr);
+ if (found_upper_bound && found_lower_bound) {
+ printf(" d-interval: [ %7.4f .. %7.4f ]", dmin, dmax);
+ }
+ printf("\n");
+#endif
+ float diff = std::abs(psnr - psnr_target);
+ if (diff < best_diff) {
+ best_diff = diff;
+ best_distance = d;
+ best_psnr = psnr;
+ }
+ if (diff < tolerance * psnr_target || dmin == dmax) {
+ break;
+ }
+ if (!found_lower_bound || !found_upper_bound) {
+ d *= std::exp(0.15f * (psnr - psnr_target));
+ } else {
+ d = 0.5f * (dmin + dmax);
+ }
+ d = Clamp(d, min_dist, max_dist);
+ }
+ d = best_distance;
+ if (sampling == 1 && PSNR_SEARCH_DBG) {
+ printf("Final PSNR %.2f at distance %.4f\n", best_psnr, d);
+ }
+ }
+ return d;
+}
+
+} // namespace
+
+void QuantizetoPSNR(j_compress_ptr cinfo) {
+ float distance = FindDistanceForPSNR(cinfo);
+ UpdateDistance(cinfo, distance);
+ ReQuantizeCoeffs(cinfo);
+}
+
+} // namespace jpegli
+#endif // HWY_ONCE
diff --git a/lib/jpegli/encode_finish.h b/lib/jpegli/encode_finish.h
new file mode 100644
index 0000000..f6862de
--- /dev/null
+++ b/lib/jpegli/encode_finish.h
@@ -0,0 +1,17 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_ENCODE_FINISH_H_
+#define LIB_JPEGLI_ENCODE_FINISH_H_
+
+#include "lib/jpegli/encode_internal.h"
+
+namespace jpegli {
+
+void QuantizetoPSNR(j_compress_ptr cinfo);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_ENCODE_FINISH_H_
diff --git a/lib/jpegli/encode_internal.h b/lib/jpegli/encode_internal.h
new file mode 100644
index 0000000..4dbef97
--- /dev/null
+++ b/lib/jpegli/encode_internal.h
@@ -0,0 +1,141 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_ENCODE_INTERNAL_H_
+#define LIB_JPEGLI_ENCODE_INTERNAL_H_
+
+#include <stdint.h>
+
+#include "lib/jpegli/bit_writer.h"
+#include "lib/jpegli/common.h"
+#include "lib/jpegli/common_internal.h"
+#include "lib/jpegli/encode.h"
+
+namespace jpegli {
+
+constexpr unsigned char kICCSignature[12] = {
+ 0x49, 0x43, 0x43, 0x5F, 0x50, 0x52, 0x4F, 0x46, 0x49, 0x4C, 0x45, 0x00};
+constexpr int kICCMarker = JPEG_APP0 + 2;
+
+constexpr int kDefaultProgressiveLevel = 0;
+
+typedef int16_t coeff_t;
+
+struct HuffmanCodeTable {
+ int depth[256];
+ int code[256];
+};
+
+struct Token {
+ uint8_t context;
+ uint8_t symbol;
+ uint16_t bits;
+ Token(int c, int s, int b) : context(c), symbol(s), bits(b) {}
+};
+
+struct TokenArray {
+ Token* tokens;
+ size_t num_tokens;
+};
+
+struct RefToken {
+ uint8_t symbol;
+ uint8_t refbits;
+};
+
+struct ScanTokenInfo {
+ RefToken* tokens;
+ size_t num_tokens;
+ uint8_t* refbits;
+ uint16_t* eobruns;
+ size_t* restarts;
+ size_t num_restarts;
+ size_t num_nonzeros;
+ size_t num_future_nonzeros;
+ size_t token_offset;
+ size_t restart_interval;
+ size_t MCUs_per_row;
+ size_t MCU_rows_in_scan;
+ size_t blocks_in_MCU;
+ size_t num_blocks;
+};
+
+} // namespace jpegli
+
+struct jpeg_comp_master {
+ jpegli::RowBuffer<float> input_buffer[jpegli::kMaxComponents];
+ jpegli::RowBuffer<float>* smooth_input[jpegli::kMaxComponents];
+ jpegli::RowBuffer<float>* raw_data[jpegli::kMaxComponents];
+ bool force_baseline;
+ bool xyb_mode;
+ uint8_t cicp_transfer_function;
+ bool use_std_tables;
+ bool use_adaptive_quantization;
+ int progressive_level;
+ size_t xsize_blocks;
+ size_t ysize_blocks;
+ size_t blocks_per_iMCU_row;
+ jpegli::ScanTokenInfo* scan_token_info;
+ JpegliDataType data_type;
+ JpegliEndianness endianness;
+ void (*input_method)(const uint8_t* row_in, size_t len,
+ float* row_out[jpegli::kMaxComponents]);
+ void (*color_transform)(float* row[jpegli::kMaxComponents], size_t len);
+ void (*downsample_method[jpegli::kMaxComponents])(
+ float* rows_in[MAX_SAMP_FACTOR], size_t len, float* row_out);
+ float* quant_mul[jpegli::kMaxComponents];
+ float* zero_bias_offset[jpegli::kMaxComponents];
+ float* zero_bias_mul[jpegli::kMaxComponents];
+ int h_factor[jpegli::kMaxComponents];
+ int v_factor[jpegli::kMaxComponents];
+ // Array of Huffman tables that will be encoded in one or more DHT segments.
+ // In progressive mode we compute all Huffman tables that will be used in any
+ // of the scans, thus we can have more than 4 tables here.
+ JHUFF_TBL* huffman_tables;
+ size_t num_huffman_tables;
+ // Array of num_huffman_tables slot ids, where the ith element is the slot id
+ // of the ith Huffman table, as it appears in the DHT segment. The range of
+ // the slot ids is 0..3 for DC and 16..19 for AC Huffman codes.
+ uint8_t* slot_id_map;
+ // Maps context ids to an index in the huffman_tables array. Each component in
+ // each scan has a DC and AC context id, which are defined as follows:
+ // - DC context id is the component index (relative to cinfo->comp_info) of
+ // the scan component
+ // - AC context ids start at 4 and are increased for each component of each
+ // scan that have AC components (i.e. Se > 0)
+ uint8_t* context_map;
+ size_t num_contexts;
+ // Array of cinfo->num_scans context ids, where the ith element is the context
+ // id of the first AC component of the ith scan.
+ uint8_t* ac_ctx_offset;
+ // Array of num_huffman tables derived coding tables.
+ jpegli::HuffmanCodeTable* coding_tables;
+ float* diff_buffer;
+ jpegli::RowBuffer<float> fuzzy_erosion_tmp;
+ jpegli::RowBuffer<float> pre_erosion;
+ jpegli::RowBuffer<float> quant_field;
+ jvirt_barray_ptr* coeff_buffers;
+ size_t next_input_row;
+ size_t next_iMCU_row;
+ size_t next_dht_index;
+ size_t last_restart_interval;
+ JCOEF last_dc_coeff[MAX_COMPS_IN_SCAN];
+ jpegli::JpegBitWriter bw;
+ float* dct_buffer;
+ int32_t* block_tmp;
+ jpegli::TokenArray* token_arrays;
+ size_t cur_token_array;
+ jpegli::Token* next_token;
+ size_t num_tokens;
+ size_t total_num_tokens;
+ jpegli::RefToken* next_refinement_token;
+ uint8_t* next_refinement_bit;
+ float psnr_target;
+ float psnr_tolerance;
+ float min_distance;
+ float max_distance;
+};
+
+#endif // LIB_JPEGLI_ENCODE_INTERNAL_H_
diff --git a/lib/jpegli/encode_streaming.cc b/lib/jpegli/encode_streaming.cc
new file mode 100644
index 0000000..89dbd81
--- /dev/null
+++ b/lib/jpegli/encode_streaming.cc
@@ -0,0 +1,259 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/encode_streaming.h"
+
+#include <cmath>
+
+#include "lib/jpegli/bit_writer.h"
+#include "lib/jpegli/bitstream.h"
+#include "lib/jpegli/entropy_coding.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/memory_manager.h"
+#include "lib/jxl/base/bits.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/encode_streaming.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/dct-inl.h"
+#include "lib/jpegli/entropy_coding-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+static const int kStreamingModeCoefficients = 0;
+static const int kStreamingModeTokens = 1;
+static const int kStreamingModeBits = 2;
+
+namespace {
+void ZigZagShuffle(int32_t* JXL_RESTRICT block) {
+ // TODO(szabadka) SIMDify this.
+ int32_t tmp[DCTSIZE2];
+ tmp[0] = block[0];
+ tmp[1] = block[1];
+ tmp[2] = block[8];
+ tmp[3] = block[16];
+ tmp[4] = block[9];
+ tmp[5] = block[2];
+ tmp[6] = block[3];
+ tmp[7] = block[10];
+ tmp[8] = block[17];
+ tmp[9] = block[24];
+ tmp[10] = block[32];
+ tmp[11] = block[25];
+ tmp[12] = block[18];
+ tmp[13] = block[11];
+ tmp[14] = block[4];
+ tmp[15] = block[5];
+ tmp[16] = block[12];
+ tmp[17] = block[19];
+ tmp[18] = block[26];
+ tmp[19] = block[33];
+ tmp[20] = block[40];
+ tmp[21] = block[48];
+ tmp[22] = block[41];
+ tmp[23] = block[34];
+ tmp[24] = block[27];
+ tmp[25] = block[20];
+ tmp[26] = block[13];
+ tmp[27] = block[6];
+ tmp[28] = block[7];
+ tmp[29] = block[14];
+ tmp[30] = block[21];
+ tmp[31] = block[28];
+ tmp[32] = block[35];
+ tmp[33] = block[42];
+ tmp[34] = block[49];
+ tmp[35] = block[56];
+ tmp[36] = block[57];
+ tmp[37] = block[50];
+ tmp[38] = block[43];
+ tmp[39] = block[36];
+ tmp[40] = block[29];
+ tmp[41] = block[22];
+ tmp[42] = block[15];
+ tmp[43] = block[23];
+ tmp[44] = block[30];
+ tmp[45] = block[37];
+ tmp[46] = block[44];
+ tmp[47] = block[51];
+ tmp[48] = block[58];
+ tmp[49] = block[59];
+ tmp[50] = block[52];
+ tmp[51] = block[45];
+ tmp[52] = block[38];
+ tmp[53] = block[31];
+ tmp[54] = block[39];
+ tmp[55] = block[46];
+ tmp[56] = block[53];
+ tmp[57] = block[60];
+ tmp[58] = block[61];
+ tmp[59] = block[54];
+ tmp[60] = block[47];
+ tmp[61] = block[55];
+ tmp[62] = block[62];
+ tmp[63] = block[63];
+ memcpy(block, tmp, DCTSIZE2 * sizeof(tmp[0]));
+}
+} // namespace
+
+template <int kMode>
+void ProcessiMCURow(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ JpegBitWriter* bw = &m->bw;
+ int xsize_mcus = DivCeil(cinfo->image_width, 8 * cinfo->max_h_samp_factor);
+ int ysize_mcus = DivCeil(cinfo->image_height, 8 * cinfo->max_v_samp_factor);
+ int mcu_y = m->next_iMCU_row;
+ int32_t* block = m->block_tmp;
+ int32_t* symbols = m->block_tmp + DCTSIZE2;
+ int32_t* nonzero_idx = m->block_tmp + 3 * DCTSIZE2;
+ coeff_t* JXL_RESTRICT last_dc_coeff = m->last_dc_coeff;
+ bool adaptive_quant = m->use_adaptive_quantization && m->psnr_target == 0;
+ JBLOCKARRAY ba[kMaxComponents];
+ if (kMode == kStreamingModeCoefficients) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ int by0 = mcu_y * comp->v_samp_factor;
+ int block_rows_left = comp->height_in_blocks - by0;
+ int max_block_rows = std::min(comp->v_samp_factor, block_rows_left);
+ ba[c] = (*cinfo->mem->access_virt_barray)(
+ reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[c], by0,
+ max_block_rows, true);
+ }
+ }
+ if (kMode == kStreamingModeTokens) {
+ TokenArray* ta = &m->token_arrays[m->cur_token_array];
+ int max_tokens_per_mcu_row = MaxNumTokensPerMCURow(cinfo);
+ if (ta->num_tokens + max_tokens_per_mcu_row > m->num_tokens) {
+ if (ta->tokens) {
+ m->total_num_tokens += ta->num_tokens;
+ ++m->cur_token_array;
+ ta = &m->token_arrays[m->cur_token_array];
+ }
+ m->num_tokens =
+ EstimateNumTokens(cinfo, mcu_y, ysize_mcus, m->total_num_tokens,
+ max_tokens_per_mcu_row);
+ ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE);
+ m->next_token = ta->tokens;
+ }
+ }
+ const float* imcu_start[kMaxComponents];
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ imcu_start[c] = m->raw_data[c]->Row(mcu_y * comp->v_samp_factor * DCTSIZE);
+ }
+ const float* qf = nullptr;
+ if (adaptive_quant) {
+ qf = m->quant_field.Row(0);
+ }
+ HuffmanCodeTable* dc_code = nullptr;
+ HuffmanCodeTable* ac_code = nullptr;
+ const size_t qf_stride = m->quant_field.stride();
+ for (int mcu_x = 0; mcu_x < xsize_mcus; ++mcu_x) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ if (kMode == kStreamingModeBits) {
+ dc_code = &m->coding_tables[m->context_map[c]];
+ ac_code = &m->coding_tables[m->context_map[c + 4]];
+ }
+ float* JXL_RESTRICT qmc = m->quant_mul[c];
+ const size_t stride = m->raw_data[c]->stride();
+ const int h_factor = m->h_factor[c];
+ const float* zero_bias_offset = m->zero_bias_offset[c];
+ const float* zero_bias_mul = m->zero_bias_mul[c];
+ float aq_strength = 0.0f;
+ for (int iy = 0; iy < comp->v_samp_factor; ++iy) {
+ for (int ix = 0; ix < comp->h_samp_factor; ++ix) {
+ size_t by = mcu_y * comp->v_samp_factor + iy;
+ size_t bx = mcu_x * comp->h_samp_factor + ix;
+ if (bx >= comp->width_in_blocks || by >= comp->height_in_blocks) {
+ if (kMode == kStreamingModeTokens) {
+ *m->next_token++ = Token(c, 0, 0);
+ *m->next_token++ = Token(c + 4, 0, 0);
+ } else if (kMode == kStreamingModeBits) {
+ WriteBits(bw, dc_code->depth[0], dc_code->code[0]);
+ WriteBits(bw, ac_code->depth[0], ac_code->code[0]);
+ }
+ continue;
+ }
+ if (adaptive_quant) {
+ aq_strength = qf[iy * qf_stride + bx * h_factor];
+ }
+ const float* pixels = imcu_start[c] + (iy * stride + bx) * DCTSIZE;
+ ComputeCoefficientBlock(pixels, stride, qmc, last_dc_coeff[c],
+ aq_strength, zero_bias_offset, zero_bias_mul,
+ m->dct_buffer, block);
+ if (kMode == kStreamingModeCoefficients) {
+ JCOEF* cblock = &ba[c][iy][bx][0];
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ cblock[k] = block[kJPEGNaturalOrder[k]];
+ }
+ }
+ block[0] -= last_dc_coeff[c];
+ last_dc_coeff[c] += block[0];
+ if (kMode == kStreamingModeTokens) {
+ ComputeTokensForBlock<int32_t, false>(block, 0, c, c + 4,
+ &m->next_token);
+ } else if (kMode == kStreamingModeBits) {
+ ZigZagShuffle(block);
+ const int num_nonzeros = CompactBlock(block, nonzero_idx);
+ const bool emit_eob = nonzero_idx[num_nonzeros - 1] < 1008;
+ ComputeSymbols(num_nonzeros, nonzero_idx, block, symbols);
+ WriteBlock(symbols, block, num_nonzeros, emit_eob, dc_code, ac_code,
+ bw);
+ }
+ }
+ }
+ }
+ }
+ if (kMode == kStreamingModeTokens) {
+ TokenArray* ta = &m->token_arrays[m->cur_token_array];
+ ta->num_tokens = m->next_token - ta->tokens;
+ ScanTokenInfo* sti = &m->scan_token_info[0];
+ sti->num_tokens = m->total_num_tokens + ta->num_tokens;
+ sti->restarts[0] = sti->num_tokens;
+ }
+}
+
+void ComputeCoefficientsForiMCURow(j_compress_ptr cinfo) {
+ ProcessiMCURow<kStreamingModeCoefficients>(cinfo);
+}
+
+void ComputeTokensForiMCURow(j_compress_ptr cinfo) {
+ ProcessiMCURow<kStreamingModeTokens>(cinfo);
+}
+
+void WriteiMCURow(j_compress_ptr cinfo) {
+ ProcessiMCURow<kStreamingModeBits>(cinfo);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+HWY_EXPORT(ComputeCoefficientsForiMCURow);
+HWY_EXPORT(ComputeTokensForiMCURow);
+HWY_EXPORT(WriteiMCURow);
+
+void ComputeCoefficientsForiMCURow(j_compress_ptr cinfo) {
+ HWY_DYNAMIC_DISPATCH(ComputeCoefficientsForiMCURow)(cinfo);
+}
+
+void ComputeTokensForiMCURow(j_compress_ptr cinfo) {
+ HWY_DYNAMIC_DISPATCH(ComputeTokensForiMCURow)(cinfo);
+}
+
+void WriteiMCURow(j_compress_ptr cinfo) {
+ HWY_DYNAMIC_DISPATCH(WriteiMCURow)(cinfo);
+}
+
+} // namespace jpegli
+#endif // HWY_ONCE
diff --git a/lib/jpegli/encode_streaming.h b/lib/jpegli/encode_streaming.h
new file mode 100644
index 0000000..69acff4
--- /dev/null
+++ b/lib/jpegli/encode_streaming.h
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_ENCODE_STREAMING_H_
+#define LIB_JPEGLI_ENCODE_STREAMING_H_
+
+#include "lib/jpegli/encode_internal.h"
+
+namespace jpegli {
+
+void ComputeCoefficientsForiMCURow(j_compress_ptr cinfo);
+
+void ComputeTokensForiMCURow(j_compress_ptr cinfo);
+
+void WriteiMCURow(j_compress_ptr cinfo);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_ENCODE_STREAMING_H_
diff --git a/lib/jpegli/entropy_coding-inl.h b/lib/jpegli/entropy_coding-inl.h
new file mode 100644
index 0000000..bfb436d
--- /dev/null
+++ b/lib/jpegli/entropy_coding-inl.h
@@ -0,0 +1,213 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JPEGLI_ENTROPY_CODING_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JPEGLI_ENTROPY_CODING_INL_H_
+#undef LIB_JPEGLI_ENTROPY_CODING_INL_H_
+#else
+#define LIB_JPEGLI_ENTROPY_CODING_INL_H_
+#endif
+
+#include "lib/jxl/base/compiler_specific.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::And;
+using hwy::HWY_NAMESPACE::AndNot;
+using hwy::HWY_NAMESPACE::Compress;
+using hwy::HWY_NAMESPACE::CountTrue;
+using hwy::HWY_NAMESPACE::Eq;
+using hwy::HWY_NAMESPACE::GetLane;
+using hwy::HWY_NAMESPACE::MaskFromVec;
+using hwy::HWY_NAMESPACE::Max;
+using hwy::HWY_NAMESPACE::Not;
+using hwy::HWY_NAMESPACE::Or;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Shl;
+using hwy::HWY_NAMESPACE::Sub;
+
+using DI = HWY_FULL(int32_t);
+constexpr DI di;
+
+template <typename DI, class V>
+JXL_INLINE V NumBits(DI di, const V x) {
+ // TODO(szabadka) Add faster implementations for some specific architectures.
+ const auto b1 = And(x, Set(di, 1));
+ const auto b2 = And(x, Set(di, 2));
+ const auto b3 = Sub((And(x, Set(di, 4))), Set(di, 1));
+ const auto b4 = Sub((And(x, Set(di, 8))), Set(di, 4));
+ const auto b5 = Sub((And(x, Set(di, 16))), Set(di, 11));
+ const auto b6 = Sub((And(x, Set(di, 32))), Set(di, 26));
+ const auto b7 = Sub((And(x, Set(di, 64))), Set(di, 57));
+ const auto b8 = Sub((And(x, Set(di, 128))), Set(di, 120));
+ const auto b9 = Sub((And(x, Set(di, 256))), Set(di, 247));
+ const auto b10 = Sub((And(x, Set(di, 512))), Set(di, 502));
+ const auto b11 = Sub((And(x, Set(di, 1024))), Set(di, 1013));
+ const auto b12 = Sub((And(x, Set(di, 2048))), Set(di, 2036));
+ return Max(Max(Max(Max(b1, b2), Max(b3, b4)), Max(Max(b5, b6), Max(b7, b8))),
+ Max(Max(b9, b10), Max(b11, b12)));
+}
+
+// Coefficient indexes pre-multiplied by 16 for the symbol calculation.
+HWY_ALIGN constexpr int32_t kIndexes[64] = {
+ 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192,
+ 208, 224, 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400,
+ 416, 432, 448, 464, 480, 496, 512, 528, 544, 560, 576, 592, 608,
+ 624, 640, 656, 672, 688, 704, 720, 736, 752, 768, 784, 800, 816,
+ 832, 848, 864, 880, 896, 912, 928, 944, 960, 976, 992, 1008,
+};
+
+JXL_INLINE int CompactBlock(int32_t* JXL_RESTRICT block,
+ int32_t* JXL_RESTRICT nonzero_idx) {
+ const auto zero = Zero(di);
+ HWY_ALIGN constexpr int32_t dc_mask_lanes[HWY_LANES(DI)] = {-1};
+ const auto dc_mask = MaskFromVec(Load(di, dc_mask_lanes));
+ int num_nonzeros = 0;
+ int k = 0;
+ {
+ const auto coef = Load(di, block);
+ const auto idx = Load(di, kIndexes);
+ const auto nonzero_mask = Or(dc_mask, Not(Eq(coef, zero)));
+ const auto nzero_coef = Compress(coef, nonzero_mask);
+ const auto nzero_idx = Compress(idx, nonzero_mask);
+ StoreU(nzero_coef, di, &block[num_nonzeros]);
+ StoreU(nzero_idx, di, &nonzero_idx[num_nonzeros]);
+ num_nonzeros += CountTrue(di, nonzero_mask);
+ k += Lanes(di);
+ }
+ for (; k < DCTSIZE2; k += Lanes(di)) {
+ const auto coef = Load(di, &block[k]);
+ const auto idx = Load(di, &kIndexes[k]);
+ const auto nonzero_mask = Not(Eq(coef, zero));
+ const auto nzero_coef = Compress(coef, nonzero_mask);
+ const auto nzero_idx = Compress(idx, nonzero_mask);
+ StoreU(nzero_coef, di, &block[num_nonzeros]);
+ StoreU(nzero_idx, di, &nonzero_idx[num_nonzeros]);
+ num_nonzeros += CountTrue(di, nonzero_mask);
+ }
+ return num_nonzeros;
+}
+
+JXL_INLINE void ComputeSymbols(const int num_nonzeros,
+ int32_t* JXL_RESTRICT nonzero_idx,
+ int32_t* JXL_RESTRICT block,
+ int32_t* JXL_RESTRICT symbols) {
+ nonzero_idx[-1] = -16;
+ const auto one = Set(di, 1);
+ const auto offset = Set(di, 16);
+ for (int i = 0; i < num_nonzeros; i += Lanes(di)) {
+ const auto idx = Load(di, &nonzero_idx[i]);
+ const auto prev_idx = LoadU(di, &nonzero_idx[i - 1]);
+ const auto coeff = Load(di, &block[i]);
+ const auto nbits = NumBits(di, Abs(coeff));
+ const auto mask = ShiftRight<8 * sizeof(int32_t) - 1>(coeff);
+ const auto bits = And(Add(coeff, mask), Sub(Shl(one, nbits), one));
+ const auto symbol = Sub(Add(nbits, idx), Add(prev_idx, offset));
+ Store(symbol, di, symbols + i);
+ Store(bits, di, block + i);
+ }
+}
+
+template <typename T>
+int NumNonZero8x8ExceptDC(const T* block) {
+ const HWY_CAPPED(T, 8) di;
+
+ const auto zero = Zero(di);
+ // Add FFFF for every zero coefficient, negate to get #zeros.
+ auto neg_sum_zero = zero;
+ {
+ // First row has DC, so mask
+ const size_t y = 0;
+ HWY_ALIGN const T dc_mask_lanes[8] = {-1};
+
+ for (size_t x = 0; x < 8; x += Lanes(di)) {
+ const auto dc_mask = Load(di, dc_mask_lanes + x);
+
+ // DC counts as zero so we don't include it in nzeros.
+ const auto coef = AndNot(dc_mask, Load(di, &block[y * 8 + x]));
+
+ neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
+ }
+ }
+ // Remaining rows: no mask
+ for (size_t y = 1; y < 8; y++) {
+ for (size_t x = 0; x < 8; x += Lanes(di)) {
+ const auto coef = Load(di, &block[y * 8 + x]);
+ neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
+ }
+ }
+
+ // We want 64 - sum_zero, add because neg_sum_zero is already negated.
+ return kDCTBlockSize + GetLane(SumOfLanes(di, neg_sum_zero));
+}
+
+template <typename T, bool zig_zag_order>
+void ComputeTokensForBlock(const T* block, int last_dc, int dc_ctx, int ac_ctx,
+ Token** tokens_ptr) {
+ Token* next_token = *tokens_ptr;
+ coeff_t temp2;
+ coeff_t temp;
+ temp = block[0] - last_dc;
+ if (temp == 0) {
+ *next_token++ = Token(dc_ctx, 0, 0);
+ } else {
+ temp2 = temp;
+ if (temp < 0) {
+ temp = -temp;
+ temp2--;
+ }
+ int dc_nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1;
+ int dc_mask = (1 << dc_nbits) - 1;
+ *next_token++ = Token(dc_ctx, dc_nbits, temp2 & dc_mask);
+ }
+ int num_nonzeros = NumNonZero8x8ExceptDC(block);
+ for (int k = 1; k < 64; ++k) {
+ if (num_nonzeros == 0) {
+ *next_token++ = Token(ac_ctx, 0, 0);
+ break;
+ }
+ int r = 0;
+ if (zig_zag_order) {
+ while ((temp = block[k]) == 0) {
+ r++;
+ k++;
+ }
+ } else {
+ while ((temp = block[kJPEGNaturalOrder[k]]) == 0) {
+ r++;
+ k++;
+ }
+ }
+ --num_nonzeros;
+ if (temp < 0) {
+ temp = -temp;
+ temp2 = ~temp;
+ } else {
+ temp2 = temp;
+ }
+ while (r > 15) {
+ *next_token++ = Token(ac_ctx, 0xf0, 0);
+ r -= 16;
+ }
+ int ac_nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1;
+ int ac_mask = (1 << ac_nbits) - 1;
+ int symbol = (r << 4u) + ac_nbits;
+ *next_token++ = Token(ac_ctx, symbol, temp2 & ac_mask);
+ }
+ *tokens_ptr = next_token;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+#endif // LIB_JPEGLI_ENTROPY_CODING_INL_H_
diff --git a/lib/jpegli/entropy_coding.cc b/lib/jpegli/entropy_coding.cc
new file mode 100644
index 0000000..7e50bbc
--- /dev/null
+++ b/lib/jpegli/entropy_coding.cc
@@ -0,0 +1,837 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/entropy_coding.h"
+
+#include <vector>
+
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/huffman.h"
+#include "lib/jxl/base/bits.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/entropy_coding.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/entropy_coding-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+void ComputeTokensSequential(const coeff_t* block, int last_dc, int dc_ctx,
+ int ac_ctx, Token** tokens_ptr) {
+ ComputeTokensForBlock<coeff_t, true>(block, last_dc, dc_ctx, ac_ctx,
+ tokens_ptr);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+
+size_t MaxNumTokensPerMCURow(j_compress_ptr cinfo) {
+ int MCUs_per_row = DivCeil(cinfo->image_width, 8 * cinfo->max_h_samp_factor);
+ size_t blocks_per_mcu = 0;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ blocks_per_mcu += comp->h_samp_factor * comp->v_samp_factor;
+ }
+ return kDCTBlockSize * blocks_per_mcu * MCUs_per_row;
+}
+
+size_t EstimateNumTokens(j_compress_ptr cinfo, size_t mcu_y, size_t ysize_mcus,
+ size_t num_tokens, size_t max_per_row) {
+ size_t estimate;
+ if (mcu_y == 0) {
+ estimate = 16 * max_per_row;
+ } else {
+ estimate = (4 * ysize_mcus * num_tokens) / (3 * mcu_y);
+ }
+ size_t mcus_left = ysize_mcus - mcu_y;
+ return std::min(mcus_left * max_per_row,
+ std::max(max_per_row, estimate - num_tokens));
+}
+
+namespace {
+HWY_EXPORT(ComputeTokensSequential);
+
+void TokenizeProgressiveDC(const coeff_t* coeffs, int context, int Al,
+ coeff_t* last_dc_coeff, Token** next_token) {
+ coeff_t temp2;
+ coeff_t temp;
+ temp2 = coeffs[0] >> Al;
+ temp = temp2 - *last_dc_coeff;
+ *last_dc_coeff = temp2;
+ temp2 = temp;
+ if (temp < 0) {
+ temp = -temp;
+ temp2--;
+ }
+ int nbits = (temp == 0) ? 0 : (jxl::FloorLog2Nonzero<uint32_t>(temp) + 1);
+ int bits = temp2 & ((1 << nbits) - 1);
+ *(*next_token)++ = Token(context, nbits, bits);
+}
+
+void TokenizeACProgressiveScan(j_compress_ptr cinfo, int scan_index,
+ int context, ScanTokenInfo* sti) {
+ jpeg_comp_master* m = cinfo->master;
+ const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
+ const int comp_idx = scan_info->component_index[0];
+ const jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+ const int Al = scan_info->Al;
+ const int Ss = scan_info->Ss;
+ const int Se = scan_info->Se;
+ const size_t restart_interval = sti->restart_interval;
+ int restarts_to_go = restart_interval;
+ size_t num_blocks = comp->height_in_blocks * comp->width_in_blocks;
+ size_t num_restarts =
+ restart_interval > 0 ? DivCeil(num_blocks, restart_interval) : 1;
+ size_t restart_idx = 0;
+ int eob_run = 0;
+ TokenArray* ta = &m->token_arrays[m->cur_token_array];
+ sti->token_offset = m->total_num_tokens + ta->num_tokens;
+ sti->restarts = Allocate<size_t>(cinfo, num_restarts, JPOOL_IMAGE);
+ for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) {
+ JBLOCKARRAY ba = (*cinfo->mem->access_virt_barray)(
+ reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[comp_idx], by,
+ 1, false);
+ // Each coefficient can appear in at most one token, but we have to reserve
+ // one extra EOBrun token that was rolled over from the previous block-row
+ // and has to be flushed at the end.
+ int max_tokens_per_row = 1 + comp->width_in_blocks * (Se - Ss + 1);
+ if (ta->num_tokens + max_tokens_per_row > m->num_tokens) {
+ if (ta->tokens) {
+ m->total_num_tokens += ta->num_tokens;
+ ++m->cur_token_array;
+ ta = &m->token_arrays[m->cur_token_array];
+ }
+ m->num_tokens =
+ EstimateNumTokens(cinfo, by, comp->height_in_blocks,
+ m->total_num_tokens, max_tokens_per_row);
+ ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE);
+ m->next_token = ta->tokens;
+ }
+ for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) {
+ if (restart_interval > 0 && restarts_to_go == 0) {
+ if (eob_run > 0) {
+ int nbits = jxl::FloorLog2Nonzero<uint32_t>(eob_run);
+ int symbol = nbits << 4u;
+ *m->next_token++ =
+ Token(context, symbol, eob_run & ((1 << nbits) - 1));
+ eob_run = 0;
+ }
+ ta->num_tokens = m->next_token - ta->tokens;
+ sti->restarts[restart_idx++] = m->total_num_tokens + ta->num_tokens;
+ restarts_to_go = restart_interval;
+ }
+ const coeff_t* block = &ba[0][bx][0];
+ coeff_t temp2;
+ coeff_t temp;
+ int r = 0;
+ int num_nzeros = 0;
+ int num_future_nzeros = 0;
+ for (int k = Ss; k <= Se; ++k) {
+ if ((temp = block[k]) == 0) {
+ r++;
+ continue;
+ }
+ if (temp < 0) {
+ temp = -temp;
+ temp >>= Al;
+ temp2 = ~temp;
+ } else {
+ temp >>= Al;
+ temp2 = temp;
+ }
+ if (temp == 0) {
+ r++;
+ num_future_nzeros++;
+ continue;
+ }
+ if (eob_run > 0) {
+ int nbits = jxl::FloorLog2Nonzero<uint32_t>(eob_run);
+ int symbol = nbits << 4u;
+ *m->next_token++ =
+ Token(context, symbol, eob_run & ((1 << nbits) - 1));
+ eob_run = 0;
+ }
+ while (r > 15) {
+ *m->next_token++ = Token(context, 0xf0, 0);
+ r -= 16;
+ }
+ int nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1;
+ int symbol = (r << 4u) + nbits;
+ *m->next_token++ = Token(context, symbol, temp2 & ((1 << nbits) - 1));
+ ++num_nzeros;
+ r = 0;
+ }
+ if (r > 0) {
+ ++eob_run;
+ if (eob_run == 0x7FFF) {
+ int nbits = jxl::FloorLog2Nonzero<uint32_t>(eob_run);
+ int symbol = nbits << 4u;
+ *m->next_token++ =
+ Token(context, symbol, eob_run & ((1 << nbits) - 1));
+ eob_run = 0;
+ }
+ }
+ sti->num_nonzeros += num_nzeros;
+ sti->num_future_nonzeros += num_future_nzeros;
+ --restarts_to_go;
+ }
+ ta->num_tokens = m->next_token - ta->tokens;
+ }
+ if (eob_run > 0) {
+ int nbits = jxl::FloorLog2Nonzero<uint32_t>(eob_run);
+ int symbol = nbits << 4u;
+ *m->next_token++ = Token(context, symbol, eob_run & ((1 << nbits) - 1));
+ ++ta->num_tokens;
+ eob_run = 0;
+ }
+ sti->num_tokens = m->total_num_tokens + ta->num_tokens - sti->token_offset;
+ sti->restarts[restart_idx++] = m->total_num_tokens + ta->num_tokens;
+}
+
+void TokenizeACRefinementScan(j_compress_ptr cinfo, int scan_index,
+ ScanTokenInfo* sti) {
+ jpeg_comp_master* m = cinfo->master;
+ const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
+ const int comp_idx = scan_info->component_index[0];
+ const jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+ const int Al = scan_info->Al;
+ const int Ss = scan_info->Ss;
+ const int Se = scan_info->Se;
+ const size_t restart_interval = sti->restart_interval;
+ int restarts_to_go = restart_interval;
+ RefToken token;
+ int eob_run = 0;
+ int eob_refbits = 0;
+ size_t num_blocks = comp->height_in_blocks * comp->width_in_blocks;
+ size_t num_restarts =
+ restart_interval > 0 ? DivCeil(num_blocks, restart_interval) : 1;
+ sti->tokens = m->next_refinement_token;
+ sti->refbits = m->next_refinement_bit;
+ sti->eobruns = Allocate<uint16_t>(cinfo, num_blocks / 2, JPOOL_IMAGE);
+ sti->restarts = Allocate<size_t>(cinfo, num_restarts, JPOOL_IMAGE);
+ RefToken* next_token = sti->tokens;
+ RefToken* next_eob_token = next_token;
+ uint8_t* next_ref_bit = sti->refbits;
+ uint16_t* next_eobrun = sti->eobruns;
+ size_t restart_idx = 0;
+ for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) {
+ JBLOCKARRAY ba = (*cinfo->mem->access_virt_barray)(
+ reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[comp_idx], by,
+ 1, false);
+ for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) {
+ if (restart_interval > 0 && restarts_to_go == 0) {
+ sti->restarts[restart_idx++] = next_token - sti->tokens;
+ restarts_to_go = restart_interval;
+ next_eob_token = next_token;
+ eob_run = eob_refbits = 0;
+ }
+ const coeff_t* block = &ba[0][bx][0];
+ int num_eob_refinement_bits = 0;
+ int num_refinement_bits = 0;
+ int num_nzeros = 0;
+ int r = 0;
+ for (int k = Ss; k <= Se; ++k) {
+ int absval = block[k];
+ if (absval == 0) {
+ r++;
+ continue;
+ }
+ const int mask = absval >> (8 * sizeof(int) - 1);
+ absval += mask;
+ absval ^= mask;
+ absval >>= Al;
+ if (absval == 0) {
+ r++;
+ continue;
+ }
+ while (r > 15) {
+ token.symbol = 0xf0;
+ token.refbits = num_refinement_bits;
+ *next_token++ = token;
+ r -= 16;
+ num_eob_refinement_bits += num_refinement_bits;
+ num_refinement_bits = 0;
+ }
+ if (absval > 1) {
+ *next_ref_bit++ = absval & 1u;
+ ++num_refinement_bits;
+ continue;
+ }
+ int symbol = (r << 4u) + 1 + ((mask + 1) << 1);
+ token.symbol = symbol;
+ token.refbits = num_refinement_bits;
+ *next_token++ = token;
+ ++num_nzeros;
+ num_refinement_bits = 0;
+ num_eob_refinement_bits = 0;
+ r = 0;
+ next_eob_token = next_token;
+ eob_run = eob_refbits = 0;
+ }
+ if (r > 0 || num_eob_refinement_bits + num_refinement_bits > 0) {
+ ++eob_run;
+ eob_refbits += num_eob_refinement_bits + num_refinement_bits;
+ if (eob_refbits > 255) {
+ ++next_eob_token;
+ eob_refbits = num_eob_refinement_bits + num_refinement_bits;
+ eob_run = 1;
+ }
+ next_token = next_eob_token;
+ next_token->refbits = eob_refbits;
+ if (eob_run == 1) {
+ next_token->symbol = 0;
+ } else if (eob_run == 2) {
+ next_token->symbol = 16;
+ *next_eobrun++ = 0;
+ } else if ((eob_run & (eob_run - 1)) == 0) {
+ next_token->symbol += 16;
+ next_eobrun[-1] = 0;
+ } else {
+ ++next_eobrun[-1];
+ }
+ ++next_token;
+ if (eob_run == 0x7fff) {
+ next_eob_token = next_token;
+ eob_run = eob_refbits = 0;
+ }
+ }
+ sti->num_nonzeros += num_nzeros;
+ --restarts_to_go;
+ }
+ }
+ sti->num_tokens = next_token - sti->tokens;
+ sti->restarts[restart_idx++] = sti->num_tokens;
+ m->next_refinement_token = next_token;
+ m->next_refinement_bit = next_ref_bit;
+}
+
+void TokenizeScan(j_compress_ptr cinfo, size_t scan_index, int ac_ctx_offset,
+ ScanTokenInfo* sti) {
+ const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
+ if (scan_info->Ss > 0) {
+ if (scan_info->Ah == 0) {
+ TokenizeACProgressiveScan(cinfo, scan_index, ac_ctx_offset, sti);
+ } else {
+ TokenizeACRefinementScan(cinfo, scan_index, sti);
+ }
+ return;
+ }
+
+ jpeg_comp_master* m = cinfo->master;
+ size_t restart_interval = sti->restart_interval;
+ int restarts_to_go = restart_interval;
+ coeff_t last_dc_coeff[MAX_COMPS_IN_SCAN] = {0};
+
+ // "Non-interleaved" means color data comes in separate scans, in other words
+ // each scan can contain only one color component.
+ const bool is_interleaved = (scan_info->comps_in_scan > 1);
+ const bool is_progressive = cinfo->progressive_mode;
+ const int Ah = scan_info->Ah;
+ const int Al = scan_info->Al;
+ HWY_ALIGN constexpr coeff_t kSinkBlock[DCTSIZE2] = {0};
+
+ size_t restart_idx = 0;
+ TokenArray* ta = &m->token_arrays[m->cur_token_array];
+ sti->token_offset = Ah > 0 ? 0 : m->total_num_tokens + ta->num_tokens;
+
+ if (Ah > 0) {
+ sti->refbits = Allocate<uint8_t>(cinfo, sti->num_blocks, JPOOL_IMAGE);
+ } else if (cinfo->progressive_mode) {
+ if (ta->num_tokens + sti->num_blocks > m->num_tokens) {
+ if (ta->tokens) {
+ m->total_num_tokens += ta->num_tokens;
+ ++m->cur_token_array;
+ ta = &m->token_arrays[m->cur_token_array];
+ }
+ m->num_tokens = sti->num_blocks;
+ ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE);
+ m->next_token = ta->tokens;
+ }
+ }
+
+ JBLOCKARRAY ba[MAX_COMPS_IN_SCAN];
+ size_t block_idx = 0;
+ for (size_t mcu_y = 0; mcu_y < sti->MCU_rows_in_scan; ++mcu_y) {
+ for (int i = 0; i < scan_info->comps_in_scan; ++i) {
+ int comp_idx = scan_info->component_index[i];
+ jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+ int n_blocks_y = is_interleaved ? comp->v_samp_factor : 1;
+ int by0 = mcu_y * n_blocks_y;
+ int block_rows_left = comp->height_in_blocks - by0;
+ int max_block_rows = std::min(n_blocks_y, block_rows_left);
+ ba[i] = (*cinfo->mem->access_virt_barray)(
+ reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[comp_idx],
+ by0, max_block_rows, false);
+ }
+ if (!cinfo->progressive_mode) {
+ int max_tokens_per_mcu_row = MaxNumTokensPerMCURow(cinfo);
+ if (ta->num_tokens + max_tokens_per_mcu_row > m->num_tokens) {
+ if (ta->tokens) {
+ m->total_num_tokens += ta->num_tokens;
+ ++m->cur_token_array;
+ ta = &m->token_arrays[m->cur_token_array];
+ }
+ m->num_tokens =
+ EstimateNumTokens(cinfo, mcu_y, sti->MCU_rows_in_scan,
+ m->total_num_tokens, max_tokens_per_mcu_row);
+ ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE);
+ m->next_token = ta->tokens;
+ }
+ }
+ for (size_t mcu_x = 0; mcu_x < sti->MCUs_per_row; ++mcu_x) {
+ // Possibly emit a restart marker.
+ if (restart_interval > 0 && restarts_to_go == 0) {
+ restarts_to_go = restart_interval;
+ memset(last_dc_coeff, 0, sizeof(last_dc_coeff));
+ ta->num_tokens = m->next_token - ta->tokens;
+ sti->restarts[restart_idx++] =
+ Ah > 0 ? block_idx : m->total_num_tokens + ta->num_tokens;
+ }
+ // Encode one MCU
+ for (int i = 0; i < scan_info->comps_in_scan; ++i) {
+ int comp_idx = scan_info->component_index[i];
+ jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+ int n_blocks_y = is_interleaved ? comp->v_samp_factor : 1;
+ int n_blocks_x = is_interleaved ? comp->h_samp_factor : 1;
+ for (int iy = 0; iy < n_blocks_y; ++iy) {
+ for (int ix = 0; ix < n_blocks_x; ++ix) {
+ size_t block_y = mcu_y * n_blocks_y + iy;
+ size_t block_x = mcu_x * n_blocks_x + ix;
+ const coeff_t* block;
+ if (block_x >= comp->width_in_blocks ||
+ block_y >= comp->height_in_blocks) {
+ block = kSinkBlock;
+ } else {
+ block = &ba[i][iy][block_x][0];
+ }
+ if (!is_progressive) {
+ HWY_DYNAMIC_DISPATCH(ComputeTokensSequential)
+ (block, last_dc_coeff[i], comp_idx, ac_ctx_offset + i,
+ &m->next_token);
+ last_dc_coeff[i] = block[0];
+ } else {
+ if (Ah == 0) {
+ TokenizeProgressiveDC(block, comp_idx, Al, last_dc_coeff + i,
+ &m->next_token);
+ } else {
+ sti->refbits[block_idx] = (block[0] >> Al) & 1;
+ }
+ }
+ ++block_idx;
+ }
+ }
+ }
+ --restarts_to_go;
+ }
+ ta->num_tokens = m->next_token - ta->tokens;
+ }
+ JXL_DASSERT(block_idx == sti->num_blocks);
+ sti->num_tokens =
+ Ah > 0 ? sti->num_blocks
+ : m->total_num_tokens + ta->num_tokens - sti->token_offset;
+ sti->restarts[restart_idx++] =
+ Ah > 0 ? sti->num_blocks : m->total_num_tokens + ta->num_tokens;
+ if (Ah == 0 && cinfo->progressive_mode) {
+ JXL_DASSERT(sti->num_blocks == sti->num_tokens);
+ }
+}
+
+} // namespace
+
+void TokenizeJpeg(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ std::vector<int> processed(cinfo->num_scans);
+ size_t max_refinement_tokens = 0;
+ size_t num_refinement_bits = 0;
+ int num_refinement_scans[DCTSIZE2] = {};
+ int max_num_refinement_scans = 0;
+ for (int i = 0; i < cinfo->num_scans; ++i) {
+ const jpeg_scan_info* si = &cinfo->scan_info[i];
+ ScanTokenInfo* sti = &m->scan_token_info[i];
+ if (si->Ss > 0 && si->Ah == 0 && si->Al > 0) {
+ int offset = m->ac_ctx_offset[i];
+ TokenizeScan(cinfo, i, offset, sti);
+ processed[i] = 1;
+ max_refinement_tokens += sti->num_future_nonzeros;
+ for (int k = si->Ss; k <= si->Se; ++k) {
+ num_refinement_scans[k] = si->Al;
+ }
+ max_num_refinement_scans = std::max(max_num_refinement_scans, si->Al);
+ num_refinement_bits += sti->num_nonzeros;
+ }
+ if (si->Ss > 0 && si->Ah > 0) {
+ int comp_idx = si->component_index[0];
+ const jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+ size_t num_blocks = comp->width_in_blocks * comp->height_in_blocks;
+ max_refinement_tokens += (1 + (si->Se - si->Ss) / 16) * num_blocks;
+ }
+ }
+ if (max_refinement_tokens > 0) {
+ m->next_refinement_token =
+ Allocate<RefToken>(cinfo, max_refinement_tokens, JPOOL_IMAGE);
+ }
+ for (int j = 0; j < max_num_refinement_scans; ++j) {
+ uint8_t* refinement_bits =
+ Allocate<uint8_t>(cinfo, num_refinement_bits, JPOOL_IMAGE);
+ m->next_refinement_bit = refinement_bits;
+ size_t new_refinement_bits = 0;
+ for (int i = 0; i < cinfo->num_scans; ++i) {
+ const jpeg_scan_info* si = &cinfo->scan_info[i];
+ ScanTokenInfo* sti = &m->scan_token_info[i];
+ if (si->Ss > 0 && si->Ah > 0 &&
+ si->Ah == num_refinement_scans[si->Ss] - j) {
+ int offset = m->ac_ctx_offset[i];
+ TokenizeScan(cinfo, i, offset, sti);
+ processed[i] = 1;
+ new_refinement_bits += sti->num_nonzeros;
+ }
+ }
+ JXL_DASSERT(m->next_refinement_bit ==
+ refinement_bits + num_refinement_bits);
+ num_refinement_bits += new_refinement_bits;
+ }
+ for (int i = 0; i < cinfo->num_scans; ++i) {
+ if (processed[i]) {
+ continue;
+ }
+ int offset = m->ac_ctx_offset[i];
+ TokenizeScan(cinfo, i, offset, &m->scan_token_info[i]);
+ processed[i] = 1;
+ }
+}
+
+namespace {
+
+struct Histogram {
+ int count[kJpegHuffmanAlphabetSize];
+ Histogram() { memset(count, 0, sizeof(count)); }
+};
+
+void BuildHistograms(j_compress_ptr cinfo, Histogram* histograms) {
+ jpeg_comp_master* m = cinfo->master;
+ size_t num_token_arrays = m->cur_token_array + 1;
+ for (size_t i = 0; i < num_token_arrays; ++i) {
+ Token* tokens = m->token_arrays[i].tokens;
+ size_t num_tokens = m->token_arrays[i].num_tokens;
+ for (size_t j = 0; j < num_tokens; ++j) {
+ Token t = tokens[j];
+ ++histograms[t.context].count[t.symbol];
+ }
+ }
+ for (int i = 0; i < cinfo->num_scans; ++i) {
+ const jpeg_scan_info& si = cinfo->scan_info[i];
+ const ScanTokenInfo& sti = m->scan_token_info[i];
+ if (si.Ss > 0 && si.Ah > 0) {
+ int context = m->ac_ctx_offset[i];
+ int* ac_histo = &histograms[context].count[0];
+ for (size_t j = 0; j < sti.num_tokens; ++j) {
+ ++ac_histo[sti.tokens[j].symbol & 253];
+ }
+ }
+ }
+}
+
+struct JpegClusteredHistograms {
+ std::vector<Histogram> histograms;
+ std::vector<uint32_t> histogram_indexes;
+ std::vector<uint32_t> slot_ids;
+};
+
+float HistogramCost(const Histogram& histo) {
+ std::vector<uint32_t> counts(kJpegHuffmanAlphabetSize + 1);
+ std::vector<uint8_t> depths(kJpegHuffmanAlphabetSize + 1);
+ for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
+ counts[i] = histo.count[i];
+ }
+ counts[kJpegHuffmanAlphabetSize] = 1;
+ CreateHuffmanTree(counts.data(), counts.size(), kJpegHuffmanMaxBitLength,
+ &depths[0]);
+ size_t header_bits = (1 + kJpegHuffmanMaxBitLength) * 8;
+ size_t data_bits = 0;
+ for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
+ if (depths[i] > 0) {
+ header_bits += 8;
+ data_bits += counts[i] * depths[i];
+ }
+ }
+ return header_bits + data_bits;
+}
+
+void AddHistograms(const Histogram& a, const Histogram& b, Histogram* c) {
+ for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
+ c->count[i] = a.count[i] + b.count[i];
+ }
+}
+
+bool IsEmptyHistogram(const Histogram& histo) {
+ for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
+ if (histo.count[i]) return false;
+ }
+ return true;
+}
+
+void ClusterJpegHistograms(const Histogram* histograms, size_t num,
+ JpegClusteredHistograms* clusters) {
+ clusters->histogram_indexes.resize(num);
+ std::vector<uint32_t> slot_histograms;
+ std::vector<float> slot_costs;
+ for (size_t i = 0; i < num; ++i) {
+ const Histogram& cur = histograms[i];
+ if (IsEmptyHistogram(cur)) {
+ continue;
+ }
+ float best_cost = HistogramCost(cur);
+ size_t best_slot = slot_histograms.size();
+ for (size_t j = 0; j < slot_histograms.size(); ++j) {
+ size_t prev_idx = slot_histograms[j];
+ const Histogram& prev = clusters->histograms[prev_idx];
+ Histogram combined;
+ AddHistograms(prev, cur, &combined);
+ float combined_cost = HistogramCost(combined);
+ float cost = combined_cost - slot_costs[j];
+ if (cost < best_cost) {
+ best_cost = cost;
+ best_slot = j;
+ }
+ }
+ if (best_slot == slot_histograms.size()) {
+ // Create new histogram.
+ size_t histogram_index = clusters->histograms.size();
+ clusters->histograms.push_back(cur);
+ clusters->histogram_indexes[i] = histogram_index;
+ if (best_slot < 4) {
+ // We have a free slot, so we put the new histogram there.
+ slot_histograms.push_back(histogram_index);
+ slot_costs.push_back(best_cost);
+ } else {
+ // TODO(szabadka) Find the best histogram to replce.
+ best_slot = (clusters->slot_ids.back() + 1) % 4;
+ }
+ slot_histograms[best_slot] = histogram_index;
+ slot_costs[best_slot] = best_cost;
+ clusters->slot_ids.push_back(best_slot);
+ } else {
+ // Merge this histogram with a previous one.
+ size_t histogram_index = slot_histograms[best_slot];
+ const Histogram& prev = clusters->histograms[histogram_index];
+ AddHistograms(prev, cur, &clusters->histograms[histogram_index]);
+ clusters->histogram_indexes[i] = histogram_index;
+ JXL_ASSERT(clusters->slot_ids[histogram_index] == best_slot);
+ slot_costs[best_slot] += best_cost;
+ }
+ }
+}
+
+void CopyHuffmanTable(j_compress_ptr cinfo, int index, bool is_dc,
+ int* inv_slot_map, uint8_t* slot_id_map,
+ JHUFF_TBL* huffman_tables, size_t* num_huffman_tables) {
+ const char* type = is_dc ? "DC" : "AC";
+ if (index < 0 || index >= NUM_HUFF_TBLS) {
+ JPEGLI_ERROR("Invalid %s Huffman table index %d", type, index);
+ }
+ // Check if we have already copied this Huffman table.
+ int slot_idx = index + (is_dc ? 0 : NUM_HUFF_TBLS);
+ if (inv_slot_map[slot_idx] != -1) {
+ return;
+ }
+ inv_slot_map[slot_idx] = *num_huffman_tables;
+ // Look up and validate Huffman table.
+ JHUFF_TBL* table =
+ is_dc ? cinfo->dc_huff_tbl_ptrs[index] : cinfo->ac_huff_tbl_ptrs[index];
+ if (table == nullptr) {
+ JPEGLI_ERROR("Missing %s Huffman table %d", type, index);
+ }
+ ValidateHuffmanTable(reinterpret_cast<j_common_ptr>(cinfo), table, is_dc);
+ // Copy Huffman table to the end of the list and save slot id.
+ slot_id_map[*num_huffman_tables] = index + (is_dc ? 0 : 0x10);
+ memcpy(&huffman_tables[*num_huffman_tables], table, sizeof(JHUFF_TBL));
+ ++(*num_huffman_tables);
+}
+
+void BuildJpegHuffmanTable(const Histogram& histo, JHUFF_TBL* table) {
+ std::vector<uint32_t> counts(kJpegHuffmanAlphabetSize + 1);
+ std::vector<uint8_t> depths(kJpegHuffmanAlphabetSize + 1);
+ for (size_t j = 0; j < kJpegHuffmanAlphabetSize; ++j) {
+ counts[j] = histo.count[j];
+ }
+ counts[kJpegHuffmanAlphabetSize] = 1;
+ CreateHuffmanTree(counts.data(), counts.size(), kJpegHuffmanMaxBitLength,
+ &depths[0]);
+ memset(table, 0, sizeof(JHUFF_TBL));
+ for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
+ if (depths[i] > 0) {
+ ++table->bits[depths[i]];
+ }
+ }
+ int offset[kJpegHuffmanMaxBitLength + 1] = {0};
+ for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) {
+ offset[i] = offset[i - 1] + table->bits[i - 1];
+ }
+ for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
+ if (depths[i] > 0) {
+ table->huffval[offset[depths[i]]++] = i;
+ }
+ }
+}
+
+} // namespace
+
+void CopyHuffmanTables(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ size_t max_huff_tables = 2 * cinfo->num_components;
+ // Copy Huffman tables and save slot ids.
+ m->huffman_tables = Allocate<JHUFF_TBL>(cinfo, max_huff_tables, JPOOL_IMAGE);
+ m->slot_id_map = Allocate<uint8_t>(cinfo, max_huff_tables, JPOOL_IMAGE);
+ m->num_huffman_tables = 0;
+ int inv_slot_map[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ CopyHuffmanTable(cinfo, comp->dc_tbl_no, /*is_dc=*/true, &inv_slot_map[0],
+ m->slot_id_map, m->huffman_tables, &m->num_huffman_tables);
+ CopyHuffmanTable(cinfo, comp->ac_tbl_no, /*is_dc=*/false, &inv_slot_map[0],
+ m->slot_id_map, m->huffman_tables, &m->num_huffman_tables);
+ }
+ // Compute context map.
+ m->context_map = Allocate<uint8_t>(cinfo, 8, JPOOL_IMAGE);
+ memset(m->context_map, 0, 8);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ m->context_map[c] = inv_slot_map[cinfo->comp_info[c].dc_tbl_no];
+ }
+ int ac_ctx = 4;
+ for (int i = 0; i < cinfo->num_scans; ++i) {
+ const jpeg_scan_info* si = &cinfo->scan_info[i];
+ if (si->Se > 0) {
+ for (int j = 0; j < si->comps_in_scan; ++j) {
+ int c = si->component_index[j];
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ m->context_map[ac_ctx++] = inv_slot_map[comp->ac_tbl_no + 4];
+ }
+ }
+ }
+}
+
+void OptimizeHuffmanCodes(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ // Build DC and AC histograms.
+ std::vector<Histogram> histograms(m->num_contexts);
+ BuildHistograms(cinfo, &histograms[0]);
+
+ // Cluster DC histograms.
+ JpegClusteredHistograms dc_clusters;
+ ClusterJpegHistograms(histograms.data(), cinfo->num_components, &dc_clusters);
+
+ // Cluster AC histograms.
+ JpegClusteredHistograms ac_clusters;
+ ClusterJpegHistograms(histograms.data() + 4, m->num_contexts - 4,
+ &ac_clusters);
+
+ // Create Huffman tables and slot ids clusters.
+ size_t num_dc_huff = dc_clusters.histograms.size();
+ m->num_huffman_tables = num_dc_huff + ac_clusters.histograms.size();
+ m->huffman_tables =
+ Allocate<JHUFF_TBL>(cinfo, m->num_huffman_tables, JPOOL_IMAGE);
+ m->slot_id_map = Allocate<uint8_t>(cinfo, m->num_huffman_tables, JPOOL_IMAGE);
+ for (size_t i = 0; i < m->num_huffman_tables; ++i) {
+ JHUFF_TBL huff_table = {};
+ if (i < dc_clusters.histograms.size()) {
+ m->slot_id_map[i] = i;
+ BuildJpegHuffmanTable(dc_clusters.histograms[i], &huff_table);
+ } else {
+ m->slot_id_map[i] = 16 + ac_clusters.slot_ids[i - num_dc_huff];
+ BuildJpegHuffmanTable(ac_clusters.histograms[i - num_dc_huff],
+ &huff_table);
+ }
+ memcpy(&m->huffman_tables[i], &huff_table, sizeof(huff_table));
+ }
+
+ // Create context map from clustered histogram indexes.
+ m->context_map = Allocate<uint8_t>(cinfo, m->num_contexts, JPOOL_IMAGE);
+ memset(m->context_map, 0, m->num_contexts);
+ for (size_t i = 0; i < m->num_contexts; ++i) {
+ if (i < (size_t)cinfo->num_components) {
+ m->context_map[i] = dc_clusters.histogram_indexes[i];
+ } else if (i >= 4) {
+ m->context_map[i] = num_dc_huff + ac_clusters.histogram_indexes[i - 4];
+ }
+ }
+}
+
+namespace {
+
+constexpr uint8_t kNumExtraBits[256] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 7, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 11, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 13, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+};
+
+void BuildHuffmanCodeTable(const JHUFF_TBL& table, HuffmanCodeTable* code) {
+ int huff_code[kJpegHuffmanAlphabetSize];
+ // +1 for a sentinel element.
+ uint32_t huff_size[kJpegHuffmanAlphabetSize + 1];
+ int p = 0;
+ for (size_t l = 1; l <= kJpegHuffmanMaxBitLength; ++l) {
+ int i = table.bits[l];
+ while (i--) huff_size[p++] = l;
+ }
+
+ // Reuse sentinel element.
+ int last_p = p;
+ huff_size[last_p] = 0;
+
+ int next_code = 0;
+ uint32_t si = huff_size[0];
+ p = 0;
+ while (huff_size[p]) {
+ while ((huff_size[p]) == si) {
+ huff_code[p++] = next_code;
+ next_code++;
+ }
+ next_code <<= 1;
+ si++;
+ }
+ for (p = 0; p < last_p; p++) {
+ int i = table.huffval[p];
+ int nbits = kNumExtraBits[i];
+ code->depth[i] = huff_size[p] + nbits;
+ code->code[i] = huff_code[p] << nbits;
+ }
+}
+
+} // namespace
+
+void InitEntropyCoder(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ m->coding_tables =
+ Allocate<HuffmanCodeTable>(cinfo, m->num_huffman_tables, JPOOL_IMAGE);
+ for (size_t i = 0; i < m->num_huffman_tables; ++i) {
+ BuildHuffmanCodeTable(m->huffman_tables[i], &m->coding_tables[i]);
+ }
+}
+
+} // namespace jpegli
+#endif // HWY_ONCE
diff --git a/lib/jpegli/entropy_coding.h b/lib/jpegli/entropy_coding.h
new file mode 100644
index 0000000..a552219
--- /dev/null
+++ b/lib/jpegli/entropy_coding.h
@@ -0,0 +1,28 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_ENTROPY_CODING_H_
+#define LIB_JPEGLI_ENTROPY_CODING_H_
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+size_t MaxNumTokensPerMCURow(j_compress_ptr cinfo);
+
+size_t EstimateNumTokens(j_compress_ptr cinfo, size_t mcu_y, size_t ysize_mcus,
+ size_t num_tokens, size_t max_per_row);
+
+void TokenizeJpeg(j_compress_ptr cinfo);
+
+void CopyHuffmanTables(j_compress_ptr cinfo);
+
+void OptimizeHuffmanCodes(j_compress_ptr cinfo);
+
+void InitEntropyCoder(j_compress_ptr cinfo);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_ENTROPY_CODING_H_
diff --git a/lib/jpegli/error.cc b/lib/jpegli/error.cc
new file mode 100644
index 0000000..2892616
--- /dev/null
+++ b/lib/jpegli/error.cc
@@ -0,0 +1,102 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/error.h"
+
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <string>
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+const char* const kErrorMessageTable[] = {
+ "Message codes are not supported, error message is in msg_parm.s string",
+};
+
+bool FormatString(char* buffer, const char* format, ...) {
+ va_list args;
+ va_start(args, format);
+ vsnprintf(buffer, JMSG_STR_PARM_MAX, format, args);
+ va_end(args);
+ return false;
+}
+
+void ExitWithAbort(j_common_ptr cinfo) {
+ (*cinfo->err->output_message)(cinfo);
+ jpegli_destroy(cinfo);
+ exit(EXIT_FAILURE);
+}
+
+void EmitMessage(j_common_ptr cinfo, int msg_level) {
+ if (msg_level < 0) {
+ if (cinfo->err->num_warnings <= 5 || cinfo->err->trace_level >= 3) {
+ (*cinfo->err->output_message)(cinfo);
+ }
+ ++cinfo->err->num_warnings;
+ } else if (cinfo->err->trace_level >= msg_level) {
+ (*cinfo->err->output_message)(cinfo);
+ }
+}
+
+void OutputMessage(j_common_ptr cinfo) {
+ char buffer[JMSG_LENGTH_MAX];
+ (*cinfo->err->format_message)(cinfo, buffer);
+ fprintf(stderr, "%s\n", buffer);
+}
+
+void FormatMessage(j_common_ptr cinfo, char* buffer) {
+ jpeg_error_mgr* err = cinfo->err;
+ int code = err->msg_code;
+ if (code == 0) {
+ memcpy(buffer, cinfo->err->msg_parm.s, JMSG_STR_PARM_MAX);
+ } else if (err->addon_message_table != nullptr &&
+ code >= err->first_addon_message &&
+ code <= err->last_addon_message) {
+ std::string msg(err->addon_message_table[code - err->first_addon_message]);
+ if (msg.find("%s") != std::string::npos) {
+ snprintf(buffer, JMSG_LENGTH_MAX, msg.data(), err->msg_parm.s);
+ } else {
+ snprintf(buffer, JMSG_LENGTH_MAX, msg.data(), err->msg_parm.i[0],
+ err->msg_parm.i[1], err->msg_parm.i[2], err->msg_parm.i[3],
+ err->msg_parm.i[4], err->msg_parm.i[5], err->msg_parm.i[6],
+ err->msg_parm.i[7]);
+ }
+ } else {
+ snprintf(buffer, JMSG_LENGTH_MAX, "%s", kErrorMessageTable[0]);
+ }
+}
+
+void ResetErrorManager(j_common_ptr cinfo) {
+ memset(cinfo->err->msg_parm.s, 0, JMSG_STR_PARM_MAX);
+ cinfo->err->msg_code = 0;
+ cinfo->err->num_warnings = 0;
+}
+
+} // namespace jpegli
+
+struct jpeg_error_mgr* jpegli_std_error(struct jpeg_error_mgr* err) {
+ err->error_exit = jpegli::ExitWithAbort;
+ err->emit_message = jpegli::EmitMessage;
+ err->output_message = jpegli::OutputMessage;
+ err->format_message = jpegli::FormatMessage;
+ err->reset_error_mgr = jpegli::ResetErrorManager;
+ memset(err->msg_parm.s, 0, JMSG_STR_PARM_MAX);
+ err->trace_level = 0;
+ err->num_warnings = 0;
+ // We don't support message codes and message table, but we define one here
+ // in case the application has a custom format_message and tries to access
+ // these fields there.
+ err->msg_code = 0;
+ err->jpeg_message_table = jpegli::kErrorMessageTable;
+ err->last_jpeg_message = 0;
+ err->addon_message_table = nullptr;
+ err->first_addon_message = 0;
+ err->last_addon_message = 0;
+ return err;
+}
diff --git a/lib/jpegli/error.h b/lib/jpegli/error.h
new file mode 100644
index 0000000..4451abd
--- /dev/null
+++ b/lib/jpegli/error.h
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_ERROR_H_
+#define LIB_JPEGLI_ERROR_H_
+
+#include <stdarg.h>
+#include <stdint.h>
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+bool FormatString(char* buffer, const char* format, ...);
+
+} // namespace jpegli
+
+#define JPEGLI_ERROR(format, ...) \
+ jpegli::FormatString(cinfo->err->msg_parm.s, ("%s:%d: " format), __FILE__, \
+ __LINE__, ##__VA_ARGS__), \
+ (*cinfo->err->error_exit)(reinterpret_cast<j_common_ptr>(cinfo))
+
+#define JPEGLI_WARN(format, ...) \
+ jpegli::FormatString(cinfo->err->msg_parm.s, ("%s:%d: " format), __FILE__, \
+ __LINE__, ##__VA_ARGS__), \
+ (*cinfo->err->emit_message)(reinterpret_cast<j_common_ptr>(cinfo), -1)
+
+#define JPEGLI_TRACE(level, format, ...) \
+ if (cinfo->err->trace_level >= (level)) \
+ jpegli::FormatString(cinfo->err->msg_parm.s, ("%s:%d: " format), __FILE__, \
+ __LINE__, ##__VA_ARGS__), \
+ (*cinfo->err->emit_message)(reinterpret_cast<j_common_ptr>(cinfo), \
+ (level))
+
+#endif // LIB_JPEGLI_ERROR_H_
diff --git a/lib/jpegli/error_handling_test.cc b/lib/jpegli/error_handling_test.cc
new file mode 100644
index 0000000..0d481c5
--- /dev/null
+++ b/lib/jpegli/error_handling_test.cc
@@ -0,0 +1,1276 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jpegli {
+namespace {
+
+TEST(EncoderErrorHandlingTest, MinimalSuccess) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ {
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ jpegli_start_compress(&cinfo, TRUE);
+ JSAMPLE image[1] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ }
+ TestImage output;
+ DecodeWithLibjpeg(CompressParams(), DecompressParams(), nullptr, 0, buffer,
+ buffer_size, &output);
+ EXPECT_EQ(1, output.xsize);
+ EXPECT_EQ(1, output.ysize);
+ EXPECT_EQ(1, output.components);
+ EXPECT_EQ(0, output.pixels[0]);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NoDestination) {
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+}
+
+TEST(EncoderErrorHandlingTest, NoImageDimensions) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, ImageTooBig) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 100000;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NoInputComponents) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ jpegli_set_defaults(&cinfo);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, TooManyInputComponents) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1000;
+ jpegli_set_defaults(&cinfo);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NoSetDefaults) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_start_compress(&cinfo, TRUE);
+ JSAMPLE image[1] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NoStartCompress) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ JSAMPLE image[1] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NoWriteScanlines) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ jpegli_start_compress(&cinfo, TRUE);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NoWriteAllScanlines) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 2;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ jpegli_start_compress(&cinfo, TRUE);
+ JSAMPLE image[1] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidQuantValue) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ cinfo.quant_tbl_ptrs[0] = jpegli_alloc_quant_table((j_common_ptr)&cinfo);
+ for (size_t k = 0; k < DCTSIZE2; ++k) {
+ cinfo.quant_tbl_ptrs[0]->quantval[k] = 0;
+ }
+ jpegli_start_compress(&cinfo, TRUE);
+ JSAMPLE image[1] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidQuantTableIndex) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ cinfo.comp_info[0].quant_tbl_no = 3;
+ jpegli_start_compress(&cinfo, TRUE);
+ JSAMPLE image[1] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch1) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ cinfo.num_components = 100;
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch2) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ cinfo.num_components = 2;
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch3) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ cinfo.num_components = 2;
+ cinfo.comp_info[1].h_samp_factor = cinfo.comp_info[1].v_samp_factor = 1;
+ jpegli_start_compress(&cinfo, TRUE);
+ JSAMPLE image[1] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch4) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ cinfo.in_color_space = JCS_RGB;
+ jpegli_set_defaults(&cinfo);
+ jpegli_start_compress(&cinfo, TRUE);
+ JSAMPLE image[1] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch5) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 3;
+ cinfo.in_color_space = JCS_GRAYSCALE;
+ jpegli_set_defaults(&cinfo);
+ jpegli_start_compress(&cinfo, TRUE);
+ JSAMPLE image[3] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch6) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 3;
+ cinfo.in_color_space = JCS_RGB;
+ jpegli_set_defaults(&cinfo);
+ cinfo.num_components = 2;
+ jpegli_start_compress(&cinfo, TRUE);
+ JSAMPLE image[3] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidColorTransform) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 3;
+ cinfo.in_color_space = JCS_YCbCr;
+ jpegli_set_defaults(&cinfo);
+ cinfo.jpeg_color_space = JCS_RGB;
+ jpegli_start_compress(&cinfo, TRUE);
+ JSAMPLE image[3] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, DuplicateComponentIds) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 3;
+ jpegli_set_defaults(&cinfo);
+ cinfo.comp_info[0].component_id = 0;
+ cinfo.comp_info[1].component_id = 0;
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidComponentIndex) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 3;
+ jpegli_set_defaults(&cinfo);
+ cinfo.comp_info[0].component_index = 17;
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, ArithmeticCoding) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 3;
+ jpegli_set_defaults(&cinfo);
+ cinfo.arith_code = TRUE;
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, CCIR601Sampling) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 3;
+ jpegli_set_defaults(&cinfo);
+ cinfo.CCIR601_sampling = TRUE;
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript1) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {{1, {0}, 0, 63, 0, 0}}; //
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = 0;
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript2) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {{2, {0, 1}, 0, 63, 0, 0}}; //
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript3) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {{5, {0}, 0, 63, 0, 0}}; //
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript4) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 2;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {{2, {0, 0}, 0, 63, 0, 0}}; //
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript5) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 2;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {{2, {1, 0}, 0, 63, 0, 0}}; //
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript6) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {{1, {0}, 0, 64, 0, 0}}; //
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript7) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {{1, {0}, 2, 1, 0, 0}}; //
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript8) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 2;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {
+ {1, {0}, 0, 63, 0, 0}, {1, {1}, 0, 0, 0, 0}, {1, {1}, 1, 63, 0, 0} //
+ };
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript9) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {
+ {1, {0}, 0, 1, 0, 0}, {1, {0}, 2, 63, 0, 0}, //
+ };
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript10) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 2;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {
+ {2, {0, 1}, 0, 0, 0, 0}, {2, {0, 1}, 1, 63, 0, 0} //
+ };
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript11) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {
+ {1, {0}, 1, 63, 0, 0}, {1, {0}, 0, 0, 0, 0} //
+ };
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript12) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {
+ {1, {0}, 0, 0, 10, 1}, {1, {0}, 0, 0, 1, 0}, {1, {0}, 1, 63, 0, 0} //
+ };
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript13) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {
+ {1, {0}, 0, 0, 0, 2},
+ {1, {0}, 0, 0, 1, 0},
+ {1, {0}, 0, 0, 2, 1}, //
+ {1, {0}, 1, 63, 0, 0} //
+ };
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, MCUSizeTooBig) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 3;
+ jpegli_set_defaults(&cinfo);
+ jpegli_set_progressive_level(&cinfo, 0);
+ cinfo.comp_info[0].h_samp_factor = 3;
+ cinfo.comp_info[0].v_samp_factor = 3;
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, RestartIntervalTooBig) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ cinfo.restart_interval = 1000000;
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, SamplingFactorTooBig) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 3;
+ jpegli_set_defaults(&cinfo);
+ cinfo.comp_info[0].h_samp_factor = 5;
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NonIntegralSamplingRatio) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 3;
+ jpegli_set_defaults(&cinfo);
+ cinfo.comp_info[0].h_samp_factor = 3;
+ cinfo.comp_info[1].h_samp_factor = 2;
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+constexpr const char* kAddOnTable[] = {"First message",
+ "Second message with int param %d",
+ "Third message with string param %s"};
+
+TEST(EncoderErrorHandlingTest, AddOnTableNoParam) {
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ cinfo.err->addon_message_table = kAddOnTable;
+ cinfo.err->first_addon_message = 10000;
+ cinfo.err->last_addon_message = 10002;
+ cinfo.err->msg_code = 10000;
+ (*cinfo.err->error_exit)(reinterpret_cast<j_common_ptr>(&cinfo));
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+}
+
+TEST(EncoderErrorHandlingTest, AddOnTableIntParam) {
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ cinfo.err->addon_message_table = kAddOnTable;
+ cinfo.err->first_addon_message = 10000;
+ cinfo.err->last_addon_message = 10002;
+ cinfo.err->msg_code = 10001;
+ cinfo.err->msg_parm.i[0] = 17;
+ (*cinfo.err->error_exit)(reinterpret_cast<j_common_ptr>(&cinfo));
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+}
+
+TEST(EncoderErrorHandlingTest, AddOnTableNoStringParam) {
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ cinfo.err->addon_message_table = kAddOnTable;
+ cinfo.err->first_addon_message = 10000;
+ cinfo.err->last_addon_message = 10002;
+ cinfo.err->msg_code = 10002;
+ memcpy(cinfo.err->msg_parm.s, "MESSAGE PARAM", 14);
+ (*cinfo.err->error_exit)(reinterpret_cast<j_common_ptr>(&cinfo));
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+}
+
+static const uint8_t kCompressed0[] = {
+ // SOI
+ 0xff, 0xd8, //
+ // DQT
+ 0xff, 0xdb, 0x00, 0x43, 0x00, 0x03, 0x02, 0x02, 0x03, 0x02, //
+ 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x03, 0x03, 0x04, 0x05, //
+ 0x08, 0x05, 0x05, 0x04, 0x04, 0x05, 0x0a, 0x07, 0x07, 0x06, //
+ 0x08, 0x0c, 0x0a, 0x0c, 0x0c, 0x0b, 0x0a, 0x0b, 0x0b, 0x0d, //
+ 0x0e, 0x12, 0x10, 0x0d, 0x0e, 0x11, 0x0e, 0x0b, 0x0b, 0x10, //
+ 0x16, 0x10, 0x11, 0x13, 0x14, 0x15, 0x15, 0x15, 0x0c, 0x0f, //
+ 0x17, 0x18, 0x16, 0x14, 0x18, 0x12, 0x14, 0x15, 0x14, //
+ // SOF
+ 0xff, 0xc0, 0x00, 0x0b, 0x08, 0x00, 0x01, 0x00, 0x01, 0x01, //
+ 0x01, 0x11, 0x00, //
+ // DHT
+ 0xff, 0xc4, 0x00, 0xd2, 0x00, 0x00, 0x01, 0x05, 0x01, 0x01, //
+ 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
+ 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, //
+ 0x09, 0x0a, 0x0b, 0x10, 0x00, 0x02, 0x01, 0x03, 0x03, 0x02, //
+ 0x04, 0x03, 0x05, 0x05, 0x04, 0x04, 0x00, 0x00, 0x01, 0x7d, //
+ 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, //
+ 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32, //
+ 0x81, 0x91, 0xa1, 0x08, 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, //
+ 0xd1, 0xf0, 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16, //
+ 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, //
+ 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, 0x45, //
+ 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, //
+ 0x58, 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, //
+ 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x83, //
+ 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, //
+ 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, //
+ 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, //
+ 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, //
+ 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, //
+ 0xd9, 0xda, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, //
+ 0xe9, 0xea, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, //
+ 0xf9, 0xfa, //
+ // SOS
+ 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x00, 0x3f, 0x00, //
+ // entropy coded data
+ 0xfc, 0xaa, 0xaf, //
+ // EOI
+ 0xff, 0xd9, //
+};
+static const size_t kLen0 = sizeof(kCompressed0);
+
+static const size_t kDQTOffset = 2;
+static const size_t kSOFOffset = 71;
+static const size_t kDHTOffset = 84;
+static const size_t kSOSOffset = 296;
+
+TEST(DecoderErrorHandlingTest, MinimalSuccess) {
+ JXL_CHECK(kCompressed0[kDQTOffset] == 0xff);
+ JXL_CHECK(kCompressed0[kSOFOffset] == 0xff);
+ JXL_CHECK(kCompressed0[kDHTOffset] == 0xff);
+ JXL_CHECK(kCompressed0[kSOSOffset] == 0xff);
+ jpeg_decompress_struct cinfo = {};
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ jpegli_mem_src(&cinfo, kCompressed0, kLen0);
+ jpegli_read_header(&cinfo, TRUE);
+ EXPECT_EQ(1, cinfo.image_width);
+ EXPECT_EQ(1, cinfo.image_height);
+ jpegli_start_decompress(&cinfo);
+ JSAMPLE image[1];
+ JSAMPROW row[] = {image};
+ jpegli_read_scanlines(&cinfo, row, 1);
+ EXPECT_EQ(0, image[0]);
+ jpegli_finish_decompress(&cinfo);
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+}
+
+TEST(DecoderErrorHandlingTest, NoSource) {
+ jpeg_decompress_struct cinfo = {};
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ jpegli_read_header(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+}
+
+TEST(DecoderErrorHandlingTest, NoReadHeader) {
+ jpeg_decompress_struct cinfo = {};
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ jpegli_mem_src(&cinfo, kCompressed0, kLen0);
+ jpegli_start_decompress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+}
+
+TEST(DecoderErrorHandlingTest, NoStartDecompress) {
+ jpeg_decompress_struct cinfo = {};
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ jpegli_mem_src(&cinfo, kCompressed0, kLen0);
+ jpegli_read_header(&cinfo, TRUE);
+ EXPECT_EQ(1, cinfo.image_width);
+ EXPECT_EQ(1, cinfo.image_height);
+ JSAMPLE image[1];
+ JSAMPROW row[] = {image};
+ jpegli_read_scanlines(&cinfo, row, 1);
+ EXPECT_EQ(0, image[0]);
+ jpegli_finish_decompress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+}
+
+TEST(DecoderErrorHandlingTest, NoReadScanlines) {
+ jpeg_decompress_struct cinfo = {};
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ jpegli_mem_src(&cinfo, kCompressed0, kLen0);
+ jpegli_read_header(&cinfo, TRUE);
+ EXPECT_EQ(1, cinfo.image_width);
+ EXPECT_EQ(1, cinfo.image_height);
+ jpegli_start_decompress(&cinfo);
+ jpegli_finish_decompress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+}
+
+static const size_t kMaxImageWidth = 0xffff;
+JSAMPLE kOutputBuffer[MAX_COMPONENTS * kMaxImageWidth];
+
+bool ParseCompressed(const std::vector<uint8_t>& compressed) {
+ jpeg_decompress_struct cinfo = {};
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ jpegli_mem_src(&cinfo, compressed.data(), compressed.size());
+ jpegli_read_header(&cinfo, TRUE);
+ jpegli_start_decompress(&cinfo);
+ for (JDIMENSION i = 0; i < cinfo.output_height; ++i) {
+ JSAMPROW row[] = {kOutputBuffer};
+ jpegli_read_scanlines(&cinfo, row, 1);
+ }
+ jpegli_finish_decompress(&cinfo);
+ return true;
+ };
+ bool retval = try_catch_block();
+ jpegli_destroy_decompress(&cinfo);
+ return retval;
+}
+
+TEST(DecoderErrorHandlingTest, NoSOI) {
+ for (int pos : {0, 1}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[pos] = 0;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+}
+
+TEST(DecoderErrorHandlingTest, InvalidDQT) {
+ // Bad marker length
+ for (int diff : {-2, -1, 1, 2}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kDQTOffset + 3] += diff;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+ // inavlid table index / precision
+ for (int val : {0x20, 0x05}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kDQTOffset + 4] = val;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+ // zero quant value
+ for (int k : {0, 1, 17, 63}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kDQTOffset + 5 + k] = 0;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+}
+
+TEST(DecoderErrorHandlingTest, InvalidSOF) {
+ // Bad marker length
+ for (int diff : {-2, -1, 1, 2}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kSOFOffset + 3] += diff;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+ // zero width, height or num_components
+ for (int pos : {6, 8, 9}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kSOFOffset + pos] = 0;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+ // invalid data precision
+ for (int val : {0, 1, 127}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kSOFOffset + 4] = val;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+ // too many num_components
+ for (int val : {5, 255}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kSOFOffset + 9] = val;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+ // invalid sampling factors
+ for (int val : {0x00, 0x01, 0x10, 0x15, 0x51}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kSOFOffset + 11] = val;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+ // invalid quant table index
+ for (int val : {5, 17}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kSOFOffset + 12] = val;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+}
+
+TEST(DecoderErrorHandlingTest, InvalidDHT) {
+ // Bad marker length
+ for (int diff : {-2, -1, 1, 2}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kDHTOffset + 3] += diff;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+ {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kDHTOffset + 2] += 17;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+ // inavlid table slot_id
+ for (int val : {0x05, 0x15, 0x20}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kDHTOffset + 4] = val;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+}
+
+TEST(DecoderErrorHandlingTest, InvalidSOS) {
+ // Invalid comps_in_scan
+ for (int val : {2, 5, 17}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kSOSOffset + 4] = val;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+ // invalid Huffman table indexes
+ for (int val : {0x05, 0x50, 0x15, 0x51}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kSOSOffset + 6] = val;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+ // invalid Ss/Se
+ for (int pos : {7, 8}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kSOSOffset + pos] = 64;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+}
+
+TEST(DecoderErrorHandlingTest, MutateSingleBytes) {
+ for (size_t pos = 0; pos < kLen0; ++pos) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ for (int val : {0x00, 0x0f, 0xf0, 0xff}) {
+ compressed[pos] = val;
+ ParseCompressed(compressed);
+ }
+ }
+}
+
+} // namespace
+} // namespace jpegli
diff --git a/lib/jpegli/huffman.cc b/lib/jpegli/huffman.cc
new file mode 100644
index 0000000..1cf88a5
--- /dev/null
+++ b/lib/jpegli/huffman.cc
@@ -0,0 +1,321 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/huffman.h"
+
+#include <limits>
+#include <vector>
+
+#include "lib/jpegli/common.h"
+#include "lib/jpegli/error.h"
+
+namespace jpegli {
+
+// Returns the table width of the next 2nd level table, count is the histogram
+// of bit lengths for the remaining symbols, len is the code length of the next
+// processed symbol.
+static inline int NextTableBitSize(const int* count, int len) {
+ int left = 1 << (len - kJpegHuffmanRootTableBits);
+ while (len < static_cast<int>(kJpegHuffmanMaxBitLength)) {
+ left -= count[len];
+ if (left <= 0) break;
+ ++len;
+ left <<= 1;
+ }
+ return len - kJpegHuffmanRootTableBits;
+}
+
+void BuildJpegHuffmanTable(const uint32_t* count, const uint32_t* symbols,
+ HuffmanTableEntry* lut) {
+ HuffmanTableEntry code; // current table entry
+ HuffmanTableEntry* table; // next available space in table
+ int len; // current code length
+ int idx; // symbol index
+ int key; // prefix code
+ int reps; // number of replicate key values in current table
+ int low; // low bits for current root entry
+ int table_bits; // key length of current table
+ int table_size; // size of current table
+
+ // Make a local copy of the input bit length histogram.
+ int tmp_count[kJpegHuffmanMaxBitLength + 1] = {0};
+ int total_count = 0;
+ for (len = 1; len <= static_cast<int>(kJpegHuffmanMaxBitLength); ++len) {
+ tmp_count[len] = count[len];
+ total_count += tmp_count[len];
+ }
+
+ table = lut;
+ table_bits = kJpegHuffmanRootTableBits;
+ table_size = 1 << table_bits;
+
+ // Special case code with only one value.
+ if (total_count == 1) {
+ code.bits = 0;
+ code.value = symbols[0];
+ for (key = 0; key < table_size; ++key) {
+ table[key] = code;
+ }
+ return;
+ }
+
+ // Fill in root table.
+ key = 0;
+ idx = 0;
+ for (len = 1; len <= kJpegHuffmanRootTableBits; ++len) {
+ for (; tmp_count[len] > 0; --tmp_count[len]) {
+ code.bits = len;
+ code.value = symbols[idx++];
+ reps = 1 << (kJpegHuffmanRootTableBits - len);
+ while (reps--) {
+ table[key++] = code;
+ }
+ }
+ }
+
+ // Fill in 2nd level tables and add pointers to root table.
+ table += table_size;
+ table_size = 0;
+ low = 0;
+ for (len = kJpegHuffmanRootTableBits + 1;
+ len <= static_cast<int>(kJpegHuffmanMaxBitLength); ++len) {
+ for (; tmp_count[len] > 0; --tmp_count[len]) {
+ // Start a new sub-table if the previous one is full.
+ if (low >= table_size) {
+ table += table_size;
+ table_bits = NextTableBitSize(tmp_count, len);
+ table_size = 1 << table_bits;
+ low = 0;
+ lut[key].bits = table_bits + kJpegHuffmanRootTableBits;
+ lut[key].value = (table - lut) - key;
+ ++key;
+ }
+ code.bits = len - kJpegHuffmanRootTableBits;
+ code.value = symbols[idx++];
+ reps = 1 << (table_bits - code.bits);
+ while (reps--) {
+ table[low++] = code;
+ }
+ }
+ }
+}
+
+// A node of a Huffman tree.
+struct HuffmanTree {
+ HuffmanTree(uint32_t count, int16_t left, int16_t right)
+ : total_count(count), index_left(left), index_right_or_value(right) {}
+ uint32_t total_count;
+ int16_t index_left;
+ int16_t index_right_or_value;
+};
+
+void SetDepth(const HuffmanTree& p, HuffmanTree* pool, uint8_t* depth,
+ uint8_t level) {
+ if (p.index_left >= 0) {
+ ++level;
+ SetDepth(pool[p.index_left], pool, depth, level);
+ SetDepth(pool[p.index_right_or_value], pool, depth, level);
+ } else {
+ depth[p.index_right_or_value] = level;
+ }
+}
+
+// Sort the root nodes, least popular first.
+static JXL_INLINE bool Compare(const HuffmanTree& v0, const HuffmanTree& v1) {
+ return v0.total_count < v1.total_count;
+}
+
+// This function will create a Huffman tree.
+//
+// The catch here is that the tree cannot be arbitrarily deep.
+// Brotli specifies a maximum depth of 15 bits for "code trees"
+// and 7 bits for "code length code trees."
+//
+// count_limit is the value that is to be faked as the minimum value
+// and this minimum value is raised until the tree matches the
+// maximum length requirement.
+//
+// This algorithm is not of excellent performance for very long data blocks,
+// especially when population counts are longer than 2**tree_limit, but
+// we are not planning to use this with extremely long blocks.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+void CreateHuffmanTree(const uint32_t* data, const size_t length,
+ const int tree_limit, uint8_t* depth) {
+ // For block sizes below 64 kB, we never need to do a second iteration
+ // of this loop. Probably all of our block sizes will be smaller than
+ // that, so this loop is mostly of academic interest. If we actually
+ // would need this, we would be better off with the Katajainen algorithm.
+ for (uint32_t count_limit = 1;; count_limit *= 2) {
+ std::vector<HuffmanTree> tree;
+ tree.reserve(2 * length + 1);
+
+ for (size_t i = length; i != 0;) {
+ --i;
+ if (data[i]) {
+ const uint32_t count = std::max(data[i], count_limit - 1);
+ tree.emplace_back(count, -1, static_cast<int16_t>(i));
+ }
+ }
+
+ const size_t n = tree.size();
+ if (n == 1) {
+ // Fake value; will be fixed on upper level.
+ depth[tree[0].index_right_or_value] = 1;
+ break;
+ }
+
+ std::stable_sort(tree.begin(), tree.end(), Compare);
+
+ // The nodes are:
+ // [0, n): the sorted leaf nodes that we start with.
+ // [n]: we add a sentinel here.
+ // [n + 1, 2n): new parent nodes are added here, starting from
+ // (n+1). These are naturally in ascending order.
+ // [2n]: we add a sentinel at the end as well.
+ // There will be (2n+1) elements at the end.
+ const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
+ tree.push_back(sentinel);
+ tree.push_back(sentinel);
+
+ size_t i = 0; // Points to the next leaf node.
+ size_t j = n + 1; // Points to the next non-leaf node.
+ for (size_t k = n - 1; k != 0; --k) {
+ size_t left, right;
+ if (tree[i].total_count <= tree[j].total_count) {
+ left = i;
+ ++i;
+ } else {
+ left = j;
+ ++j;
+ }
+ if (tree[i].total_count <= tree[j].total_count) {
+ right = i;
+ ++i;
+ } else {
+ right = j;
+ ++j;
+ }
+
+ // The sentinel node becomes the parent node.
+ size_t j_end = tree.size() - 1;
+ tree[j_end].total_count =
+ tree[left].total_count + tree[right].total_count;
+ tree[j_end].index_left = static_cast<int16_t>(left);
+ tree[j_end].index_right_or_value = static_cast<int16_t>(right);
+
+ // Add back the last sentinel node.
+ tree.push_back(sentinel);
+ }
+ JXL_DASSERT(tree.size() == 2 * n + 1);
+ SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
+
+ // We need to pack the Huffman tree in tree_limit bits.
+ // If this was not successful, add fake entities to the lowest values
+ // and retry.
+ if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
+ break;
+ }
+ }
+}
+
+void ValidateHuffmanTable(j_common_ptr cinfo, const JHUFF_TBL* table,
+ bool is_dc) {
+ size_t total_symbols = 0;
+ size_t total_p = 0;
+ size_t max_depth = 0;
+ for (size_t d = 1; d <= kJpegHuffmanMaxBitLength; ++d) {
+ uint8_t count = table->bits[d];
+ if (count) {
+ total_symbols += count;
+ total_p += (1u << (kJpegHuffmanMaxBitLength - d)) * count;
+ max_depth = d;
+ }
+ }
+ total_p += 1u << (kJpegHuffmanMaxBitLength - max_depth); // sentinel symbol
+ if (total_symbols == 0) {
+ JPEGLI_ERROR("Empty Huffman table");
+ }
+ if (total_symbols > kJpegHuffmanAlphabetSize) {
+ JPEGLI_ERROR("Too many symbols in Huffman table");
+ }
+ if (total_p != (1u << kJpegHuffmanMaxBitLength)) {
+ JPEGLI_ERROR("Invalid bit length distribution");
+ }
+ uint8_t symbol_seen[kJpegHuffmanAlphabetSize] = {};
+ for (size_t i = 0; i < total_symbols; ++i) {
+ uint8_t symbol = table->huffval[i];
+ if (symbol_seen[symbol]) {
+ JPEGLI_ERROR("Duplicate symbol %d in Huffman table", symbol);
+ }
+ symbol_seen[symbol] = 1;
+ }
+}
+
+void AddStandardHuffmanTables(j_common_ptr cinfo, bool is_dc) {
+ // Huffman tables from the JPEG standard.
+ static constexpr JHUFF_TBL kStandardDCTables[2] = {
+ // DC luma
+ {{0, 0, 1, 5, 1, 1, 1, 1, 1, 1},
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
+ FALSE},
+ // DC chroma
+ {{0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
+ FALSE}};
+ static constexpr JHUFF_TBL kStandardACTables[2] = {
+ // AC luma
+ {{0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 125},
+ {0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, 0x41, 0x06,
+ 0x13, 0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
+ 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, 0x24, 0x33, 0x62, 0x72,
+ 0x82, 0x09, 0x0a, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
+ 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, 0x45,
+ 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
+ 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74, 0x75,
+ 0x76, 0x77, 0x78, 0x79, 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
+ 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3,
+ 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
+ 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9,
+ 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
+ 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf1, 0xf2, 0xf3, 0xf4,
+ 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa},
+ FALSE},
+ // AC chroma
+ {{0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 119},
+ {0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, 0x31, 0x06, 0x12, 0x41,
+ 0x51, 0x07, 0x61, 0x71, 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
+ 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, 0x15, 0x62, 0x72, 0xd1,
+ 0x0a, 0x16, 0x24, 0x34, 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
+ 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44,
+ 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+ 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74,
+ 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a,
+ 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
+ 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+ 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
+ 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf2, 0xf3, 0xf4,
+ 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa},
+ FALSE}};
+ const JHUFF_TBL* std_tables = is_dc ? kStandardDCTables : kStandardACTables;
+ JHUFF_TBL** tables;
+ if (cinfo->is_decompressor) {
+ j_decompress_ptr cinfo_d = reinterpret_cast<j_decompress_ptr>(cinfo);
+ tables = is_dc ? cinfo_d->dc_huff_tbl_ptrs : cinfo_d->ac_huff_tbl_ptrs;
+ } else {
+ j_compress_ptr cinfo_c = reinterpret_cast<j_compress_ptr>(cinfo);
+ tables = is_dc ? cinfo_c->dc_huff_tbl_ptrs : cinfo_c->ac_huff_tbl_ptrs;
+ }
+ for (int i = 0; i < 2; ++i) {
+ if (tables[i] == nullptr) {
+ tables[i] = jpegli_alloc_huff_table(cinfo);
+ memcpy(tables[i], &std_tables[i], sizeof(JHUFF_TBL));
+ ValidateHuffmanTable(cinfo, tables[i], is_dc);
+ }
+ }
+}
+
+} // namespace jpegli
diff --git a/lib/jpegli/huffman.h b/lib/jpegli/huffman.h
new file mode 100644
index 0000000..f0e5e1d
--- /dev/null
+++ b/lib/jpegli/huffman.h
@@ -0,0 +1,50 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_HUFFMAN_H_
+#define LIB_JPEGLI_HUFFMAN_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "lib/jpegli/common_internal.h"
+
+namespace jpegli {
+
+constexpr int kJpegHuffmanRootTableBits = 8;
+// Maximum huffman lookup table size.
+// According to zlib/examples/enough.c, 758 entries are always enough for
+// an alphabet of 257 symbols (256 + 1 special symbol for the all 1s code) and
+// max bit length 16 if the root table has 8 bits.
+constexpr int kJpegHuffmanLutSize = 758;
+
+struct HuffmanTableEntry {
+ uint8_t bits; // number of bits used for this symbol
+ uint16_t value; // symbol value or table offset
+};
+
+void BuildJpegHuffmanTable(const uint32_t* count, const uint32_t* symbols,
+ HuffmanTableEntry* lut);
+
+// This function will create a Huffman tree.
+//
+// The (data,length) contains the population counts.
+// The tree_limit is the maximum bit depth of the Huffman codes.
+//
+// The depth contains the tree, i.e., how many bits are used for
+// the symbol.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+void CreateHuffmanTree(const uint32_t* data, size_t length, int tree_limit,
+ uint8_t* depth);
+
+void ValidateHuffmanTable(j_common_ptr cinfo, const JHUFF_TBL* table,
+ bool is_dc);
+
+void AddStandardHuffmanTables(j_common_ptr cinfo, bool is_dc);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_HUFFMAN_H_
diff --git a/lib/jpegli/idct.cc b/lib/jpegli/idct.cc
new file mode 100644
index 0000000..4d10563
--- /dev/null
+++ b/lib/jpegli/idct.cc
@@ -0,0 +1,692 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/idct.h"
+
+#include <cmath>
+
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jxl/base/status.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/idct.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/transpose-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Gt;
+using hwy::HWY_NAMESPACE::IfThenElseZero;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::NegMulAdd;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::Sub;
+using hwy::HWY_NAMESPACE::Vec;
+using hwy::HWY_NAMESPACE::Xor;
+
+using D = HWY_FULL(float);
+using DI = HWY_FULL(int32_t);
+constexpr D d;
+constexpr DI di;
+
+using D8 = HWY_CAPPED(float, 8);
+constexpr D8 d8;
+
+void DequantBlock(const int16_t* JXL_RESTRICT qblock,
+ const float* JXL_RESTRICT dequant,
+ const float* JXL_RESTRICT biases, float* JXL_RESTRICT block) {
+ for (size_t k = 0; k < 64; k += Lanes(d)) {
+ const auto mul = Load(d, dequant + k);
+ const auto bias = Load(d, biases + k);
+ const Rebind<int16_t, DI> di16;
+ const Vec<DI> quant_i = PromoteTo(di, Load(di16, qblock + k));
+ const Rebind<float, DI> df;
+ const auto quant = ConvertTo(df, quant_i);
+ const auto abs_quant = Abs(quant);
+ const auto not_0 = Gt(abs_quant, Zero(df));
+ const auto sign_quant = Xor(quant, abs_quant);
+ const auto biased_quant = Sub(quant, Xor(bias, sign_quant));
+ const auto dequant = IfThenElseZero(not_0, Mul(biased_quant, mul));
+ Store(dequant, d, block + k);
+ }
+}
+
+template <size_t N>
+void ForwardEvenOdd(const float* JXL_RESTRICT ain, size_t ain_stride,
+ float* JXL_RESTRICT aout) {
+ for (size_t i = 0; i < N / 2; i++) {
+ auto in1 = LoadU(d8, ain + 2 * i * ain_stride);
+ Store(in1, d8, aout + i * 8);
+ }
+ for (size_t i = N / 2; i < N; i++) {
+ auto in1 = LoadU(d8, ain + (2 * (i - N / 2) + 1) * ain_stride);
+ Store(in1, d8, aout + i * 8);
+ }
+}
+
+template <size_t N>
+void BTranspose(float* JXL_RESTRICT coeff) {
+ for (size_t i = N - 1; i > 0; i--) {
+ auto in1 = Load(d8, coeff + i * 8);
+ auto in2 = Load(d8, coeff + (i - 1) * 8);
+ Store(Add(in1, in2), d8, coeff + i * 8);
+ }
+ constexpr float kSqrt2 = 1.41421356237f;
+ auto sqrt2 = Set(d8, kSqrt2);
+ auto in1 = Load(d8, coeff);
+ Store(Mul(in1, sqrt2), d8, coeff);
+}
+
+// Constants for DCT implementation. Generated by the following snippet:
+// for i in range(N // 2):
+// print(1.0 / (2 * math.cos((i + 0.5) * math.pi / N)), end=", ")
+template <size_t N>
+struct WcMultipliers;
+
+template <>
+struct WcMultipliers<4> {
+ static constexpr float kMultipliers[] = {
+ 0.541196100146197,
+ 1.3065629648763764,
+ };
+};
+
+template <>
+struct WcMultipliers<8> {
+ static constexpr float kMultipliers[] = {
+ 0.5097955791041592,
+ 0.6013448869350453,
+ 0.8999762231364156,
+ 2.5629154477415055,
+ };
+};
+
+constexpr float WcMultipliers<4>::kMultipliers[];
+constexpr float WcMultipliers<8>::kMultipliers[];
+
+template <size_t N>
+void MultiplyAndAdd(const float* JXL_RESTRICT coeff, float* JXL_RESTRICT out,
+ size_t out_stride) {
+ for (size_t i = 0; i < N / 2; i++) {
+ auto mul = Set(d8, WcMultipliers<N>::kMultipliers[i]);
+ auto in1 = Load(d8, coeff + i * 8);
+ auto in2 = Load(d8, coeff + (N / 2 + i) * 8);
+ auto out1 = MulAdd(mul, in2, in1);
+ auto out2 = NegMulAdd(mul, in2, in1);
+ StoreU(out1, d8, out + i * out_stride);
+ StoreU(out2, d8, out + (N - i - 1) * out_stride);
+ }
+}
+
+template <size_t N>
+struct IDCT1DImpl;
+
+template <>
+struct IDCT1DImpl<1> {
+ JXL_INLINE void operator()(const float* from, size_t from_stride, float* to,
+ size_t to_stride) {
+ StoreU(LoadU(d8, from), d8, to);
+ }
+};
+
+template <>
+struct IDCT1DImpl<2> {
+ JXL_INLINE void operator()(const float* from, size_t from_stride, float* to,
+ size_t to_stride) {
+ JXL_DASSERT(from_stride >= 8);
+ JXL_DASSERT(to_stride >= 8);
+ auto in1 = LoadU(d8, from);
+ auto in2 = LoadU(d8, from + from_stride);
+ StoreU(Add(in1, in2), d8, to);
+ StoreU(Sub(in1, in2), d8, to + to_stride);
+ }
+};
+
+template <size_t N>
+struct IDCT1DImpl {
+ void operator()(const float* from, size_t from_stride, float* to,
+ size_t to_stride) {
+ JXL_DASSERT(from_stride >= 8);
+ JXL_DASSERT(to_stride >= 8);
+ HWY_ALIGN float tmp[64];
+ ForwardEvenOdd<N>(from, from_stride, tmp);
+ IDCT1DImpl<N / 2>()(tmp, 8, tmp, 8);
+ BTranspose<N / 2>(tmp + N * 4);
+ IDCT1DImpl<N / 2>()(tmp + N * 4, 8, tmp + N * 4, 8);
+ MultiplyAndAdd<N>(tmp, to, to_stride);
+ }
+};
+
+template <size_t N>
+void IDCT1D(float* JXL_RESTRICT from, float* JXL_RESTRICT output,
+ size_t output_stride) {
+ for (size_t i = 0; i < 8; i += Lanes(d8)) {
+ IDCT1DImpl<N>()(from + i, 8, output + i, output_stride);
+ }
+}
+
+void ComputeScaledIDCT(float* JXL_RESTRICT block0, float* JXL_RESTRICT block1,
+ float* JXL_RESTRICT output, size_t output_stride) {
+ Transpose8x8Block(block0, block1);
+ IDCT1D<8>(block1, block0, 8);
+ Transpose8x8Block(block0, block1);
+ IDCT1D<8>(block1, output, output_stride);
+}
+
+void InverseTransformBlock8x8(const int16_t* JXL_RESTRICT qblock,
+ const float* JXL_RESTRICT dequant,
+ const float* JXL_RESTRICT biases,
+ float* JXL_RESTRICT scratch_space,
+ float* JXL_RESTRICT output, size_t output_stride,
+ size_t dctsize) {
+ float* JXL_RESTRICT block0 = scratch_space;
+ float* JXL_RESTRICT block1 = scratch_space + DCTSIZE2;
+ DequantBlock(qblock, dequant, biases, block0);
+ ComputeScaledIDCT(block0, block1, output, output_stride);
+}
+
+// Computes the N-point IDCT of in[], and stores the result in out[]. The in[]
+// array is at most 8 values long, values in[8:N-1] are assumed to be 0.
+void Compute1dIDCT(float* in, float* out, size_t N) {
+ switch (N) {
+ case 3: {
+ static constexpr float kC3[3] = {
+ 1.414213562373,
+ 1.224744871392,
+ 0.707106781187,
+ };
+ float even0 = in[0] + kC3[2] * in[2];
+ float even1 = in[0] - kC3[0] * in[2];
+ float odd0 = kC3[1] * in[1];
+ out[0] = even0 + odd0;
+ out[2] = even0 - odd0;
+ out[1] = even1;
+ break;
+ }
+ case 5: {
+ static constexpr float kC5[5] = {
+ 1.414213562373, 1.344997023928, 1.144122805635,
+ 0.831253875555, 0.437016024449,
+ };
+ float even0 = in[0] + kC5[2] * in[2] + kC5[4] * in[4];
+ float even1 = in[0] - kC5[4] * in[2] - kC5[2] * in[4];
+ float even2 = in[0] - kC5[0] * in[2] + kC5[0] * in[4];
+ float odd0 = kC5[1] * in[1] + kC5[3] * in[3];
+ float odd1 = kC5[3] * in[1] - kC5[1] * in[3];
+ out[0] = even0 + odd0;
+ out[4] = even0 - odd0;
+ out[1] = even1 + odd1;
+ out[3] = even1 - odd1;
+ out[2] = even2;
+ break;
+ }
+ case 6: {
+ static constexpr float kC6[6] = {
+ 1.414213562373, 1.366025403784, 1.224744871392,
+ 1.000000000000, 0.707106781187, 0.366025403784,
+ };
+ float even0 = in[0] + kC6[2] * in[2] + kC6[4] * in[4];
+ float even1 = in[0] - kC6[0] * in[4];
+ float even2 = in[0] - kC6[2] * in[2] + kC6[4] * in[4];
+ float odd0 = kC6[1] * in[1] + kC6[3] * in[3] + kC6[5] * in[5];
+ float odd1 = kC6[3] * in[1] - kC6[3] * in[3] - kC6[3] * in[5];
+ float odd2 = kC6[5] * in[1] - kC6[3] * in[3] + kC6[1] * in[5];
+ out[0] = even0 + odd0;
+ out[5] = even0 - odd0;
+ out[1] = even1 + odd1;
+ out[4] = even1 - odd1;
+ out[2] = even2 + odd2;
+ out[3] = even2 - odd2;
+ break;
+ }
+ case 7: {
+ static constexpr float kC7[7] = {
+ 1.414213562373, 1.378756275744, 1.274162392264, 1.105676685997,
+ 0.881747733790, 0.613604268353, 0.314692122713,
+ };
+ float even0 = in[0] + kC7[2] * in[2] + kC7[4] * in[4] + kC7[6] * in[6];
+ float even1 = in[0] + kC7[6] * in[2] - kC7[2] * in[4] - kC7[4] * in[6];
+ float even2 = in[0] - kC7[4] * in[2] - kC7[6] * in[4] + kC7[2] * in[6];
+ float even3 = in[0] - kC7[0] * in[2] + kC7[0] * in[4] - kC7[0] * in[6];
+ float odd0 = kC7[1] * in[1] + kC7[3] * in[3] + kC7[5] * in[5];
+ float odd1 = kC7[3] * in[1] - kC7[5] * in[3] - kC7[1] * in[5];
+ float odd2 = kC7[5] * in[1] - kC7[1] * in[3] + kC7[3] * in[5];
+ out[0] = even0 + odd0;
+ out[6] = even0 - odd0;
+ out[1] = even1 + odd1;
+ out[5] = even1 - odd1;
+ out[2] = even2 + odd2;
+ out[4] = even2 - odd2;
+ out[3] = even3;
+ break;
+ }
+ case 9: {
+ static constexpr float kC9[9] = {
+ 1.414213562373, 1.392728480640, 1.328926048777,
+ 1.224744871392, 1.083350440839, 0.909038955344,
+ 0.707106781187, 0.483689525296, 0.245575607938,
+ };
+ float even0 = in[0] + kC9[2] * in[2] + kC9[4] * in[4] + kC9[6] * in[6];
+ float even1 = in[0] + kC9[6] * in[2] - kC9[6] * in[4] - kC9[0] * in[6];
+ float even2 = in[0] - kC9[8] * in[2] - kC9[2] * in[4] + kC9[6] * in[6];
+ float even3 = in[0] - kC9[4] * in[2] + kC9[8] * in[4] + kC9[6] * in[6];
+ float even4 = in[0] - kC9[0] * in[2] + kC9[0] * in[4] - kC9[0] * in[6];
+ float odd0 =
+ kC9[1] * in[1] + kC9[3] * in[3] + kC9[5] * in[5] + kC9[7] * in[7];
+ float odd1 = kC9[3] * in[1] - kC9[3] * in[5] - kC9[3] * in[7];
+ float odd2 =
+ kC9[5] * in[1] - kC9[3] * in[3] - kC9[7] * in[5] + kC9[1] * in[7];
+ float odd3 =
+ kC9[7] * in[1] - kC9[3] * in[3] + kC9[1] * in[5] - kC9[5] * in[7];
+ out[0] = even0 + odd0;
+ out[8] = even0 - odd0;
+ out[1] = even1 + odd1;
+ out[7] = even1 - odd1;
+ out[2] = even2 + odd2;
+ out[6] = even2 - odd2;
+ out[3] = even3 + odd3;
+ out[5] = even3 - odd3;
+ out[4] = even4;
+ break;
+ }
+ case 10: {
+ static constexpr float kC10[10] = {
+ 1.414213562373, 1.396802246667, 1.344997023928, 1.260073510670,
+ 1.144122805635, 1.000000000000, 0.831253875555, 0.642039521920,
+ 0.437016024449, 0.221231742082,
+ };
+ float even0 = in[0] + kC10[2] * in[2] + kC10[4] * in[4] + kC10[6] * in[6];
+ float even1 = in[0] + kC10[6] * in[2] - kC10[8] * in[4] - kC10[2] * in[6];
+ float even2 = in[0] - kC10[0] * in[4];
+ float even3 = in[0] - kC10[6] * in[2] - kC10[8] * in[4] + kC10[2] * in[6];
+ float even4 = in[0] - kC10[2] * in[2] + kC10[4] * in[4] - kC10[6] * in[6];
+ float odd0 =
+ kC10[1] * in[1] + kC10[3] * in[3] + kC10[5] * in[5] + kC10[7] * in[7];
+ float odd1 =
+ kC10[3] * in[1] + kC10[9] * in[3] - kC10[5] * in[5] - kC10[1] * in[7];
+ float odd2 =
+ kC10[5] * in[1] - kC10[5] * in[3] - kC10[5] * in[5] + kC10[5] * in[7];
+ float odd3 =
+ kC10[7] * in[1] - kC10[1] * in[3] + kC10[5] * in[5] + kC10[9] * in[7];
+ float odd4 =
+ kC10[9] * in[1] - kC10[7] * in[3] + kC10[5] * in[5] - kC10[3] * in[7];
+ out[0] = even0 + odd0;
+ out[9] = even0 - odd0;
+ out[1] = even1 + odd1;
+ out[8] = even1 - odd1;
+ out[2] = even2 + odd2;
+ out[7] = even2 - odd2;
+ out[3] = even3 + odd3;
+ out[6] = even3 - odd3;
+ out[4] = even4 + odd4;
+ out[5] = even4 - odd4;
+ break;
+ }
+ case 11: {
+ static constexpr float kC11[11] = {
+ 1.414213562373, 1.399818907436, 1.356927976287, 1.286413904599,
+ 1.189712155524, 1.068791297809, 0.926112931411, 0.764581576418,
+ 0.587485545401, 0.398430002847, 0.201263574413,
+ };
+ float even0 = in[0] + kC11[2] * in[2] + kC11[4] * in[4] + kC11[6] * in[6];
+ float even1 =
+ in[0] + kC11[6] * in[2] - kC11[10] * in[4] - kC11[4] * in[6];
+ float even2 =
+ in[0] + kC11[10] * in[2] - kC11[2] * in[4] - kC11[8] * in[6];
+ float even3 = in[0] - kC11[8] * in[2] - kC11[6] * in[4] + kC11[2] * in[6];
+ float even4 =
+ in[0] - kC11[4] * in[2] + kC11[8] * in[4] + kC11[10] * in[6];
+ float even5 = in[0] - kC11[0] * in[2] + kC11[0] * in[4] - kC11[0] * in[6];
+ float odd0 =
+ kC11[1] * in[1] + kC11[3] * in[3] + kC11[5] * in[5] + kC11[7] * in[7];
+ float odd1 =
+ kC11[3] * in[1] + kC11[9] * in[3] - kC11[7] * in[5] - kC11[1] * in[7];
+ float odd2 =
+ kC11[5] * in[1] - kC11[7] * in[3] - kC11[3] * in[5] + kC11[9] * in[7];
+ float odd3 =
+ kC11[7] * in[1] - kC11[1] * in[3] + kC11[9] * in[5] + kC11[5] * in[7];
+ float odd4 =
+ kC11[9] * in[1] - kC11[5] * in[3] + kC11[1] * in[5] - kC11[3] * in[7];
+ out[0] = even0 + odd0;
+ out[10] = even0 - odd0;
+ out[1] = even1 + odd1;
+ out[9] = even1 - odd1;
+ out[2] = even2 + odd2;
+ out[8] = even2 - odd2;
+ out[3] = even3 + odd3;
+ out[7] = even3 - odd3;
+ out[4] = even4 + odd4;
+ out[6] = even4 - odd4;
+ out[5] = even5;
+ break;
+ }
+ case 12: {
+ static constexpr float kC12[12] = {
+ 1.414213562373, 1.402114769300, 1.366025403784, 1.306562964876,
+ 1.224744871392, 1.121971053594, 1.000000000000, 0.860918669154,
+ 0.707106781187, 0.541196100146, 0.366025403784, 0.184591911283,
+ };
+ float even0 = in[0] + kC12[2] * in[2] + kC12[4] * in[4] + kC12[6] * in[6];
+ float even1 = in[0] + kC12[6] * in[2] - kC12[6] * in[6];
+ float even2 =
+ in[0] + kC12[10] * in[2] - kC12[4] * in[4] - kC12[6] * in[6];
+ float even3 =
+ in[0] - kC12[10] * in[2] - kC12[4] * in[4] + kC12[6] * in[6];
+ float even4 = in[0] - kC12[6] * in[2] + kC12[6] * in[6];
+ float even5 = in[0] - kC12[2] * in[2] + kC12[4] * in[4] - kC12[6] * in[6];
+ float odd0 =
+ kC12[1] * in[1] + kC12[3] * in[3] + kC12[5] * in[5] + kC12[7] * in[7];
+ float odd1 =
+ kC12[3] * in[1] + kC12[9] * in[3] - kC12[9] * in[5] - kC12[3] * in[7];
+ float odd2 = kC12[5] * in[1] - kC12[9] * in[3] - kC12[1] * in[5] -
+ kC12[11] * in[7];
+ float odd3 = kC12[7] * in[1] - kC12[3] * in[3] - kC12[11] * in[5] +
+ kC12[1] * in[7];
+ float odd4 =
+ kC12[9] * in[1] - kC12[3] * in[3] + kC12[3] * in[5] - kC12[9] * in[7];
+ float odd5 = kC12[11] * in[1] - kC12[9] * in[3] + kC12[7] * in[5] -
+ kC12[5] * in[7];
+ out[0] = even0 + odd0;
+ out[11] = even0 - odd0;
+ out[1] = even1 + odd1;
+ out[10] = even1 - odd1;
+ out[2] = even2 + odd2;
+ out[9] = even2 - odd2;
+ out[3] = even3 + odd3;
+ out[8] = even3 - odd3;
+ out[4] = even4 + odd4;
+ out[7] = even4 - odd4;
+ out[5] = even5 + odd5;
+ out[6] = even5 - odd5;
+ break;
+ }
+ case 13: {
+ static constexpr float kC13[13] = {
+ 1.414213562373, 1.403902353238, 1.373119086479, 1.322312651445,
+ 1.252223920364, 1.163874944761, 1.058554051646, 0.937797056801,
+ 0.803364869133, 0.657217812653, 0.501487040539, 0.338443458124,
+ 0.170464607981,
+ };
+ float even0 = in[0] + kC13[2] * in[2] + kC13[4] * in[4] + kC13[6] * in[6];
+ float even1 =
+ in[0] + kC13[6] * in[2] + kC13[12] * in[4] - kC13[8] * in[6];
+ float even2 =
+ in[0] + kC13[10] * in[2] - kC13[6] * in[4] - kC13[4] * in[6];
+ float even3 =
+ in[0] - kC13[12] * in[2] - kC13[2] * in[4] + kC13[10] * in[6];
+ float even4 =
+ in[0] - kC13[8] * in[2] - kC13[10] * in[4] + kC13[2] * in[6];
+ float even5 =
+ in[0] - kC13[4] * in[2] + kC13[8] * in[4] - kC13[12] * in[6];
+ float even6 = in[0] - kC13[0] * in[2] + kC13[0] * in[4] - kC13[0] * in[6];
+ float odd0 =
+ kC13[1] * in[1] + kC13[3] * in[3] + kC13[5] * in[5] + kC13[7] * in[7];
+ float odd1 = kC13[3] * in[1] + kC13[9] * in[3] - kC13[11] * in[5] -
+ kC13[5] * in[7];
+ float odd2 = kC13[5] * in[1] - kC13[11] * in[3] - kC13[1] * in[5] -
+ kC13[9] * in[7];
+ float odd3 =
+ kC13[7] * in[1] - kC13[5] * in[3] - kC13[9] * in[5] + kC13[3] * in[7];
+ float odd4 = kC13[9] * in[1] - kC13[1] * in[3] + kC13[7] * in[5] +
+ kC13[11] * in[7];
+ float odd5 = kC13[11] * in[1] - kC13[7] * in[3] + kC13[3] * in[5] -
+ kC13[1] * in[7];
+ out[0] = even0 + odd0;
+ out[12] = even0 - odd0;
+ out[1] = even1 + odd1;
+ out[11] = even1 - odd1;
+ out[2] = even2 + odd2;
+ out[10] = even2 - odd2;
+ out[3] = even3 + odd3;
+ out[9] = even3 - odd3;
+ out[4] = even4 + odd4;
+ out[8] = even4 - odd4;
+ out[5] = even5 + odd5;
+ out[7] = even5 - odd5;
+ out[6] = even6;
+ break;
+ }
+ case 14: {
+ static constexpr float kC14[14] = {
+ 1.414213562373, 1.405321284327, 1.378756275744, 1.334852607020,
+ 1.274162392264, 1.197448846138, 1.105676685997, 1.000000000000,
+ 0.881747733790, 0.752406978226, 0.613604268353, 0.467085128785,
+ 0.314692122713, 0.158341680609,
+ };
+ float even0 = in[0] + kC14[2] * in[2] + kC14[4] * in[4] + kC14[6] * in[6];
+ float even1 =
+ in[0] + kC14[6] * in[2] + kC14[12] * in[4] - kC14[10] * in[6];
+ float even2 =
+ in[0] + kC14[10] * in[2] - kC14[8] * in[4] - kC14[2] * in[6];
+ float even3 = in[0] - kC14[0] * in[4];
+ float even4 =
+ in[0] - kC14[10] * in[2] - kC14[8] * in[4] + kC14[2] * in[6];
+ float even5 =
+ in[0] - kC14[6] * in[2] + kC14[12] * in[4] + kC14[10] * in[6];
+ float even6 = in[0] - kC14[2] * in[2] + kC14[4] * in[4] - kC14[6] * in[6];
+ float odd0 =
+ kC14[1] * in[1] + kC14[3] * in[3] + kC14[5] * in[5] + kC14[7] * in[7];
+ float odd1 = kC14[3] * in[1] + kC14[9] * in[3] - kC14[13] * in[5] -
+ kC14[7] * in[7];
+ float odd2 = kC14[5] * in[1] - kC14[13] * in[3] - kC14[3] * in[5] -
+ kC14[7] * in[7];
+ float odd3 =
+ kC14[7] * in[1] - kC14[7] * in[3] - kC14[7] * in[5] + kC14[7] * in[7];
+ float odd4 = kC14[9] * in[1] - kC14[1] * in[3] + kC14[11] * in[5] +
+ kC14[7] * in[7];
+ float odd5 = kC14[11] * in[1] - kC14[5] * in[3] + kC14[1] * in[5] -
+ kC14[7] * in[7];
+ float odd6 = kC14[13] * in[1] - kC14[11] * in[3] + kC14[9] * in[5] -
+ kC14[7] * in[7];
+ out[0] = even0 + odd0;
+ out[13] = even0 - odd0;
+ out[1] = even1 + odd1;
+ out[12] = even1 - odd1;
+ out[2] = even2 + odd2;
+ out[11] = even2 - odd2;
+ out[3] = even3 + odd3;
+ out[10] = even3 - odd3;
+ out[4] = even4 + odd4;
+ out[9] = even4 - odd4;
+ out[5] = even5 + odd5;
+ out[8] = even5 - odd5;
+ out[6] = even6 + odd6;
+ out[7] = even6 - odd6;
+ break;
+ }
+ case 15: {
+ static constexpr float kC15[15] = {
+ 1.414213562373, 1.406466352507, 1.383309602960, 1.344997023928,
+ 1.291948376043, 1.224744871392, 1.144122805635, 1.050965490998,
+ 0.946293578512, 0.831253875555, 0.707106781187, 0.575212476952,
+ 0.437016024449, 0.294031532930, 0.147825570407,
+ };
+ float even0 = in[0] + kC15[2] * in[2] + kC15[4] * in[4] + kC15[6] * in[6];
+ float even1 =
+ in[0] + kC15[6] * in[2] + kC15[12] * in[4] - kC15[12] * in[6];
+ float even2 =
+ in[0] + kC15[10] * in[2] - kC15[10] * in[4] - kC15[0] * in[6];
+ float even3 =
+ in[0] + kC15[14] * in[2] - kC15[2] * in[4] - kC15[12] * in[6];
+ float even4 =
+ in[0] - kC15[12] * in[2] - kC15[6] * in[4] + kC15[6] * in[6];
+ float even5 =
+ in[0] - kC15[8] * in[2] - kC15[14] * in[4] + kC15[6] * in[6];
+ float even6 =
+ in[0] - kC15[4] * in[2] + kC15[8] * in[4] - kC15[12] * in[6];
+ float even7 = in[0] - kC15[0] * in[2] + kC15[0] * in[4] - kC15[0] * in[6];
+ float odd0 =
+ kC15[1] * in[1] + kC15[3] * in[3] + kC15[5] * in[5] + kC15[7] * in[7];
+ float odd1 = kC15[3] * in[1] + kC15[9] * in[3] - kC15[9] * in[7];
+ float odd2 = kC15[5] * in[1] - kC15[5] * in[5] - kC15[5] * in[7];
+ float odd3 = kC15[7] * in[1] - kC15[9] * in[3] - kC15[5] * in[5] +
+ kC15[11] * in[7];
+ float odd4 = kC15[9] * in[1] - kC15[3] * in[3] + kC15[3] * in[7];
+ float odd5 = kC15[11] * in[1] - kC15[3] * in[3] + kC15[5] * in[5] -
+ kC15[13] * in[7];
+ float odd6 = kC15[13] * in[1] - kC15[9] * in[3] + kC15[5] * in[5] -
+ kC15[1] * in[7];
+ out[0] = even0 + odd0;
+ out[14] = even0 - odd0;
+ out[1] = even1 + odd1;
+ out[13] = even1 - odd1;
+ out[2] = even2 + odd2;
+ out[12] = even2 - odd2;
+ out[3] = even3 + odd3;
+ out[11] = even3 - odd3;
+ out[4] = even4 + odd4;
+ out[10] = even4 - odd4;
+ out[5] = even5 + odd5;
+ out[9] = even5 - odd5;
+ out[6] = even6 + odd6;
+ out[8] = even6 - odd6;
+ out[7] = even7;
+ break;
+ }
+ case 16: {
+ static constexpr float kC16[16] = {
+ 1.414213562373, 1.407403737526, 1.387039845322, 1.353318001174,
+ 1.306562964876, 1.247225012987, 1.175875602419, 1.093201867002,
+ 1.000000000000, 0.897167586343, 0.785694958387, 0.666655658478,
+ 0.541196100146, 0.410524527522, 0.275899379283, 0.138617169199,
+ };
+ float even0 = in[0] + kC16[2] * in[2] + kC16[4] * in[4] + kC16[6] * in[6];
+ float even1 =
+ in[0] + kC16[6] * in[2] + kC16[12] * in[4] - kC16[14] * in[6];
+ float even2 =
+ in[0] + kC16[10] * in[2] - kC16[12] * in[4] - kC16[2] * in[6];
+ float even3 =
+ in[0] + kC16[14] * in[2] - kC16[4] * in[4] - kC16[10] * in[6];
+ float even4 =
+ in[0] - kC16[14] * in[2] - kC16[4] * in[4] + kC16[10] * in[6];
+ float even5 =
+ in[0] - kC16[10] * in[2] - kC16[12] * in[4] + kC16[2] * in[6];
+ float even6 =
+ in[0] - kC16[6] * in[2] + kC16[12] * in[4] + kC16[14] * in[6];
+ float even7 = in[0] - kC16[2] * in[2] + kC16[4] * in[4] - kC16[6] * in[6];
+ float odd0 = (kC16[1] * in[1] + kC16[3] * in[3] + kC16[5] * in[5] +
+ kC16[7] * in[7]);
+ float odd1 = (kC16[3] * in[1] + kC16[9] * in[3] + kC16[15] * in[5] -
+ kC16[11] * in[7]);
+ float odd2 = (kC16[5] * in[1] + kC16[15] * in[3] - kC16[7] * in[5] -
+ kC16[3] * in[7]);
+ float odd3 = (kC16[7] * in[1] - kC16[11] * in[3] - kC16[3] * in[5] +
+ kC16[15] * in[7]);
+ float odd4 = (kC16[9] * in[1] - kC16[5] * in[3] - kC16[13] * in[5] +
+ kC16[1] * in[7]);
+ float odd5 = (kC16[11] * in[1] - kC16[1] * in[3] + kC16[9] * in[5] +
+ kC16[13] * in[7]);
+ float odd6 = (kC16[13] * in[1] - kC16[7] * in[3] + kC16[1] * in[5] -
+ kC16[5] * in[7]);
+ float odd7 = (kC16[15] * in[1] - kC16[13] * in[3] + kC16[11] * in[5] -
+ kC16[9] * in[7]);
+ out[0] = even0 + odd0;
+ out[15] = even0 - odd0;
+ out[1] = even1 + odd1;
+ out[14] = even1 - odd1;
+ out[2] = even2 + odd2;
+ out[13] = even2 - odd2;
+ out[3] = even3 + odd3;
+ out[12] = even3 - odd3;
+ out[4] = even4 + odd4;
+ out[11] = even4 - odd4;
+ out[5] = even5 + odd5;
+ out[10] = even5 - odd5;
+ out[6] = even6 + odd6;
+ out[9] = even6 - odd6;
+ out[7] = even7 + odd7;
+ out[8] = even7 - odd7;
+ break;
+ }
+ }
+}
+
+void InverseTransformBlockGeneric(const int16_t* JXL_RESTRICT qblock,
+ const float* JXL_RESTRICT dequant,
+ const float* JXL_RESTRICT biases,
+ float* JXL_RESTRICT scratch_space,
+ float* JXL_RESTRICT output,
+ size_t output_stride, size_t dctsize) {
+ float* JXL_RESTRICT block0 = scratch_space;
+ float* JXL_RESTRICT block1 = scratch_space + DCTSIZE2;
+ DequantBlock(qblock, dequant, biases, block0);
+ if (dctsize == 1) {
+ *output = *block0;
+ } else if (dctsize == 2 || dctsize == 4) {
+ float* JXL_RESTRICT block2 = scratch_space + 2 * DCTSIZE2;
+ ComputeScaledIDCT(block0, block1, block2, 8);
+ if (dctsize == 4) {
+ for (size_t iy = 0; iy < 4; ++iy) {
+ for (size_t ix = 0; ix < 4; ++ix) {
+ float* block = &block2[16 * iy + 2 * ix];
+ output[iy * output_stride + ix] =
+ 0.25f * (block[0] + block[1] + block[8] + block[9]);
+ }
+ }
+ } else {
+ for (size_t iy = 0; iy < 2; ++iy) {
+ for (size_t ix = 0; ix < 2; ++ix) {
+ float* block = &block2[32 * iy + 4 * ix];
+ output[iy * output_stride + ix] =
+ 0.0625f *
+ (block[0] + block[1] + block[2] + block[3] + block[8] + block[9] +
+ block[10] + block[11] + block[16] + block[17] + block[18] +
+ block[19] + block[24] + block[25] + block[26] + block[27]);
+ }
+ }
+ }
+ } else {
+ float dctin[DCTSIZE];
+ float dctout[DCTSIZE * 2];
+ size_t insize = std::min<size_t>(dctsize, DCTSIZE);
+ for (size_t ix = 0; ix < insize; ++ix) {
+ for (size_t iy = 0; iy < insize; ++iy) {
+ dctin[iy] = block0[iy * DCTSIZE + ix];
+ }
+ Compute1dIDCT(dctin, dctout, dctsize);
+ for (size_t iy = 0; iy < dctsize; ++iy) {
+ block1[iy * dctsize + ix] = dctout[iy];
+ }
+ }
+ for (size_t iy = 0; iy < dctsize; ++iy) {
+ Compute1dIDCT(block1 + iy * dctsize, output + iy * output_stride,
+ dctsize);
+ }
+ }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+
+HWY_EXPORT(InverseTransformBlock8x8);
+HWY_EXPORT(InverseTransformBlockGeneric);
+
+void ChooseInverseTransform(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ if (m->scaled_dct_size[c] == DCTSIZE) {
+ m->inverse_transform[c] = HWY_DYNAMIC_DISPATCH(InverseTransformBlock8x8);
+ } else {
+ m->inverse_transform[c] =
+ HWY_DYNAMIC_DISPATCH(InverseTransformBlockGeneric);
+ }
+ }
+}
+
+} // namespace jpegli
+#endif // HWY_ONCE
diff --git a/lib/jpegli/idct.h b/lib/jpegli/idct.h
new file mode 100644
index 0000000..c2ec6d1
--- /dev/null
+++ b/lib/jpegli/idct.h
@@ -0,0 +1,18 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_IDCT_H_
+#define LIB_JPEGLI_IDCT_H_
+
+#include "lib/jpegli/common.h"
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jpegli {
+
+void ChooseInverseTransform(j_decompress_ptr cinfo);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_IDCT_H_
diff --git a/lib/jpegli/input.cc b/lib/jpegli/input.cc
new file mode 100644
index 0000000..765bf98
--- /dev/null
+++ b/lib/jpegli/input.cc
@@ -0,0 +1,414 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/input.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/input.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jpegli/error.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/compiler_specific.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::Vec;
+
+using D = HWY_FULL(float);
+using DU = HWY_FULL(uint32_t);
+using DU8 = Rebind<uint8_t, D>;
+using DU16 = Rebind<uint16_t, D>;
+
+constexpr D d;
+constexpr DU du;
+constexpr DU8 du8;
+constexpr DU16 du16;
+
+static constexpr double kMul16 = 1.0 / 257.0;
+static constexpr double kMulFloat = 255.0;
+
+template <size_t C>
+void ReadUint8Row(const uint8_t* row_in, size_t x0, size_t len,
+ float* row_out[kMaxComponents]) {
+ for (size_t x = x0; x < len; ++x) {
+ for (size_t c = 0; c < C; ++c) {
+ row_out[c][x] = row_in[C * x + c];
+ }
+ }
+}
+
+template <size_t C, bool swap_endianness = false>
+void ReadUint16Row(const uint8_t* row_in, size_t x0, size_t len,
+ float* row_out[kMaxComponents]) {
+ const uint16_t* row16 = reinterpret_cast<const uint16_t*>(row_in);
+ for (size_t x = x0; x < len; ++x) {
+ for (size_t c = 0; c < C; ++c) {
+ uint16_t val = row16[C * x + c];
+ if (swap_endianness) val = JXL_BSWAP16(val);
+ row_out[c][x] = val * kMul16;
+ }
+ }
+}
+
+template <size_t C, bool swap_endianness = false>
+void ReadFloatRow(const uint8_t* row_in, size_t x0, size_t len,
+ float* row_out[kMaxComponents]) {
+ const float* rowf = reinterpret_cast<const float*>(row_in);
+ for (size_t x = x0; x < len; ++x) {
+ for (size_t c = 0; c < C; ++c) {
+ float val = rowf[C * x + c];
+ if (swap_endianness) val = BSwapFloat(val);
+ row_out[c][x] = val * kMulFloat;
+ }
+ }
+}
+
+void ReadUint8RowSingle(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ float* JXL_RESTRICT const row0 = row_out[0];
+ for (size_t x = 0; x < simd_len; x += N) {
+ Store(ConvertTo(d, PromoteTo(du, LoadU(du8, row_in + x))), d, row0 + x);
+ }
+ ReadUint8Row<1>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint8RowInterleaved2(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ float* JXL_RESTRICT const row0 = row_out[0];
+ float* JXL_RESTRICT const row1 = row_out[1];
+ Vec<DU8> out0, out1;
+ for (size_t x = 0; x < simd_len; x += N) {
+ LoadInterleaved2(du8, row_in + 2 * x, out0, out1);
+ Store(ConvertTo(d, PromoteTo(du, out0)), d, row0 + x);
+ Store(ConvertTo(d, PromoteTo(du, out1)), d, row1 + x);
+ }
+ ReadUint8Row<2>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint8RowInterleaved3(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ float* JXL_RESTRICT const row0 = row_out[0];
+ float* JXL_RESTRICT const row1 = row_out[1];
+ float* JXL_RESTRICT const row2 = row_out[2];
+ Vec<DU8> out0, out1, out2;
+ for (size_t x = 0; x < simd_len; x += N) {
+ LoadInterleaved3(du8, row_in + 3 * x, out0, out1, out2);
+ Store(ConvertTo(d, PromoteTo(du, out0)), d, row0 + x);
+ Store(ConvertTo(d, PromoteTo(du, out1)), d, row1 + x);
+ Store(ConvertTo(d, PromoteTo(du, out2)), d, row2 + x);
+ }
+ ReadUint8Row<3>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint8RowInterleaved4(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ float* JXL_RESTRICT const row0 = row_out[0];
+ float* JXL_RESTRICT const row1 = row_out[1];
+ float* JXL_RESTRICT const row2 = row_out[2];
+ float* JXL_RESTRICT const row3 = row_out[3];
+ Vec<DU8> out0, out1, out2, out3;
+ for (size_t x = 0; x < simd_len; x += N) {
+ LoadInterleaved4(du8, row_in + 4 * x, out0, out1, out2, out3);
+ Store(ConvertTo(d, PromoteTo(du, out0)), d, row0 + x);
+ Store(ConvertTo(d, PromoteTo(du, out1)), d, row1 + x);
+ Store(ConvertTo(d, PromoteTo(du, out2)), d, row2 + x);
+ Store(ConvertTo(d, PromoteTo(du, out3)), d, row3 + x);
+ }
+ ReadUint8Row<4>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint16RowSingle(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ const auto mul = Set(d, kMul16);
+ const uint16_t* JXL_RESTRICT const row =
+ reinterpret_cast<const uint16_t*>(row_in);
+ float* JXL_RESTRICT const row0 = row_out[0];
+ for (size_t x = 0; x < simd_len; x += N) {
+ Store(Mul(mul, ConvertTo(d, PromoteTo(du, LoadU(du16, row + x)))), d,
+ row0 + x);
+ }
+ ReadUint16Row<1>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint16RowInterleaved2(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ const auto mul = Set(d, kMul16);
+ const uint16_t* JXL_RESTRICT const row =
+ reinterpret_cast<const uint16_t*>(row_in);
+ float* JXL_RESTRICT const row0 = row_out[0];
+ float* JXL_RESTRICT const row1 = row_out[1];
+ Vec<DU16> out0, out1;
+ for (size_t x = 0; x < simd_len; x += N) {
+ LoadInterleaved2(du16, row + 2 * x, out0, out1);
+ Store(Mul(mul, ConvertTo(d, PromoteTo(du, out0))), d, row0 + x);
+ Store(Mul(mul, ConvertTo(d, PromoteTo(du, out1))), d, row1 + x);
+ }
+ ReadUint16Row<2>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint16RowInterleaved3(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ const auto mul = Set(d, kMul16);
+ const uint16_t* JXL_RESTRICT const row =
+ reinterpret_cast<const uint16_t*>(row_in);
+ float* JXL_RESTRICT const row0 = row_out[0];
+ float* JXL_RESTRICT const row1 = row_out[1];
+ float* JXL_RESTRICT const row2 = row_out[2];
+ Vec<DU16> out0, out1, out2;
+ for (size_t x = 0; x < simd_len; x += N) {
+ LoadInterleaved3(du16, row + 3 * x, out0, out1, out2);
+ Store(Mul(mul, ConvertTo(d, PromoteTo(du, out0))), d, row0 + x);
+ Store(Mul(mul, ConvertTo(d, PromoteTo(du, out1))), d, row1 + x);
+ Store(Mul(mul, ConvertTo(d, PromoteTo(du, out2))), d, row2 + x);
+ }
+ ReadUint16Row<3>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint16RowInterleaved4(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ const auto mul = Set(d, kMul16);
+ const uint16_t* JXL_RESTRICT const row =
+ reinterpret_cast<const uint16_t*>(row_in);
+ float* JXL_RESTRICT const row0 = row_out[0];
+ float* JXL_RESTRICT const row1 = row_out[1];
+ float* JXL_RESTRICT const row2 = row_out[2];
+ float* JXL_RESTRICT const row3 = row_out[3];
+ Vec<DU16> out0, out1, out2, out3;
+ for (size_t x = 0; x < simd_len; x += N) {
+ LoadInterleaved4(du16, row + 4 * x, out0, out1, out2, out3);
+ Store(Mul(mul, ConvertTo(d, PromoteTo(du, out0))), d, row0 + x);
+ Store(Mul(mul, ConvertTo(d, PromoteTo(du, out1))), d, row1 + x);
+ Store(Mul(mul, ConvertTo(d, PromoteTo(du, out2))), d, row2 + x);
+ Store(Mul(mul, ConvertTo(d, PromoteTo(du, out3))), d, row3 + x);
+ }
+ ReadUint16Row<4>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint16RowSingleSwap(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ ReadUint16Row<1, true>(row_in, 0, len, row_out);
+}
+
+void ReadUint16RowInterleaved2Swap(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ ReadUint16Row<2, true>(row_in, 0, len, row_out);
+}
+
+void ReadUint16RowInterleaved3Swap(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ ReadUint16Row<3, true>(row_in, 0, len, row_out);
+}
+
+void ReadUint16RowInterleaved4Swap(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ ReadUint16Row<4, true>(row_in, 0, len, row_out);
+}
+
+void ReadFloatRowSingle(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ const auto mul = Set(d, kMulFloat);
+ const float* JXL_RESTRICT const row = reinterpret_cast<const float*>(row_in);
+ float* JXL_RESTRICT const row0 = row_out[0];
+ for (size_t x = 0; x < simd_len; x += N) {
+ Store(Mul(mul, LoadU(d, row + x)), d, row0 + x);
+ }
+ ReadFloatRow<1>(row_in, simd_len, len, row_out);
+}
+
+void ReadFloatRowInterleaved2(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ const auto mul = Set(d, kMulFloat);
+ const float* JXL_RESTRICT const row = reinterpret_cast<const float*>(row_in);
+ float* JXL_RESTRICT const row0 = row_out[0];
+ float* JXL_RESTRICT const row1 = row_out[1];
+ Vec<D> out0, out1;
+ for (size_t x = 0; x < simd_len; x += N) {
+ LoadInterleaved2(d, row + 2 * x, out0, out1);
+ Store(Mul(mul, out0), d, row0 + x);
+ Store(Mul(mul, out1), d, row1 + x);
+ }
+ ReadFloatRow<2>(row_in, simd_len, len, row_out);
+}
+
+void ReadFloatRowInterleaved3(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ const auto mul = Set(d, kMulFloat);
+ const float* JXL_RESTRICT const row = reinterpret_cast<const float*>(row_in);
+ float* JXL_RESTRICT const row0 = row_out[0];
+ float* JXL_RESTRICT const row1 = row_out[1];
+ float* JXL_RESTRICT const row2 = row_out[2];
+ Vec<D> out0, out1, out2;
+ for (size_t x = 0; x < simd_len; x += N) {
+ LoadInterleaved3(d, row + 3 * x, out0, out1, out2);
+ Store(Mul(mul, out0), d, row0 + x);
+ Store(Mul(mul, out1), d, row1 + x);
+ Store(Mul(mul, out2), d, row2 + x);
+ }
+ ReadFloatRow<3>(row_in, simd_len, len, row_out);
+}
+
+void ReadFloatRowInterleaved4(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ const auto mul = Set(d, kMulFloat);
+ const float* JXL_RESTRICT const row = reinterpret_cast<const float*>(row_in);
+ float* JXL_RESTRICT const row0 = row_out[0];
+ float* JXL_RESTRICT const row1 = row_out[1];
+ float* JXL_RESTRICT const row2 = row_out[2];
+ float* JXL_RESTRICT const row3 = row_out[3];
+ Vec<D> out0, out1, out2, out3;
+ for (size_t x = 0; x < simd_len; x += N) {
+ LoadInterleaved4(d, row + 4 * x, out0, out1, out2, out3);
+ Store(Mul(mul, out0), d, row0 + x);
+ Store(Mul(mul, out1), d, row1 + x);
+ Store(Mul(mul, out2), d, row2 + x);
+ Store(Mul(mul, out3), d, row3 + x);
+ }
+ ReadFloatRow<4>(row_in, simd_len, len, row_out);
+}
+
+void ReadFloatRowSingleSwap(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ ReadFloatRow<1, true>(row_in, 0, len, row_out);
+}
+
+void ReadFloatRowInterleaved2Swap(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ ReadFloatRow<2, true>(row_in, 0, len, row_out);
+}
+
+void ReadFloatRowInterleaved3Swap(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ ReadFloatRow<3, true>(row_in, 0, len, row_out);
+}
+
+void ReadFloatRowInterleaved4Swap(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ ReadFloatRow<4, true>(row_in, 0, len, row_out);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+
+HWY_EXPORT(ReadUint8RowSingle);
+HWY_EXPORT(ReadUint8RowInterleaved2);
+HWY_EXPORT(ReadUint8RowInterleaved3);
+HWY_EXPORT(ReadUint8RowInterleaved4);
+HWY_EXPORT(ReadUint16RowSingle);
+HWY_EXPORT(ReadUint16RowInterleaved2);
+HWY_EXPORT(ReadUint16RowInterleaved3);
+HWY_EXPORT(ReadUint16RowInterleaved4);
+HWY_EXPORT(ReadUint16RowSingleSwap);
+HWY_EXPORT(ReadUint16RowInterleaved2Swap);
+HWY_EXPORT(ReadUint16RowInterleaved3Swap);
+HWY_EXPORT(ReadUint16RowInterleaved4Swap);
+HWY_EXPORT(ReadFloatRowSingle);
+HWY_EXPORT(ReadFloatRowInterleaved2);
+HWY_EXPORT(ReadFloatRowInterleaved3);
+HWY_EXPORT(ReadFloatRowInterleaved4);
+HWY_EXPORT(ReadFloatRowSingleSwap);
+HWY_EXPORT(ReadFloatRowInterleaved2Swap);
+HWY_EXPORT(ReadFloatRowInterleaved3Swap);
+HWY_EXPORT(ReadFloatRowInterleaved4Swap);
+
+void ChooseInputMethod(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ bool swap_endianness =
+ (m->endianness == JPEGLI_LITTLE_ENDIAN && !IsLittleEndian()) ||
+ (m->endianness == JPEGLI_BIG_ENDIAN && IsLittleEndian());
+ m->input_method = nullptr;
+ if (m->data_type == JPEGLI_TYPE_UINT8) {
+ if (cinfo->raw_data_in || cinfo->input_components == 1) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint8RowSingle);
+ } else if (cinfo->input_components == 2) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint8RowInterleaved2);
+ } else if (cinfo->input_components == 3) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint8RowInterleaved3);
+ } else if (cinfo->input_components == 4) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint8RowInterleaved4);
+ }
+ } else if (m->data_type == JPEGLI_TYPE_UINT16 && !swap_endianness) {
+ if (cinfo->raw_data_in || cinfo->input_components == 1) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowSingle);
+ } else if (cinfo->input_components == 2) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved2);
+ } else if (cinfo->input_components == 3) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved3);
+ } else if (cinfo->input_components == 4) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved4);
+ }
+ } else if (m->data_type == JPEGLI_TYPE_UINT16 && swap_endianness) {
+ if (cinfo->raw_data_in || cinfo->input_components == 1) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowSingleSwap);
+ } else if (cinfo->input_components == 2) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved2Swap);
+ } else if (cinfo->input_components == 3) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved3Swap);
+ } else if (cinfo->input_components == 4) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved4Swap);
+ }
+ } else if (m->data_type == JPEGLI_TYPE_FLOAT && !swap_endianness) {
+ if (cinfo->raw_data_in || cinfo->input_components == 1) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowSingle);
+ } else if (cinfo->input_components == 2) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved2);
+ } else if (cinfo->input_components == 3) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved3);
+ } else if (cinfo->input_components == 4) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved4);
+ }
+ } else if (m->data_type == JPEGLI_TYPE_FLOAT && swap_endianness) {
+ if (cinfo->raw_data_in || cinfo->input_components == 1) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowSingleSwap);
+ } else if (cinfo->input_components == 2) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved2Swap);
+ } else if (cinfo->input_components == 3) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved3Swap);
+ } else if (cinfo->input_components == 4) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved4Swap);
+ }
+ }
+ if (m->input_method == nullptr) {
+ JPEGLI_ERROR("Could not find input method.");
+ }
+}
+
+} // namespace jpegli
+#endif // HWY_ONCE
diff --git a/lib/jpegli/input.h b/lib/jpegli/input.h
new file mode 100644
index 0000000..f54d0be
--- /dev/null
+++ b/lib/jpegli/input.h
@@ -0,0 +1,17 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_INPUT_H_
+#define LIB_JPEGLI_INPUT_H_
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+void ChooseInputMethod(j_compress_ptr cinfo);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_INPUT_H_
diff --git a/lib/jpegli/input_suspension_test.cc b/lib/jpegli/input_suspension_test.cc
new file mode 100644
index 0000000..09bafd9
--- /dev/null
+++ b/lib/jpegli/input_suspension_test.cc
@@ -0,0 +1,612 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <cmath>
+#include <cstdint>
+#include <vector>
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jpegli {
+namespace {
+
+static constexpr uint8_t kFakeEoiMarker[2] = {0xff, 0xd9};
+
+struct SourceManager {
+ SourceManager(const uint8_t* data, size_t len, size_t max_chunk_size,
+ bool is_partial_file)
+ : data_(data),
+ len_(len),
+ pos_(0),
+ max_chunk_size_(max_chunk_size),
+ is_partial_file_(is_partial_file) {
+ pub_.init_source = init_source;
+ pub_.fill_input_buffer = fill_input_buffer;
+ pub_.next_input_byte = nullptr;
+ pub_.bytes_in_buffer = 0;
+ pub_.skip_input_data = skip_input_data;
+ pub_.resync_to_restart = jpegli_resync_to_restart;
+ pub_.term_source = term_source;
+ if (max_chunk_size_ == 0) max_chunk_size_ = len;
+ }
+
+ ~SourceManager() {
+ EXPECT_EQ(0, pub_.bytes_in_buffer);
+ if (!is_partial_file_) {
+ EXPECT_EQ(len_, pos_);
+ }
+ }
+
+ bool LoadNextChunk() {
+ if (pos_ >= len_ && !is_partial_file_) {
+ return false;
+ }
+ if (pub_.bytes_in_buffer > 0) {
+ EXPECT_LE(pub_.bytes_in_buffer, buffer_.size());
+ memmove(&buffer_[0], pub_.next_input_byte, pub_.bytes_in_buffer);
+ }
+ size_t chunk_size =
+ pos_ < len_ ? std::min(len_ - pos_, max_chunk_size_) : 2;
+ buffer_.resize(pub_.bytes_in_buffer + chunk_size);
+ memcpy(&buffer_[pub_.bytes_in_buffer],
+ pos_ < len_ ? data_ + pos_ : kFakeEoiMarker, chunk_size);
+ pub_.next_input_byte = &buffer_[0];
+ pub_.bytes_in_buffer += chunk_size;
+ pos_ += chunk_size;
+ return true;
+ }
+
+ private:
+ jpeg_source_mgr pub_;
+ std::vector<uint8_t> buffer_;
+ const uint8_t* data_;
+ size_t len_;
+ size_t pos_;
+ size_t max_chunk_size_;
+ bool is_partial_file_;
+
+ static void init_source(j_decompress_ptr cinfo) {
+ auto src = reinterpret_cast<SourceManager*>(cinfo->src);
+ src->pub_.next_input_byte = nullptr;
+ src->pub_.bytes_in_buffer = 0;
+ }
+
+ static boolean fill_input_buffer(j_decompress_ptr cinfo) { return FALSE; }
+
+ static void skip_input_data(j_decompress_ptr cinfo, long num_bytes) {
+ auto src = reinterpret_cast<SourceManager*>(cinfo->src);
+ if (num_bytes <= 0) {
+ return;
+ }
+ if (src->pub_.bytes_in_buffer >= static_cast<size_t>(num_bytes)) {
+ src->pub_.bytes_in_buffer -= num_bytes;
+ src->pub_.next_input_byte += num_bytes;
+ } else {
+ src->pos_ += num_bytes - src->pub_.bytes_in_buffer;
+ src->pub_.bytes_in_buffer = 0;
+ }
+ }
+
+ static void term_source(j_decompress_ptr cinfo) {}
+};
+
+uint8_t markers_seen[kMarkerSequenceLen];
+size_t num_markers_seen = 0;
+
+uint8_t get_next_byte(j_decompress_ptr cinfo) {
+ cinfo->src->bytes_in_buffer--;
+ return *cinfo->src->next_input_byte++;
+}
+
+boolean test_marker_processor(j_decompress_ptr cinfo) {
+ markers_seen[num_markers_seen] = cinfo->unread_marker;
+ if (cinfo->src->bytes_in_buffer < 2) {
+ return FALSE;
+ }
+ size_t marker_len = (get_next_byte(cinfo) << 8) + get_next_byte(cinfo);
+ EXPECT_EQ(2 + ((num_markers_seen + 2) % sizeof(kMarkerData)), marker_len);
+ if (marker_len > 2) {
+ (*cinfo->src->skip_input_data)(cinfo, marker_len - 2);
+ }
+ ++num_markers_seen;
+ return TRUE;
+}
+
+void ReadOutputImage(const DecompressParams& dparams, j_decompress_ptr cinfo,
+ SourceManager* src, TestImage* output) {
+ output->ysize = cinfo->output_height;
+ output->xsize = cinfo->output_width;
+ output->components = cinfo->num_components;
+ if (cinfo->raw_data_out) {
+ output->color_space = cinfo->jpeg_color_space;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE;
+ size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE;
+ std::vector<uint8_t> plane(ysize * xsize);
+ output->raw_data.emplace_back(std::move(plane));
+ }
+ } else {
+ output->color_space = cinfo->out_color_space;
+ output->AllocatePixels();
+ }
+ size_t total_output_lines = 0;
+ while (cinfo->output_scanline < cinfo->output_height) {
+ size_t max_lines;
+ size_t num_output_lines;
+ if (cinfo->raw_data_out) {
+ size_t iMCU_height = cinfo->max_v_samp_factor * DCTSIZE;
+ EXPECT_EQ(cinfo->output_scanline, cinfo->output_iMCU_row * iMCU_height);
+ max_lines = iMCU_height;
+ std::vector<std::vector<JSAMPROW>> rowdata(cinfo->num_components);
+ std::vector<JSAMPARRAY> data(cinfo->num_components);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE;
+ size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE;
+ size_t num_lines = cinfo->comp_info[c].v_samp_factor * DCTSIZE;
+ rowdata[c].resize(num_lines);
+ size_t y0 = cinfo->output_iMCU_row * num_lines;
+ for (size_t i = 0; i < num_lines; ++i) {
+ rowdata[c][i] =
+ y0 + i < ysize ? &output->raw_data[c][(y0 + i) * xsize] : nullptr;
+ }
+ data[c] = &rowdata[c][0];
+ }
+ while ((num_output_lines =
+ jpegli_read_raw_data(cinfo, &data[0], max_lines)) == 0) {
+ JXL_CHECK(src && src->LoadNextChunk());
+ }
+ } else {
+ size_t max_output_lines = dparams.max_output_lines;
+ if (max_output_lines == 0) max_output_lines = cinfo->output_height;
+ size_t lines_left = cinfo->output_height - cinfo->output_scanline;
+ max_lines = std::min<size_t>(max_output_lines, lines_left);
+ size_t stride = cinfo->output_width * cinfo->num_components;
+ std::vector<JSAMPROW> scanlines(max_lines);
+ for (size_t i = 0; i < max_lines; ++i) {
+ size_t yidx = cinfo->output_scanline + i;
+ scanlines[i] = &output->pixels[yidx * stride];
+ }
+ while ((num_output_lines = jpegli_read_scanlines(cinfo, &scanlines[0],
+ max_lines)) == 0) {
+ JXL_CHECK(src && src->LoadNextChunk());
+ }
+ }
+ total_output_lines += num_output_lines;
+ EXPECT_EQ(total_output_lines, cinfo->output_scanline);
+ if (num_output_lines < max_lines) {
+ JXL_CHECK(src && src->LoadNextChunk());
+ }
+ }
+}
+
+struct TestConfig {
+ std::string fn;
+ std::string fn_desc;
+ TestImage input;
+ CompressParams jparams;
+ DecompressParams dparams;
+ float max_rms_dist = 1.0f;
+};
+
+std::vector<uint8_t> GetTestJpegData(TestConfig& config) {
+ if (!config.fn.empty()) {
+ return ReadTestData(config.fn.c_str());
+ }
+ GeneratePixels(&config.input);
+ std::vector<uint8_t> compressed;
+ JXL_CHECK(EncodeWithJpegli(config.input, config.jparams, &compressed));
+ return compressed;
+}
+
+bool IsSequential(const TestConfig& config) {
+ if (!config.fn.empty()) {
+ return config.fn_desc.find("PROGR") == std::string::npos;
+ }
+ return config.jparams.progressive_mode <= 0;
+}
+
+class InputSuspensionTestParam : public ::testing::TestWithParam<TestConfig> {};
+
+TEST_P(InputSuspensionTestParam, InputOutputLockStepNonBuffered) {
+ TestConfig config = GetParam();
+ const DecompressParams& dparams = config.dparams;
+ std::vector<uint8_t> compressed = GetTestJpegData(config);
+ bool is_partial = config.dparams.size_factor < 1.0f;
+ if (is_partial) {
+ compressed.resize(compressed.size() * config.dparams.size_factor);
+ }
+ SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size,
+ is_partial);
+ TestImage output0;
+ jpeg_decompress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+
+ if (config.jparams.add_marker) {
+ jpegli_save_markers(&cinfo, kSpecialMarker0, 0xffff);
+ jpegli_save_markers(&cinfo, kSpecialMarker1, 0xffff);
+ num_markers_seen = 0;
+ jpegli_set_marker_processor(&cinfo, 0xe6, test_marker_processor);
+ jpegli_set_marker_processor(&cinfo, 0xe7, test_marker_processor);
+ jpegli_set_marker_processor(&cinfo, 0xe8, test_marker_processor);
+ }
+ while (jpegli_read_header(&cinfo, TRUE) == JPEG_SUSPENDED) {
+ JXL_CHECK(src.LoadNextChunk());
+ }
+ SetDecompressParams(dparams, &cinfo);
+ jpegli_set_output_format(&cinfo, dparams.data_type, dparams.endianness);
+ if (config.jparams.add_marker) {
+ EXPECT_EQ(num_markers_seen, kMarkerSequenceLen);
+ EXPECT_EQ(0, memcmp(markers_seen, kMarkerSequence, num_markers_seen));
+ }
+ VerifyHeader(config.jparams, &cinfo);
+ cinfo.raw_data_out = dparams.output_mode == RAW_DATA;
+
+ if (dparams.output_mode == COEFFICIENTS) {
+ jvirt_barray_ptr* coef_arrays;
+ while ((coef_arrays = jpegli_read_coefficients(&cinfo)) == nullptr) {
+ JXL_CHECK(src.LoadNextChunk());
+ }
+ CopyCoefficients(&cinfo, coef_arrays, &output0);
+ } else {
+ while (!jpegli_start_decompress(&cinfo)) {
+ JXL_CHECK(src.LoadNextChunk());
+ }
+ ReadOutputImage(dparams, &cinfo, &src, &output0);
+ }
+
+ while (!jpegli_finish_decompress(&cinfo)) {
+ JXL_CHECK(src.LoadNextChunk());
+ }
+ return true;
+ };
+ ASSERT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+
+ TestImage output1;
+ DecodeWithLibjpeg(config.jparams, dparams, compressed, &output1);
+ VerifyOutputImage(output1, output0, config.max_rms_dist);
+}
+
+TEST_P(InputSuspensionTestParam, InputOutputLockStepBuffered) {
+ TestConfig config = GetParam();
+ if (config.jparams.add_marker) return;
+ const DecompressParams& dparams = config.dparams;
+ std::vector<uint8_t> compressed = GetTestJpegData(config);
+ bool is_partial = config.dparams.size_factor < 1.0f;
+ if (is_partial) {
+ compressed.resize(compressed.size() * config.dparams.size_factor);
+ }
+ SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size,
+ is_partial);
+ std::vector<TestImage> output_progression0;
+ jpeg_decompress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+
+ cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+
+ while (jpegli_read_header(&cinfo, TRUE) == JPEG_SUSPENDED) {
+ JXL_CHECK(src.LoadNextChunk());
+ }
+ SetDecompressParams(dparams, &cinfo);
+ jpegli_set_output_format(&cinfo, dparams.data_type, dparams.endianness);
+
+ cinfo.buffered_image = TRUE;
+ cinfo.raw_data_out = dparams.output_mode == RAW_DATA;
+
+ EXPECT_TRUE(jpegli_start_decompress(&cinfo));
+ EXPECT_FALSE(jpegli_input_complete(&cinfo));
+ EXPECT_EQ(0, cinfo.output_scan_number);
+
+ int sos_marker_cnt = 1; // read_header reads the first SOS marker
+ while (!jpegli_input_complete(&cinfo)) {
+ EXPECT_EQ(cinfo.input_scan_number, sos_marker_cnt);
+ EXPECT_TRUE(jpegli_start_output(&cinfo, cinfo.input_scan_number));
+ // start output sets output_scan_number, but does not change
+ // input_scan_number
+ EXPECT_EQ(cinfo.output_scan_number, cinfo.input_scan_number);
+ EXPECT_EQ(cinfo.input_scan_number, sos_marker_cnt);
+ TestImage output;
+ ReadOutputImage(dparams, &cinfo, &src, &output);
+ output_progression0.emplace_back(std::move(output));
+ // read scanlines/read raw data does not change input/output scan number
+ EXPECT_EQ(cinfo.input_scan_number, sos_marker_cnt);
+ EXPECT_EQ(cinfo.output_scan_number, cinfo.input_scan_number);
+ while (!jpegli_finish_output(&cinfo)) {
+ JXL_CHECK(src.LoadNextChunk());
+ }
+ ++sos_marker_cnt; // finish output reads the next SOS marker or EOI
+ if (dparams.output_mode == COEFFICIENTS) {
+ jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(&cinfo);
+ JXL_CHECK(coef_arrays != nullptr);
+ CopyCoefficients(&cinfo, coef_arrays, &output_progression0.back());
+ }
+ }
+
+ EXPECT_TRUE(jpegli_finish_decompress(&cinfo));
+ return true;
+ };
+ ASSERT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+
+ std::vector<TestImage> output_progression1;
+ DecodeAllScansWithLibjpeg(config.jparams, dparams, compressed,
+ &output_progression1);
+ ASSERT_EQ(output_progression0.size(), output_progression1.size());
+ for (size_t i = 0; i < output_progression0.size(); ++i) {
+ const TestImage& output = output_progression0[i];
+ const TestImage& expected = output_progression1[i];
+ VerifyOutputImage(expected, output, config.max_rms_dist);
+ }
+}
+
+TEST_P(InputSuspensionTestParam, PreConsumeInputBuffered) {
+ TestConfig config = GetParam();
+ if (config.jparams.add_marker) return;
+ const DecompressParams& dparams = config.dparams;
+ std::vector<uint8_t> compressed = GetTestJpegData(config);
+ bool is_partial = config.dparams.size_factor < 1.0f;
+ if (is_partial) {
+ compressed.resize(compressed.size() * config.dparams.size_factor);
+ }
+ std::vector<TestImage> output_progression1;
+ DecodeAllScansWithLibjpeg(config.jparams, dparams, compressed,
+ &output_progression1);
+ SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size,
+ is_partial);
+ TestImage output0;
+ jpeg_decompress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+
+ int status;
+ while ((status = jpegli_consume_input(&cinfo)) != JPEG_REACHED_SOS) {
+ if (status == JPEG_SUSPENDED) {
+ JXL_CHECK(src.LoadNextChunk());
+ }
+ }
+ EXPECT_EQ(JPEG_REACHED_SOS, jpegli_consume_input(&cinfo));
+ cinfo.buffered_image = TRUE;
+ cinfo.raw_data_out = dparams.output_mode == RAW_DATA;
+ cinfo.do_block_smoothing = dparams.do_block_smoothing;
+
+ EXPECT_TRUE(jpegli_start_decompress(&cinfo));
+ EXPECT_FALSE(jpegli_input_complete(&cinfo));
+ EXPECT_EQ(1, cinfo.input_scan_number);
+ EXPECT_EQ(0, cinfo.output_scan_number);
+
+ while ((status = jpegli_consume_input(&cinfo)) != JPEG_REACHED_EOI) {
+ if (status == JPEG_SUSPENDED) {
+ JXL_CHECK(src.LoadNextChunk());
+ }
+ }
+
+ EXPECT_TRUE(jpegli_input_complete(&cinfo));
+ EXPECT_EQ(output_progression1.size(), cinfo.input_scan_number);
+ EXPECT_EQ(0, cinfo.output_scan_number);
+
+ EXPECT_TRUE(jpegli_start_output(&cinfo, cinfo.input_scan_number));
+ EXPECT_EQ(output_progression1.size(), cinfo.input_scan_number);
+ EXPECT_EQ(cinfo.output_scan_number, cinfo.input_scan_number);
+
+ ReadOutputImage(dparams, &cinfo, nullptr, &output0);
+ EXPECT_EQ(output_progression1.size(), cinfo.input_scan_number);
+ EXPECT_EQ(cinfo.output_scan_number, cinfo.input_scan_number);
+
+ EXPECT_TRUE(jpegli_finish_output(&cinfo));
+ if (dparams.output_mode == COEFFICIENTS) {
+ jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(&cinfo);
+ JXL_CHECK(coef_arrays != nullptr);
+ CopyCoefficients(&cinfo, coef_arrays, &output0);
+ }
+ EXPECT_TRUE(jpegli_finish_decompress(&cinfo));
+ return true;
+ };
+ ASSERT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+
+ VerifyOutputImage(output_progression1.back(), output0, config.max_rms_dist);
+}
+
+TEST_P(InputSuspensionTestParam, PreConsumeInputNonBuffered) {
+ TestConfig config = GetParam();
+ if (config.jparams.add_marker || IsSequential(config)) return;
+ const DecompressParams& dparams = config.dparams;
+ std::vector<uint8_t> compressed = GetTestJpegData(config);
+ bool is_partial = config.dparams.size_factor < 1.0f;
+ if (is_partial) {
+ compressed.resize(compressed.size() * config.dparams.size_factor);
+ }
+ SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size,
+ is_partial);
+ TestImage output0;
+ jpeg_decompress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+
+ int status;
+ while ((status = jpegli_consume_input(&cinfo)) != JPEG_REACHED_SOS) {
+ if (status == JPEG_SUSPENDED) {
+ JXL_CHECK(src.LoadNextChunk());
+ }
+ }
+ EXPECT_EQ(JPEG_REACHED_SOS, jpegli_consume_input(&cinfo));
+ cinfo.raw_data_out = dparams.output_mode == RAW_DATA;
+ cinfo.do_block_smoothing = dparams.do_block_smoothing;
+
+ if (dparams.output_mode == COEFFICIENTS) {
+ jpegli_read_coefficients(&cinfo);
+ } else {
+ while (!jpegli_start_decompress(&cinfo)) {
+ JXL_CHECK(src.LoadNextChunk());
+ }
+ }
+
+ while ((status = jpegli_consume_input(&cinfo)) != JPEG_REACHED_EOI) {
+ if (status == JPEG_SUSPENDED) {
+ JXL_CHECK(src.LoadNextChunk());
+ }
+ }
+
+ if (dparams.output_mode == COEFFICIENTS) {
+ jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(&cinfo);
+ JXL_CHECK(coef_arrays != nullptr);
+ CopyCoefficients(&cinfo, coef_arrays, &output0);
+ } else {
+ ReadOutputImage(dparams, &cinfo, nullptr, &output0);
+ }
+
+ EXPECT_TRUE(jpegli_finish_decompress(&cinfo));
+ return true;
+ };
+ ASSERT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+
+ TestImage output1;
+ DecodeWithLibjpeg(config.jparams, dparams, compressed, &output1);
+ VerifyOutputImage(output1, output0, config.max_rms_dist);
+}
+
+std::vector<TestConfig> GenerateTests() {
+ std::vector<TestConfig> all_tests;
+ std::vector<std::pair<std::string, std::string>> testfiles({
+ {"jxl/flower/flower.png.im_q85_444.jpg", "Q85YUV444"},
+ {"jxl/flower/flower.png.im_q85_420_R13B.jpg", "Q85YUV420R13B"},
+ {"jxl/flower/flower.png.im_q85_420_progr.jpg", "Q85YUV420PROGR"},
+ });
+ for (const auto& it : testfiles) {
+ for (size_t chunk_size : {1, 64, 65536}) {
+ for (size_t max_output_lines : {0, 1, 8, 16}) {
+ TestConfig config;
+ config.fn = it.first;
+ config.fn_desc = it.second;
+ config.dparams.chunk_size = chunk_size;
+ config.dparams.max_output_lines = max_output_lines;
+ all_tests.push_back(config);
+ if (max_output_lines == 16) {
+ config.dparams.output_mode = RAW_DATA;
+ all_tests.push_back(config);
+ config.dparams.output_mode = COEFFICIENTS;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ for (size_t r : {1, 17, 1024}) {
+ for (size_t chunk_size : {1, 65536}) {
+ TestConfig config;
+ config.dparams.chunk_size = chunk_size;
+ config.jparams.progressive_mode = 2;
+ config.jparams.restart_interval = r;
+ all_tests.push_back(config);
+ }
+ }
+ for (size_t chunk_size : {1, 4, 1024}) {
+ TestConfig config;
+ config.input.xsize = 256;
+ config.input.ysize = 256;
+ config.dparams.chunk_size = chunk_size;
+ config.jparams.add_marker = true;
+ all_tests.push_back(config);
+ }
+ // Tests for partial input.
+ for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f}) {
+ for (int progr : {0, 1, 3}) {
+ for (int samp : {1, 2}) {
+ for (JpegIOMode output_mode : {PIXELS, RAW_DATA}) {
+ TestConfig config;
+ config.input.xsize = 517;
+ config.input.ysize = 523;
+ config.jparams.h_sampling = {samp, 1, 1};
+ config.jparams.v_sampling = {samp, 1, 1};
+ config.jparams.progressive_mode = progr;
+ config.dparams.size_factor = size_factor;
+ config.dparams.output_mode = output_mode;
+ // The last partially available block can behave differently.
+ // TODO(szabadka) Figure out if we can make the behaviour more
+ // similar.
+ config.max_rms_dist = samp == 1 ? 1.75f : 3.0f;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ // Tests for block smoothing.
+ for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f, 1.0f}) {
+ for (int samp : {1, 2}) {
+ TestConfig config;
+ config.input.xsize = 517;
+ config.input.ysize = 523;
+ config.jparams.h_sampling = {samp, 1, 1};
+ config.jparams.v_sampling = {samp, 1, 1};
+ config.jparams.progressive_mode = 2;
+ config.dparams.size_factor = size_factor;
+ config.dparams.do_block_smoothing = true;
+ // libjpeg does smoothing for incomplete scans differently at
+ // the border between current and previous scans.
+ config.max_rms_dist = 8.0f;
+ all_tests.push_back(config);
+ }
+ }
+ return all_tests;
+}
+
+std::ostream& operator<<(std::ostream& os, const TestConfig& c) {
+ if (!c.fn.empty()) {
+ os << c.fn_desc;
+ } else {
+ os << c.input;
+ }
+ os << c.jparams;
+ if (c.dparams.chunk_size == 0) {
+ os << "CompleteInput";
+ } else {
+ os << "InputChunks" << c.dparams.chunk_size;
+ }
+ if (c.dparams.size_factor < 1.0f) {
+ os << "Partial" << static_cast<int>(c.dparams.size_factor * 100) << "p";
+ }
+ if (c.dparams.max_output_lines == 0) {
+ os << "CompleteOutput";
+ } else {
+ os << "OutputLines" << c.dparams.max_output_lines;
+ }
+ if (c.dparams.output_mode == RAW_DATA) {
+ os << "RawDataOut";
+ } else if (c.dparams.output_mode == COEFFICIENTS) {
+ os << "CoeffsOut";
+ }
+ if (c.dparams.do_block_smoothing) {
+ os << "BlockSmoothing";
+ }
+ return os;
+}
+
+std::string TestDescription(
+ const testing::TestParamInfo<InputSuspensionTestParam::ParamType>& info) {
+ std::stringstream name;
+ name << info.param;
+ return name.str();
+}
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(InputSuspensionTest, InputSuspensionTestParam,
+ testing::ValuesIn(GenerateTests()),
+ TestDescription);
+
+} // namespace
+} // namespace jpegli
diff --git a/lib/jpegli/jpeg.version.62 b/lib/jpegli/jpeg.version.62
new file mode 100644
index 0000000..3a8d1f5
--- /dev/null
+++ b/lib/jpegli/jpeg.version.62
@@ -0,0 +1,11 @@
+LIBJPEG_6.2 {
+ global:
+ jpeg*;
+};
+
+LIBJPEGTURBO_6.2 {
+ global:
+ jpeg_mem_src*;
+ jpeg_mem_dest*;
+ tj*;
+}; \ No newline at end of file
diff --git a/lib/jpegli/jpeg.version.8 b/lib/jpegli/jpeg.version.8
new file mode 100644
index 0000000..aa891f8
--- /dev/null
+++ b/lib/jpegli/jpeg.version.8
@@ -0,0 +1,9 @@
+LIBJPEG_8.0 {
+ global:
+ jpeg*;
+};
+
+LIBJPEGTURBO_8.0 {
+ global:
+ tj*;
+};
diff --git a/lib/jpegli/libjpeg_test_util.cc b/lib/jpegli/libjpeg_test_util.cc
new file mode 100644
index 0000000..de23037
--- /dev/null
+++ b/lib/jpegli/libjpeg_test_util.cc
@@ -0,0 +1,261 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/libjpeg_test_util.h"
+
+/* clang-format off */
+#include <stdio.h>
+#include <jpeglib.h>
+#include <setjmp.h>
+/* clang-format on */
+
+#include "lib/jxl/sanitizers.h"
+
+namespace jpegli {
+
+namespace {
+
+#define JPEG_API_FN(name) jpeg_##name
+#include "lib/jpegli/test_utils-inl.h"
+#undef JPEG_API_FN
+
+void ReadOutputPass(j_decompress_ptr cinfo, const DecompressParams& dparams,
+ TestImage* output) {
+ JDIMENSION xoffset = 0;
+ JDIMENSION yoffset = 0;
+ JDIMENSION xsize_cropped = cinfo->output_width;
+ JDIMENSION ysize_cropped = cinfo->output_height;
+ if (dparams.crop_output) {
+ xoffset = xsize_cropped = cinfo->output_width / 3;
+ yoffset = ysize_cropped = cinfo->output_height / 3;
+ jpeg_crop_scanline(cinfo, &xoffset, &xsize_cropped);
+ JXL_CHECK(xsize_cropped == cinfo->output_width);
+ }
+ output->xsize = xsize_cropped;
+ output->ysize = ysize_cropped;
+ output->components = cinfo->out_color_components;
+ if (cinfo->quantize_colors) {
+ jxl::msan::UnpoisonMemory(cinfo->colormap, cinfo->out_color_components *
+ sizeof(cinfo->colormap[0]));
+ for (int c = 0; c < cinfo->out_color_components; ++c) {
+ jxl::msan::UnpoisonMemory(
+ cinfo->colormap[c],
+ cinfo->actual_number_of_colors * sizeof(cinfo->colormap[c][0]));
+ }
+ }
+ if (!cinfo->raw_data_out) {
+ size_t stride = output->xsize * output->components;
+ output->pixels.resize(output->ysize * stride);
+ output->color_space = cinfo->out_color_space;
+ if (yoffset > 0) {
+ jpeg_skip_scanlines(cinfo, yoffset);
+ }
+ for (size_t y = 0; y < output->ysize; ++y) {
+ JSAMPROW rows[] = {
+ reinterpret_cast<JSAMPLE*>(&output->pixels[y * stride])};
+ JXL_CHECK(1 == jpeg_read_scanlines(cinfo, rows, 1));
+ jxl::msan::UnpoisonMemory(
+ rows[0], sizeof(JSAMPLE) * cinfo->output_components * output->xsize);
+ if (cinfo->quantize_colors) {
+ UnmapColors(rows[0], cinfo->output_width, cinfo->out_color_components,
+ cinfo->colormap, cinfo->actual_number_of_colors);
+ }
+ }
+ if (cinfo->output_scanline < cinfo->output_height) {
+ jpeg_skip_scanlines(cinfo, cinfo->output_height - cinfo->output_scanline);
+ }
+ } else {
+ output->color_space = cinfo->jpeg_color_space;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE;
+ size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE;
+ std::vector<uint8_t> plane(ysize * xsize);
+ output->raw_data.emplace_back(std::move(plane));
+ }
+ while (cinfo->output_scanline < cinfo->output_height) {
+ size_t iMCU_height = cinfo->max_v_samp_factor * DCTSIZE;
+ JXL_CHECK(cinfo->output_scanline == cinfo->output_iMCU_row * iMCU_height);
+ std::vector<std::vector<JSAMPROW>> rowdata(cinfo->num_components);
+ std::vector<JSAMPARRAY> data(cinfo->num_components);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE;
+ size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE;
+ size_t num_lines = cinfo->comp_info[c].v_samp_factor * DCTSIZE;
+ rowdata[c].resize(num_lines);
+ size_t y0 = cinfo->output_iMCU_row * num_lines;
+ for (size_t i = 0; i < num_lines; ++i) {
+ rowdata[c][i] =
+ y0 + i < ysize ? &output->raw_data[c][(y0 + i) * xsize] : nullptr;
+ }
+ data[c] = &rowdata[c][0];
+ }
+ JXL_CHECK(iMCU_height ==
+ jpeg_read_raw_data(cinfo, &data[0], iMCU_height));
+ }
+ }
+ JXL_CHECK(cinfo->total_iMCU_rows ==
+ DivCeil(cinfo->image_height, cinfo->max_v_samp_factor * DCTSIZE));
+}
+
+void DecodeWithLibjpeg(const CompressParams& jparams,
+ const DecompressParams& dparams, j_decompress_ptr cinfo,
+ TestImage* output) {
+ if (jparams.add_marker) {
+ jpeg_save_markers(cinfo, kSpecialMarker0, 0xffff);
+ jpeg_save_markers(cinfo, kSpecialMarker1, 0xffff);
+ }
+ if (!jparams.icc.empty()) {
+ jpeg_save_markers(cinfo, JPEG_APP0 + 2, 0xffff);
+ }
+ JXL_CHECK(JPEG_REACHED_SOS ==
+ jpeg_read_header(cinfo, /*require_image=*/TRUE));
+ if (!jparams.icc.empty()) {
+ uint8_t* icc_data = nullptr;
+ unsigned int icc_len;
+ JXL_CHECK(jpeg_read_icc_profile(cinfo, &icc_data, &icc_len));
+ JXL_CHECK(icc_data);
+ jxl::msan::UnpoisonMemory(icc_data, icc_len);
+ JXL_CHECK(0 == memcmp(jparams.icc.data(), icc_data, icc_len));
+ free(icc_data);
+ }
+ SetDecompressParams(dparams, cinfo);
+ VerifyHeader(jparams, cinfo);
+ if (dparams.output_mode == COEFFICIENTS) {
+ jvirt_barray_ptr* coef_arrays = jpeg_read_coefficients(cinfo);
+ JXL_CHECK(coef_arrays != nullptr);
+ CopyCoefficients(cinfo, coef_arrays, output);
+ } else {
+ JXL_CHECK(jpeg_start_decompress(cinfo));
+ VerifyScanHeader(jparams, cinfo);
+ ReadOutputPass(cinfo, dparams, output);
+ }
+ JXL_CHECK(jpeg_finish_decompress(cinfo));
+}
+
+} // namespace
+
+// Verifies that an image encoded with libjpegli can be decoded with libjpeg,
+// and checks that the jpeg coding metadata matches jparams.
+void DecodeAllScansWithLibjpeg(const CompressParams& jparams,
+ const DecompressParams& dparams,
+ const std::vector<uint8_t>& compressed,
+ std::vector<TestImage>* output_progression) {
+ jpeg_decompress_struct cinfo = {};
+ const auto try_catch_block = [&]() {
+ jpeg_error_mgr jerr;
+ jmp_buf env;
+ cinfo.err = jpeg_std_error(&jerr);
+ if (setjmp(env)) {
+ return false;
+ }
+ cinfo.client_data = reinterpret_cast<void*>(&env);
+ cinfo.err->error_exit = [](j_common_ptr cinfo) {
+ (*cinfo->err->output_message)(cinfo);
+ jmp_buf* env = reinterpret_cast<jmp_buf*>(cinfo->client_data);
+ jpeg_destroy(cinfo);
+ longjmp(*env, 1);
+ };
+ jpeg_create_decompress(&cinfo);
+ jpeg_mem_src(&cinfo, compressed.data(), compressed.size());
+ if (jparams.add_marker) {
+ jpeg_save_markers(&cinfo, kSpecialMarker0, 0xffff);
+ jpeg_save_markers(&cinfo, kSpecialMarker1, 0xffff);
+ }
+ JXL_CHECK(JPEG_REACHED_SOS ==
+ jpeg_read_header(&cinfo, /*require_image=*/TRUE));
+ cinfo.buffered_image = TRUE;
+ SetDecompressParams(dparams, &cinfo);
+ VerifyHeader(jparams, &cinfo);
+ JXL_CHECK(jpeg_start_decompress(&cinfo));
+ // start decompress should not read the whole input in buffered image mode
+ JXL_CHECK(!jpeg_input_complete(&cinfo));
+ JXL_CHECK(cinfo.output_scan_number == 0);
+ int sos_marker_cnt = 1; // read header reads the first SOS marker
+ while (!jpeg_input_complete(&cinfo)) {
+ JXL_CHECK(cinfo.input_scan_number == sos_marker_cnt);
+ if (dparams.skip_scans && (cinfo.input_scan_number % 2) != 1) {
+ int result = JPEG_SUSPENDED;
+ while (result != JPEG_REACHED_SOS && result != JPEG_REACHED_EOI) {
+ result = jpeg_consume_input(&cinfo);
+ }
+ if (result == JPEG_REACHED_SOS) ++sos_marker_cnt;
+ continue;
+ }
+ SetScanDecompressParams(dparams, &cinfo, cinfo.input_scan_number);
+ JXL_CHECK(jpeg_start_output(&cinfo, cinfo.input_scan_number));
+ // start output sets output_scan_number, but does not change
+ // input_scan_number
+ JXL_CHECK(cinfo.output_scan_number == cinfo.input_scan_number);
+ JXL_CHECK(cinfo.input_scan_number == sos_marker_cnt);
+ VerifyScanHeader(jparams, &cinfo);
+ TestImage output;
+ ReadOutputPass(&cinfo, dparams, &output);
+ output_progression->emplace_back(std::move(output));
+ // read scanlines/read raw data does not change input/output scan number
+ if (!cinfo.progressive_mode) {
+ JXL_CHECK(cinfo.input_scan_number == sos_marker_cnt);
+ JXL_CHECK(cinfo.output_scan_number == cinfo.input_scan_number);
+ }
+ JXL_CHECK(jpeg_finish_output(&cinfo));
+ ++sos_marker_cnt; // finish output reads the next SOS marker or EOI
+ if (dparams.output_mode == COEFFICIENTS) {
+ jvirt_barray_ptr* coef_arrays = jpeg_read_coefficients(&cinfo);
+ JXL_CHECK(coef_arrays != nullptr);
+ CopyCoefficients(&cinfo, coef_arrays, &output_progression->back());
+ }
+ }
+ JXL_CHECK(jpeg_finish_decompress(&cinfo));
+ return true;
+ };
+ JXL_CHECK(try_catch_block());
+ jpeg_destroy_decompress(&cinfo);
+}
+
+// Returns the number of bytes read from compressed.
+size_t DecodeWithLibjpeg(const CompressParams& jparams,
+ const DecompressParams& dparams,
+ const uint8_t* table_stream, size_t table_stream_size,
+ const uint8_t* compressed, size_t len,
+ TestImage* output) {
+ jpeg_decompress_struct cinfo = {};
+ size_t bytes_read;
+ const auto try_catch_block = [&]() {
+ jpeg_error_mgr jerr;
+ jmp_buf env;
+ cinfo.err = jpeg_std_error(&jerr);
+ if (setjmp(env)) {
+ return false;
+ }
+ cinfo.client_data = reinterpret_cast<void*>(&env);
+ cinfo.err->error_exit = [](j_common_ptr cinfo) {
+ (*cinfo->err->output_message)(cinfo);
+ jmp_buf* env = reinterpret_cast<jmp_buf*>(cinfo->client_data);
+ jpeg_destroy(cinfo);
+ longjmp(*env, 1);
+ };
+ jpeg_create_decompress(&cinfo);
+ if (table_stream != nullptr) {
+ jpeg_mem_src(&cinfo, table_stream, table_stream_size);
+ jpeg_read_header(&cinfo, FALSE);
+ }
+ jpeg_mem_src(&cinfo, compressed, len);
+ DecodeWithLibjpeg(jparams, dparams, &cinfo, output);
+ bytes_read = len - cinfo.src->bytes_in_buffer;
+ return true;
+ };
+ JXL_CHECK(try_catch_block());
+ jpeg_destroy_decompress(&cinfo);
+ return bytes_read;
+}
+
+void DecodeWithLibjpeg(const CompressParams& jparams,
+ const DecompressParams& dparams,
+ const std::vector<uint8_t>& compressed,
+ TestImage* output) {
+ DecodeWithLibjpeg(jparams, dparams, nullptr, 0, compressed.data(),
+ compressed.size(), output);
+}
+
+} // namespace jpegli
diff --git a/lib/jpegli/libjpeg_test_util.h b/lib/jpegli/libjpeg_test_util.h
new file mode 100644
index 0000000..18cc1e5
--- /dev/null
+++ b/lib/jpegli/libjpeg_test_util.h
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_LIBJPEG_TEST_UTIL_H_
+#define LIB_JPEGLI_LIBJPEG_TEST_UTIL_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jpegli/test_params.h"
+
+namespace jpegli {
+
+// Verifies that an image encoded with libjpegli can be decoded with libjpeg,
+// and checks that the jpeg coding metadata matches jparams.
+void DecodeAllScansWithLibjpeg(const CompressParams& jparams,
+ const DecompressParams& dparams,
+ const std::vector<uint8_t>& compressed,
+ std::vector<TestImage>* output_progression);
+// Returns the number of bytes read from compressed.
+size_t DecodeWithLibjpeg(const CompressParams& jparams,
+ const DecompressParams& dparams,
+ const uint8_t* table_stream, size_t table_stream_size,
+ const uint8_t* compressed, size_t len,
+ TestImage* output);
+void DecodeWithLibjpeg(const CompressParams& jparams,
+ const DecompressParams& dparams,
+ const std::vector<uint8_t>& compressed,
+ TestImage* output);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_LIBJPEG_TEST_UTIL_H_
diff --git a/lib/jpegli/libjpeg_wrapper.cc b/lib/jpegli/libjpeg_wrapper.cc
new file mode 100644
index 0000000..b38d16f
--- /dev/null
+++ b/lib/jpegli/libjpeg_wrapper.cc
@@ -0,0 +1,255 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// This file contains wrapper-functions that are used to build the libjpeg.so
+// shared library that is API- and ABI-compatible with libjpeg-turbo's version
+// of libjpeg.so.
+
+#include "lib/jpegli/common.h"
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/error.h"
+
+struct jpeg_error_mgr *jpeg_std_error(struct jpeg_error_mgr *err) {
+ return jpegli_std_error(err);
+}
+
+void jpeg_abort(j_common_ptr cinfo) { jpegli_abort(cinfo); }
+
+void jpeg_destroy(j_common_ptr cinfo) { jpegli_destroy(cinfo); }
+
+JQUANT_TBL *jpeg_alloc_quant_table(j_common_ptr cinfo) {
+ return jpegli_alloc_quant_table(cinfo);
+}
+
+JHUFF_TBL *jpeg_alloc_huff_table(j_common_ptr cinfo) {
+ return jpegli_alloc_huff_table(cinfo);
+}
+
+void jpeg_CreateDecompress(j_decompress_ptr cinfo, int version,
+ size_t structsize) {
+ jpegli_CreateDecompress(cinfo, version, structsize);
+}
+
+void jpeg_stdio_src(j_decompress_ptr cinfo, FILE *infile) {
+ jpegli_stdio_src(cinfo, infile);
+}
+
+void jpeg_mem_src(j_decompress_ptr cinfo, const unsigned char *inbuffer,
+ unsigned long insize) {
+ jpegli_mem_src(cinfo, inbuffer, insize);
+}
+
+int jpeg_read_header(j_decompress_ptr cinfo, boolean require_image) {
+ return jpegli_read_header(cinfo, require_image);
+}
+
+boolean jpeg_start_decompress(j_decompress_ptr cinfo) {
+ return jpegli_start_decompress(cinfo);
+}
+
+JDIMENSION jpeg_read_scanlines(j_decompress_ptr cinfo, JSAMPARRAY scanlines,
+ JDIMENSION max_lines) {
+ return jpegli_read_scanlines(cinfo, scanlines, max_lines);
+}
+
+JDIMENSION jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines) {
+ return jpegli_skip_scanlines(cinfo, num_lines);
+}
+
+void jpeg_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset,
+ JDIMENSION *width) {
+ jpegli_crop_scanline(cinfo, xoffset, width);
+}
+
+boolean jpeg_finish_decompress(j_decompress_ptr cinfo) {
+ return jpegli_finish_decompress(cinfo);
+}
+
+JDIMENSION jpeg_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data,
+ JDIMENSION max_lines) {
+ return jpegli_read_raw_data(cinfo, data, max_lines);
+}
+
+jvirt_barray_ptr *jpeg_read_coefficients(j_decompress_ptr cinfo) {
+ return jpegli_read_coefficients(cinfo);
+}
+
+boolean jpeg_has_multiple_scans(j_decompress_ptr cinfo) {
+ return jpegli_has_multiple_scans(cinfo);
+}
+
+boolean jpeg_start_output(j_decompress_ptr cinfo, int scan_number) {
+ return jpegli_start_output(cinfo, scan_number);
+}
+
+boolean jpeg_finish_output(j_decompress_ptr cinfo) {
+ return jpegli_finish_output(cinfo);
+}
+
+boolean jpeg_input_complete(j_decompress_ptr cinfo) {
+ return jpegli_input_complete(cinfo);
+}
+
+int jpeg_consume_input(j_decompress_ptr cinfo) {
+ return jpegli_consume_input(cinfo);
+}
+
+#if JPEG_LIB_VERSION >= 80
+void jpeg_core_output_dimensions(j_decompress_ptr cinfo) {
+ jpegli_core_output_dimensions(cinfo);
+}
+#endif
+void jpeg_calc_output_dimensions(j_decompress_ptr cinfo) {
+ jpegli_calc_output_dimensions(cinfo);
+}
+
+void jpeg_save_markers(j_decompress_ptr cinfo, int marker_code,
+ unsigned int length_limit) {
+ jpegli_save_markers(cinfo, marker_code, length_limit);
+}
+
+void jpeg_set_marker_processor(j_decompress_ptr cinfo, int marker_code,
+ jpeg_marker_parser_method routine) {
+ jpegli_set_marker_processor(cinfo, marker_code, routine);
+}
+
+boolean jpeg_read_icc_profile(j_decompress_ptr cinfo, JOCTET **icc_data_ptr,
+ unsigned int *icc_data_len) {
+ return jpegli_read_icc_profile(cinfo, icc_data_ptr, icc_data_len);
+}
+
+void jpeg_abort_decompress(j_decompress_ptr cinfo) {
+ return jpegli_abort_decompress(cinfo);
+}
+
+void jpeg_destroy_decompress(j_decompress_ptr cinfo) {
+ return jpegli_destroy_decompress(cinfo);
+}
+
+void jpeg_CreateCompress(j_compress_ptr cinfo, int version, size_t structsize) {
+ jpegli_CreateCompress(cinfo, version, structsize);
+}
+
+void jpeg_stdio_dest(j_compress_ptr cinfo, FILE *outfile) {
+ jpegli_stdio_dest(cinfo, outfile);
+}
+
+void jpeg_mem_dest(j_compress_ptr cinfo, unsigned char **outbuffer,
+ unsigned long *outsize) {
+ jpegli_mem_dest(cinfo, outbuffer, outsize);
+}
+
+void jpeg_set_defaults(j_compress_ptr cinfo) { jpegli_set_defaults(cinfo); }
+
+void jpeg_default_colorspace(j_compress_ptr cinfo) {
+ jpegli_default_colorspace(cinfo);
+}
+
+void jpeg_set_colorspace(j_compress_ptr cinfo, J_COLOR_SPACE colorspace) {
+ jpegli_set_colorspace(cinfo, colorspace);
+}
+
+void jpeg_set_quality(j_compress_ptr cinfo, int quality,
+ boolean force_baseline) {
+ jpegli_set_quality(cinfo, quality, force_baseline);
+}
+
+void jpeg_set_linear_quality(j_compress_ptr cinfo, int scale_factor,
+ boolean force_baseline) {
+ jpegli_set_linear_quality(cinfo, scale_factor, force_baseline);
+}
+
+#if JPEG_LIB_VERSION >= 70
+void jpeg_default_qtables(j_compress_ptr cinfo, boolean force_baseline) {
+ jpegli_default_qtables(cinfo, force_baseline);
+}
+#endif
+
+int jpeg_quality_scaling(int quality) {
+ return jpegli_quality_scaling(quality);
+}
+
+void jpeg_add_quant_table(j_compress_ptr cinfo, int which_tbl,
+ const unsigned int *basic_table, int scale_factor,
+ boolean force_baseline) {
+ jpegli_add_quant_table(cinfo, which_tbl, basic_table, scale_factor,
+ force_baseline);
+}
+
+void jpeg_simple_progression(j_compress_ptr cinfo) {
+ jpegli_simple_progression(cinfo);
+}
+
+void jpeg_suppress_tables(j_compress_ptr cinfo, boolean suppress) {
+ jpegli_suppress_tables(cinfo, suppress);
+}
+
+#if JPEG_LIB_VERSION >= 70
+void jpeg_calc_jpeg_dimensions(j_compress_ptr cinfo) {
+ jpegli_calc_jpeg_dimensions(cinfo);
+}
+#endif
+
+void jpeg_copy_critical_parameters(j_decompress_ptr srcinfo,
+ j_compress_ptr dstinfo) {
+ jpegli_copy_critical_parameters(srcinfo, dstinfo);
+}
+
+void jpeg_write_m_header(j_compress_ptr cinfo, int marker,
+ unsigned int datalen) {
+ jpegli_write_m_header(cinfo, marker, datalen);
+}
+
+void jpeg_write_m_byte(j_compress_ptr cinfo, int val) {
+ jpegli_write_m_byte(cinfo, val);
+}
+
+void jpeg_write_marker(j_compress_ptr cinfo, int marker, const JOCTET *dataptr,
+ unsigned int datalen) {
+ jpegli_write_marker(cinfo, marker, dataptr, datalen);
+}
+
+void jpeg_write_icc_profile(j_compress_ptr cinfo, const JOCTET *icc_data_ptr,
+ unsigned int icc_data_len) {
+ jpegli_write_icc_profile(cinfo, icc_data_ptr, icc_data_len);
+}
+
+void jpeg_start_compress(j_compress_ptr cinfo, boolean write_all_tables) {
+ jpegli_start_compress(cinfo, write_all_tables);
+}
+
+void jpeg_write_tables(j_compress_ptr cinfo) { jpegli_write_tables(cinfo); }
+
+JDIMENSION jpeg_write_scanlines(j_compress_ptr cinfo, JSAMPARRAY scanlines,
+ JDIMENSION num_lines) {
+ return jpegli_write_scanlines(cinfo, scanlines, num_lines);
+}
+
+JDIMENSION jpeg_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data,
+ JDIMENSION num_lines) {
+ return jpegli_write_raw_data(cinfo, data, num_lines);
+}
+
+void jpeg_write_coefficients(j_compress_ptr cinfo,
+ jvirt_barray_ptr *coef_arrays) {
+ jpegli_write_coefficients(cinfo, coef_arrays);
+}
+
+void jpeg_finish_compress(j_compress_ptr cinfo) {
+ jpegli_finish_compress(cinfo);
+}
+
+void jpeg_abort_compress(j_compress_ptr cinfo) { jpegli_abort_compress(cinfo); }
+
+void jpeg_destroy_compress(j_compress_ptr cinfo) {
+ jpegli_destroy_compress(cinfo);
+}
+
+boolean jpeg_resync_to_restart(j_decompress_ptr cinfo, int desired) {
+ return jpegli_resync_to_restart(cinfo, desired);
+}
+
+void jpeg_new_colormap(j_decompress_ptr cinfo) { jpegli_new_colormap(cinfo); }
diff --git a/lib/jpegli/memory_manager.cc b/lib/jpegli/memory_manager.cc
new file mode 100644
index 0000000..3a8f230
--- /dev/null
+++ b/lib/jpegli/memory_manager.cc
@@ -0,0 +1,186 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/memory_manager.h"
+
+#include <string.h>
+
+#include <hwy/aligned_allocator.h>
+#include <vector>
+
+#include "lib/jpegli/common_internal.h"
+#include "lib/jpegli/error.h"
+
+struct jvirt_sarray_control {
+ JSAMPARRAY full_buffer;
+ size_t numrows;
+ JDIMENSION maxaccess;
+};
+
+struct jvirt_barray_control {
+ JBLOCKARRAY full_buffer;
+ size_t numrows;
+ JDIMENSION maxaccess;
+};
+
+namespace jpegli {
+
+namespace {
+
+struct MemoryManager {
+ struct jpeg_memory_mgr pub;
+ std::vector<void*> owned_ptrs[2 * JPOOL_NUMPOOLS];
+ uint64_t pool_memory_usage[2 * JPOOL_NUMPOOLS];
+ uint64_t total_memory_usage;
+ uint64_t peak_memory_usage;
+};
+
+void* Alloc(j_common_ptr cinfo, int pool_id, size_t sizeofobject) {
+ MemoryManager* mem = reinterpret_cast<MemoryManager*>(cinfo->mem);
+ if (pool_id < 0 || pool_id >= 2 * JPOOL_NUMPOOLS) {
+ JPEGLI_ERROR("Invalid pool id %d", pool_id);
+ }
+ if (mem->pub.max_memory_to_use > 0 &&
+ mem->total_memory_usage + static_cast<uint64_t>(sizeofobject) >
+ static_cast<uint64_t>(mem->pub.max_memory_to_use)) {
+ JPEGLI_ERROR("Total memory usage exceeding %ld",
+ mem->pub.max_memory_to_use);
+ }
+ void* p;
+ if (pool_id < JPOOL_NUMPOOLS) {
+ p = malloc(sizeofobject);
+ } else {
+ p = hwy::AllocateAlignedBytes(sizeofobject, nullptr, nullptr);
+ }
+ if (p == nullptr) {
+ JPEGLI_ERROR("Out of memory");
+ }
+ mem->owned_ptrs[pool_id].push_back(p);
+ mem->pool_memory_usage[pool_id] += sizeofobject;
+ mem->total_memory_usage += sizeofobject;
+ mem->peak_memory_usage =
+ std::max(mem->peak_memory_usage, mem->total_memory_usage);
+ return p;
+}
+
+constexpr size_t gcd(size_t a, size_t b) { return b == 0 ? a : gcd(b, a % b); }
+constexpr size_t lcm(size_t a, size_t b) { return (a * b) / gcd(a, b); }
+
+template <typename T>
+T** Alloc2dArray(j_common_ptr cinfo, int pool_id, JDIMENSION samplesperrow,
+ JDIMENSION numrows) {
+ T** array = Allocate<T*>(cinfo, numrows, pool_id);
+ // Always use aligned allocator for large 2d arrays.
+ if (pool_id < JPOOL_NUMPOOLS) {
+ pool_id += JPOOL_NUMPOOLS;
+ }
+ size_t alignment = lcm(sizeof(T), HWY_ALIGNMENT);
+ size_t memstride = RoundUpTo(samplesperrow * sizeof(T), alignment);
+ size_t stride = memstride / sizeof(T);
+ T* buffer = Allocate<T>(cinfo, numrows * stride, pool_id);
+ for (size_t i = 0; i < numrows; ++i) {
+ array[i] = &buffer[i * stride];
+ }
+ return array;
+}
+
+template <typename Control, typename T>
+Control* RequestVirtualArray(j_common_ptr cinfo, int pool_id, boolean pre_zero,
+ JDIMENSION samplesperrow, JDIMENSION numrows,
+ JDIMENSION maxaccess) {
+ if (pool_id != JPOOL_IMAGE) {
+ JPEGLI_ERROR("Only image lifetime virtual arrays are supported.");
+ }
+ Control* p = Allocate<Control>(cinfo, 1, pool_id);
+ p->full_buffer = Alloc2dArray<T>(cinfo, pool_id, samplesperrow, numrows);
+ p->numrows = numrows;
+ p->maxaccess = maxaccess;
+ if (pre_zero) {
+ for (size_t i = 0; i < numrows; ++i) {
+ memset(p->full_buffer[i], 0, samplesperrow * sizeof(T));
+ }
+ }
+ return p;
+}
+
+void RealizeVirtualArrays(j_common_ptr cinfo) {
+ // Nothing to do, the full arrays were realized at request time already.
+}
+
+template <typename Control, typename T>
+T** AccessVirtualArray(j_common_ptr cinfo, Control* ptr, JDIMENSION start_row,
+ JDIMENSION num_rows, boolean writable) {
+ if (num_rows > ptr->maxaccess) {
+ JPEGLI_ERROR("Invalid virtual array access, num rows %u vs max rows %u",
+ num_rows, ptr->maxaccess);
+ }
+ if (start_row + num_rows > ptr->numrows) {
+ JPEGLI_ERROR("Invalid virtual array access, %u vs %u total rows",
+ start_row + num_rows, ptr->numrows);
+ }
+ if (ptr->full_buffer == nullptr) {
+ JPEGLI_ERROR("Invalid virtual array access, array not realized.");
+ }
+ return ptr->full_buffer + start_row;
+}
+
+void ClearPool(j_common_ptr cinfo, int pool_id) {
+ MemoryManager* mem = reinterpret_cast<MemoryManager*>(cinfo->mem);
+ mem->owned_ptrs[pool_id].clear();
+ mem->total_memory_usage -= mem->pool_memory_usage[pool_id];
+ mem->pool_memory_usage[pool_id] = 0;
+}
+
+void FreePool(j_common_ptr cinfo, int pool_id) {
+ MemoryManager* mem = reinterpret_cast<MemoryManager*>(cinfo->mem);
+ if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS) {
+ JPEGLI_ERROR("Invalid pool id %d", pool_id);
+ }
+ for (void* ptr : mem->owned_ptrs[pool_id]) {
+ free(ptr);
+ }
+ ClearPool(cinfo, pool_id);
+ for (void* ptr : mem->owned_ptrs[JPOOL_NUMPOOLS + pool_id]) {
+ hwy::FreeAlignedBytes(ptr, nullptr, nullptr);
+ }
+ ClearPool(cinfo, JPOOL_NUMPOOLS + pool_id);
+}
+
+void SelfDestruct(j_common_ptr cinfo) {
+ MemoryManager* mem = reinterpret_cast<MemoryManager*>(cinfo->mem);
+ for (int pool_id = 0; pool_id < JPOOL_NUMPOOLS; ++pool_id) {
+ FreePool(cinfo, pool_id);
+ }
+ delete mem;
+ cinfo->mem = nullptr;
+}
+
+} // namespace
+
+void InitMemoryManager(j_common_ptr cinfo) {
+ MemoryManager* mem = new MemoryManager;
+ mem->pub.alloc_small = jpegli::Alloc;
+ mem->pub.alloc_large = jpegli::Alloc;
+ mem->pub.alloc_sarray = jpegli::Alloc2dArray<JSAMPLE>;
+ mem->pub.alloc_barray = jpegli::Alloc2dArray<JBLOCK>;
+ mem->pub.request_virt_sarray =
+ jpegli::RequestVirtualArray<jvirt_sarray_control, JSAMPLE>;
+ mem->pub.request_virt_barray =
+ jpegli::RequestVirtualArray<jvirt_barray_control, JBLOCK>;
+ mem->pub.realize_virt_arrays = jpegli::RealizeVirtualArrays;
+ mem->pub.access_virt_sarray =
+ jpegli::AccessVirtualArray<jvirt_sarray_control, JSAMPLE>;
+ mem->pub.access_virt_barray =
+ jpegli::AccessVirtualArray<jvirt_barray_control, JBLOCK>;
+ mem->pub.free_pool = jpegli::FreePool;
+ mem->pub.self_destruct = jpegli::SelfDestruct;
+ mem->pub.max_memory_to_use = 0;
+ mem->total_memory_usage = 0;
+ mem->peak_memory_usage = 0;
+ memset(mem->pool_memory_usage, 0, sizeof(mem->pool_memory_usage));
+ cinfo->mem = reinterpret_cast<struct jpeg_memory_mgr*>(mem);
+}
+
+} // namespace jpegli
diff --git a/lib/jpegli/memory_manager.h b/lib/jpegli/memory_manager.h
new file mode 100644
index 0000000..3e2bdab
--- /dev/null
+++ b/lib/jpegli/memory_manager.h
@@ -0,0 +1,45 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_MEMORY_MANAGER_H_
+#define LIB_JPEGLI_MEMORY_MANAGER_H_
+
+#include <stdlib.h>
+
+#include "lib/jpegli/common.h"
+
+#define JPOOL_PERMANENT_ALIGNED (JPOOL_NUMPOOLS + JPOOL_PERMANENT)
+#define JPOOL_IMAGE_ALIGNED (JPOOL_NUMPOOLS + JPOOL_IMAGE)
+
+namespace jpegli {
+
+void InitMemoryManager(j_common_ptr cinfo);
+
+template <typename T>
+T* Allocate(j_common_ptr cinfo, size_t len, int pool_id = JPOOL_PERMANENT) {
+ void* p = (*cinfo->mem->alloc_small)(cinfo, pool_id, len * sizeof(T));
+ return reinterpret_cast<T*>(p);
+}
+
+template <typename T>
+T* Allocate(j_decompress_ptr cinfo, size_t len, int pool_id = JPOOL_PERMANENT) {
+ return Allocate<T>(reinterpret_cast<j_common_ptr>(cinfo), len, pool_id);
+}
+
+template <typename T>
+T* Allocate(j_compress_ptr cinfo, size_t len, int pool_id = JPOOL_PERMANENT) {
+ return Allocate<T>(reinterpret_cast<j_common_ptr>(cinfo), len, pool_id);
+}
+
+template <typename T>
+JBLOCKARRAY GetBlockRow(T cinfo, int c, JDIMENSION by) {
+ return (*cinfo->mem->access_virt_barray)(
+ reinterpret_cast<j_common_ptr>(cinfo), cinfo->master->coeff_buffers[c],
+ by, 1, true);
+}
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_MEMORY_MANAGER_H_
diff --git a/lib/jpegli/output_suspension_test.cc b/lib/jpegli/output_suspension_test.cc
new file mode 100644
index 0000000..73db791
--- /dev/null
+++ b/lib/jpegli/output_suspension_test.cc
@@ -0,0 +1,219 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+
+namespace jpegli {
+namespace {
+
+static constexpr size_t kInitialBufferSize = 1024;
+static constexpr size_t kFinalBufferSize = 18;
+
+struct DestinationManager {
+ jpeg_destination_mgr pub;
+ std::vector<uint8_t> buffer;
+
+ DestinationManager() {
+ pub.init_destination = init_destination;
+ pub.empty_output_buffer = empty_output_buffer;
+ pub.term_destination = term_destination;
+ }
+
+ void Rewind() {
+ pub.next_output_byte = buffer.data();
+ pub.free_in_buffer = buffer.size();
+ }
+
+ void EmptyTo(std::vector<uint8_t>* output, size_t new_size = 0) {
+ output->insert(output->end(), buffer.data(), pub.next_output_byte);
+ if (new_size > 0) {
+ buffer.resize(new_size);
+ }
+ Rewind();
+ }
+
+ static void init_destination(j_compress_ptr cinfo) {
+ auto us = reinterpret_cast<DestinationManager*>(cinfo->dest);
+ us->buffer.resize(kInitialBufferSize);
+ us->Rewind();
+ }
+
+ static boolean empty_output_buffer(j_compress_ptr cinfo) { return FALSE; }
+
+ static void term_destination(j_compress_ptr cinfo) {}
+};
+
+struct TestConfig {
+ TestImage input;
+ CompressParams jparams;
+ size_t buffer_size;
+ size_t lines_batch_size;
+};
+
+class OutputSuspensionTestParam : public ::testing::TestWithParam<TestConfig> {
+};
+
+TEST_P(OutputSuspensionTestParam, PixelData) {
+ jpeg_compress_struct cinfo = {};
+ TestConfig config = GetParam();
+ TestImage& input = config.input;
+ GeneratePixels(&input);
+ DestinationManager dest;
+ std::vector<uint8_t> compressed;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ cinfo.dest = reinterpret_cast<jpeg_destination_mgr*>(&dest);
+
+ cinfo.image_width = input.xsize;
+ cinfo.image_height = input.ysize;
+ cinfo.input_components = input.components;
+ cinfo.in_color_space = JCS_RGB;
+ jpegli_set_defaults(&cinfo);
+ cinfo.comp_info[0].v_samp_factor = config.jparams.v_sampling[0];
+ jpegli_set_progressive_level(&cinfo, 0);
+ cinfo.optimize_coding = FALSE;
+ jpegli_start_compress(&cinfo, TRUE);
+
+ size_t stride = cinfo.image_width * cinfo.input_components;
+ std::vector<uint8_t> row_bytes(config.lines_batch_size * stride);
+ while (cinfo.next_scanline < cinfo.image_height) {
+ size_t lines_left = cinfo.image_height - cinfo.next_scanline;
+ size_t num_lines = std::min(config.lines_batch_size, lines_left);
+ memcpy(&row_bytes[0], &input.pixels[cinfo.next_scanline * stride],
+ num_lines * stride);
+ std::vector<JSAMPROW> rows(num_lines);
+ for (size_t i = 0; i < num_lines; ++i) {
+ rows[i] = &row_bytes[i * stride];
+ }
+ size_t lines_done = 0;
+ while (lines_done < num_lines) {
+ lines_done += jpegli_write_scanlines(&cinfo, &rows[lines_done],
+ num_lines - lines_done);
+ if (lines_done < num_lines) {
+ dest.EmptyTo(&compressed, config.buffer_size);
+ }
+ }
+ }
+ dest.EmptyTo(&compressed, kFinalBufferSize);
+ jpegli_finish_compress(&cinfo);
+ dest.EmptyTo(&compressed);
+ return true;
+ };
+ ASSERT_TRUE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ TestImage output;
+ DecodeWithLibjpeg(CompressParams(), DecompressParams(), compressed, &output);
+ VerifyOutputImage(input, output, 2.5);
+}
+
+TEST_P(OutputSuspensionTestParam, RawData) {
+ jpeg_compress_struct cinfo = {};
+ TestConfig config = GetParam();
+ if (config.lines_batch_size != 1) return;
+ TestImage& input = config.input;
+ input.color_space = JCS_YCbCr;
+ GeneratePixels(&input);
+ GenerateRawData(config.jparams, &input);
+ DestinationManager dest;
+ std::vector<uint8_t> compressed;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ cinfo.dest = reinterpret_cast<jpeg_destination_mgr*>(&dest);
+ cinfo.image_width = input.xsize;
+ cinfo.image_height = input.ysize;
+ cinfo.input_components = input.components;
+ cinfo.in_color_space = JCS_YCbCr;
+ jpegli_set_defaults(&cinfo);
+ cinfo.comp_info[0].v_samp_factor = config.jparams.v_sampling[0];
+ jpegli_set_progressive_level(&cinfo, 0);
+ cinfo.optimize_coding = FALSE;
+ cinfo.raw_data_in = TRUE;
+ jpegli_start_compress(&cinfo, TRUE);
+
+ std::vector<std::vector<uint8_t>> raw_data = input.raw_data;
+ size_t max_lines = config.jparams.max_v_sample() * DCTSIZE;
+ std::vector<std::vector<JSAMPROW>> rowdata(cinfo.num_components);
+ std::vector<JSAMPARRAY> data(cinfo.num_components);
+ for (int c = 0; c < cinfo.num_components; ++c) {
+ rowdata[c].resize(config.jparams.v_samp(c) * DCTSIZE);
+ data[c] = &rowdata[c][0];
+ }
+ while (cinfo.next_scanline < cinfo.image_height) {
+ for (int c = 0; c < cinfo.num_components; ++c) {
+ size_t cwidth = cinfo.comp_info[c].width_in_blocks * DCTSIZE;
+ size_t cheight = cinfo.comp_info[c].height_in_blocks * DCTSIZE;
+ size_t num_lines = config.jparams.v_samp(c) * DCTSIZE;
+ size_t y0 = (cinfo.next_scanline / max_lines) * num_lines;
+ for (size_t i = 0; i < num_lines; ++i) {
+ rowdata[c][i] =
+ (y0 + i < cheight ? &raw_data[c][(y0 + i) * cwidth] : nullptr);
+ }
+ }
+ while (jpegli_write_raw_data(&cinfo, &data[0], max_lines) == 0) {
+ dest.EmptyTo(&compressed, config.buffer_size);
+ }
+ }
+ dest.EmptyTo(&compressed, kFinalBufferSize);
+ jpegli_finish_compress(&cinfo);
+ dest.EmptyTo(&compressed);
+ return true;
+ };
+ try_catch_block();
+ jpegli_destroy_compress(&cinfo);
+ DecompressParams dparams;
+ dparams.output_mode = RAW_DATA;
+ TestImage output;
+ DecodeWithLibjpeg(CompressParams(), dparams, compressed, &output);
+ VerifyOutputImage(input, output, 3.5);
+}
+
+std::vector<TestConfig> GenerateTests() {
+ std::vector<TestConfig> all_tests;
+ const size_t xsize0 = 1920;
+ const size_t ysize0 = 1080;
+ for (int dysize : {0, 1, 8, 9}) {
+ for (int v_sampling : {1, 2}) {
+ for (int nlines : {1, 8, 117}) {
+ for (int bufsize : {1, 16, 16 << 10}) {
+ TestConfig config;
+ config.lines_batch_size = nlines;
+ config.buffer_size = bufsize;
+ config.input.xsize = xsize0;
+ config.input.ysize = ysize0 + dysize;
+ config.jparams.h_sampling = {1, 1, 1};
+ config.jparams.v_sampling = {v_sampling, 1, 1};
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ return all_tests;
+}
+
+std::ostream& operator<<(std::ostream& os, const TestConfig& c) {
+ os << c.input;
+ os << c.jparams;
+ os << "Lines" << c.lines_batch_size;
+ os << "BufSize" << c.buffer_size;
+ return os;
+}
+
+std::string TestDescription(
+ const testing::TestParamInfo<OutputSuspensionTestParam::ParamType>& info) {
+ std::stringstream name;
+ name << info.param;
+ return name.str();
+}
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(OutputSuspensionTest, OutputSuspensionTestParam,
+ testing::ValuesIn(GenerateTests()),
+ TestDescription);
+
+} // namespace
+} // namespace jpegli
diff --git a/lib/jpegli/quant.cc b/lib/jpegli/quant.cc
new file mode 100644
index 0000000..36f1df4
--- /dev/null
+++ b/lib/jpegli/quant.cc
@@ -0,0 +1,768 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/quant.h"
+
+#include <algorithm>
+#include <cmath>
+#include <vector>
+
+#include "lib/jpegli/adaptive_quantization.h"
+#include "lib/jpegli/common.h"
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/memory_manager.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/status.h"
+
+namespace jpegli {
+
+namespace {
+
+// Global scale is chosen in a way that butteraugli 3-norm matches libjpeg
+// with the same quality setting. Fitted for quality 90 on jyrki31 corpus.
+constexpr float kGlobalScaleXYB = 1.43951668f;
+constexpr float kGlobalScaleYCbCr = 1.73966010f;
+
+static constexpr float kBaseQuantMatrixXYB[] = {
+ // c = 0
+ 7.5629935265f,
+ 19.8247814178f,
+ 22.5724945068f,
+ 20.6706695557f,
+ 22.6864585876f,
+ 23.5696277618f,
+ 25.8129081726f,
+ 36.3307571411f,
+ 19.8247814178f,
+ 21.5503177643f,
+ 19.9372234344f,
+ 20.5424213409f,
+ 21.8645496368f,
+ 23.9041385651f,
+ 28.2844066620f,
+ 32.6609764099f,
+ 22.5724945068f,
+ 19.9372234344f,
+ 21.9017257690f,
+ 19.1223449707f,
+ 21.7515811920f,
+ 24.6724700928f,
+ 25.4249649048f,
+ 32.6653823853f,
+ 20.6706695557f,
+ 20.5424213409f,
+ 19.1223449707f,
+ 20.1610221863f,
+ 25.3719692230f,
+ 25.9668903351f,
+ 30.9804954529f,
+ 31.3406009674f,
+ 22.6864585876f,
+ 21.8645496368f,
+ 21.7515811920f,
+ 25.3719692230f,
+ 26.2431850433f,
+ 40.5992202759f,
+ 43.2624626160f,
+ 63.3010940552f,
+ 23.5696277618f,
+ 23.9041385651f,
+ 24.6724700928f,
+ 25.9668903351f,
+ 40.5992202759f,
+ 48.3026771545f,
+ 34.0964355469f,
+ 61.9852142334f,
+ 25.8129081726f,
+ 28.2844066620f,
+ 25.4249649048f,
+ 30.9804954529f,
+ 43.2624626160f,
+ 34.0964355469f,
+ 34.4937438965f,
+ 66.9702758789f,
+ 36.3307571411f,
+ 32.6609764099f,
+ 32.6653823853f,
+ 31.3406009674f,
+ 63.3010940552f,
+ 61.9852142334f,
+ 66.9702758789f,
+ 39.9652709961f,
+ // c = 1
+ 1.6262000799f,
+ 3.2199242115f,
+ 3.4903779030f,
+ 3.9148359299f,
+ 4.8337211609f,
+ 4.9108843803f,
+ 5.3137121201f,
+ 6.1676793098f,
+ 3.2199242115f,
+ 3.4547898769f,
+ 3.6036829948f,
+ 4.2652835846f,
+ 4.8368387222f,
+ 4.8226222992f,
+ 5.6120514870f,
+ 6.3431472778f,
+ 3.4903779030f,
+ 3.6036829948f,
+ 3.9044559002f,
+ 4.3374395370f,
+ 4.8435096741f,
+ 5.4057979584f,
+ 5.6066360474f,
+ 6.1075134277f,
+ 3.9148359299f,
+ 4.2652835846f,
+ 4.3374395370f,
+ 4.6064834595f,
+ 5.1751475334f,
+ 5.4013924599f,
+ 6.0399808884f,
+ 6.7825231552f,
+ 4.8337211609f,
+ 4.8368387222f,
+ 4.8435096741f,
+ 5.1751475334f,
+ 5.3748049736f,
+ 6.1410837173f,
+ 7.6529307365f,
+ 7.5235214233f,
+ 4.9108843803f,
+ 4.8226222992f,
+ 5.4057979584f,
+ 5.4013924599f,
+ 6.1410837173f,
+ 6.3431472778f,
+ 7.1083049774f,
+ 7.6008300781f,
+ 5.3137121201f,
+ 5.6120514870f,
+ 5.6066360474f,
+ 6.0399808884f,
+ 7.6529307365f,
+ 7.1083049774f,
+ 7.0943155289f,
+ 7.0478363037f,
+ 6.1676793098f,
+ 6.3431472778f,
+ 6.1075134277f,
+ 6.7825231552f,
+ 7.5235214233f,
+ 7.6008300781f,
+ 7.0478363037f,
+ 6.9186143875f,
+ // c = 2
+ 3.3038473129f,
+ 10.0689258575f,
+ 12.2785224915f,
+ 14.6041173935f,
+ 16.2107315063f,
+ 19.2314529419f,
+ 28.0129547119f,
+ 55.6682891846f,
+ 10.0689258575f,
+ 11.4085016251f,
+ 11.3871345520f,
+ 15.4934167862f,
+ 16.5364933014f,
+ 14.9153423309f,
+ 26.3748722076f,
+ 40.8614425659f,
+ 12.2785224915f,
+ 11.3871345520f,
+ 17.0886878967f,
+ 13.9500350952f,
+ 16.0003223419f,
+ 28.5660629272f,
+ 26.2124195099f,
+ 30.1260128021f,
+ 14.6041173935f,
+ 15.4934167862f,
+ 13.9500350952f,
+ 21.1235027313f,
+ 26.1579780579f,
+ 25.5579223633f,
+ 40.6859359741f,
+ 33.8056335449f,
+ 16.2107315063f,
+ 16.5364933014f,
+ 16.0003223419f,
+ 26.1579780579f,
+ 26.8042831421f,
+ 26.1587715149f,
+ 35.7343978882f,
+ 43.6857032776f,
+ 19.2314529419f,
+ 14.9153423309f,
+ 28.5660629272f,
+ 25.5579223633f,
+ 26.1587715149f,
+ 34.5418128967f,
+ 41.3197937012f,
+ 48.7867660522f,
+ 28.0129547119f,
+ 26.3748722076f,
+ 26.2124195099f,
+ 40.6859359741f,
+ 35.7343978882f,
+ 41.3197937012f,
+ 47.6329460144f,
+ 55.3498458862f,
+ 55.6682891846f,
+ 40.8614425659f,
+ 30.1260128021f,
+ 33.8056335449f,
+ 43.6857032776f,
+ 48.7867660522f,
+ 55.3498458862f,
+ 63.6065597534f,
+};
+
+static const float kBaseQuantMatrixYCbCr[] = {
+ // c = 0
+ 1.2397409345866273f, //
+ 1.7227115097630963f, //
+ 2.9212167156636855f, //
+ 2.812737435286529f, //
+ 3.339819711906184f, //
+ 3.463603762596166f, //
+ 3.840915217993518f, //
+ 3.86956f, //
+ 1.7227115097630963f, //
+ 2.0928894413636874f, //
+ 2.8456760904429297f, //
+ 2.704506820909662f, //
+ 3.4407673520905337f, //
+ 3.166232352090534f, //
+ 4.025208741558432f, //
+ 4.035324490952577f, //
+ 2.9212167156636855f, //
+ 2.8456760904429297f, //
+ 2.9587403520905338f, //
+ 3.3862948970669273f, //
+ 3.619523781336757f, //
+ 3.9046279999999998f, //
+ 3.757835838431854f, //
+ 4.237447515714274f, //
+ 2.812737435286529f, //
+ 2.704506820909662f, //
+ 3.3862948970669273f, //
+ 3.380058821812233f, //
+ 4.1679867415584315f, //
+ 4.805510627261856f, //
+ 4.784259f, //
+ 4.605934f, //
+ 3.339819711906184f, //
+ 3.4407673520905337f, //
+ 3.619523781336757f, //
+ 4.1679867415584315f, //
+ 4.579851258441568f, //
+ 4.923237f, //
+ 5.574107f, //
+ 5.48533336146308f, //
+ 3.463603762596166f, //
+ 3.166232352090534f, //
+ 3.9046279999999998f, //
+ 4.805510627261856f, //
+ 4.923237f, //
+ 5.43936f, //
+ 5.093895741558431f, //
+ 6.0872254423617225f, //
+ 3.840915217993518f, //
+ 4.025208741558432f, //
+ 3.757835838431854f, //
+ 4.784259f, //
+ 5.574107f, //
+ 5.093895741558431f, //
+ 5.438461f, //
+ 5.4037359493250845f, //
+ 3.86956f, //
+ 4.035324490952577f, //
+ 4.237447515714274f, //
+ 4.605934f, //
+ 5.48533336146308f, //
+ 6.0872254423617225f, //
+ 5.4037359493250845f, //
+ 4.37787101190424f,
+ // c = 1
+ 2.8236197786377537f, //
+ 6.495639358561486f, //
+ 9.310489207538302f, //
+ 10.64747864717083f, //
+ 11.07419143098738f, //
+ 17.146390223910462f, //
+ 18.463982229408998f, //
+ 29.087001644203088f, //
+ 6.495639358561486f, //
+ 8.890103846667353f, //
+ 8.976895794294748f, //
+ 13.666270550318826f, //
+ 16.547071905624193f, //
+ 16.63871382827686f, //
+ 26.778396930893695f, //
+ 21.33034294694781f, //
+ 9.310489207538302f, //
+ 8.976895794294748f, //
+ 11.08737706005991f, //
+ 18.20548239870446f, //
+ 19.752481654011646f, //
+ 23.985660533114896f, //
+ 102.6457378402362f, //
+ 24.450989f, //
+ 10.64747864717083f, //
+ 13.666270550318826f, //
+ 18.20548239870446f, //
+ 18.628012327860365f, //
+ 16.042509519487183f, //
+ 25.04918273242625f, //
+ 25.017140189353015f, //
+ 35.79788782635831f, //
+ 11.07419143098738f, //
+ 16.547071905624193f, //
+ 19.752481654011646f, //
+ 16.042509519487183f, //
+ 19.373482748612577f, //
+ 14.677529999999999f, //
+ 19.94695960400931f, //
+ 51.094112f, //
+ 17.146390223910462f, //
+ 16.63871382827686f, //
+ 23.985660533114896f, //
+ 25.04918273242625f, //
+ 14.677529999999999f, //
+ 31.320412426835304f, //
+ 46.357234000000005f, //
+ 67.48111451705412f, //
+ 18.463982229408998f, //
+ 26.778396930893695f, //
+ 102.6457378402362f, //
+ 25.017140189353015f, //
+ 19.94695960400931f, //
+ 46.357234000000005f, //
+ 61.315764694388044f, //
+ 88.34665293823721f, //
+ 29.087001644203088f, //
+ 21.33034294694781f, //
+ 24.450989f, //
+ 35.79788782635831f, //
+ 51.094112f, //
+ 67.48111451705412f, //
+ 88.34665293823721f, //
+ 112.16099098350989f,
+ // c = 2
+ 2.9217254961255255f, //
+ 4.497681013199305f, //
+ 7.356344520940414f, //
+ 6.583891506504051f, //
+ 8.535608740100237f, //
+ 8.799434353234647f, //
+ 9.188341534163023f, //
+ 9.482700481227672f, //
+ 4.497681013199305f, //
+ 6.309548851989123f, //
+ 7.024608962670982f, //
+ 7.156445324163424f, //
+ 8.049059218663244f, //
+ 7.0124290657218555f, //
+ 6.711923184393611f, //
+ 8.380307846134853f, //
+ 7.356344520940414f, //
+ 7.024608962670982f, //
+ 6.892101177327445f, //
+ 6.882819916277163f, //
+ 8.782226090078568f, //
+ 6.8774750000000004f, //
+ 7.8858175969577955f, //
+ 8.67909f, //
+ 6.583891506504051f, //
+ 7.156445324163424f, //
+ 6.882819916277163f, //
+ 7.003072944847055f, //
+ 7.7223464701024875f, //
+ 7.955425720217421f, //
+ 7.4734110000000005f, //
+ 8.362933242943903f, //
+ 8.535608740100237f, //
+ 8.049059218663244f, //
+ 8.782226090078568f, //
+ 7.7223464701024875f, //
+ 6.778005927001542f, //
+ 9.484922741558432f, //
+ 9.043702663686046f, //
+ 8.053178199770173f, //
+ 8.799434353234647f, //
+ 7.0124290657218555f, //
+ 6.8774750000000004f, //
+ 7.955425720217421f, //
+ 9.484922741558432f, //
+ 8.607606527385098f, //
+ 9.922697394370815f, //
+ 64.25135180237939f, //
+ 9.188341534163023f, //
+ 6.711923184393611f, //
+ 7.8858175969577955f, //
+ 7.4734110000000005f, //
+ 9.043702663686046f, //
+ 9.922697394370815f, //
+ 63.184936549738225f, //
+ 83.35294340273799f, //
+ 9.482700481227672f, //
+ 8.380307846134853f, //
+ 8.67909f, //
+ 8.362933242943903f, //
+ 8.053178199770173f, //
+ 64.25135180237939f, //
+ 83.35294340273799f, //
+ 114.89202448569779f, //
+};
+
+static const float k420GlobalScale = 1.22;
+static const float k420Rescale[64] = {
+ 0.4093, 0.3209, 0.3477, 0.3333, 0.3144, 0.2823, 0.3214, 0.3354, //
+ 0.3209, 0.3111, 0.3489, 0.2801, 0.3059, 0.3119, 0.4135, 0.3445, //
+ 0.3477, 0.3489, 0.3586, 0.3257, 0.2727, 0.3754, 0.3369, 0.3484, //
+ 0.3333, 0.2801, 0.3257, 0.3020, 0.3515, 0.3410, 0.3971, 0.3839, //
+ 0.3144, 0.3059, 0.2727, 0.3515, 0.3105, 0.3397, 0.2716, 0.3836, //
+ 0.2823, 0.3119, 0.3754, 0.3410, 0.3397, 0.3212, 0.3203, 0.0726, //
+ 0.3214, 0.4135, 0.3369, 0.3971, 0.2716, 0.3203, 0.0798, 0.0553, //
+ 0.3354, 0.3445, 0.3484, 0.3839, 0.3836, 0.0726, 0.0553, 0.3368, //
+};
+
+static const float kBaseQuantMatrixStd[] = {
+ // c = 0
+ 16.0f, 11.0f, 10.0f, 16.0f, 24.0f, 40.0f, 51.0f, 61.0f, //
+ 12.0f, 12.0f, 14.0f, 19.0f, 26.0f, 58.0f, 60.0f, 55.0f, //
+ 14.0f, 13.0f, 16.0f, 24.0f, 40.0f, 57.0f, 69.0f, 56.0f, //
+ 14.0f, 17.0f, 22.0f, 29.0f, 51.0f, 87.0f, 80.0f, 62.0f, //
+ 18.0f, 22.0f, 37.0f, 56.0f, 68.0f, 109.0f, 103.0f, 77.0f, //
+ 24.0f, 35.0f, 55.0f, 64.0f, 81.0f, 104.0f, 113.0f, 92.0f, //
+ 49.0f, 64.0f, 78.0f, 87.0f, 103.0f, 121.0f, 120.0f, 101.0f, //
+ 72.0f, 92.0f, 95.0f, 98.0f, 112.0f, 100.0f, 103.0f, 99.0f, //
+ // c = 1
+ 17.0f, 18.0f, 24.0f, 47.0f, 99.0f, 99.0f, 99.0f, 99.0f, //
+ 18.0f, 21.0f, 26.0f, 66.0f, 99.0f, 99.0f, 99.0f, 99.0f, //
+ 24.0f, 26.0f, 56.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, //
+ 47.0f, 66.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, //
+ 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, //
+ 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, //
+ 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, //
+ 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, //
+};
+
+static const float kZeroBiasMulYCbCrLQ[] = {
+ // c = 0
+ 0.0000f, 0.0568f, 0.3880f, 0.6190f, 0.6190f, 0.4490f, 0.4490f, 0.6187f, //
+ 0.0568f, 0.5829f, 0.6189f, 0.6190f, 0.6190f, 0.7190f, 0.6190f, 0.6189f, //
+ 0.3880f, 0.6189f, 0.6190f, 0.6190f, 0.6190f, 0.6190f, 0.6187f, 0.6100f, //
+ 0.6190f, 0.6190f, 0.6190f, 0.6190f, 0.5890f, 0.3839f, 0.7160f, 0.6190f, //
+ 0.6190f, 0.6190f, 0.6190f, 0.5890f, 0.6190f, 0.3880f, 0.5860f, 0.4790f, //
+ 0.4490f, 0.7190f, 0.6190f, 0.3839f, 0.3880f, 0.6190f, 0.6190f, 0.6190f, //
+ 0.4490f, 0.6190f, 0.6187f, 0.7160f, 0.5860f, 0.6190f, 0.6204f, 0.6190f, //
+ 0.6187f, 0.6189f, 0.6100f, 0.6190f, 0.4790f, 0.6190f, 0.6190f, 0.3480f, //
+ // c = 1
+ 0.0000f, 1.1640f, 0.9373f, 1.1319f, 0.8016f, 0.9136f, 1.1530f, 0.9430f, //
+ 1.1640f, 0.9188f, 0.9160f, 1.1980f, 1.1830f, 0.9758f, 0.9430f, 0.9430f, //
+ 0.9373f, 0.9160f, 0.8430f, 1.1720f, 0.7083f, 0.9430f, 0.9430f, 0.9430f, //
+ 1.1319f, 1.1980f, 1.1720f, 1.1490f, 0.8547f, 0.9430f, 0.9430f, 0.9430f, //
+ 0.8016f, 1.1830f, 0.7083f, 0.8547f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, //
+ 0.9136f, 0.9758f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, //
+ 1.1530f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9480f, //
+ 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9480f, 0.9430f, //
+ // c = 2
+ 0.0000f, 1.3190f, 0.4308f, 0.4460f, 0.0661f, 0.0660f, 0.2660f, 0.2960f, //
+ 1.3190f, 0.3280f, 0.3093f, 0.0750f, 0.0505f, 0.1594f, 0.3060f, 0.2113f, //
+ 0.4308f, 0.3093f, 0.3060f, 0.1182f, 0.0500f, 0.3060f, 0.3915f, 0.2426f, //
+ 0.4460f, 0.0750f, 0.1182f, 0.0512f, 0.0500f, 0.2130f, 0.3930f, 0.1590f, //
+ 0.0661f, 0.0505f, 0.0500f, 0.0500f, 0.3055f, 0.3360f, 0.5148f, 0.5403f, //
+ 0.0660f, 0.1594f, 0.3060f, 0.2130f, 0.3360f, 0.5060f, 0.5874f, 0.3060f, //
+ 0.2660f, 0.3060f, 0.3915f, 0.3930f, 0.5148f, 0.5874f, 0.3060f, 0.3060f, //
+ 0.2960f, 0.2113f, 0.2426f, 0.1590f, 0.5403f, 0.3060f, 0.3060f, 0.3060f, //
+};
+
+static const float kZeroBiasMulYCbCrHQ[] = {
+ // c = 0
+ 0.0000f, 0.0044f, 0.2521f, 0.6547f, 0.8161f, 0.6130f, 0.8841f, 0.8155f, //
+ 0.0044f, 0.6831f, 0.6553f, 0.6295f, 0.7848f, 0.7843f, 0.8474f, 0.7836f, //
+ 0.2521f, 0.6553f, 0.7834f, 0.7829f, 0.8161f, 0.8072f, 0.7743f, 0.9242f, //
+ 0.6547f, 0.6295f, 0.7829f, 0.8654f, 0.7829f, 0.6986f, 0.7818f, 0.7726f, //
+ 0.8161f, 0.7848f, 0.8161f, 0.7829f, 0.7471f, 0.7827f, 0.7843f, 0.7653f, //
+ 0.6130f, 0.7843f, 0.8072f, 0.6986f, 0.7827f, 0.7848f, 0.9508f, 0.7653f, //
+ 0.8841f, 0.8474f, 0.7743f, 0.7818f, 0.7843f, 0.9508f, 0.7839f, 0.8437f, //
+ 0.8155f, 0.7836f, 0.9242f, 0.7726f, 0.7653f, 0.7653f, 0.8437f, 0.7819f, //
+ // c = 1
+ 0.0000f, 1.0816f, 1.0556f, 1.2876f, 1.1554f, 1.1567f, 1.8851f, 0.5488f, //
+ 1.0816f, 1.1537f, 1.1850f, 1.0712f, 1.1671f, 2.0719f, 1.0544f, 1.4764f, //
+ 1.0556f, 1.1850f, 1.2870f, 1.1981f, 1.8181f, 1.2618f, 1.0564f, 1.1191f, //
+ 1.2876f, 1.0712f, 1.1981f, 1.4753f, 2.0609f, 1.0564f, 1.2645f, 1.0564f, //
+ 1.1554f, 1.1671f, 1.8181f, 2.0609f, 0.7324f, 1.1163f, 0.8464f, 1.0564f, //
+ 1.1567f, 2.0719f, 1.2618f, 1.0564f, 1.1163f, 1.0040f, 1.0564f, 1.0564f, //
+ 1.8851f, 1.0544f, 1.0564f, 1.2645f, 0.8464f, 1.0564f, 1.0564f, 1.0564f, //
+ 0.5488f, 1.4764f, 1.1191f, 1.0564f, 1.0564f, 1.0564f, 1.0564f, 1.0564f, //
+ // c = 2
+ 0.0000f, 0.5392f, 0.6659f, 0.8968f, 0.6829f, 0.6328f, 0.5802f, 0.4836f, //
+ 0.5392f, 0.6746f, 0.6760f, 0.6102f, 0.6015f, 0.6958f, 0.7327f, 0.4897f, //
+ 0.6659f, 0.6760f, 0.6957f, 0.6543f, 0.4396f, 0.6330f, 0.7081f, 0.2583f, //
+ 0.8968f, 0.6102f, 0.6543f, 0.5913f, 0.6457f, 0.5828f, 0.5139f, 0.3565f, //
+ 0.6829f, 0.6015f, 0.4396f, 0.6457f, 0.5633f, 0.4263f, 0.6371f, 0.5949f, //
+ 0.6328f, 0.6958f, 0.6330f, 0.5828f, 0.4263f, 0.2847f, 0.2909f, 0.6629f, //
+ 0.5802f, 0.7327f, 0.7081f, 0.5139f, 0.6371f, 0.2909f, 0.6644f, 0.6644f, //
+ 0.4836f, 0.4897f, 0.2583f, 0.3565f, 0.5949f, 0.6629f, 0.6644f, 0.6644f, //
+};
+
+static const float kZeroBiasOffsetYCbCrDC[] = {0.0f, 0.0f, 0.0f};
+
+static const float kZeroBiasOffsetYCbCrAC[] = {
+ 0.59082f,
+ 0.58146f,
+ 0.57988f,
+};
+
+constexpr uint8_t kTransferFunctionPQ = 16;
+constexpr uint8_t kTransferFunctionHLG = 18;
+
+float DistanceToLinearQuality(float distance) {
+ if (distance <= 0.1f) {
+ return 1.0f;
+ } else if (distance <= 4.6f) {
+ return (200.0f / 9.0f) * (distance - 0.1f);
+ } else if (distance <= 6.4f) {
+ return 5000.0f / (100.0f - (distance - 0.1f) / 0.09f);
+ } else if (distance < 25.0f) {
+ return 530000.0f /
+ (3450.0f -
+ 300.0f * std::sqrt((848.0f * distance - 5330.0f) / 120.0f));
+ } else {
+ return 5000.0f;
+ }
+}
+
+constexpr float kExponent[DCTSIZE2] = {
+ 1.00f, 0.51f, 0.67f, 0.74f, 1.00f, 1.00f, 1.00f, 1.00f, //
+ 0.51f, 0.66f, 0.69f, 0.87f, 1.00f, 1.00f, 1.00f, 1.00f, //
+ 0.67f, 0.69f, 0.84f, 0.83f, 0.96f, 1.00f, 1.00f, 1.00f, //
+ 0.74f, 0.87f, 0.83f, 1.00f, 1.00f, 0.91f, 0.91f, 1.00f, //
+ 1.00f, 1.00f, 0.96f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, //
+ 1.00f, 1.00f, 1.00f, 0.91f, 1.00f, 1.00f, 1.00f, 1.00f, //
+ 1.00f, 1.00f, 1.00f, 0.91f, 1.00f, 1.00f, 1.00f, 1.00f, //
+ 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, //
+};
+constexpr float kDist0 = 1.5f; // distance where non-linearity kicks in.
+
+float DistanceToScale(float distance, int k) {
+ if (distance < kDist0) {
+ return distance;
+ }
+ const float exp = kExponent[k];
+ const float mul = std::pow(kDist0, 1.0 - exp);
+ return std::max<float>(0.5f * distance, mul * std::pow(distance, exp));
+}
+
+float ScaleToDistance(float scale, int k) {
+ if (scale < kDist0) {
+ return scale;
+ }
+ const float exp = 1.0 / kExponent[k];
+ const float mul = std::pow(kDist0, 1.0 - exp);
+ return std::min<float>(2.0f * scale, mul * std::pow(scale, exp));
+}
+
+float QuantValsToDistance(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ float global_scale = kGlobalScaleYCbCr;
+ if (m->cicp_transfer_function == kTransferFunctionPQ) {
+ global_scale *= .4f;
+ } else if (m->cicp_transfer_function == kTransferFunctionHLG) {
+ global_scale *= .5f;
+ }
+ int quant_max = m->force_baseline ? 255 : 32767U;
+ static const float kDistMax = 10000.0f;
+ float dist_min = 0.0f;
+ float dist_max = kDistMax;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ int quant_idx = cinfo->comp_info[c].quant_tbl_no;
+ uint16_t* quantval = cinfo->quant_tbl_ptrs[quant_idx]->quantval;
+ const float* base_qm = &kBaseQuantMatrixYCbCr[quant_idx * DCTSIZE2];
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ float dmin = 0.0;
+ float dmax = kDistMax;
+ float invq = 1.0f / base_qm[k] / global_scale;
+ int qval = quantval[k];
+ if (qval > 1) {
+ float scale_min = (qval - 0.5f) * invq;
+ dmin = ScaleToDistance(scale_min, k);
+ }
+ if (qval < quant_max) {
+ float scale_max = (qval + 0.5f) * invq;
+ dmax = ScaleToDistance(scale_max, k);
+ }
+ if (dmin <= dist_max) {
+ dist_min = std::max(dmin, dist_min);
+ }
+ if (dmax >= dist_min) {
+ dist_max = std::min(dist_max, dmax);
+ }
+ }
+ }
+ float distance;
+ if (dist_min == 0) {
+ distance = dist_max;
+ } else if (dist_max == kDistMax) {
+ distance = dist_min;
+ } else {
+ distance = 0.5f * (dist_min + dist_max);
+ }
+ return distance;
+}
+
+bool IsYUV420(j_compress_ptr cinfo) {
+ return (cinfo->jpeg_color_space == JCS_YCbCr &&
+ cinfo->comp_info[0].h_samp_factor == 2 &&
+ cinfo->comp_info[0].v_samp_factor == 2 &&
+ cinfo->comp_info[1].h_samp_factor == 1 &&
+ cinfo->comp_info[1].v_samp_factor == 1 &&
+ cinfo->comp_info[2].h_samp_factor == 1 &&
+ cinfo->comp_info[2].v_samp_factor == 1);
+}
+
+} // namespace
+
+void SetQuantMatrices(j_compress_ptr cinfo, float distances[NUM_QUANT_TBLS],
+ bool add_two_chroma_tables) {
+ jpeg_comp_master* m = cinfo->master;
+ const bool xyb = m->xyb_mode && cinfo->jpeg_color_space == JCS_RGB;
+ const bool is_yuv420 = IsYUV420(cinfo);
+
+ float global_scale;
+ bool non_linear_scaling = true;
+ const float* base_quant_matrix[NUM_QUANT_TBLS];
+ int num_base_tables;
+
+ if (xyb) {
+ global_scale = kGlobalScaleXYB;
+ num_base_tables = 3;
+ base_quant_matrix[0] = kBaseQuantMatrixXYB;
+ base_quant_matrix[1] = kBaseQuantMatrixXYB + DCTSIZE2;
+ base_quant_matrix[2] = kBaseQuantMatrixXYB + 2 * DCTSIZE2;
+ } else if (cinfo->jpeg_color_space == JCS_YCbCr && !m->use_std_tables) {
+ global_scale = kGlobalScaleYCbCr;
+ if (m->cicp_transfer_function == kTransferFunctionPQ) {
+ global_scale *= .4f;
+ } else if (m->cicp_transfer_function == kTransferFunctionHLG) {
+ global_scale *= .5f;
+ }
+ if (is_yuv420) {
+ global_scale *= k420GlobalScale;
+ }
+ if (add_two_chroma_tables) {
+ cinfo->comp_info[2].quant_tbl_no = 2;
+ num_base_tables = 3;
+ base_quant_matrix[0] = kBaseQuantMatrixYCbCr;
+ base_quant_matrix[1] = kBaseQuantMatrixYCbCr + DCTSIZE2;
+ base_quant_matrix[2] = kBaseQuantMatrixYCbCr + 2 * DCTSIZE2;
+ } else {
+ num_base_tables = 2;
+ base_quant_matrix[0] = kBaseQuantMatrixYCbCr;
+ // Use the Cr table for both Cb and Cr.
+ base_quant_matrix[1] = kBaseQuantMatrixYCbCr + 2 * DCTSIZE2;
+ }
+ } else {
+ global_scale = 0.01f;
+ non_linear_scaling = false;
+ num_base_tables = 2;
+ base_quant_matrix[0] = kBaseQuantMatrixStd;
+ base_quant_matrix[1] = kBaseQuantMatrixStd + DCTSIZE2;
+ }
+
+ int quant_max = m->force_baseline ? 255 : 32767U;
+ for (int quant_idx = 0; quant_idx < num_base_tables; ++quant_idx) {
+ const float* base_qm = base_quant_matrix[quant_idx];
+ JQUANT_TBL** qtable = &cinfo->quant_tbl_ptrs[quant_idx];
+ if (*qtable == nullptr) {
+ *qtable = jpegli_alloc_quant_table(reinterpret_cast<j_common_ptr>(cinfo));
+ }
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ float scale = global_scale;
+ if (non_linear_scaling) {
+ scale *= DistanceToScale(distances[quant_idx], k);
+ if (is_yuv420 && quant_idx > 0) {
+ scale *= k420Rescale[k];
+ }
+ } else {
+ scale *= DistanceToLinearQuality(distances[quant_idx]);
+ }
+ int qval = std::round(scale * base_qm[k]);
+ (*qtable)->quantval[k] = std::max(1, std::min(qval, quant_max));
+ }
+ (*qtable)->sent_table = FALSE;
+ }
+}
+
+void InitQuantizer(j_compress_ptr cinfo, QuantPass pass) {
+ jpeg_comp_master* m = cinfo->master;
+ // Compute quantization multupliers from the quant table values.
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ int quant_idx = cinfo->comp_info[c].quant_tbl_no;
+ JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[quant_idx];
+ if (!quant_table) {
+ JPEGLI_ERROR("Missing quantization table %d for component %d", quant_idx,
+ c);
+ }
+ for (size_t k = 0; k < DCTSIZE2; k++) {
+ int val = quant_table->quantval[k];
+ if (val == 0) {
+ JPEGLI_ERROR("Invalid quantval 0.");
+ }
+ switch (pass) {
+ case QuantPass::NO_SEARCH:
+ m->quant_mul[c][k] = 8.0f / val;
+ break;
+ case QuantPass::SEARCH_FIRST_PASS:
+ m->quant_mul[c][k] = 128.0f;
+ break;
+ case QuantPass::SEARCH_SECOND_PASS:
+ m->quant_mul[c][kJPEGZigZagOrder[k]] = 1.0f / (16 * val);
+ break;
+ }
+ }
+ }
+ if (m->use_adaptive_quantization) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ m->zero_bias_mul[c][k] = k == 0 ? 0.0f : 0.5f;
+ m->zero_bias_offset[c][k] = k == 0 ? 0.0f : 0.5f;
+ }
+ }
+ if (cinfo->jpeg_color_space == JCS_YCbCr) {
+ float distance = QuantValsToDistance(cinfo);
+ static const float kDistHQ = 1.0f;
+ static const float kDistLQ = 3.0f;
+ float mix0 = (distance - kDistHQ) / (kDistLQ - kDistHQ);
+ mix0 = std::max(0.0f, std::min(1.0f, mix0));
+ float mix1 = 1.0f - mix0;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ float mul0 = kZeroBiasMulYCbCrLQ[c * DCTSIZE2 + k];
+ float mul1 = kZeroBiasMulYCbCrHQ[c * DCTSIZE2 + k];
+ m->zero_bias_mul[c][k] = mix0 * mul0 + mix1 * mul1;
+ m->zero_bias_offset[c][k] =
+ k == 0 ? kZeroBiasOffsetYCbCrDC[c] : kZeroBiasOffsetYCbCrAC[c];
+ }
+ }
+ }
+ } else if (cinfo->jpeg_color_space == JCS_YCbCr) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ m->zero_bias_offset[c][k] =
+ k == 0 ? kZeroBiasOffsetYCbCrDC[c] : kZeroBiasOffsetYCbCrAC[c];
+ }
+ }
+ }
+}
+
+} // namespace jpegli
diff --git a/lib/jpegli/quant.h b/lib/jpegli/quant.h
new file mode 100644
index 0000000..cb37757
--- /dev/null
+++ b/lib/jpegli/quant.h
@@ -0,0 +1,26 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_QUANT_H_
+#define LIB_JPEGLI_QUANT_H_
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+void SetQuantMatrices(j_compress_ptr cinfo, float distances[NUM_QUANT_TBLS],
+ bool add_two_chroma_tables);
+
+enum QuantPass {
+ NO_SEARCH,
+ SEARCH_FIRST_PASS,
+ SEARCH_SECOND_PASS,
+};
+
+void InitQuantizer(j_compress_ptr cinfo, QuantPass pass);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_QUANT_H_
diff --git a/lib/jpegli/render.cc b/lib/jpegli/render.cc
new file mode 100644
index 0000000..24e7e99
--- /dev/null
+++ b/lib/jpegli/render.cc
@@ -0,0 +1,763 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/render.h"
+
+#include <string.h>
+
+#include <array>
+#include <atomic>
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <hwy/aligned_allocator.h>
+#include <vector>
+
+#include "lib/jpegli/color_quantize.h"
+#include "lib/jpegli/color_transform.h"
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/idct.h"
+#include "lib/jpegli/upsample.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+
+#ifdef MEMORY_SANITIZER
+#define JXL_MEMORY_SANITIZER 1
+#elif defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#define JXL_MEMORY_SANITIZER 1
+#else
+#define JXL_MEMORY_SANITIZER 0
+#endif
+#else
+#define JXL_MEMORY_SANITIZER 0
+#endif
+
+#if JXL_MEMORY_SANITIZER
+#include "sanitizer/msan_interface.h"
+#endif
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/render.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Clamp;
+using hwy::HWY_NAMESPACE::Gt;
+using hwy::HWY_NAMESPACE::IfThenElseZero;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::NearestInt;
+using hwy::HWY_NAMESPACE::Or;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::ShiftLeftSame;
+using hwy::HWY_NAMESPACE::ShiftRightSame;
+using hwy::HWY_NAMESPACE::Vec;
+using D = HWY_FULL(float);
+using DI = HWY_FULL(int32_t);
+constexpr D d;
+constexpr DI di;
+
+void GatherBlockStats(const int16_t* JXL_RESTRICT coeffs,
+ const size_t coeffs_size, int32_t* JXL_RESTRICT nonzeros,
+ int32_t* JXL_RESTRICT sumabs) {
+ for (size_t i = 0; i < coeffs_size; i += Lanes(d)) {
+ size_t k = i % DCTSIZE2;
+ const Rebind<int16_t, DI> di16;
+ const Vec<DI> coeff = PromoteTo(di, Load(di16, coeffs + i));
+ const auto abs_coeff = Abs(coeff);
+ const auto not_0 = Gt(abs_coeff, Zero(di));
+ const auto nzero = IfThenElseZero(not_0, Set(di, 1));
+ Store(Add(nzero, Load(di, nonzeros + k)), di, nonzeros + k);
+ Store(Add(abs_coeff, Load(di, sumabs + k)), di, sumabs + k);
+ }
+}
+
+void DecenterRow(float* row, size_t xsize) {
+ const HWY_CAPPED(float, 8) df;
+ const auto c128 = Set(df, 128.0f / 255);
+ for (size_t x = 0; x < xsize; x += Lanes(df)) {
+ Store(Add(Load(df, row + x), c128), df, row + x);
+ }
+}
+
+void DitherRow(j_decompress_ptr cinfo, float* row, int c, size_t y,
+ size_t xsize) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (!m->dither_[c]) return;
+ const float* dither_row =
+ &m->dither_[c][(y & m->dither_mask_) * m->dither_size_];
+ for (size_t x = 0; x < xsize; ++x) {
+ row[x] += dither_row[x & m->dither_mask_];
+ }
+}
+
+template <typename T>
+void StoreUnsignedRow(float* JXL_RESTRICT input[], size_t x0, size_t len,
+ size_t num_channels, float multiplier, T* output) {
+ const HWY_CAPPED(float, 8) d;
+ auto zero = Zero(d);
+ auto mul = Set(d, multiplier);
+ const Rebind<T, decltype(d)> du;
+#if JXL_MEMORY_SANITIZER
+ const size_t padding = hwy::RoundUpTo(len, Lanes(d)) - len;
+ for (size_t c = 0; c < num_channels; ++c) {
+ __msan_unpoison(input[c] + x0 + len, sizeof(input[c][0]) * padding);
+ }
+#endif
+ if (num_channels == 1) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ auto v0 = Clamp(zero, Mul(LoadU(d, &input[0][x0 + i]), mul), mul);
+ StoreU(DemoteTo(du, NearestInt(v0)), du, &output[i]);
+ }
+ } else if (num_channels == 2) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ auto v0 = Clamp(zero, Mul(LoadU(d, &input[0][x0 + i]), mul), mul);
+ auto v1 = Clamp(zero, Mul(LoadU(d, &input[1][x0 + i]), mul), mul);
+ StoreInterleaved2(DemoteTo(du, NearestInt(v0)),
+ DemoteTo(du, NearestInt(v1)), du, &output[2 * i]);
+ }
+ } else if (num_channels == 3) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ auto v0 = Clamp(zero, Mul(LoadU(d, &input[0][x0 + i]), mul), mul);
+ auto v1 = Clamp(zero, Mul(LoadU(d, &input[1][x0 + i]), mul), mul);
+ auto v2 = Clamp(zero, Mul(LoadU(d, &input[2][x0 + i]), mul), mul);
+ StoreInterleaved3(DemoteTo(du, NearestInt(v0)),
+ DemoteTo(du, NearestInt(v1)),
+ DemoteTo(du, NearestInt(v2)), du, &output[3 * i]);
+ }
+ } else if (num_channels == 4) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ auto v0 = Clamp(zero, Mul(LoadU(d, &input[0][x0 + i]), mul), mul);
+ auto v1 = Clamp(zero, Mul(LoadU(d, &input[1][x0 + i]), mul), mul);
+ auto v2 = Clamp(zero, Mul(LoadU(d, &input[2][x0 + i]), mul), mul);
+ auto v3 = Clamp(zero, Mul(LoadU(d, &input[3][x0 + i]), mul), mul);
+ StoreInterleaved4(DemoteTo(du, NearestInt(v0)),
+ DemoteTo(du, NearestInt(v1)),
+ DemoteTo(du, NearestInt(v2)),
+ DemoteTo(du, NearestInt(v3)), du, &output[4 * i]);
+ }
+ }
+#if JXL_MEMORY_SANITIZER
+ __msan_poison(output + num_channels * len,
+ sizeof(output[0]) * num_channels * padding);
+#endif
+}
+
+void StoreFloatRow(float* JXL_RESTRICT input[3], size_t x0, size_t len,
+ size_t num_channels, float* output) {
+ const HWY_CAPPED(float, 8) d;
+ if (num_channels == 1) {
+ memcpy(output, input[0] + x0, len * sizeof(output[0]));
+ } else if (num_channels == 2) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ StoreInterleaved2(LoadU(d, &input[0][x0 + i]),
+ LoadU(d, &input[1][x0 + i]), d, &output[2 * i]);
+ }
+ } else if (num_channels == 3) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ StoreInterleaved3(LoadU(d, &input[0][x0 + i]),
+ LoadU(d, &input[1][x0 + i]),
+ LoadU(d, &input[2][x0 + i]), d, &output[3 * i]);
+ }
+ } else if (num_channels == 4) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ StoreInterleaved4(LoadU(d, &input[0][x0 + i]),
+ LoadU(d, &input[1][x0 + i]),
+ LoadU(d, &input[2][x0 + i]),
+ LoadU(d, &input[3][x0 + i]), d, &output[4 * i]);
+ }
+ }
+}
+
+static constexpr float kFSWeightMR = 7.0f / 16.0f;
+static constexpr float kFSWeightBL = 3.0f / 16.0f;
+static constexpr float kFSWeightBM = 5.0f / 16.0f;
+static constexpr float kFSWeightBR = 1.0f / 16.0f;
+
+float LimitError(float error) {
+ float abserror = std::abs(error);
+ if (abserror > 48.0f) {
+ abserror = 32.0f;
+ } else if (abserror > 16.0f) {
+ abserror = 0.5f * abserror + 8.0f;
+ }
+ return error > 0.0f ? abserror : -abserror;
+}
+
+void WriteToOutput(j_decompress_ptr cinfo, float* JXL_RESTRICT rows[],
+ size_t xoffset, size_t len, size_t num_channels,
+ uint8_t* JXL_RESTRICT output) {
+ jpeg_decomp_master* m = cinfo->master;
+ uint8_t* JXL_RESTRICT scratch_space = m->output_scratch_;
+ if (cinfo->quantize_colors && m->quant_pass_ == 1) {
+ float* error_row[kMaxComponents];
+ float* next_error_row[kMaxComponents];
+ if (cinfo->dither_mode == JDITHER_ORDERED) {
+ for (size_t c = 0; c < num_channels; ++c) {
+ DitherRow(cinfo, &rows[c][xoffset], c, cinfo->output_scanline,
+ cinfo->output_width);
+ }
+ } else if (cinfo->dither_mode == JDITHER_FS) {
+ for (size_t c = 0; c < num_channels; ++c) {
+ if (cinfo->output_scanline % 2 == 0) {
+ error_row[c] = m->error_row_[c];
+ next_error_row[c] = m->error_row_[c + kMaxComponents];
+ } else {
+ error_row[c] = m->error_row_[c + kMaxComponents];
+ next_error_row[c] = m->error_row_[c];
+ }
+ memset(next_error_row[c], 0.0, cinfo->output_width * sizeof(float));
+ }
+ }
+ const float mul = 255.0f;
+ if (cinfo->dither_mode != JDITHER_FS) {
+ StoreUnsignedRow(rows, xoffset, len, num_channels, mul, scratch_space);
+ }
+ for (size_t i = 0; i < len; ++i) {
+ uint8_t* pixel = &scratch_space[num_channels * i];
+ if (cinfo->dither_mode == JDITHER_FS) {
+ for (size_t c = 0; c < num_channels; ++c) {
+ float val = rows[c][i] * mul + LimitError(error_row[c][i]);
+ pixel[c] = std::round(std::min(255.0f, std::max(0.0f, val)));
+ }
+ }
+ int index = LookupColorIndex(cinfo, pixel);
+ output[i] = index;
+ if (cinfo->dither_mode == JDITHER_FS) {
+ size_t prev_i = i > 0 ? i - 1 : 0;
+ size_t next_i = i + 1 < len ? i + 1 : len - 1;
+ for (size_t c = 0; c < num_channels; ++c) {
+ float error = pixel[c] - cinfo->colormap[c][index];
+ error_row[c][next_i] += kFSWeightMR * error;
+ next_error_row[c][prev_i] += kFSWeightBL * error;
+ next_error_row[c][i] += kFSWeightBM * error;
+ next_error_row[c][next_i] += kFSWeightBR * error;
+ }
+ }
+ }
+ } else if (m->output_data_type_ == JPEGLI_TYPE_UINT8) {
+ const float mul = 255.0;
+ StoreUnsignedRow(rows, xoffset, len, num_channels, mul, scratch_space);
+ memcpy(output, scratch_space, len * num_channels);
+ } else if (m->output_data_type_ == JPEGLI_TYPE_UINT16) {
+ const float mul = 65535.0;
+ uint16_t* tmp = reinterpret_cast<uint16_t*>(scratch_space);
+ StoreUnsignedRow(rows, xoffset, len, num_channels, mul, tmp);
+ if (m->swap_endianness_) {
+ const HWY_CAPPED(uint16_t, 8) du;
+ size_t output_len = len * num_channels;
+ for (size_t j = 0; j < output_len; j += Lanes(du)) {
+ auto v = LoadU(du, tmp + j);
+ auto vswap = Or(ShiftRightSame(v, 8), ShiftLeftSame(v, 8));
+ StoreU(vswap, du, tmp + j);
+ }
+ }
+ memcpy(output, tmp, len * num_channels * 2);
+ } else if (m->output_data_type_ == JPEGLI_TYPE_FLOAT) {
+ float* tmp = reinterpret_cast<float*>(scratch_space);
+ StoreFloatRow(rows, xoffset, len, num_channels, tmp);
+ if (m->swap_endianness_) {
+ size_t output_len = len * num_channels;
+ for (size_t j = 0; j < output_len; ++j) {
+ tmp[j] = BSwapFloat(tmp[j]);
+ }
+ }
+ memcpy(output, tmp, len * num_channels * 4);
+ }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace jpegli {
+
+HWY_EXPORT(GatherBlockStats);
+HWY_EXPORT(WriteToOutput);
+HWY_EXPORT(DecenterRow);
+
+void GatherBlockStats(const int16_t* JXL_RESTRICT coeffs,
+ const size_t coeffs_size, int32_t* JXL_RESTRICT nonzeros,
+ int32_t* JXL_RESTRICT sumabs) {
+ return HWY_DYNAMIC_DISPATCH(GatherBlockStats)(coeffs, coeffs_size, nonzeros,
+ sumabs);
+}
+
+void WriteToOutput(j_decompress_ptr cinfo, float* JXL_RESTRICT rows[],
+ size_t xoffset, size_t len, size_t num_channels,
+ uint8_t* JXL_RESTRICT output) {
+ return HWY_DYNAMIC_DISPATCH(WriteToOutput)(cinfo, rows, xoffset, len,
+ num_channels, output);
+}
+
+void DecenterRow(float* row, size_t xsize) {
+ return HWY_DYNAMIC_DISPATCH(DecenterRow)(row, xsize);
+}
+
+bool ShouldApplyDequantBiases(j_decompress_ptr cinfo, int ci) {
+ const auto& compinfo = cinfo->comp_info[ci];
+ return (compinfo.h_samp_factor == cinfo->max_h_samp_factor &&
+ compinfo.v_samp_factor == cinfo->max_v_samp_factor);
+}
+
+// See the following article for the details:
+// J. R. Price and M. Rabbani, "Dequantization bias for JPEG decompression"
+// Proceedings International Conference on Information Technology: Coding and
+// Computing (Cat. No.PR00540), 2000, pp. 30-35, doi: 10.1109/ITCC.2000.844179.
+void ComputeOptimalLaplacianBiases(const int num_blocks, const int* nonzeros,
+ const int* sumabs, float* biases) {
+ for (size_t k = 1; k < DCTSIZE2; ++k) {
+ if (nonzeros[k] == 0) {
+ biases[k] = 0.5f;
+ continue;
+ }
+ // Notation adapted from the article
+ float N = num_blocks;
+ float N1 = nonzeros[k];
+ float N0 = num_blocks - N1;
+ float S = sumabs[k];
+ // Compute gamma from N0, N1, N, S (eq. 11), with A and B being just
+ // temporary grouping of terms.
+ float A = 4.0 * S + 2.0 * N;
+ float B = 4.0 * S - 2.0 * N1;
+ float gamma = (-1.0 * N0 + std::sqrt(N0 * N0 * 1.0 + A * B)) / A;
+ float gamma2 = gamma * gamma;
+ // The bias is computed from gamma with (eq. 5), where the quantization
+ // multiplier Q can be factored out and thus the bias can be applied
+ // directly on the quantized coefficient.
+ biases[k] =
+ 0.5 * (((1.0 + gamma2) / (1.0 - gamma2)) + 1.0 / std::log(gamma));
+ }
+}
+
+constexpr std::array<int, SAVED_COEFS> Q_POS = {0, 1, 8, 16, 9,
+ 2, 3, 10, 17, 24};
+
+bool is_nonzero_quantizers(const JQUANT_TBL* qtable) {
+ return std::all_of(Q_POS.begin(), Q_POS.end(),
+ [&](int pos) { return qtable->quantval[pos] != 0; });
+}
+
+// Determine whether smoothing should be applied during decompression
+bool do_smoothing(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ bool smoothing_useful = false;
+
+ if (!cinfo->progressive_mode || cinfo->coef_bits == nullptr) {
+ return false;
+ }
+ auto coef_bits_latch = m->coef_bits_latch;
+ auto prev_coef_bits_latch = m->prev_coef_bits_latch;
+
+ for (int ci = 0; ci < cinfo->num_components; ci++) {
+ jpeg_component_info* compptr = &cinfo->comp_info[ci];
+ JQUANT_TBL* qtable = compptr->quant_table;
+ int* coef_bits = cinfo->coef_bits[ci];
+ int* prev_coef_bits = cinfo->coef_bits[ci + cinfo->num_components];
+
+ // Return early if conditions for smoothing are not met
+ if (qtable == nullptr || !is_nonzero_quantizers(qtable) ||
+ coef_bits[0] < 0) {
+ return false;
+ }
+
+ coef_bits_latch[ci][0] = coef_bits[0];
+
+ for (int coefi = 1; coefi < SAVED_COEFS; coefi++) {
+ prev_coef_bits_latch[ci][coefi] =
+ cinfo->input_scan_number > 1 ? prev_coef_bits[coefi] : -1;
+ if (coef_bits[coefi] != 0) {
+ smoothing_useful = true;
+ }
+ coef_bits_latch[ci][coefi] = coef_bits[coefi];
+ }
+ }
+
+ return smoothing_useful;
+}
+
+void PredictSmooth(j_decompress_ptr cinfo, JBLOCKARRAY blocks, int component,
+ size_t bx, int iy) {
+ const size_t imcu_row = cinfo->output_iMCU_row;
+ int16_t* scratch = cinfo->master->smoothing_scratch_;
+ std::vector<int> Q_VAL(SAVED_COEFS);
+ int* coef_bits;
+
+ std::array<std::array<int, 5>, 5> dc_values;
+ auto& compinfo = cinfo->comp_info[component];
+ const size_t by0 = imcu_row * compinfo.v_samp_factor;
+ const size_t by = by0 + iy;
+
+ int prev_iy = by > 0 ? iy - 1 : 0;
+ int prev_prev_iy = by > 1 ? iy - 2 : prev_iy;
+ int next_iy = by + 1 < compinfo.height_in_blocks ? iy + 1 : iy;
+ int next_next_iy = by + 2 < compinfo.height_in_blocks ? iy + 2 : next_iy;
+
+ const int16_t* cur_row = blocks[iy][bx];
+ const int16_t* prev_row = blocks[prev_iy][bx];
+ const int16_t* prev_prev_row = blocks[prev_prev_iy][bx];
+ const int16_t* next_row = blocks[next_iy][bx];
+ const int16_t* next_next_row = blocks[next_next_iy][bx];
+
+ int prev_block_ind = bx ? -DCTSIZE2 : 0;
+ int prev_prev_block_ind = bx > 1 ? -2 * DCTSIZE2 : prev_block_ind;
+ int next_block_ind = bx + 1 < compinfo.width_in_blocks ? DCTSIZE2 : 0;
+ int next_next_block_ind =
+ bx + 2 < compinfo.width_in_blocks ? DCTSIZE2 * 2 : next_block_ind;
+
+ std::array<const int16_t*, 5> row_ptrs = {prev_prev_row, prev_row, cur_row,
+ next_row, next_next_row};
+ std::array<int, 5> block_inds = {prev_prev_block_ind, prev_block_ind, 0,
+ next_block_ind, next_next_block_ind};
+
+ memcpy(scratch, cur_row, DCTSIZE2 * sizeof(cur_row[0]));
+
+ for (int r = 0; r < 5; ++r) {
+ for (int c = 0; c < 5; ++c) {
+ dc_values[r][c] = row_ptrs[r][block_inds[c]];
+ }
+ }
+ // Get the correct coef_bits: In case of an incomplete scan, we use the
+ // prev coeficients.
+ if (cinfo->output_iMCU_row + 1 > cinfo->input_iMCU_row) {
+ coef_bits = cinfo->master->prev_coef_bits_latch[component];
+ } else {
+ coef_bits = cinfo->master->coef_bits_latch[component];
+ }
+
+ bool change_dc = true;
+ for (int i = 1; i < SAVED_COEFS; i++) {
+ if (coef_bits[i] != -1) {
+ change_dc = false;
+ break;
+ }
+ }
+
+ JQUANT_TBL* quanttbl = cinfo->quant_tbl_ptrs[compinfo.quant_tbl_no];
+ for (size_t i = 0; i < 6; ++i) {
+ Q_VAL[i] = quanttbl->quantval[Q_POS[i]];
+ }
+ if (change_dc) {
+ for (size_t i = 6; i < SAVED_COEFS; ++i) {
+ Q_VAL[i] = quanttbl->quantval[Q_POS[i]];
+ }
+ }
+ auto calculate_dct_value = [&](int coef_index) {
+ int64_t num = 0;
+ int pred;
+ int Al;
+ // we use the symmetry of the smoothing matrices by transposing the 5x5 dc
+ // matrix in that case.
+ bool swap_indices = coef_index == 2 || coef_index == 5 || coef_index == 8 ||
+ coef_index == 9;
+ auto dc = [&](int i, int j) {
+ return swap_indices ? dc_values[j][i] : dc_values[i][j];
+ };
+ Al = coef_bits[coef_index];
+ switch (coef_index) {
+ case 0:
+ // set the DC
+ num = (-2 * dc(0, 0) - 6 * dc(0, 1) - 8 * dc(0, 2) - 6 * dc(0, 3) -
+ 2 * dc(0, 4) - 6 * dc(1, 0) + 6 * dc(1, 1) + 42 * dc(1, 2) +
+ 6 * dc(1, 3) - 6 * dc(1, 4) - 8 * dc(2, 0) + 42 * dc(2, 1) +
+ 152 * dc(2, 2) + 42 * dc(2, 3) - 8 * dc(2, 4) - 6 * dc(3, 0) +
+ 6 * dc(3, 1) + 42 * dc(3, 2) + 6 * dc(3, 3) - 6 * dc(3, 4) -
+ 2 * dc(4, 0) - 6 * dc(4, 1) - 8 * dc(4, 2) - 6 * dc(4, 3) -
+ 2 * dc(4, 4));
+ // special case: for the DC the dequantization is different
+ Al = 0;
+ break;
+ case 1:
+ case 2:
+ // set Q01 or Q10
+ num = (change_dc ? (-dc(0, 0) - dc(0, 1) + dc(0, 3) + dc(0, 4) -
+ 3 * dc(1, 0) + 13 * dc(1, 1) - 13 * dc(1, 3) +
+ 3 * dc(1, 4) - 3 * dc(2, 0) + 38 * dc(2, 1) -
+ 38 * dc(2, 3) + 3 * dc(2, 4) - 3 * dc(3, 0) +
+ 13 * dc(3, 1) - 13 * dc(3, 3) + 3 * dc(3, 4) -
+ dc(4, 0) - dc(4, 1) + dc(4, 3) + dc(4, 4))
+ : (-7 * dc(2, 0) + 50 * dc(2, 1) - 50 * dc(2, 3) +
+ 7 * dc(2, 4)));
+ break;
+ case 3:
+ case 5:
+ // set Q02 or Q20
+ num = (change_dc
+ ? dc(0, 2) + 2 * dc(1, 1) + 7 * dc(1, 2) + 2 * dc(1, 3) -
+ 5 * dc(2, 1) - 14 * dc(2, 2) - 5 * dc(2, 3) +
+ 2 * dc(3, 1) + 7 * dc(3, 2) + 2 * dc(3, 3) + dc(4, 2)
+ : (-dc(0, 2) + 13 * dc(1, 2) - 24 * dc(2, 2) +
+ 13 * dc(3, 2) - dc(4, 2)));
+ break;
+ case 4:
+ // set Q11
+ num =
+ (change_dc ? -dc(0, 0) + dc(0, 4) + 9 * dc(1, 1) - 9 * dc(1, 3) -
+ 9 * dc(3, 1) + 9 * dc(3, 3) + dc(4, 0) - dc(4, 4)
+ : (dc(1, 4) + dc(3, 0) - 10 * dc(3, 1) + 10 * dc(3, 3) -
+ dc(0, 1) - dc(3, 4) + dc(4, 1) - dc(4, 3) + dc(0, 3) -
+ dc(1, 0) + 10 * dc(1, 1) - 10 * dc(1, 3)));
+ break;
+ case 6:
+ case 9:
+ // set Q03 or Q30
+ num = (dc(1, 1) - dc(1, 3) + 2 * dc(2, 1) - 2 * dc(2, 3) + dc(3, 1) -
+ dc(3, 3));
+ break;
+ case 7:
+ case 8:
+ // set Q12 and Q21
+ num = (dc(1, 1) - 3 * dc(1, 2) + dc(1, 3) - dc(3, 1) + 3 * dc(3, 2) -
+ dc(3, 3));
+ break;
+ }
+ num = Q_VAL[0] * num;
+ if (num >= 0) {
+ pred = ((Q_VAL[coef_index] << 7) + num) / (Q_VAL[coef_index] << 8);
+ if (Al > 0 && pred >= (1 << Al)) pred = (1 << Al) - 1;
+ } else {
+ pred = ((Q_VAL[coef_index] << 7) - num) / (Q_VAL[coef_index] << 8);
+ if (Al > 0 && pred >= (1 << Al)) pred = (1 << Al) - 1;
+ pred = -pred;
+ }
+ return static_cast<int16_t>(pred);
+ };
+
+ int loop_end = change_dc ? SAVED_COEFS : 6;
+ for (int i = 1; i < loop_end; ++i) {
+ if (coef_bits[i] != 0 && scratch[Q_POS[i]] == 0) {
+ scratch[Q_POS[i]] = calculate_dct_value(i);
+ }
+ }
+ if (change_dc) {
+ scratch[0] = calculate_dct_value(0);
+ }
+}
+
+void PrepareForOutput(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ bool smoothing = do_smoothing(cinfo);
+ m->apply_smoothing = smoothing && cinfo->do_block_smoothing;
+ size_t coeffs_per_block = cinfo->num_components * DCTSIZE2;
+ memset(m->nonzeros_, 0, coeffs_per_block * sizeof(m->nonzeros_[0]));
+ memset(m->sumabs_, 0, coeffs_per_block * sizeof(m->sumabs_[0]));
+ memset(m->num_processed_blocks_, 0, sizeof(m->num_processed_blocks_));
+ memset(m->biases_, 0, coeffs_per_block * sizeof(m->biases_[0]));
+ cinfo->output_iMCU_row = 0;
+ cinfo->output_scanline = 0;
+ const float kDequantScale = 1.0f / (8 * 255);
+ for (int c = 0; c < cinfo->num_components; c++) {
+ const auto& comp = cinfo->comp_info[c];
+ JQUANT_TBL* table = comp.quant_table;
+ if (table == nullptr) continue;
+ for (size_t k = 0; k < DCTSIZE2; ++k) {
+ m->dequant_[c * DCTSIZE2 + k] = table->quantval[k] * kDequantScale;
+ }
+ }
+ ChooseInverseTransform(cinfo);
+ ChooseColorTransform(cinfo);
+}
+
+void DecodeCurrentiMCURow(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ const size_t imcu_row = cinfo->output_iMCU_row;
+ JBLOCKARRAY ba[kMaxComponents];
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ const jpeg_component_info* comp = &cinfo->comp_info[c];
+ int by0 = imcu_row * comp->v_samp_factor;
+ int block_rows_left = comp->height_in_blocks - by0;
+ int max_block_rows = std::min(comp->v_samp_factor, block_rows_left);
+ int offset = m->streaming_mode_ ? 0 : by0;
+ ba[c] = (*cinfo->mem->access_virt_barray)(
+ reinterpret_cast<j_common_ptr>(cinfo), m->coef_arrays[c], offset,
+ max_block_rows, false);
+ }
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ size_t k0 = c * DCTSIZE2;
+ auto& compinfo = cinfo->comp_info[c];
+ size_t block_row = imcu_row * compinfo.v_samp_factor;
+ if (ShouldApplyDequantBiases(cinfo, c)) {
+ // Update statistics for this iMCU row.
+ for (int iy = 0; iy < compinfo.v_samp_factor; ++iy) {
+ size_t by = block_row + iy;
+ if (by >= compinfo.height_in_blocks) {
+ continue;
+ }
+ int16_t* JXL_RESTRICT coeffs = &ba[c][iy][0][0];
+ size_t num = compinfo.width_in_blocks * DCTSIZE2;
+ GatherBlockStats(coeffs, num, &m->nonzeros_[k0], &m->sumabs_[k0]);
+ m->num_processed_blocks_[c] += compinfo.width_in_blocks;
+ }
+ if (imcu_row % 4 == 3) {
+ // Re-compute optimal biases every few iMCU-rows.
+ ComputeOptimalLaplacianBiases(m->num_processed_blocks_[c],
+ &m->nonzeros_[k0], &m->sumabs_[k0],
+ &m->biases_[k0]);
+ }
+ }
+ RowBuffer<float>* raw_out = &m->raw_output_[c];
+ for (int iy = 0; iy < compinfo.v_samp_factor; ++iy) {
+ size_t by = block_row + iy;
+ if (by >= compinfo.height_in_blocks) {
+ continue;
+ }
+ size_t dctsize = m->scaled_dct_size[c];
+ int16_t* JXL_RESTRICT row_in = &ba[c][iy][0][0];
+ float* JXL_RESTRICT row_out = raw_out->Row(by * dctsize);
+ for (size_t bx = 0; bx < compinfo.width_in_blocks; ++bx) {
+ if (m->apply_smoothing) {
+ PredictSmooth(cinfo, ba[c], c, bx, iy);
+ (*m->inverse_transform[c])(m->smoothing_scratch_, &m->dequant_[k0],
+ &m->biases_[k0], m->idct_scratch_,
+ &row_out[bx * dctsize], raw_out->stride(),
+ dctsize);
+ } else {
+ (*m->inverse_transform[c])(&row_in[bx * DCTSIZE2], &m->dequant_[k0],
+ &m->biases_[k0], m->idct_scratch_,
+ &row_out[bx * dctsize], raw_out->stride(),
+ dctsize);
+ }
+ }
+ if (m->streaming_mode_) {
+ memset(row_in, 0, compinfo.width_in_blocks * sizeof(JBLOCK));
+ }
+ }
+ }
+}
+
+void ProcessRawOutput(j_decompress_ptr cinfo, JSAMPIMAGE data) {
+ jpegli::DecodeCurrentiMCURow(cinfo);
+ jpeg_decomp_master* m = cinfo->master;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ const auto& compinfo = cinfo->comp_info[c];
+ size_t comp_width = compinfo.width_in_blocks * DCTSIZE;
+ size_t comp_height = compinfo.height_in_blocks * DCTSIZE;
+ size_t comp_nrows = compinfo.v_samp_factor * DCTSIZE;
+ size_t y0 = cinfo->output_iMCU_row * compinfo.v_samp_factor * DCTSIZE;
+ size_t y1 = std::min(y0 + comp_nrows, comp_height);
+ for (size_t y = y0; y < y1; ++y) {
+ float* rows[1] = {m->raw_output_[c].Row(y)};
+ uint8_t* output = data[c][y - y0];
+ DecenterRow(rows[0], comp_width);
+ WriteToOutput(cinfo, rows, 0, comp_width, 1, output);
+ }
+ }
+ ++cinfo->output_iMCU_row;
+ cinfo->output_scanline += cinfo->max_v_samp_factor * DCTSIZE;
+ if (cinfo->output_scanline >= cinfo->output_height) {
+ ++m->output_passes_done_;
+ }
+}
+
+void ProcessOutput(j_decompress_ptr cinfo, size_t* num_output_rows,
+ JSAMPARRAY scanlines, size_t max_output_rows) {
+ jpeg_decomp_master* m = cinfo->master;
+ const int vfactor = cinfo->max_v_samp_factor;
+ const int hfactor = cinfo->max_h_samp_factor;
+ const size_t context = m->need_context_rows_ ? 1 : 0;
+ const size_t imcu_row = cinfo->output_iMCU_row;
+ const size_t imcu_height = vfactor * m->min_scaled_dct_size;
+ const size_t imcu_width = hfactor * m->min_scaled_dct_size;
+ const size_t output_width = m->iMCU_cols_ * imcu_width;
+ if (imcu_row == cinfo->total_iMCU_rows ||
+ (imcu_row > context &&
+ cinfo->output_scanline < (imcu_row - context) * imcu_height)) {
+ // We are ready to output some scanlines.
+ size_t ybegin = cinfo->output_scanline;
+ size_t yend = (imcu_row == cinfo->total_iMCU_rows
+ ? cinfo->output_height
+ : (imcu_row - context) * imcu_height);
+ yend = std::min<size_t>(yend, ybegin + max_output_rows - *num_output_rows);
+ size_t yb = (ybegin / vfactor) * vfactor;
+ size_t ye = DivCeil(yend, vfactor) * vfactor;
+ for (size_t y = yb; y < ye; y += vfactor) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ RowBuffer<float>* raw_out = &m->raw_output_[c];
+ RowBuffer<float>* render_out = &m->render_output_[c];
+ int line_groups = vfactor / m->v_factor[c];
+ int downsampled_width = output_width / m->h_factor[c];
+ size_t yc = y / m->v_factor[c];
+ for (int dy = 0; dy < line_groups; ++dy) {
+ size_t ymid = yc + dy;
+ const float* JXL_RESTRICT row_mid = raw_out->Row(ymid);
+ if (cinfo->do_fancy_upsampling && m->v_factor[c] == 2) {
+ const float* JXL_RESTRICT row_top =
+ ymid == 0 ? row_mid : raw_out->Row(ymid - 1);
+ const float* JXL_RESTRICT row_bot = ymid + 1 == m->raw_height_[c]
+ ? row_mid
+ : raw_out->Row(ymid + 1);
+ Upsample2Vertical(row_top, row_mid, row_bot,
+ render_out->Row(2 * dy),
+ render_out->Row(2 * dy + 1), downsampled_width);
+ } else {
+ for (int yix = 0; yix < m->v_factor[c]; ++yix) {
+ memcpy(render_out->Row(m->v_factor[c] * dy + yix), row_mid,
+ downsampled_width * sizeof(float));
+ }
+ }
+ if (m->h_factor[c] > 1) {
+ for (int yix = 0; yix < m->v_factor[c]; ++yix) {
+ int row_ix = m->v_factor[c] * dy + yix;
+ float* JXL_RESTRICT row = render_out->Row(row_ix);
+ float* JXL_RESTRICT tmp = m->upsample_scratch_;
+ if (cinfo->do_fancy_upsampling && m->h_factor[c] == 2) {
+ Upsample2Horizontal(row, tmp, output_width);
+ } else {
+ // TODO(szabadka) SIMDify this.
+ for (size_t x = 0; x < output_width; ++x) {
+ tmp[x] = row[x / m->h_factor[c]];
+ }
+ memcpy(row, tmp, output_width * sizeof(tmp[0]));
+ }
+ }
+ }
+ }
+ }
+ for (int yix = 0; yix < vfactor; ++yix) {
+ if (y + yix < ybegin || y + yix >= yend) continue;
+ float* rows[kMaxComponents];
+ int num_all_components =
+ std::max(cinfo->out_color_components, cinfo->num_components);
+ for (int c = 0; c < num_all_components; ++c) {
+ rows[c] = m->render_output_[c].Row(yix);
+ }
+ (*m->color_transform)(rows, output_width);
+ for (int c = 0; c < cinfo->out_color_components; ++c) {
+ // Undo the centering of the sample values around zero.
+ DecenterRow(rows[c], output_width);
+ }
+ if (scanlines) {
+ uint8_t* output = scanlines[*num_output_rows];
+ WriteToOutput(cinfo, rows, m->xoffset_, cinfo->output_width,
+ cinfo->out_color_components, output);
+ }
+ JXL_ASSERT(cinfo->output_scanline == y + yix);
+ ++cinfo->output_scanline;
+ ++(*num_output_rows);
+ if (cinfo->output_scanline == cinfo->output_height) {
+ ++m->output_passes_done_;
+ }
+ }
+ }
+ } else {
+ DecodeCurrentiMCURow(cinfo);
+ ++cinfo->output_iMCU_row;
+ }
+}
+
+} // namespace jpegli
+#endif // HWY_ONCE
diff --git a/lib/jpegli/render.h b/lib/jpegli/render.h
new file mode 100644
index 0000000..ad69335
--- /dev/null
+++ b/lib/jpegli/render.h
@@ -0,0 +1,24 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_RENDER_H_
+#define LIB_JPEGLI_RENDER_H_
+
+#include <stdint.h>
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+void PrepareForOutput(j_decompress_ptr cinfo);
+
+void ProcessOutput(j_decompress_ptr cinfo, size_t* num_output_rows,
+ JSAMPARRAY scanlines, size_t max_output_rows);
+
+void ProcessRawOutput(j_decompress_ptr cinfo, JSAMPIMAGE data);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_RENDER_H_
diff --git a/lib/jpegli/simd.cc b/lib/jpegli/simd.cc
new file mode 100644
index 0000000..5e84939
--- /dev/null
+++ b/lib/jpegli/simd.cc
@@ -0,0 +1,38 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/simd.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/simd.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+size_t GetVectorSize() { return HWY_LANES(uint8_t); }
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+namespace {
+
+HWY_EXPORT(GetVectorSize); // Local function.
+
+} // namespace
+
+size_t VectorSize() {
+ static size_t bytes = HWY_DYNAMIC_DISPATCH(GetVectorSize)();
+ return bytes;
+}
+
+} // namespace jpegli
+#endif // HWY_ONCE
diff --git a/lib/jpegli/simd.h b/lib/jpegli/simd.h
new file mode 100644
index 0000000..aec772e
--- /dev/null
+++ b/lib/jpegli/simd.h
@@ -0,0 +1,18 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_SIMD_H_
+#define LIB_JPEGLI_SIMD_H_
+
+#include <stddef.h>
+
+namespace jpegli {
+
+// Returns SIMD vector size in bytes.
+size_t VectorSize();
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_SIMD_H_
diff --git a/lib/jpegli/source_manager.cc b/lib/jpegli/source_manager.cc
new file mode 100644
index 0000000..0b8e0a5
--- /dev/null
+++ b/lib/jpegli/source_manager.cc
@@ -0,0 +1,90 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/memory_manager.h"
+
+namespace jpegli {
+
+void init_mem_source(j_decompress_ptr cinfo) {}
+void init_stdio_source(j_decompress_ptr cinfo) {}
+
+void skip_input_data(j_decompress_ptr cinfo, long num_bytes) {
+ if (num_bytes <= 0) return;
+ while (num_bytes > static_cast<long>(cinfo->src->bytes_in_buffer)) {
+ num_bytes -= cinfo->src->bytes_in_buffer;
+ (*cinfo->src->fill_input_buffer)(cinfo);
+ }
+ cinfo->src->next_input_byte += num_bytes;
+ cinfo->src->bytes_in_buffer -= num_bytes;
+}
+
+void term_source(j_decompress_ptr cinfo) {}
+
+boolean EmitFakeEoiMarker(j_decompress_ptr cinfo) {
+ static constexpr uint8_t kFakeEoiMarker[2] = {0xff, 0xd9};
+ cinfo->src->next_input_byte = kFakeEoiMarker;
+ cinfo->src->bytes_in_buffer = 2;
+ return TRUE;
+}
+
+constexpr size_t kStdioBufferSize = 64 << 10;
+
+struct StdioSourceManager {
+ jpeg_source_mgr pub;
+ FILE* f;
+ uint8_t* buffer;
+
+ static boolean fill_input_buffer(j_decompress_ptr cinfo) {
+ auto src = reinterpret_cast<StdioSourceManager*>(cinfo->src);
+ size_t num_bytes_read = fread(src->buffer, 1, kStdioBufferSize, src->f);
+ if (num_bytes_read == 0) {
+ return EmitFakeEoiMarker(cinfo);
+ }
+ src->pub.next_input_byte = src->buffer;
+ src->pub.bytes_in_buffer = num_bytes_read;
+ return TRUE;
+ }
+};
+
+} // namespace jpegli
+
+void jpegli_mem_src(j_decompress_ptr cinfo, const unsigned char* inbuffer,
+ unsigned long insize) {
+ if (cinfo->src && cinfo->src->init_source != jpegli::init_mem_source) {
+ JPEGLI_ERROR("jpegli_mem_src: a different source manager was already set");
+ }
+ if (!cinfo->src) {
+ cinfo->src = jpegli::Allocate<jpeg_source_mgr>(cinfo, 1);
+ }
+ cinfo->src->next_input_byte = inbuffer;
+ cinfo->src->bytes_in_buffer = insize;
+ cinfo->src->init_source = jpegli::init_mem_source;
+ cinfo->src->fill_input_buffer = jpegli::EmitFakeEoiMarker;
+ cinfo->src->skip_input_data = jpegli::skip_input_data;
+ cinfo->src->resync_to_restart = jpegli_resync_to_restart;
+ cinfo->src->term_source = jpegli::term_source;
+}
+
+void jpegli_stdio_src(j_decompress_ptr cinfo, FILE* infile) {
+ if (cinfo->src && cinfo->src->init_source != jpegli::init_stdio_source) {
+ JPEGLI_ERROR("jpeg_stdio_src: a different source manager was already set");
+ }
+ if (!cinfo->src) {
+ cinfo->src = reinterpret_cast<jpeg_source_mgr*>(
+ jpegli::Allocate<jpegli::StdioSourceManager>(cinfo, 1));
+ }
+ auto src = reinterpret_cast<jpegli::StdioSourceManager*>(cinfo->src);
+ src->f = infile;
+ src->buffer = jpegli::Allocate<uint8_t>(cinfo, jpegli::kStdioBufferSize);
+ src->pub.next_input_byte = src->buffer;
+ src->pub.bytes_in_buffer = 0;
+ src->pub.init_source = jpegli::init_stdio_source;
+ src->pub.fill_input_buffer = jpegli::StdioSourceManager::fill_input_buffer;
+ src->pub.skip_input_data = jpegli::skip_input_data;
+ src->pub.resync_to_restart = jpegli_resync_to_restart;
+ src->pub.term_source = jpegli::term_source;
+}
diff --git a/lib/jpegli/source_manager_test.cc b/lib/jpegli/source_manager_test.cc
new file mode 100644
index 0000000..4e13787
--- /dev/null
+++ b/lib/jpegli/source_manager_test.cc
@@ -0,0 +1,142 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <cmath>
+#include <cstdint>
+#include <vector>
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+#include "lib/jxl/base/status.h"
+
+namespace jpegli {
+namespace {
+
+void ReadOutputImage(j_decompress_ptr cinfo, TestImage* output) {
+ jpegli_read_header(cinfo, /*require_image=*/TRUE);
+ jpegli_start_decompress(cinfo);
+ output->ysize = cinfo->output_height;
+ output->xsize = cinfo->output_width;
+ output->components = cinfo->num_components;
+ output->AllocatePixels();
+ size_t stride = cinfo->output_width * cinfo->num_components;
+ while (cinfo->output_scanline < cinfo->output_height) {
+ JSAMPROW scanline = &output->pixels[cinfo->output_scanline * stride];
+ jpegli_read_scanlines(cinfo, &scanline, 1);
+ }
+ jpegli_finish_decompress(cinfo);
+}
+
+struct TestConfig {
+ std::string fn;
+ std::string fn_desc;
+ DecompressParams dparams;
+};
+
+class SourceManagerTestParam : public ::testing::TestWithParam<TestConfig> {};
+
+namespace {
+FILE* MemOpen(const std::vector<uint8_t>& data) {
+ FILE* src = tmpfile();
+ if (!src) return nullptr;
+ fwrite(data.data(), 1, data.size(), src);
+ rewind(src);
+ return src;
+}
+} // namespace
+
+TEST_P(SourceManagerTestParam, TestStdioSourceManager) {
+ TestConfig config = GetParam();
+ std::vector<uint8_t> compressed = ReadTestData(config.fn.c_str());
+ if (config.dparams.size_factor < 1.0) {
+ compressed.resize(compressed.size() * config.dparams.size_factor);
+ }
+ FILE* src = MemOpen(compressed);
+ ASSERT_TRUE(src);
+ TestImage output0;
+ jpeg_decompress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ jpegli_stdio_src(&cinfo, src);
+ ReadOutputImage(&cinfo, &output0);
+ return true;
+ };
+ bool ok = try_catch_block();
+ fclose(src);
+ ASSERT_TRUE(ok);
+ jpegli_destroy_decompress(&cinfo);
+
+ TestImage output1;
+ DecodeWithLibjpeg(CompressParams(), DecompressParams(), compressed, &output1);
+ VerifyOutputImage(output1, output0, 1.0f);
+}
+
+TEST_P(SourceManagerTestParam, TestMemSourceManager) {
+ TestConfig config = GetParam();
+ std::vector<uint8_t> compressed = ReadTestData(config.fn.c_str());
+ if (config.dparams.size_factor < 1.0f) {
+ compressed.resize(compressed.size() * config.dparams.size_factor);
+ }
+ TestImage output0;
+ jpeg_decompress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ jpegli_mem_src(&cinfo, compressed.data(), compressed.size());
+ ReadOutputImage(&cinfo, &output0);
+ return true;
+ };
+ ASSERT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+
+ TestImage output1;
+ DecodeWithLibjpeg(CompressParams(), DecompressParams(), compressed, &output1);
+ VerifyOutputImage(output1, output0, 1.0f);
+}
+
+std::vector<TestConfig> GenerateTests() {
+ std::vector<TestConfig> all_tests;
+ {
+ std::vector<std::pair<std::string, std::string>> testfiles({
+ {"jxl/flower/flower.png.im_q85_444.jpg", "Q85YUV444"},
+ {"jxl/flower/flower.png.im_q85_420.jpg", "Q85YUV420"},
+ {"jxl/flower/flower.png.im_q85_420_R13B.jpg", "Q85YUV420R13B"},
+ });
+ for (const auto& it : testfiles) {
+ for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f}) {
+ TestConfig config;
+ config.fn = it.first;
+ config.fn_desc = it.second;
+ config.dparams.size_factor = size_factor;
+ all_tests.push_back(config);
+ }
+ }
+ return all_tests;
+ }
+}
+
+std::ostream& operator<<(std::ostream& os, const TestConfig& c) {
+ os << c.fn_desc;
+ if (c.dparams.size_factor < 1.0f) {
+ os << "Partial" << static_cast<int>(c.dparams.size_factor * 100) << "p";
+ }
+ return os;
+}
+
+std::string TestDescription(
+ const testing::TestParamInfo<SourceManagerTestParam::ParamType>& info) {
+ std::stringstream name;
+ name << info.param;
+ return name.str();
+}
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(SourceManagerTest, SourceManagerTestParam,
+ testing::ValuesIn(GenerateTests()),
+ TestDescription);
+
+} // namespace
+} // namespace jpegli
diff --git a/lib/jpegli/streaming_test.cc b/lib/jpegli/streaming_test.cc
new file mode 100644
index 0000000..8d2e357
--- /dev/null
+++ b/lib/jpegli/streaming_test.cc
@@ -0,0 +1,233 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+
+namespace jpegli {
+namespace {
+
+// A simple suspending source manager with an input buffer.
+struct SourceManager {
+ jpeg_source_mgr pub;
+ std::vector<uint8_t> buffer;
+
+ SourceManager() {
+ pub.next_input_byte = nullptr;
+ pub.bytes_in_buffer = 0;
+ pub.init_source = init_source;
+ pub.fill_input_buffer = fill_input_buffer;
+ pub.skip_input_data = skip_input_data;
+ pub.resync_to_restart = jpegli_resync_to_restart;
+ pub.term_source = term_source;
+ }
+
+ static void init_source(j_decompress_ptr cinfo) {}
+ static boolean fill_input_buffer(j_decompress_ptr cinfo) { return FALSE; }
+ static void skip_input_data(j_decompress_ptr cinfo, long num_bytes) {}
+ static void term_source(j_decompress_ptr cinfo) {}
+};
+
+// A destination manager that empties its output buffer into a SourceManager's
+// input buffer. The buffer size is kept short because empty_output_buffer() is
+// called only when the output buffer is full, and we want to update the decoder
+// input frequently to demonstrate that streaming works.
+static constexpr size_t kOutputBufferSize = 1024;
+struct DestinationManager {
+ jpeg_destination_mgr pub;
+ std::vector<uint8_t> buffer;
+ SourceManager* dest;
+
+ DestinationManager(SourceManager* src)
+ : buffer(kOutputBufferSize), dest(src) {
+ pub.next_output_byte = buffer.data();
+ pub.free_in_buffer = buffer.size();
+ pub.init_destination = init_destination;
+ pub.empty_output_buffer = empty_output_buffer;
+ pub.term_destination = term_destination;
+ }
+
+ static void init_destination(j_compress_ptr cinfo) {}
+
+ static boolean empty_output_buffer(j_compress_ptr cinfo) {
+ auto us = reinterpret_cast<DestinationManager*>(cinfo->dest);
+ jpeg_destination_mgr* src = &us->pub;
+ jpeg_source_mgr* dst = &us->dest->pub;
+ std::vector<uint8_t>& src_buf = us->buffer;
+ std::vector<uint8_t>& dst_buf = us->dest->buffer;
+ if (dst->bytes_in_buffer > 0 && dst->bytes_in_buffer < dst_buf.size()) {
+ memmove(dst_buf.data(), dst->next_input_byte, dst->bytes_in_buffer);
+ }
+ size_t src_len = src_buf.size() - src->free_in_buffer;
+ dst_buf.resize(dst->bytes_in_buffer + src_len);
+ memcpy(&dst_buf[dst->bytes_in_buffer], src_buf.data(), src_len);
+ dst->next_input_byte = dst_buf.data();
+ dst->bytes_in_buffer = dst_buf.size();
+ src->next_output_byte = src_buf.data();
+ src->free_in_buffer = src_buf.size();
+ return true;
+ }
+
+ static void term_destination(j_compress_ptr cinfo) {
+ empty_output_buffer(cinfo);
+ }
+};
+
+struct TestConfig {
+ TestImage input;
+ CompressParams jparams;
+};
+
+class StreamingTestParam : public ::testing::TestWithParam<TestConfig> {};
+
+TEST_P(StreamingTestParam, TestStreaming) {
+ jpeg_decompress_struct dinfo = {};
+ jpeg_compress_struct cinfo = {};
+ TestConfig config = GetParam();
+ TestImage& input = config.input;
+ TestImage output;
+ GeneratePixels(&input);
+ const auto try_catch_block = [&]() {
+ ERROR_HANDLER_SETUP(jpegli);
+ dinfo.err = cinfo.err;
+ dinfo.client_data = cinfo.client_data;
+ // Create a pair of compressor and decompressor objects, where the
+ // compressor's output is connected to the decompressor's input.
+ jpegli_create_decompress(&dinfo);
+ jpegli_create_compress(&cinfo);
+ SourceManager src;
+ dinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+ DestinationManager dest(&src);
+ cinfo.dest = reinterpret_cast<jpeg_destination_mgr*>(&dest);
+
+ cinfo.image_width = input.xsize;
+ cinfo.image_height = input.ysize;
+ cinfo.input_components = input.components;
+ cinfo.in_color_space = (J_COLOR_SPACE)input.color_space;
+ jpegli_set_defaults(&cinfo);
+ cinfo.comp_info[0].v_samp_factor = config.jparams.v_sampling[0];
+ jpegli_set_progressive_level(&cinfo, 0);
+ cinfo.optimize_coding = FALSE;
+ jpegli_start_compress(&cinfo, TRUE);
+
+ size_t stride = cinfo.image_width * cinfo.input_components;
+ size_t iMCU_height = 8 * cinfo.max_v_samp_factor;
+ std::vector<uint8_t> row_bytes(iMCU_height * stride);
+ size_t yin = 0;
+ size_t yout = 0;
+ while (yin < cinfo.image_height) {
+ // Feed one iMCU row at a time to the compressor.
+ size_t lines_in = std::min(iMCU_height, cinfo.image_height - yin);
+ memcpy(&row_bytes[0], &input.pixels[yin * stride], lines_in * stride);
+ std::vector<JSAMPROW> rows_in(lines_in);
+ for (size_t i = 0; i < lines_in; ++i) {
+ rows_in[i] = &row_bytes[i * stride];
+ }
+ EXPECT_EQ(lines_in,
+ jpegli_write_scanlines(&cinfo, &rows_in[0], lines_in));
+ yin += lines_in;
+ if (yin == cinfo.image_height) {
+ jpegli_finish_compress(&cinfo);
+ }
+
+ // Atfer the first iMCU row, we don't yet expect any output because the
+ // compressor delays processing to have context rows after the iMCU row.
+ if (yin < std::min<size_t>(2 * iMCU_height, cinfo.image_height)) {
+ continue;
+ }
+
+ // After two iMCU rows, the compressor has started emitting compressed
+ // data. We check here that at least the scan header was output, because
+ // we expect that the compressor's output buffer was filled at least once
+ // while emitting the first compressed iMCU row.
+ if (yin == std::min<size_t>(2 * iMCU_height, cinfo.image_height)) {
+ EXPECT_EQ(JPEG_REACHED_SOS,
+ jpegli_read_header(&dinfo, /*require_image=*/TRUE));
+ output.xsize = dinfo.image_width;
+ output.ysize = dinfo.image_height;
+ output.components = dinfo.num_components;
+ EXPECT_EQ(output.xsize, input.xsize);
+ EXPECT_EQ(output.ysize, input.ysize);
+ EXPECT_EQ(output.components, input.components);
+ EXPECT_TRUE(jpegli_start_decompress(&dinfo));
+ output.pixels.resize(output.ysize * stride);
+ if (yin < cinfo.image_height) {
+ continue;
+ }
+ }
+
+ // After six iMCU rows, the compressor has emitted five iMCU rows of
+ // compressed data, of which we expect four full iMCU row of compressed
+ // data to be in the decoder's input buffer, but since the decoder also
+ // needs context rows for upsampling and smoothing, we don't expect any
+ // output to be ready yet.
+ if (yin < 7 * iMCU_height && yin < cinfo.image_height) {
+ continue;
+ }
+
+ // After five iMCU rows, we expect the decoder to have rendered the output
+ // with four iMCU rows of delay.
+ // TODO(szabadka) Reduce the processing delay in the decoder if possible.
+ size_t lines_out =
+ (yin == cinfo.image_height ? cinfo.image_height - yout : iMCU_height);
+ std::vector<JSAMPROW> rows_out(lines_out);
+ for (size_t i = 0; i < lines_out; ++i) {
+ rows_out[i] =
+ reinterpret_cast<JSAMPLE*>(&output.pixels[(yout + i) * stride]);
+ }
+ EXPECT_EQ(lines_out,
+ jpegli_read_scanlines(&dinfo, &rows_out[0], lines_out));
+ VerifyOutputImage(input, output, yout, lines_out, 3.8f);
+ yout += lines_out;
+
+ if (yout == cinfo.image_height) {
+ EXPECT_TRUE(jpegli_finish_decompress(&dinfo));
+ }
+ }
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&dinfo);
+ jpegli_destroy_compress(&cinfo);
+}
+
+std::vector<TestConfig> GenerateTests() {
+ std::vector<TestConfig> all_tests;
+ const size_t xsize0 = 1920;
+ const size_t ysize0 = 1080;
+ for (int dysize : {0, 1, 8, 9}) {
+ for (int v_sampling : {1, 2}) {
+ TestConfig config;
+ config.input.xsize = xsize0;
+ config.input.ysize = ysize0 + dysize;
+ config.jparams.h_sampling = {1, 1, 1};
+ config.jparams.v_sampling = {v_sampling, 1, 1};
+ all_tests.push_back(config);
+ }
+ }
+ return all_tests;
+}
+
+std::ostream& operator<<(std::ostream& os, const TestConfig& c) {
+ os << c.input;
+ os << c.jparams;
+ return os;
+}
+
+std::string TestDescription(
+ const testing::TestParamInfo<StreamingTestParam::ParamType>& info) {
+ std::stringstream name;
+ name << info.param;
+ return name.str();
+}
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(StreamingTest, StreamingTestParam,
+ testing::ValuesIn(GenerateTests()),
+ TestDescription);
+
+} // namespace
+} // namespace jpegli
diff --git a/lib/jpegli/test_params.h b/lib/jpegli/test_params.h
new file mode 100644
index 0000000..6ab9fa5
--- /dev/null
+++ b/lib/jpegli/test_params.h
@@ -0,0 +1,163 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_TEST_PARAMS_H_
+#define LIB_JPEGLI_TEST_PARAMS_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "lib/jpegli/types.h"
+
+namespace jpegli {
+
+// We define this here as well to make sure that the *_api_test.cc tests only
+// use the public API and therefore we don't include any *_internal.h headers.
+template <typename T1, typename T2>
+constexpr inline T1 DivCeil(T1 a, T2 b) {
+ return (a + b - 1) / b;
+}
+
+#define ARRAY_SIZE(X) (sizeof(X) / sizeof((X)[0]))
+
+static constexpr int kLastScan = 0xffff;
+
+static uint32_t kTestColorMap[] = {
+ 0x000000, 0xff0000, 0x00ff00, 0x0000ff, 0xffff00, 0x00ffff,
+ 0xff00ff, 0xffffff, 0x6251fc, 0x45d9c7, 0xa7f059, 0xd9a945,
+ 0xfa4e44, 0xceaffc, 0xbad7db, 0xc1f0b1, 0xdbca9a, 0xfacac5,
+ 0xf201ff, 0x0063db, 0x00f01c, 0xdbb204, 0xf12f0c, 0x7ba1dc};
+static constexpr int kTestColorMapNumColors = ARRAY_SIZE(kTestColorMap);
+
+static constexpr int kSpecialMarker0 = 0xe5;
+static constexpr int kSpecialMarker1 = 0xe9;
+static constexpr uint8_t kMarkerData[] = {0, 1, 255, 0, 17};
+static constexpr uint8_t kMarkerSequence[] = {0xe6, 0xe8, 0xe7,
+ 0xe6, 0xe7, 0xe8};
+static constexpr size_t kMarkerSequenceLen = ARRAY_SIZE(kMarkerSequence);
+
+enum JpegIOMode {
+ PIXELS,
+ RAW_DATA,
+ COEFFICIENTS,
+};
+
+struct CustomQuantTable {
+ int slot_idx = 0;
+ uint16_t table_type = 0;
+ int scale_factor = 100;
+ bool add_raw = false;
+ bool force_baseline = true;
+ std::vector<unsigned int> basic_table;
+ std::vector<unsigned int> quantval;
+ void Generate();
+};
+
+struct TestImage {
+ size_t xsize = 2268;
+ size_t ysize = 1512;
+ int color_space = 2; // JCS_RGB
+ size_t components = 3;
+ JpegliDataType data_type = JPEGLI_TYPE_UINT8;
+ JpegliEndianness endianness = JPEGLI_NATIVE_ENDIAN;
+ std::vector<uint8_t> pixels;
+ std::vector<std::vector<uint8_t>> raw_data;
+ std::vector<std::vector<int16_t>> coeffs;
+ void AllocatePixels() {
+ pixels.resize(ysize * xsize * components *
+ jpegli_bytes_per_sample(data_type));
+ }
+ void Clear() {
+ pixels.clear();
+ raw_data.clear();
+ coeffs.clear();
+ }
+};
+
+struct CompressParams {
+ int quality = 90;
+ bool set_jpeg_colorspace = false;
+ int jpeg_color_space = 0; // JCS_UNKNOWN
+ std::vector<int> quant_indexes;
+ std::vector<CustomQuantTable> quant_tables;
+ std::vector<int> h_sampling;
+ std::vector<int> v_sampling;
+ std::vector<int> comp_ids;
+ int override_JFIF = -1;
+ int override_Adobe = -1;
+ bool add_marker = false;
+ bool simple_progression = false;
+ // -1 is library default
+ // 0, 1, 2 is set through jpegli_set_progressive_level()
+ // 2 + N is kScriptN
+ int progressive_mode = -1;
+ unsigned int restart_interval = 0;
+ int restart_in_rows = 0;
+ int smoothing_factor = 0;
+ int optimize_coding = -1;
+ bool use_flat_dc_luma_code = false;
+ bool omit_standard_tables = false;
+ bool xyb_mode = false;
+ bool libjpeg_mode = false;
+ bool use_adaptive_quantization = true;
+ std::vector<uint8_t> icc;
+
+ int h_samp(int c) const { return h_sampling.empty() ? 1 : h_sampling[c]; }
+ int v_samp(int c) const { return v_sampling.empty() ? 1 : v_sampling[c]; }
+ int max_h_sample() const {
+ auto it = std::max_element(h_sampling.begin(), h_sampling.end());
+ return it == h_sampling.end() ? 1 : *it;
+ }
+ int max_v_sample() const {
+ auto it = std::max_element(v_sampling.begin(), v_sampling.end());
+ return it == v_sampling.end() ? 1 : *it;
+ }
+ int comp_width(const TestImage& input, int c) const {
+ return DivCeil(input.xsize * h_samp(c), max_h_sample() * 8) * 8;
+ }
+ int comp_height(const TestImage& input, int c) const {
+ return DivCeil(input.ysize * v_samp(c), max_v_sample() * 8) * 8;
+ }
+};
+
+enum ColorQuantMode {
+ CQUANT_1PASS,
+ CQUANT_2PASS,
+ CQUANT_EXTERNAL,
+ CQUANT_REUSE,
+};
+
+struct ScanDecompressParams {
+ int max_scan_number;
+ int dither_mode;
+ ColorQuantMode color_quant_mode;
+};
+
+struct DecompressParams {
+ float size_factor = 1.0f;
+ size_t chunk_size = 65536;
+ size_t max_output_lines = 16;
+ JpegIOMode output_mode = PIXELS;
+ JpegliDataType data_type = JPEGLI_TYPE_UINT8;
+ JpegliEndianness endianness = JPEGLI_NATIVE_ENDIAN;
+ bool set_out_color_space = false;
+ int out_color_space = 0; // JCS_UNKNOWN
+ bool crop_output = false;
+ bool do_block_smoothing = false;
+ bool do_fancy_upsampling = true;
+ bool skip_scans = false;
+ int scale_num = 1;
+ int scale_denom = 1;
+ bool quantize_colors = false;
+ int desired_number_of_colors = 256;
+ std::vector<ScanDecompressParams> scan_params;
+};
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_TEST_PARAMS_H_
diff --git a/lib/jpegli/test_utils-inl.h b/lib/jpegli/test_utils-inl.h
new file mode 100644
index 0000000..a454917
--- /dev/null
+++ b/lib/jpegli/test_utils-inl.h
@@ -0,0 +1,430 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This template file is included in both the libjpeg_test_util.cc and the
+// test_utils.cc files with different JPEG_API_FN macros and possibly different
+// include paths for the jpeg headers.
+
+// Sequential non-interleaved.
+static constexpr jpeg_scan_info kScript1[] = {
+ {1, {0}, 0, 63, 0, 0},
+ {1, {1}, 0, 63, 0, 0},
+ {1, {2}, 0, 63, 0, 0},
+};
+// Sequential partially interleaved, chroma first.
+static constexpr jpeg_scan_info kScript2[] = {
+ {2, {1, 2}, 0, 63, 0, 0},
+ {1, {0}, 0, 63, 0, 0},
+};
+
+// Rest of the scan scripts are progressive.
+
+static constexpr jpeg_scan_info kScript3[] = {
+ // Interleaved full DC.
+ {3, {0, 1, 2}, 0, 0, 0, 0},
+ // Full AC scans.
+ {1, {0}, 1, 63, 0, 0},
+ {1, {1}, 1, 63, 0, 0},
+ {1, {2}, 1, 63, 0, 0},
+};
+static constexpr jpeg_scan_info kScript4[] = {
+ // Non-interleaved full DC.
+ {1, {0}, 0, 0, 0, 0},
+ {1, {1}, 0, 0, 0, 0},
+ {1, {2}, 0, 0, 0, 0},
+ // Full AC scans.
+ {1, {0}, 1, 63, 0, 0},
+ {1, {1}, 1, 63, 0, 0},
+ {1, {2}, 1, 63, 0, 0},
+};
+static constexpr jpeg_scan_info kScript5[] = {
+ // Partially interleaved full DC, chroma first.
+ {2, {1, 2}, 0, 0, 0, 0},
+ {1, {0}, 0, 0, 0, 0},
+ // AC shifted by 1 bit.
+ {1, {0}, 1, 63, 0, 1},
+ {1, {1}, 1, 63, 0, 1},
+ {1, {2}, 1, 63, 0, 1},
+ // AC refinement scan.
+ {1, {0}, 1, 63, 1, 0},
+ {1, {1}, 1, 63, 1, 0},
+ {1, {2}, 1, 63, 1, 0},
+};
+static constexpr jpeg_scan_info kScript6[] = {
+ // Interleaved DC shifted by 2 bits.
+ {3, {0, 1, 2}, 0, 0, 0, 2},
+ // Interleaved DC refinement scans.
+ {3, {0, 1, 2}, 0, 0, 2, 1},
+ {3, {0, 1, 2}, 0, 0, 1, 0},
+ // Full AC scans.
+ {1, {0}, 1, 63, 0, 0},
+ {1, {1}, 1, 63, 0, 0},
+ {1, {2}, 1, 63, 0, 0},
+};
+
+static constexpr jpeg_scan_info kScript7[] = {
+ // Non-interleaved DC shifted by 2 bits.
+ {1, {0}, 0, 0, 0, 2},
+ {1, {1}, 0, 0, 0, 2},
+ {1, {2}, 0, 0, 0, 2},
+ // Non-interleaved DC first refinement scans.
+ {1, {0}, 0, 0, 2, 1},
+ {1, {1}, 0, 0, 2, 1},
+ {1, {2}, 0, 0, 2, 1},
+ // Non-interleaved DC second refinement scans.
+ {1, {0}, 0, 0, 1, 0},
+ {1, {1}, 0, 0, 1, 0},
+ {1, {2}, 0, 0, 1, 0},
+ // Full AC scans.
+ {1, {0}, 1, 63, 0, 0},
+ {1, {1}, 1, 63, 0, 0},
+ {1, {2}, 1, 63, 0, 0},
+};
+
+static constexpr jpeg_scan_info kScript8[] = {
+ // Partially interleaved DC shifted by 2 bits, chroma first
+ {2, {1, 2}, 0, 0, 0, 2},
+ {1, {0}, 0, 0, 0, 2},
+ // Partially interleaved DC first refinement scans.
+ {2, {0, 2}, 0, 0, 2, 1},
+ {1, {1}, 0, 0, 2, 1},
+ // Partially interleaved DC first refinement scans, chroma first.
+ {2, {1, 2}, 0, 0, 1, 0},
+ {1, {0}, 0, 0, 1, 0},
+ // Full AC scans.
+ {1, {0}, 1, 63, 0, 0},
+ {1, {1}, 1, 63, 0, 0},
+ {1, {2}, 1, 63, 0, 0},
+};
+
+static constexpr jpeg_scan_info kScript9[] = {
+ // Interleaved full DC.
+ {3, {0, 1, 2}, 0, 0, 0, 0},
+ // AC scans for component 0
+ // shifted by 1 bit, two spectral ranges
+ {1, {0}, 1, 6, 0, 1},
+ {1, {0}, 7, 63, 0, 1},
+ // refinement scan, full
+ {1, {0}, 1, 63, 1, 0},
+ // AC scans for component 1
+ // shifted by 1 bit, full
+ {1, {1}, 1, 63, 0, 1},
+ // refinement scan, two spectral ranges
+ {1, {1}, 1, 6, 1, 0},
+ {1, {1}, 7, 63, 1, 0},
+ // AC scans for component 2
+ // shifted by 1 bit, two spectral ranges
+ {1, {2}, 1, 6, 0, 1},
+ {1, {2}, 7, 63, 0, 1},
+ // refinement scan, two spectral ranges (but different from above)
+ {1, {2}, 1, 16, 1, 0},
+ {1, {2}, 17, 63, 1, 0},
+};
+
+static constexpr jpeg_scan_info kScript10[] = {
+ // Interleaved full DC.
+ {3, {0, 1, 2}, 0, 0, 0, 0},
+ // AC scans for spectral range 1..16
+ // shifted by 1
+ {1, {0}, 1, 16, 0, 1},
+ {1, {1}, 1, 16, 0, 1},
+ {1, {2}, 1, 16, 0, 1},
+ // refinement scans, two sub-ranges
+ {1, {0}, 1, 8, 1, 0},
+ {1, {0}, 9, 16, 1, 0},
+ {1, {1}, 1, 8, 1, 0},
+ {1, {1}, 9, 16, 1, 0},
+ {1, {2}, 1, 8, 1, 0},
+ {1, {2}, 9, 16, 1, 0},
+ // AC scans for spectral range 17..63
+ {1, {0}, 17, 63, 0, 1},
+ {1, {1}, 17, 63, 0, 1},
+ {1, {2}, 17, 63, 0, 1},
+ // refinement scans, two sub-ranges
+ {1, {0}, 17, 28, 1, 0},
+ {1, {0}, 29, 63, 1, 0},
+ {1, {1}, 17, 28, 1, 0},
+ {1, {1}, 29, 63, 1, 0},
+ {1, {2}, 17, 28, 1, 0},
+ {1, {2}, 29, 63, 1, 0},
+};
+
+struct ScanScript {
+ int num_scans;
+ const jpeg_scan_info* scans;
+};
+
+static constexpr ScanScript kTestScript[] = {
+ {ARRAY_SIZE(kScript1), kScript1}, {ARRAY_SIZE(kScript2), kScript2},
+ {ARRAY_SIZE(kScript3), kScript3}, {ARRAY_SIZE(kScript4), kScript4},
+ {ARRAY_SIZE(kScript5), kScript5}, {ARRAY_SIZE(kScript6), kScript6},
+ {ARRAY_SIZE(kScript7), kScript7}, {ARRAY_SIZE(kScript8), kScript8},
+ {ARRAY_SIZE(kScript9), kScript9}, {ARRAY_SIZE(kScript10), kScript10},
+};
+static constexpr int kNumTestScripts = ARRAY_SIZE(kTestScript);
+
+void SetScanDecompressParams(const DecompressParams& dparams,
+ j_decompress_ptr cinfo, int scan_number) {
+ const ScanDecompressParams* sparams = nullptr;
+ for (const auto& sp : dparams.scan_params) {
+ if (scan_number <= sp.max_scan_number) {
+ sparams = &sp;
+ break;
+ }
+ }
+ if (sparams == nullptr) {
+ return;
+ }
+ if (dparams.quantize_colors) {
+ cinfo->dither_mode = (J_DITHER_MODE)sparams->dither_mode;
+ if (sparams->color_quant_mode == CQUANT_1PASS) {
+ cinfo->two_pass_quantize = FALSE;
+ cinfo->colormap = nullptr;
+ } else if (sparams->color_quant_mode == CQUANT_2PASS) {
+ JXL_CHECK(cinfo->out_color_space == JCS_RGB);
+ cinfo->two_pass_quantize = TRUE;
+ cinfo->colormap = nullptr;
+ } else if (sparams->color_quant_mode == CQUANT_EXTERNAL) {
+ JXL_CHECK(cinfo->out_color_space == JCS_RGB);
+ cinfo->two_pass_quantize = FALSE;
+ bool have_colormap = cinfo->colormap != nullptr;
+ cinfo->actual_number_of_colors = kTestColorMapNumColors;
+ cinfo->colormap = (*cinfo->mem->alloc_sarray)(
+ reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE,
+ cinfo->actual_number_of_colors, 3);
+ jxl::msan::UnpoisonMemory(cinfo->colormap, 3 * sizeof(JSAMPROW));
+ for (int i = 0; i < kTestColorMapNumColors; ++i) {
+ cinfo->colormap[0][i] = (kTestColorMap[i] >> 16) & 0xff;
+ cinfo->colormap[1][i] = (kTestColorMap[i] >> 8) & 0xff;
+ cinfo->colormap[2][i] = (kTestColorMap[i] >> 0) & 0xff;
+ }
+ if (have_colormap) {
+ JPEG_API_FN(new_colormap)(cinfo);
+ }
+ } else if (sparams->color_quant_mode == CQUANT_REUSE) {
+ JXL_CHECK(cinfo->out_color_space == JCS_RGB);
+ JXL_CHECK(cinfo->colormap);
+ }
+ }
+}
+
+void SetDecompressParams(const DecompressParams& dparams,
+ j_decompress_ptr cinfo) {
+ cinfo->do_block_smoothing = dparams.do_block_smoothing;
+ cinfo->do_fancy_upsampling = dparams.do_fancy_upsampling;
+ if (dparams.output_mode == RAW_DATA) {
+ cinfo->raw_data_out = TRUE;
+ }
+ if (dparams.set_out_color_space) {
+ cinfo->out_color_space = (J_COLOR_SPACE)dparams.out_color_space;
+ if (dparams.out_color_space == JCS_UNKNOWN) {
+ cinfo->jpeg_color_space = JCS_UNKNOWN;
+ }
+ }
+ cinfo->scale_num = dparams.scale_num;
+ cinfo->scale_denom = dparams.scale_denom;
+ cinfo->quantize_colors = dparams.quantize_colors;
+ cinfo->desired_number_of_colors = dparams.desired_number_of_colors;
+ if (!dparams.scan_params.empty()) {
+ if (cinfo->buffered_image) {
+ for (const auto& sparams : dparams.scan_params) {
+ if (sparams.color_quant_mode == CQUANT_1PASS) {
+ cinfo->enable_1pass_quant = TRUE;
+ } else if (sparams.color_quant_mode == CQUANT_2PASS) {
+ cinfo->enable_2pass_quant = TRUE;
+ } else if (sparams.color_quant_mode == CQUANT_EXTERNAL) {
+ cinfo->enable_external_quant = TRUE;
+ }
+ }
+ SetScanDecompressParams(dparams, cinfo, 1);
+ } else {
+ SetScanDecompressParams(dparams, cinfo, kLastScan);
+ }
+ }
+}
+
+void CheckMarkerPresent(j_decompress_ptr cinfo, uint8_t marker_type) {
+ bool marker_found = false;
+ for (jpeg_saved_marker_ptr marker = cinfo->marker_list; marker != nullptr;
+ marker = marker->next) {
+ jxl::msan::UnpoisonMemory(marker, sizeof(*marker));
+ jxl::msan::UnpoisonMemory(marker->data, marker->data_length);
+ if (marker->marker == marker_type &&
+ marker->data_length == sizeof(kMarkerData) &&
+ memcmp(marker->data, kMarkerData, sizeof(kMarkerData)) == 0) {
+ marker_found = true;
+ }
+ }
+ JXL_CHECK(marker_found);
+}
+
+void VerifyHeader(const CompressParams& jparams, j_decompress_ptr cinfo) {
+ if (jparams.set_jpeg_colorspace) {
+ JXL_CHECK(cinfo->jpeg_color_space == jparams.jpeg_color_space);
+ }
+ if (jparams.override_JFIF >= 0) {
+ JXL_CHECK(cinfo->saw_JFIF_marker == jparams.override_JFIF);
+ }
+ if (jparams.override_Adobe >= 0) {
+ JXL_CHECK(cinfo->saw_Adobe_marker == jparams.override_Adobe);
+ }
+ if (jparams.add_marker) {
+ CheckMarkerPresent(cinfo, kSpecialMarker0);
+ CheckMarkerPresent(cinfo, kSpecialMarker1);
+ }
+ jxl::msan::UnpoisonMemory(
+ cinfo->comp_info, cinfo->num_components * sizeof(cinfo->comp_info[0]));
+ int max_h_samp_factor = 1;
+ int max_v_samp_factor = 1;
+ for (int i = 0; i < cinfo->num_components; ++i) {
+ jpeg_component_info* comp = &cinfo->comp_info[i];
+ if (!jparams.comp_ids.empty()) {
+ JXL_CHECK(comp->component_id == jparams.comp_ids[i]);
+ }
+ if (!jparams.h_sampling.empty()) {
+ JXL_CHECK(comp->h_samp_factor == jparams.h_sampling[i]);
+ }
+ if (!jparams.v_sampling.empty()) {
+ JXL_CHECK(comp->v_samp_factor == jparams.v_sampling[i]);
+ }
+ if (!jparams.quant_indexes.empty()) {
+ JXL_CHECK(comp->quant_tbl_no == jparams.quant_indexes[i]);
+ }
+ max_h_samp_factor = std::max(max_h_samp_factor, comp->h_samp_factor);
+ max_v_samp_factor = std::max(max_v_samp_factor, comp->v_samp_factor);
+ }
+ JXL_CHECK(max_h_samp_factor == cinfo->max_h_samp_factor);
+ JXL_CHECK(max_v_samp_factor == cinfo->max_v_samp_factor);
+ int referenced_tables[NUM_QUANT_TBLS] = {};
+ for (int i = 0; i < cinfo->num_components; ++i) {
+ jpeg_component_info* comp = &cinfo->comp_info[i];
+ JXL_CHECK(comp->width_in_blocks ==
+ DivCeil(cinfo->image_width * comp->h_samp_factor,
+ max_h_samp_factor * DCTSIZE));
+ JXL_CHECK(comp->height_in_blocks ==
+ DivCeil(cinfo->image_height * comp->v_samp_factor,
+ max_v_samp_factor * DCTSIZE));
+ referenced_tables[comp->quant_tbl_no] = 1;
+ }
+ for (const auto& table : jparams.quant_tables) {
+ JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[table.slot_idx];
+ if (!referenced_tables[table.slot_idx]) {
+ JXL_CHECK(quant_table == nullptr);
+ continue;
+ }
+ JXL_CHECK(quant_table != nullptr);
+ jxl::msan::UnpoisonMemory(quant_table, sizeof(*quant_table));
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ JXL_CHECK(quant_table->quantval[k] == table.quantval[k]);
+ }
+ }
+}
+
+void VerifyScanHeader(const CompressParams& jparams, j_decompress_ptr cinfo) {
+ JXL_CHECK(cinfo->input_scan_number > 0);
+ if (cinfo->progressive_mode) {
+ JXL_CHECK(cinfo->Ss != 0 || cinfo->Se != 63);
+ } else {
+ JXL_CHECK(cinfo->Ss == 0 && cinfo->Se == 63);
+ }
+ if (jparams.progressive_mode > 2) {
+ JXL_CHECK(jparams.progressive_mode < 3 + kNumTestScripts);
+ const ScanScript& script = kTestScript[jparams.progressive_mode - 3];
+ JXL_CHECK(cinfo->input_scan_number <= script.num_scans);
+ const jpeg_scan_info& scan = script.scans[cinfo->input_scan_number - 1];
+ JXL_CHECK(cinfo->comps_in_scan == scan.comps_in_scan);
+ for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+ JXL_CHECK(cinfo->cur_comp_info[i]->component_index ==
+ scan.component_index[i]);
+ }
+ JXL_CHECK(cinfo->Ss == scan.Ss);
+ JXL_CHECK(cinfo->Se == scan.Se);
+ JXL_CHECK(cinfo->Ah == scan.Ah);
+ JXL_CHECK(cinfo->Al == scan.Al);
+ }
+ if (jparams.restart_interval > 0) {
+ JXL_CHECK(cinfo->restart_interval == jparams.restart_interval);
+ } else if (jparams.restart_in_rows > 0) {
+ JXL_CHECK(cinfo->restart_interval ==
+ jparams.restart_in_rows * cinfo->MCUs_per_row);
+ }
+ if (jparams.progressive_mode == 0 && jparams.optimize_coding == 0) {
+ if (cinfo->jpeg_color_space == JCS_RGB) {
+ JXL_CHECK(cinfo->comp_info[0].dc_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[1].dc_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[2].dc_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[0].ac_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[1].ac_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[2].ac_tbl_no == 0);
+ } else if (cinfo->jpeg_color_space == JCS_YCbCr) {
+ JXL_CHECK(cinfo->comp_info[0].dc_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[1].dc_tbl_no == 1);
+ JXL_CHECK(cinfo->comp_info[2].dc_tbl_no == 1);
+ JXL_CHECK(cinfo->comp_info[0].ac_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[1].ac_tbl_no == 1);
+ JXL_CHECK(cinfo->comp_info[2].ac_tbl_no == 1);
+ } else if (cinfo->jpeg_color_space == JCS_CMYK) {
+ JXL_CHECK(cinfo->comp_info[0].dc_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[1].dc_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[2].dc_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[3].dc_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[0].ac_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[1].ac_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[2].ac_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[3].ac_tbl_no == 0);
+ } else if (cinfo->jpeg_color_space == JCS_YCCK) {
+ JXL_CHECK(cinfo->comp_info[0].dc_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[1].dc_tbl_no == 1);
+ JXL_CHECK(cinfo->comp_info[2].dc_tbl_no == 1);
+ JXL_CHECK(cinfo->comp_info[3].dc_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[0].ac_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[1].ac_tbl_no == 1);
+ JXL_CHECK(cinfo->comp_info[2].ac_tbl_no == 1);
+ JXL_CHECK(cinfo->comp_info[3].ac_tbl_no == 0);
+ }
+ if (jparams.use_flat_dc_luma_code) {
+ JHUFF_TBL* tbl = cinfo->dc_huff_tbl_ptrs[0];
+ jxl::msan::UnpoisonMemory(tbl, sizeof(*tbl));
+ for (int i = 0; i < 15; ++i) {
+ JXL_CHECK(tbl->huffval[i] == i);
+ }
+ }
+ }
+}
+
+void UnmapColors(uint8_t* row, size_t xsize, int components,
+ JSAMPARRAY colormap, size_t num_colors) {
+ JXL_CHECK(colormap != nullptr);
+ std::vector<uint8_t> tmp(xsize * components);
+ for (size_t x = 0; x < xsize; ++x) {
+ JXL_CHECK(row[x] < num_colors);
+ for (int c = 0; c < components; ++c) {
+ tmp[x * components + c] = colormap[c][row[x]];
+ }
+ }
+ memcpy(row, tmp.data(), tmp.size());
+}
+
+void CopyCoefficients(j_decompress_ptr cinfo, jvirt_barray_ptr* coef_arrays,
+ TestImage* output) {
+ output->xsize = cinfo->image_width;
+ output->ysize = cinfo->image_height;
+ output->components = cinfo->num_components;
+ output->color_space = cinfo->out_color_space;
+ j_common_ptr comptr = reinterpret_cast<j_common_ptr>(cinfo);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ std::vector<JCOEF> coeffs(comp->width_in_blocks * comp->height_in_blocks *
+ DCTSIZE2);
+ for (size_t by = 0; by < comp->height_in_blocks; ++by) {
+ JBLOCKARRAY ba = (*cinfo->mem->access_virt_barray)(comptr, coef_arrays[c],
+ by, 1, true);
+ size_t stride = comp->width_in_blocks * sizeof(JBLOCK);
+ size_t offset = by * comp->width_in_blocks * DCTSIZE2;
+ memcpy(&coeffs[offset], ba[0], stride);
+ }
+ output->coeffs.emplace_back(std::move(coeffs));
+ }
+}
diff --git a/lib/jpegli/test_utils.cc b/lib/jpegli/test_utils.cc
new file mode 100644
index 0000000..232b937
--- /dev/null
+++ b/lib/jpegli/test_utils.cc
@@ -0,0 +1,787 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/test_utils.h"
+
+#include <cmath>
+#include <cstdint>
+#include <fstream>
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/encode.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/sanitizers.h"
+
+#if !defined(TEST_DATA_PATH)
+#include "tools/cpp/runfiles/runfiles.h"
+#endif
+
+namespace jpegli {
+
+#define JPEG_API_FN(name) jpegli_##name
+#include "lib/jpegli/test_utils-inl.h"
+#undef JPEG_API_FN
+
+#if defined(TEST_DATA_PATH)
+std::string GetTestDataPath(const std::string& filename) {
+ return std::string(TEST_DATA_PATH "/") + filename;
+}
+#else
+using bazel::tools::cpp::runfiles::Runfiles;
+const std::unique_ptr<Runfiles> kRunfiles(Runfiles::Create(""));
+std::string GetTestDataPath(const std::string& filename) {
+ std::string root(JPEGXL_ROOT_PACKAGE "/testdata/");
+ return kRunfiles->Rlocation(root + filename);
+}
+#endif
+
+std::vector<uint8_t> ReadTestData(const std::string& filename) {
+ std::string full_path = GetTestDataPath(filename);
+ fprintf(stderr, "ReadTestData %s\n", full_path.c_str());
+ std::ifstream file(full_path, std::ios::binary);
+ std::vector<char> str((std::istreambuf_iterator<char>(file)),
+ std::istreambuf_iterator<char>());
+ JXL_CHECK(file.good());
+ const uint8_t* raw = reinterpret_cast<const uint8_t*>(str.data());
+ std::vector<uint8_t> data(raw, raw + str.size());
+ printf("Test data %s is %d bytes long.\n", filename.c_str(),
+ static_cast<int>(data.size()));
+ return data;
+}
+
+void CustomQuantTable::Generate() {
+ basic_table.resize(DCTSIZE2);
+ quantval.resize(DCTSIZE2);
+ switch (table_type) {
+ case 0: {
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ basic_table[k] = k + 1;
+ }
+ break;
+ }
+ default:
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ basic_table[k] = table_type;
+ }
+ }
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ quantval[k] = (basic_table[k] * scale_factor + 50U) / 100U;
+ quantval[k] = std::max(quantval[k], 1U);
+ quantval[k] = std::min(quantval[k], 65535U);
+ if (!add_raw) {
+ quantval[k] = std::min(quantval[k], force_baseline ? 255U : 32767U);
+ }
+ }
+}
+
+bool PNMParser::ParseHeader(const uint8_t** pos, size_t* xsize, size_t* ysize,
+ size_t* num_channels, size_t* bitdepth) {
+ if (pos_[0] != 'P' || (pos_[1] != '5' && pos_[1] != '6')) {
+ fprintf(stderr, "Invalid PNM header.");
+ return false;
+ }
+ *num_channels = (pos_[1] == '5' ? 1 : 3);
+ pos_ += 2;
+
+ size_t maxval;
+ if (!SkipWhitespace() || !ParseUnsigned(xsize) || !SkipWhitespace() ||
+ !ParseUnsigned(ysize) || !SkipWhitespace() || !ParseUnsigned(&maxval) ||
+ !SkipWhitespace()) {
+ return false;
+ }
+ if (maxval == 0 || maxval >= 65536) {
+ fprintf(stderr, "Invalid maxval value.\n");
+ return false;
+ }
+ bool found_bitdepth = false;
+ for (int bits = 1; bits <= 16; ++bits) {
+ if (maxval == (1u << bits) - 1) {
+ *bitdepth = bits;
+ found_bitdepth = true;
+ break;
+ }
+ }
+ if (!found_bitdepth) {
+ fprintf(stderr, "Invalid maxval value.\n");
+ return false;
+ }
+
+ *pos = pos_;
+ return true;
+}
+
+bool PNMParser::ParseUnsigned(size_t* number) {
+ if (pos_ == end_ || *pos_ < '0' || *pos_ > '9') {
+ fprintf(stderr, "Expected unsigned number.\n");
+ return false;
+ }
+ *number = 0;
+ while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') {
+ *number *= 10;
+ *number += *pos_ - '0';
+ ++pos_;
+ }
+
+ return true;
+}
+
+bool PNMParser::SkipWhitespace() {
+ if (pos_ == end_ || !IsWhitespace(*pos_)) {
+ fprintf(stderr, "Expected whitespace.\n");
+ return false;
+ }
+ while (pos_ < end_ && IsWhitespace(*pos_)) {
+ ++pos_;
+ }
+ return true;
+}
+
+bool ReadPNM(const std::vector<uint8_t>& data, size_t* xsize, size_t* ysize,
+ size_t* num_channels, size_t* bitdepth,
+ std::vector<uint8_t>* pixels) {
+ if (data.size() < 2) {
+ fprintf(stderr, "PNM file too small.\n");
+ return false;
+ }
+ PNMParser parser(data.data(), data.size());
+ const uint8_t* pos = nullptr;
+ if (!parser.ParseHeader(&pos, xsize, ysize, num_channels, bitdepth)) {
+ return false;
+ }
+ pixels->resize(data.data() + data.size() - pos);
+ memcpy(&(*pixels)[0], pos, pixels->size());
+ return true;
+}
+
+std::string ColorSpaceName(J_COLOR_SPACE colorspace) {
+ switch (colorspace) {
+ case JCS_UNKNOWN:
+ return "UNKNOWN";
+ case JCS_GRAYSCALE:
+ return "GRAYSCALE";
+ case JCS_RGB:
+ return "RGB";
+ case JCS_YCbCr:
+ return "YCbCr";
+ case JCS_CMYK:
+ return "CMYK";
+ case JCS_YCCK:
+ return "YCCK";
+ default:
+ return "";
+ }
+}
+
+std::string IOMethodName(JpegliDataType data_type,
+ JpegliEndianness endianness) {
+ std::string retval;
+ if (data_type == JPEGLI_TYPE_UINT8) {
+ return "";
+ } else if (data_type == JPEGLI_TYPE_UINT16) {
+ retval = "UINT16";
+ } else if (data_type == JPEGLI_TYPE_FLOAT) {
+ retval = "FLOAT";
+ }
+ if (endianness == JPEGLI_LITTLE_ENDIAN) {
+ retval += "LE";
+ } else if (endianness == JPEGLI_BIG_ENDIAN) {
+ retval += "BE";
+ }
+ return retval;
+}
+
+std::string SamplingId(const CompressParams& jparams) {
+ std::stringstream os;
+ JXL_CHECK(jparams.h_sampling.size() == jparams.v_sampling.size());
+ if (!jparams.h_sampling.empty()) {
+ size_t len = jparams.h_sampling.size();
+ while (len > 1 && jparams.h_sampling[len - 1] == 1 &&
+ jparams.v_sampling[len - 1] == 1) {
+ --len;
+ }
+ os << "SAMP";
+ for (size_t i = 0; i < len; ++i) {
+ if (i > 0) os << "_";
+ os << jparams.h_sampling[i] << "x" << jparams.v_sampling[i];
+ }
+ }
+ return os.str();
+}
+
+std::ostream& operator<<(std::ostream& os, const TestImage& input) {
+ os << input.xsize << "x" << input.ysize;
+ os << IOMethodName(input.data_type, input.endianness);
+ if (input.color_space != JCS_RGB) {
+ os << "InputColor" << ColorSpaceName((J_COLOR_SPACE)input.color_space);
+ }
+ if (input.color_space == JCS_UNKNOWN) {
+ os << input.components;
+ }
+ return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const CompressParams& jparams) {
+ os << "Q" << jparams.quality;
+ os << SamplingId(jparams);
+ if (jparams.set_jpeg_colorspace) {
+ os << "JpegColor"
+ << ColorSpaceName((J_COLOR_SPACE)jparams.jpeg_color_space);
+ }
+ if (!jparams.comp_ids.empty()) {
+ os << "CID";
+ for (size_t i = 0; i < jparams.comp_ids.size(); ++i) {
+ os << jparams.comp_ids[i];
+ }
+ }
+ if (!jparams.quant_indexes.empty()) {
+ os << "QIDX";
+ for (size_t i = 0; i < jparams.quant_indexes.size(); ++i) {
+ os << jparams.quant_indexes[i];
+ }
+ for (const auto& table : jparams.quant_tables) {
+ os << "TABLE" << table.slot_idx << "T" << table.table_type << "F"
+ << table.scale_factor
+ << (table.add_raw ? "R"
+ : table.force_baseline ? "B"
+ : "");
+ }
+ }
+ if (jparams.progressive_mode >= 0) {
+ os << "P" << jparams.progressive_mode;
+ } else if (jparams.simple_progression) {
+ os << "Psimple";
+ }
+ if (jparams.optimize_coding == 1) {
+ os << "OptimizedCode";
+ } else if (jparams.optimize_coding == 0) {
+ os << "FixedCode";
+ if (jparams.use_flat_dc_luma_code) {
+ os << "FlatDCLuma";
+ } else if (jparams.omit_standard_tables) {
+ os << "OmitDHT";
+ }
+ }
+ if (!jparams.use_adaptive_quantization) {
+ os << "NoAQ";
+ }
+ if (jparams.restart_interval > 0) {
+ os << "R" << jparams.restart_interval;
+ }
+ if (jparams.restart_in_rows > 0) {
+ os << "RR" << jparams.restart_in_rows;
+ }
+ if (jparams.xyb_mode) {
+ os << "XYB";
+ } else if (jparams.libjpeg_mode) {
+ os << "Libjpeg";
+ }
+ if (jparams.override_JFIF >= 0) {
+ os << (jparams.override_JFIF ? "AddJFIF" : "NoJFIF");
+ }
+ if (jparams.override_Adobe >= 0) {
+ os << (jparams.override_Adobe ? "AddAdobe" : "NoAdobe");
+ }
+ if (jparams.add_marker) {
+ os << "AddMarker";
+ }
+ if (!jparams.icc.empty()) {
+ os << "ICCSize" << jparams.icc.size();
+ }
+ if (jparams.smoothing_factor != 0) {
+ os << "SF" << jparams.smoothing_factor;
+ }
+ return os;
+}
+
+void SetNumChannels(J_COLOR_SPACE colorspace, size_t* channels) {
+ if (colorspace == JCS_GRAYSCALE) {
+ *channels = 1;
+ } else if (colorspace == JCS_RGB || colorspace == JCS_YCbCr) {
+ *channels = 3;
+ } else if (colorspace == JCS_CMYK || colorspace == JCS_YCCK) {
+ *channels = 4;
+ } else if (colorspace == JCS_UNKNOWN) {
+ JXL_CHECK(*channels <= 4);
+ } else {
+ JXL_ABORT();
+ }
+}
+
+void RGBToYCbCr(float r, float g, float b, float* y, float* cb, float* cr) {
+ *y = 0.299f * r + 0.587f * g + 0.114f * b;
+ *cb = -0.168736f * r - 0.331264f * g + 0.5f * b + 0.5f;
+ *cr = 0.5f * r - 0.418688f * g - 0.081312f * b + 0.5f;
+}
+
+void ConvertPixel(const uint8_t* input_rgb, uint8_t* out,
+ J_COLOR_SPACE colorspace, size_t num_channels,
+ JpegliDataType data_type = JPEGLI_TYPE_UINT8,
+ bool swap_endianness = JPEGLI_NATIVE_ENDIAN) {
+ const float kMul = 255.0f;
+ float r = input_rgb[0] / kMul;
+ float g = input_rgb[1] / kMul;
+ float b = input_rgb[2] / kMul;
+ uint8_t out8[MAX_COMPONENTS];
+ if (colorspace == JCS_GRAYSCALE) {
+ const float Y = 0.299f * r + 0.587f * g + 0.114f * b;
+ out8[0] = static_cast<uint8_t>(std::round(Y * kMul));
+ } else if (colorspace == JCS_RGB || colorspace == JCS_UNKNOWN) {
+ for (size_t c = 0; c < num_channels; ++c) {
+ out8[c] = input_rgb[std::min<size_t>(2, c)];
+ }
+ } else if (colorspace == JCS_YCbCr) {
+ float Y, Cb, Cr;
+ RGBToYCbCr(r, g, b, &Y, &Cb, &Cr);
+ out8[0] = static_cast<uint8_t>(std::round(Y * kMul));
+ out8[1] = static_cast<uint8_t>(std::round(Cb * kMul));
+ out8[2] = static_cast<uint8_t>(std::round(Cr * kMul));
+ } else if (colorspace == JCS_CMYK || colorspace == JCS_YCCK) {
+ float K = 1.0f - std::max(r, std::max(g, b));
+ float scaleK = 1.0f / (1.0f - K);
+ r *= scaleK;
+ g *= scaleK;
+ b *= scaleK;
+ if (colorspace == JCS_CMYK) {
+ out8[0] = static_cast<uint8_t>(std::round((1.0f - r) * kMul));
+ out8[1] = static_cast<uint8_t>(std::round((1.0f - g) * kMul));
+ out8[2] = static_cast<uint8_t>(std::round((1.0f - b) * kMul));
+ } else if (colorspace == JCS_YCCK) {
+ float Y, Cb, Cr;
+ RGBToYCbCr(r, g, b, &Y, &Cb, &Cr);
+ out8[0] = static_cast<uint8_t>(std::round(Y * kMul));
+ out8[1] = static_cast<uint8_t>(std::round(Cb * kMul));
+ out8[2] = static_cast<uint8_t>(std::round(Cr * kMul));
+ }
+ out8[3] = static_cast<uint8_t>(std::round(K * kMul));
+ } else {
+ JXL_ABORT("Colorspace %d not supported", colorspace);
+ }
+ if (data_type == JPEGLI_TYPE_UINT8) {
+ memcpy(out, out8, num_channels);
+ } else if (data_type == JPEGLI_TYPE_UINT16) {
+ for (size_t c = 0; c < num_channels; ++c) {
+ uint16_t val = (out8[c] << 8) + out8[c];
+ val |= 0x40; // Make little-endian and big-endian asymmetric
+ if (swap_endianness) {
+ val = JXL_BSWAP16(val);
+ }
+ memcpy(&out[sizeof(val) * c], &val, sizeof(val));
+ }
+ } else if (data_type == JPEGLI_TYPE_FLOAT) {
+ for (size_t c = 0; c < num_channels; ++c) {
+ float val = out8[c] / 255.0f;
+ if (swap_endianness) {
+ val = BSwapFloat(val);
+ }
+ memcpy(&out[sizeof(val) * c], &val, sizeof(val));
+ }
+ }
+}
+
+void ConvertToGrayscale(TestImage* img) {
+ if (img->color_space == JCS_GRAYSCALE) return;
+ JXL_CHECK(img->data_type == JPEGLI_TYPE_UINT8);
+ for (size_t i = 0; i < img->pixels.size(); i += 3) {
+ if (img->color_space == JCS_RGB) {
+ ConvertPixel(&img->pixels[i], &img->pixels[i / 3], JCS_GRAYSCALE, 1);
+ } else if (img->color_space == JCS_YCbCr) {
+ img->pixels[i / 3] = img->pixels[i];
+ }
+ }
+ img->pixels.resize(img->pixels.size() / 3);
+ img->color_space = JCS_GRAYSCALE;
+ img->components = 1;
+}
+
+void GeneratePixels(TestImage* img) {
+ const std::vector<uint8_t> imgdata = ReadTestData("jxl/flower/flower.pnm");
+ size_t xsize, ysize, channels, bitdepth;
+ std::vector<uint8_t> pixels;
+ JXL_CHECK(ReadPNM(imgdata, &xsize, &ysize, &channels, &bitdepth, &pixels));
+ if (img->xsize == 0) img->xsize = xsize;
+ if (img->ysize == 0) img->ysize = ysize;
+ JXL_CHECK(img->xsize <= xsize);
+ JXL_CHECK(img->ysize <= ysize);
+ JXL_CHECK(3 == channels);
+ JXL_CHECK(8 == bitdepth);
+ size_t in_bytes_per_pixel = channels;
+ size_t in_stride = xsize * in_bytes_per_pixel;
+ size_t x0 = (xsize - img->xsize) / 2;
+ size_t y0 = (ysize - img->ysize) / 2;
+ SetNumChannels((J_COLOR_SPACE)img->color_space, &img->components);
+ size_t out_bytes_per_pixel =
+ jpegli_bytes_per_sample(img->data_type) * img->components;
+ size_t out_stride = img->xsize * out_bytes_per_pixel;
+ bool swap_endianness =
+ (img->endianness == JPEGLI_LITTLE_ENDIAN && !IsLittleEndian()) ||
+ (img->endianness == JPEGLI_BIG_ENDIAN && IsLittleEndian());
+ img->pixels.resize(img->ysize * out_stride);
+ for (size_t iy = 0; iy < img->ysize; ++iy) {
+ size_t y = y0 + iy;
+ for (size_t ix = 0; ix < img->xsize; ++ix) {
+ size_t x = x0 + ix;
+ size_t idx_in = y * in_stride + x * in_bytes_per_pixel;
+ size_t idx_out = iy * out_stride + ix * out_bytes_per_pixel;
+ ConvertPixel(&pixels[idx_in], &img->pixels[idx_out],
+ (J_COLOR_SPACE)img->color_space, img->components,
+ img->data_type, swap_endianness);
+ }
+ }
+}
+
+void GenerateRawData(const CompressParams& jparams, TestImage* img) {
+ for (size_t c = 0; c < img->components; ++c) {
+ size_t xsize = jparams.comp_width(*img, c);
+ size_t ysize = jparams.comp_height(*img, c);
+ size_t factor_y = jparams.max_v_sample() / jparams.v_samp(c);
+ size_t factor_x = jparams.max_h_sample() / jparams.h_samp(c);
+ size_t factor = factor_x * factor_y;
+ std::vector<uint8_t> plane(ysize * xsize);
+ size_t bytes_per_pixel = img->components;
+ for (size_t y = 0; y < ysize; ++y) {
+ for (size_t x = 0; x < xsize; ++x) {
+ int result = 0;
+ for (size_t iy = 0; iy < factor_y; ++iy) {
+ size_t yy = std::min(y * factor_y + iy, img->ysize - 1);
+ for (size_t ix = 0; ix < factor_x; ++ix) {
+ size_t xx = std::min(x * factor_x + ix, img->xsize - 1);
+ size_t pixel_ix = (yy * img->xsize + xx) * bytes_per_pixel + c;
+ result += img->pixels[pixel_ix];
+ }
+ }
+ result = static_cast<uint8_t>((result + factor / 2) / factor);
+ plane[y * xsize + x] = result;
+ }
+ }
+ img->raw_data.emplace_back(std::move(plane));
+ }
+}
+
+void GenerateCoeffs(const CompressParams& jparams, TestImage* img) {
+ for (size_t c = 0; c < img->components; ++c) {
+ int xsize_blocks = jparams.comp_width(*img, c) / DCTSIZE;
+ int ysize_blocks = jparams.comp_height(*img, c) / DCTSIZE;
+ std::vector<JCOEF> plane(ysize_blocks * xsize_blocks * DCTSIZE2);
+ for (int by = 0; by < ysize_blocks; ++by) {
+ for (int bx = 0; bx < xsize_blocks; ++bx) {
+ JCOEF* block = &plane[(by * xsize_blocks + bx) * DCTSIZE2];
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ block[k] = (bx - by) / (k + 1);
+ }
+ }
+ }
+ img->coeffs.emplace_back(std::move(plane));
+ }
+}
+
+void EncodeWithJpegli(const TestImage& input, const CompressParams& jparams,
+ j_compress_ptr cinfo) {
+ cinfo->image_width = input.xsize;
+ cinfo->image_height = input.ysize;
+ cinfo->input_components = input.components;
+ if (jparams.xyb_mode) {
+ jpegli_set_xyb_mode(cinfo);
+ }
+ if (jparams.libjpeg_mode) {
+ jpegli_enable_adaptive_quantization(cinfo, FALSE);
+ jpegli_use_standard_quant_tables(cinfo);
+ jpegli_set_progressive_level(cinfo, 0);
+ }
+ jpegli_set_defaults(cinfo);
+ cinfo->in_color_space = (J_COLOR_SPACE)input.color_space;
+ jpegli_default_colorspace(cinfo);
+ if (jparams.override_JFIF >= 0) {
+ cinfo->write_JFIF_header = jparams.override_JFIF;
+ }
+ if (jparams.override_Adobe >= 0) {
+ cinfo->write_Adobe_marker = jparams.override_Adobe;
+ }
+ if (jparams.set_jpeg_colorspace) {
+ jpegli_set_colorspace(cinfo, (J_COLOR_SPACE)jparams.jpeg_color_space);
+ }
+ if (!jparams.comp_ids.empty()) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ cinfo->comp_info[c].component_id = jparams.comp_ids[c];
+ }
+ }
+ if (!jparams.h_sampling.empty()) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ cinfo->comp_info[c].h_samp_factor = jparams.h_sampling[c];
+ cinfo->comp_info[c].v_samp_factor = jparams.v_sampling[c];
+ }
+ }
+ jpegli_set_quality(cinfo, jparams.quality, TRUE);
+ if (!jparams.quant_indexes.empty()) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ cinfo->comp_info[c].quant_tbl_no = jparams.quant_indexes[c];
+ }
+ for (const auto& table : jparams.quant_tables) {
+ if (table.add_raw) {
+ cinfo->quant_tbl_ptrs[table.slot_idx] =
+ jpegli_alloc_quant_table((j_common_ptr)cinfo);
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ cinfo->quant_tbl_ptrs[table.slot_idx]->quantval[k] =
+ table.quantval[k];
+ }
+ cinfo->quant_tbl_ptrs[table.slot_idx]->sent_table = FALSE;
+ } else {
+ jpegli_add_quant_table(cinfo, table.slot_idx, &table.basic_table[0],
+ table.scale_factor, table.force_baseline);
+ }
+ }
+ }
+ if (jparams.simple_progression) {
+ jpegli_simple_progression(cinfo);
+ JXL_CHECK(jparams.progressive_mode == -1);
+ }
+ if (jparams.progressive_mode > 2) {
+ const ScanScript& script = kTestScript[jparams.progressive_mode - 3];
+ cinfo->scan_info = script.scans;
+ cinfo->num_scans = script.num_scans;
+ } else if (jparams.progressive_mode >= 0) {
+ jpegli_set_progressive_level(cinfo, jparams.progressive_mode);
+ }
+ jpegli_set_input_format(cinfo, input.data_type, input.endianness);
+ jpegli_enable_adaptive_quantization(cinfo, jparams.use_adaptive_quantization);
+ cinfo->restart_interval = jparams.restart_interval;
+ cinfo->restart_in_rows = jparams.restart_in_rows;
+ cinfo->smoothing_factor = jparams.smoothing_factor;
+ if (jparams.optimize_coding == 1) {
+ cinfo->optimize_coding = TRUE;
+ } else if (jparams.optimize_coding == 0) {
+ cinfo->optimize_coding = FALSE;
+ }
+ cinfo->raw_data_in = !input.raw_data.empty();
+ if (jparams.optimize_coding == 0 && jparams.use_flat_dc_luma_code) {
+ JHUFF_TBL* tbl = cinfo->dc_huff_tbl_ptrs[0];
+ memset(tbl, 0, sizeof(*tbl));
+ tbl->bits[4] = 15;
+ for (int i = 0; i < 15; ++i) tbl->huffval[i] = i;
+ }
+ if (input.coeffs.empty()) {
+ bool write_all_tables = TRUE;
+ if (jparams.optimize_coding == 0 && !jparams.use_flat_dc_luma_code &&
+ jparams.omit_standard_tables) {
+ write_all_tables = FALSE;
+ cinfo->dc_huff_tbl_ptrs[0]->sent_table = TRUE;
+ cinfo->dc_huff_tbl_ptrs[1]->sent_table = TRUE;
+ cinfo->ac_huff_tbl_ptrs[0]->sent_table = TRUE;
+ cinfo->ac_huff_tbl_ptrs[1]->sent_table = TRUE;
+ }
+ jpegli_start_compress(cinfo, write_all_tables);
+ if (jparams.add_marker) {
+ jpegli_write_marker(cinfo, kSpecialMarker0, kMarkerData,
+ sizeof(kMarkerData));
+ jpegli_write_m_header(cinfo, kSpecialMarker1, sizeof(kMarkerData));
+ for (size_t p = 0; p < sizeof(kMarkerData); ++p) {
+ jpegli_write_m_byte(cinfo, kMarkerData[p]);
+ }
+ for (size_t i = 0; i < kMarkerSequenceLen; ++i) {
+ jpegli_write_marker(cinfo, kMarkerSequence[i], kMarkerData,
+ ((i + 2) % sizeof(kMarkerData)));
+ }
+ }
+ if (!jparams.icc.empty()) {
+ jpegli_write_icc_profile(cinfo, jparams.icc.data(), jparams.icc.size());
+ }
+ }
+ if (cinfo->raw_data_in) {
+ // Need to copy because jpeg API requires non-const pointers.
+ std::vector<std::vector<uint8_t>> raw_data = input.raw_data;
+ size_t max_lines = jparams.max_v_sample() * DCTSIZE;
+ std::vector<std::vector<JSAMPROW>> rowdata(cinfo->num_components);
+ std::vector<JSAMPARRAY> data(cinfo->num_components);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ rowdata[c].resize(jparams.v_samp(c) * DCTSIZE);
+ data[c] = &rowdata[c][0];
+ }
+ while (cinfo->next_scanline < cinfo->image_height) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ size_t cwidth = cinfo->comp_info[c].width_in_blocks * DCTSIZE;
+ size_t cheight = cinfo->comp_info[c].height_in_blocks * DCTSIZE;
+ size_t num_lines = jparams.v_samp(c) * DCTSIZE;
+ size_t y0 = (cinfo->next_scanline / max_lines) * num_lines;
+ for (size_t i = 0; i < num_lines; ++i) {
+ rowdata[c][i] =
+ (y0 + i < cheight ? &raw_data[c][(y0 + i) * cwidth] : nullptr);
+ }
+ }
+ size_t num_lines = jpegli_write_raw_data(cinfo, &data[0], max_lines);
+ JXL_CHECK(num_lines == max_lines);
+ }
+ } else if (!input.coeffs.empty()) {
+ j_common_ptr comptr = reinterpret_cast<j_common_ptr>(cinfo);
+ jvirt_barray_ptr* coef_arrays = reinterpret_cast<jvirt_barray_ptr*>((
+ *cinfo->mem->alloc_small)(
+ comptr, JPOOL_IMAGE, cinfo->num_components * sizeof(jvirt_barray_ptr)));
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ size_t xsize_blocks = jparams.comp_width(input, c) / DCTSIZE;
+ size_t ysize_blocks = jparams.comp_height(input, c) / DCTSIZE;
+ coef_arrays[c] = (*cinfo->mem->request_virt_barray)(
+ comptr, JPOOL_IMAGE, FALSE, xsize_blocks, ysize_blocks,
+ cinfo->comp_info[c].v_samp_factor);
+ }
+ jpegli_write_coefficients(cinfo, coef_arrays);
+ if (jparams.add_marker) {
+ jpegli_write_marker(cinfo, kSpecialMarker0, kMarkerData,
+ sizeof(kMarkerData));
+ jpegli_write_m_header(cinfo, kSpecialMarker1, sizeof(kMarkerData));
+ for (size_t p = 0; p < sizeof(kMarkerData); ++p) {
+ jpegli_write_m_byte(cinfo, kMarkerData[p]);
+ }
+ }
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ for (size_t by = 0; by < comp->height_in_blocks; ++by) {
+ JBLOCKARRAY ba = (*cinfo->mem->access_virt_barray)(
+ comptr, coef_arrays[c], by, 1, true);
+ size_t stride = comp->width_in_blocks * sizeof(JBLOCK);
+ size_t offset = by * comp->width_in_blocks * DCTSIZE2;
+ memcpy(ba[0], &input.coeffs[c][offset], stride);
+ }
+ }
+ } else {
+ size_t stride = cinfo->image_width * cinfo->input_components *
+ jpegli_bytes_per_sample(input.data_type);
+ std::vector<uint8_t> row_bytes(stride);
+ for (size_t y = 0; y < cinfo->image_height; ++y) {
+ memcpy(&row_bytes[0], &input.pixels[y * stride], stride);
+ JSAMPROW row[] = {row_bytes.data()};
+ jpegli_write_scanlines(cinfo, row, 1);
+ }
+ }
+ jpegli_finish_compress(cinfo);
+}
+
+bool EncodeWithJpegli(const TestImage& input, const CompressParams& jparams,
+ std::vector<uint8_t>* compressed) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ EncodeWithJpegli(input, jparams, &cinfo);
+ return true;
+ };
+ bool success = try_catch_block();
+ jpegli_destroy_compress(&cinfo);
+ if (success) {
+ compressed->resize(buffer_size);
+ std::copy_n(buffer, buffer_size, compressed->data());
+ }
+ if (buffer) std::free(buffer);
+ return success;
+}
+
+int NumTestScanScripts() { return kNumTestScripts; }
+
+void DumpImage(const TestImage& image, const std::string fn) {
+ JXL_CHECK(image.components == 1 || image.components == 3);
+ size_t bytes_per_sample = jpegli_bytes_per_sample(image.data_type);
+ uint32_t maxval = (1u << (8 * bytes_per_sample)) - 1;
+ char type = image.components == 1 ? '5' : '6';
+ std::ofstream out(fn.c_str(), std::ofstream::binary);
+ out << "P" << type << std::endl
+ << image.xsize << " " << image.ysize << std::endl
+ << maxval << std::endl;
+ out.write(reinterpret_cast<const char*>(image.pixels.data()),
+ image.pixels.size());
+ out.close();
+}
+
+double DistanceRms(const TestImage& input, const TestImage& output,
+ size_t start_line, size_t num_lines, double* max_diff) {
+ size_t stride = input.xsize * input.components;
+ size_t start_offset = start_line * stride;
+ auto get_sample = [&](const TestImage& im, const std::vector<uint8_t>& data,
+ size_t idx) -> double {
+ size_t bytes_per_sample = jpegli_bytes_per_sample(im.data_type);
+ bool is_little_endian =
+ (im.endianness == JPEGLI_LITTLE_ENDIAN ||
+ (im.endianness == JPEGLI_NATIVE_ENDIAN && IsLittleEndian()));
+ size_t offset = start_offset + idx * bytes_per_sample;
+ JXL_CHECK(offset < data.size());
+ const uint8_t* p = &data[offset];
+ if (im.data_type == JPEGLI_TYPE_UINT8) {
+ static const double mul8 = 1.0 / 255.0;
+ return p[0] * mul8;
+ } else if (im.data_type == JPEGLI_TYPE_UINT16) {
+ static const double mul16 = 1.0 / 65535.0;
+ return (is_little_endian ? LoadLE16(p) : LoadBE16(p)) * mul16;
+ } else if (im.data_type == JPEGLI_TYPE_FLOAT) {
+ return (is_little_endian ? LoadLEFloat(p) : LoadBEFloat(p));
+ }
+ return 0.0;
+ };
+ double diff2 = 0.0;
+ size_t num_samples = 0;
+ if (max_diff) *max_diff = 0.0;
+ if (!input.pixels.empty() && !output.pixels.empty()) {
+ num_samples = num_lines * stride;
+ for (size_t i = 0; i < num_samples; ++i) {
+ double sample_orig = get_sample(input, input.pixels, i);
+ double sample_output = get_sample(output, output.pixels, i);
+ double diff = sample_orig - sample_output;
+ if (max_diff) *max_diff = std::max(*max_diff, 255.0 * std::abs(diff));
+ diff2 += diff * diff;
+ }
+ } else {
+ JXL_CHECK(!input.raw_data.empty());
+ JXL_CHECK(!output.raw_data.empty());
+ for (size_t c = 0; c < input.raw_data.size(); ++c) {
+ JXL_CHECK(c < output.raw_data.size());
+ num_samples += input.raw_data[c].size();
+ for (size_t i = 0; i < input.raw_data[c].size(); ++i) {
+ double sample_orig = get_sample(input, input.raw_data[c], i);
+ double sample_output = get_sample(output, output.raw_data[c], i);
+ double diff = sample_orig - sample_output;
+ if (max_diff) *max_diff = std::max(*max_diff, 255.0 * std::abs(diff));
+ diff2 += diff * diff;
+ }
+ }
+ }
+ return std::sqrt(diff2 / num_samples) * 255.0;
+}
+
+double DistanceRms(const TestImage& input, const TestImage& output,
+ double* max_diff) {
+ return DistanceRms(input, output, 0, output.ysize, max_diff);
+}
+
+void VerifyOutputImage(const TestImage& input, const TestImage& output,
+ size_t start_line, size_t num_lines, double max_rms,
+ double max_diff) {
+ double max_d;
+ double rms = DistanceRms(input, output, start_line, num_lines, &max_d);
+ printf("rms: %f, max_rms: %f, max_d: %f, max_diff: %f\n", rms, max_rms,
+ max_d, max_diff);
+ JXL_CHECK(rms <= max_rms);
+ JXL_CHECK(max_d <= max_diff);
+}
+
+void VerifyOutputImage(const TestImage& input, const TestImage& output,
+ double max_rms, double max_diff) {
+ JXL_CHECK(output.xsize == input.xsize);
+ JXL_CHECK(output.ysize == input.ysize);
+ JXL_CHECK(output.components == input.components);
+ JXL_CHECK(output.color_space == input.color_space);
+ if (!input.coeffs.empty()) {
+ JXL_CHECK(input.coeffs.size() == input.components);
+ JXL_CHECK(output.coeffs.size() == input.components);
+ for (size_t c = 0; c < input.components; ++c) {
+ JXL_CHECK(output.coeffs[c].size() == input.coeffs[c].size());
+ JXL_CHECK(0 == memcmp(input.coeffs[c].data(), output.coeffs[c].data(),
+ input.coeffs[c].size()));
+ }
+ } else {
+ VerifyOutputImage(input, output, 0, output.ysize, max_rms, max_diff);
+ }
+}
+
+} // namespace jpegli
diff --git a/lib/jpegli/test_utils.h b/lib/jpegli/test_utils.h
new file mode 100644
index 0000000..132cfd0
--- /dev/null
+++ b/lib/jpegli/test_utils.h
@@ -0,0 +1,130 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_TEST_UTILS_H_
+#define LIB_JPEGLI_TEST_UTILS_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+/* clang-format off */
+#include <stdio.h>
+#include <jpeglib.h>
+#include <setjmp.h>
+/* clang-format on */
+
+#include "lib/jpegli/common.h"
+#include "lib/jpegli/libjpeg_test_util.h"
+#include "lib/jpegli/test_params.h"
+
+namespace jpegli {
+
+#define ERROR_HANDLER_SETUP(flavor) \
+ jpeg_error_mgr jerr; \
+ jmp_buf env; \
+ cinfo.err = flavor##_std_error(&jerr); \
+ if (setjmp(env)) { \
+ return false; \
+ } \
+ cinfo.client_data = reinterpret_cast<void*>(&env); \
+ cinfo.err->error_exit = [](j_common_ptr cinfo) { \
+ (*cinfo->err->output_message)(cinfo); \
+ jmp_buf* env = reinterpret_cast<jmp_buf*>(cinfo->client_data); \
+ flavor##_destroy(cinfo); \
+ longjmp(*env, 1); \
+ };
+
+std::string IOMethodName(JpegliDataType data_type, JpegliEndianness endianness);
+
+std::string ColorSpaceName(J_COLOR_SPACE colorspace);
+
+std::ostream& operator<<(std::ostream& os, const TestImage& input);
+
+std::ostream& operator<<(std::ostream& os, const CompressParams& jparams);
+
+int NumTestScanScripts();
+
+void VerifyHeader(const CompressParams& jparams, j_decompress_ptr cinfo);
+void VerifyScanHeader(const CompressParams& jparams, j_decompress_ptr cinfo);
+
+void SetDecompressParams(const DecompressParams& dparams,
+ j_decompress_ptr cinfo);
+
+void SetScanDecompressParams(const DecompressParams& dparams,
+ j_decompress_ptr cinfo, int scan_number);
+
+void CopyCoefficients(j_decompress_ptr cinfo, jvirt_barray_ptr* coef_arrays,
+ TestImage* output);
+
+void UnmapColors(uint8_t* row, size_t xsize, int components,
+ JSAMPARRAY colormap, size_t num_colors);
+
+std::string GetTestDataPath(const std::string& filename);
+std::vector<uint8_t> ReadTestData(const std::string& filename);
+
+class PNMParser {
+ public:
+ explicit PNMParser(const uint8_t* data, const size_t len)
+ : pos_(data), end_(data + len) {}
+
+ // Sets "pos" to the first non-header byte/pixel on success.
+ bool ParseHeader(const uint8_t** pos, size_t* xsize, size_t* ysize,
+ size_t* num_channels, size_t* bitdepth);
+
+ private:
+ static bool IsLineBreak(const uint8_t c) { return c == '\r' || c == '\n'; }
+ static bool IsWhitespace(const uint8_t c) {
+ return IsLineBreak(c) || c == '\t' || c == ' ';
+ }
+
+ bool ParseUnsigned(size_t* number);
+
+ bool SkipWhitespace();
+
+ const uint8_t* pos_;
+ const uint8_t* const end_;
+};
+
+bool ReadPNM(const std::vector<uint8_t>& data, size_t* xsize, size_t* ysize,
+ size_t* num_channels, size_t* bitdepth,
+ std::vector<uint8_t>* pixels);
+
+void SetNumChannels(J_COLOR_SPACE colorspace, size_t* channels);
+
+void ConvertToGrayscale(TestImage* img);
+
+void GeneratePixels(TestImage* img);
+
+void GenerateRawData(const CompressParams& jparams, TestImage* img);
+
+void GenerateCoeffs(const CompressParams& jparams, TestImage* img);
+
+void EncodeWithJpegli(const TestImage& input, const CompressParams& jparams,
+ j_compress_ptr cinfo);
+
+bool EncodeWithJpegli(const TestImage& input, const CompressParams& jparams,
+ std::vector<uint8_t>* compressed);
+
+double DistanceRms(const TestImage& input, const TestImage& output,
+ size_t start_line, size_t num_lines,
+ double* max_diff = nullptr);
+
+double DistanceRms(const TestImage& input, const TestImage& output,
+ double* max_diff = nullptr);
+
+void VerifyOutputImage(const TestImage& input, const TestImage& output,
+ size_t start_line, size_t num_lines, double max_rms,
+ double max_diff = 255.0);
+
+void VerifyOutputImage(const TestImage& input, const TestImage& output,
+ double max_rms, double max_diff = 255.0);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_TEST_UTILS_H_
diff --git a/lib/jpegli/testing.h b/lib/jpegli/testing.h
new file mode 100644
index 0000000..873a017
--- /dev/null
+++ b/lib/jpegli/testing.h
@@ -0,0 +1,35 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_TESTING_H_
+#define LIB_JPEGLI_TESTING_H_
+
+// GTest/GMock specific macros / wrappers.
+
+// gmock unconditionally redefines those macros (to wrong values).
+// Lets include it only here and mitigate the problem.
+#pragma push_macro("PRIdS")
+#pragma push_macro("PRIuS")
+#include "gmock/gmock.h"
+#pragma pop_macro("PRIuS")
+#pragma pop_macro("PRIdS")
+
+#include "gtest/gtest.h"
+
+// googletest before 1.10 didn't define INSTANTIATE_TEST_SUITE_P() but instead
+// used INSTANTIATE_TEST_CASE_P which is now deprecated.
+#ifdef INSTANTIATE_TEST_SUITE_P
+#define JPEGLI_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_SUITE_P
+#else
+#define JPEGLI_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_CASE_P
+#endif
+
+// Ensures that we don't make our test bounds too lax, effectively disabling the
+// tests.
+MATCHER_P(IsSlightlyBelow, max, "") {
+ return max * 0.75 <= arg && arg <= max * 1.0;
+}
+
+#endif // LIB_JPEGLI_TESTING_H_
diff --git a/lib/jpegli/transcode_api_test.cc b/lib/jpegli/transcode_api_test.cc
new file mode 100644
index 0000000..1d99ce3
--- /dev/null
+++ b/lib/jpegli/transcode_api_test.cc
@@ -0,0 +1,133 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <vector>
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+#include "lib/jxl/base/status.h"
+
+namespace jpegli {
+namespace {
+
+void TranscodeWithJpegli(const std::vector<uint8_t>& jpeg_input,
+ const CompressParams& jparams,
+ std::vector<uint8_t>* jpeg_output) {
+ jpeg_decompress_struct dinfo = {};
+ jpeg_compress_struct cinfo = {};
+ uint8_t* transcoded_data = nullptr;
+ unsigned long transcoded_size;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ dinfo.err = cinfo.err;
+ dinfo.client_data = cinfo.client_data;
+ jpegli_create_decompress(&dinfo);
+ jpegli_mem_src(&dinfo, jpeg_input.data(), jpeg_input.size());
+ EXPECT_EQ(JPEG_REACHED_SOS,
+ jpegli_read_header(&dinfo, /*require_image=*/TRUE));
+ jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(&dinfo);
+ JXL_CHECK(coef_arrays != nullptr);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &transcoded_data, &transcoded_size);
+ jpegli_copy_critical_parameters(&dinfo, &cinfo);
+ jpegli_set_progressive_level(&cinfo, jparams.progressive_mode);
+ cinfo.optimize_coding = jparams.optimize_coding;
+ jpegli_write_coefficients(&cinfo, coef_arrays);
+ jpegli_finish_compress(&cinfo);
+ jpegli_finish_decompress(&dinfo);
+ return true;
+ };
+ ASSERT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&dinfo);
+ jpegli_destroy_compress(&cinfo);
+ if (transcoded_data) {
+ jpeg_output->assign(transcoded_data, transcoded_data + transcoded_size);
+ free(transcoded_data);
+ }
+}
+
+struct TestConfig {
+ TestImage input;
+ CompressParams jparams;
+};
+
+class TranscodeAPITestParam : public ::testing::TestWithParam<TestConfig> {};
+
+TEST_P(TranscodeAPITestParam, TestAPI) {
+ TestConfig config = GetParam();
+ CompressParams& jparams = config.jparams;
+ GeneratePixels(&config.input);
+
+ // Start with sequential non-optimized jpeg.
+ jparams.progressive_mode = 0;
+ jparams.optimize_coding = 0;
+ std::vector<uint8_t> compressed;
+ ASSERT_TRUE(EncodeWithJpegli(config.input, jparams, &compressed));
+ TestImage output0;
+ DecodeWithLibjpeg(jparams, DecompressParams(), compressed, &output0);
+
+ // Transcode first to a sequential optimized jpeg, and then further to
+ // a progressive jpeg.
+ for (int progr : {0, 2}) {
+ std::vector<uint8_t> transcoded;
+ jparams.progressive_mode = progr;
+ jparams.optimize_coding = 1;
+ TranscodeWithJpegli(compressed, jparams, &transcoded);
+
+ // We expect a size reduction of at least 2%.
+ EXPECT_LT(transcoded.size(), compressed.size() * 0.98f);
+
+ // Verify that transcoding is lossless.
+ TestImage output1;
+ DecodeWithLibjpeg(jparams, DecompressParams(), transcoded, &output1);
+ ASSERT_EQ(output0.pixels.size(), output1.pixels.size());
+ EXPECT_EQ(0, memcmp(output0.pixels.data(), output1.pixels.data(),
+ output0.pixels.size()));
+ compressed = transcoded;
+ }
+}
+
+std::vector<TestConfig> GenerateTests() {
+ std::vector<TestConfig> all_tests;
+ const size_t xsize0 = 1024;
+ const size_t ysize0 = 768;
+ for (int dxsize : {0, 1, 8, 9}) {
+ for (int dysize : {0, 1, 8, 9}) {
+ for (int h_sampling : {1, 2}) {
+ for (int v_sampling : {1, 2}) {
+ TestConfig config;
+ config.input.xsize = xsize0 + dxsize;
+ config.input.ysize = ysize0 + dysize;
+ config.jparams.h_sampling = {h_sampling, 1, 1};
+ config.jparams.v_sampling = {v_sampling, 1, 1};
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ return all_tests;
+}
+
+std::ostream& operator<<(std::ostream& os, const TestConfig& c) {
+ os << c.input;
+ os << c.jparams;
+ return os;
+}
+
+std::string TestDescription(
+ const testing::TestParamInfo<TranscodeAPITestParam::ParamType>& info) {
+ std::stringstream name;
+ name << info.param;
+ return name.str();
+}
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(TranscodeAPITest, TranscodeAPITestParam,
+ testing::ValuesIn(GenerateTests()),
+ TestDescription);
+
+} // namespace
+} // namespace jpegli
diff --git a/lib/jpegli/transpose-inl.h b/lib/jpegli/transpose-inl.h
new file mode 100644
index 0000000..9fdd222
--- /dev/null
+++ b/lib/jpegli/transpose-inl.h
@@ -0,0 +1,111 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JPEGLI_TRANSPOSE_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JPEGLI_TRANSPOSE_INL_H_
+#undef LIB_JPEGLI_TRANSPOSE_INL_H_
+#else
+#define LIB_JPEGLI_TRANSPOSE_INL_H_
+#endif
+
+#include "lib/jxl/base/compiler_specific.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+namespace {
+
+#if HWY_CAP_GE256
+static JXL_INLINE void Transpose8x8Block(const float* JXL_RESTRICT from,
+ float* JXL_RESTRICT to) {
+ const HWY_CAPPED(float, 8) d;
+ auto i0 = Load(d, from);
+ auto i1 = Load(d, from + 1 * 8);
+ auto i2 = Load(d, from + 2 * 8);
+ auto i3 = Load(d, from + 3 * 8);
+ auto i4 = Load(d, from + 4 * 8);
+ auto i5 = Load(d, from + 5 * 8);
+ auto i6 = Load(d, from + 6 * 8);
+ auto i7 = Load(d, from + 7 * 8);
+
+ const auto q0 = InterleaveLower(d, i0, i2);
+ const auto q1 = InterleaveLower(d, i1, i3);
+ const auto q2 = InterleaveUpper(d, i0, i2);
+ const auto q3 = InterleaveUpper(d, i1, i3);
+ const auto q4 = InterleaveLower(d, i4, i6);
+ const auto q5 = InterleaveLower(d, i5, i7);
+ const auto q6 = InterleaveUpper(d, i4, i6);
+ const auto q7 = InterleaveUpper(d, i5, i7);
+
+ const auto r0 = InterleaveLower(d, q0, q1);
+ const auto r1 = InterleaveUpper(d, q0, q1);
+ const auto r2 = InterleaveLower(d, q2, q3);
+ const auto r3 = InterleaveUpper(d, q2, q3);
+ const auto r4 = InterleaveLower(d, q4, q5);
+ const auto r5 = InterleaveUpper(d, q4, q5);
+ const auto r6 = InterleaveLower(d, q6, q7);
+ const auto r7 = InterleaveUpper(d, q6, q7);
+
+ i0 = ConcatLowerLower(d, r4, r0);
+ i1 = ConcatLowerLower(d, r5, r1);
+ i2 = ConcatLowerLower(d, r6, r2);
+ i3 = ConcatLowerLower(d, r7, r3);
+ i4 = ConcatUpperUpper(d, r4, r0);
+ i5 = ConcatUpperUpper(d, r5, r1);
+ i6 = ConcatUpperUpper(d, r6, r2);
+ i7 = ConcatUpperUpper(d, r7, r3);
+
+ Store(i0, d, to);
+ Store(i1, d, to + 1 * 8);
+ Store(i2, d, to + 2 * 8);
+ Store(i3, d, to + 3 * 8);
+ Store(i4, d, to + 4 * 8);
+ Store(i5, d, to + 5 * 8);
+ Store(i6, d, to + 6 * 8);
+ Store(i7, d, to + 7 * 8);
+}
+#elif HWY_TARGET != HWY_SCALAR
+static JXL_INLINE void Transpose8x8Block(const float* JXL_RESTRICT from,
+ float* JXL_RESTRICT to) {
+ const HWY_CAPPED(float, 4) d;
+ for (size_t n = 0; n < 8; n += 4) {
+ for (size_t m = 0; m < 8; m += 4) {
+ auto p0 = Load(d, from + n * 8 + m);
+ auto p1 = Load(d, from + (n + 1) * 8 + m);
+ auto p2 = Load(d, from + (n + 2) * 8 + m);
+ auto p3 = Load(d, from + (n + 3) * 8 + m);
+ const auto q0 = InterleaveLower(d, p0, p2);
+ const auto q1 = InterleaveLower(d, p1, p3);
+ const auto q2 = InterleaveUpper(d, p0, p2);
+ const auto q3 = InterleaveUpper(d, p1, p3);
+
+ const auto r0 = InterleaveLower(d, q0, q1);
+ const auto r1 = InterleaveUpper(d, q0, q1);
+ const auto r2 = InterleaveLower(d, q2, q3);
+ const auto r3 = InterleaveUpper(d, q2, q3);
+ Store(r0, d, to + m * 8 + n);
+ Store(r1, d, to + (1 + m) * 8 + n);
+ Store(r2, d, to + (2 + m) * 8 + n);
+ Store(r3, d, to + (3 + m) * 8 + n);
+ }
+ }
+}
+#else
+static JXL_INLINE void Transpose8x8Block(const float* JXL_RESTRICT from,
+ float* JXL_RESTRICT to) {
+ for (size_t n = 0; n < 8; ++n) {
+ for (size_t m = 0; m < 8; ++m) {
+ to[8 * n + m] = from[8 * m + n];
+ }
+ }
+}
+#endif
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+#endif // LIB_JPEGLI_TRANSPOSE_INL_H_
diff --git a/lib/jpegli/types.h b/lib/jpegli/types.h
new file mode 100644
index 0000000..2f446b7
--- /dev/null
+++ b/lib/jpegli/types.h
@@ -0,0 +1,38 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_TYPES_H_
+#define LIB_JPEGLI_TYPES_H_
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//
+// New API structs and functions that are not available in libjpeg
+//
+// NOTE: This part of the API is still experimental and will probably change in
+// the future.
+//
+
+typedef enum {
+ JPEGLI_TYPE_FLOAT = 0,
+ JPEGLI_TYPE_UINT8 = 2,
+ JPEGLI_TYPE_UINT16 = 3,
+} JpegliDataType;
+
+typedef enum {
+ JPEGLI_NATIVE_ENDIAN = 0,
+ JPEGLI_LITTLE_ENDIAN = 1,
+ JPEGLI_BIG_ENDIAN = 2,
+} JpegliEndianness;
+
+int jpegli_bytes_per_sample(JpegliDataType data_type);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif // LIB_JPEGLI_TYPES_H_
diff --git a/lib/jpegli/upsample.cc b/lib/jpegli/upsample.cc
new file mode 100644
index 0000000..5559aa7
--- /dev/null
+++ b/lib/jpegli/upsample.cc
@@ -0,0 +1,137 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/upsample.h"
+
+#include <string.h>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/upsample.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Vec;
+
+#if HWY_CAP_GE512
+using hwy::HWY_NAMESPACE::Half;
+using hwy::HWY_NAMESPACE::Vec;
+template <size_t i, class DF, class V>
+HWY_INLINE Vec<Half<Half<DF>>> Quarter(const DF df, V v) {
+ using HF = Half<DF>;
+ using HHF = Half<HF>;
+ auto half = i >= 2 ? UpperHalf(HF(), v) : LowerHalf(HF(), v);
+ return i & 1 ? UpperHalf(HHF(), half) : LowerHalf(HHF(), half);
+}
+
+template <class DF, class V>
+HWY_INLINE Vec<DF> Concat4(const DF df, V v0, V v1, V v2, V v3) {
+ using HF = Half<DF>;
+ return Combine(DF(), Combine(HF(), v3, v2), Combine(HF(), v1, v0));
+}
+
+#endif
+
+// Stores v0[0], v1[0], v0[1], v1[1], ... to mem, in this order. Mem must be
+// aligned.
+template <class DF, class V, typename T>
+void StoreInterleaved(const DF df, V v0, V v1, T* mem) {
+ static_assert(sizeof(T) == 4, "only use StoreInterleaved for 4-byte types");
+#if HWY_TARGET == HWY_SCALAR
+ Store(v0, df, mem);
+ Store(v1, df, mem + 1);
+#elif !HWY_CAP_GE256
+ Store(InterleaveLower(df, v0, v1), df, mem);
+ Store(InterleaveUpper(df, v0, v1), df, mem + Lanes(df));
+#else
+ if (!HWY_CAP_GE512 || Lanes(df) == 8) {
+ auto t0 = InterleaveLower(df, v0, v1);
+ auto t1 = InterleaveUpper(df, v0, v1);
+ Store(ConcatLowerLower(df, t1, t0), df, mem);
+ Store(ConcatUpperUpper(df, t1, t0), df, mem + Lanes(df));
+ } else {
+#if HWY_CAP_GE512
+ auto t0 = InterleaveLower(df, v0, v1);
+ auto t1 = InterleaveUpper(df, v0, v1);
+ Store(Concat4(df, Quarter<0>(df, t0), Quarter<0>(df, t1),
+ Quarter<1>(df, t0), Quarter<1>(df, t1)),
+ df, mem);
+ Store(Concat4(df, Quarter<2>(df, t0), Quarter<2>(df, t1),
+ Quarter<3>(df, t0), Quarter<3>(df, t1)),
+ df, mem + Lanes(df));
+#endif
+ }
+#endif
+}
+
+void Upsample2Horizontal(float* JXL_RESTRICT row,
+ float* JXL_RESTRICT scratch_space, size_t len_out) {
+ HWY_FULL(float) df;
+ auto threefour = Set(df, 0.75f);
+ auto onefour = Set(df, 0.25f);
+ const size_t len_in = (len_out + 1) >> 1;
+ memcpy(scratch_space, row, len_in * sizeof(row[0]));
+ scratch_space[-1] = scratch_space[0];
+ scratch_space[len_in] = scratch_space[len_in - 1];
+ for (size_t x = 0; x < len_in; x += Lanes(df)) {
+ auto current = Mul(Load(df, scratch_space + x), threefour);
+ auto prev = LoadU(df, scratch_space + x - 1);
+ auto next = LoadU(df, scratch_space + x + 1);
+ auto left = MulAdd(onefour, prev, current);
+ auto right = MulAdd(onefour, next, current);
+ StoreInterleaved(df, left, right, row + x * 2);
+ }
+}
+
+void Upsample2Vertical(const float* JXL_RESTRICT row_top,
+ const float* JXL_RESTRICT row_mid,
+ const float* JXL_RESTRICT row_bot,
+ float* JXL_RESTRICT row_out0,
+ float* JXL_RESTRICT row_out1, size_t len) {
+ HWY_FULL(float) df;
+ auto threefour = Set(df, 0.75f);
+ auto onefour = Set(df, 0.25f);
+ for (size_t x = 0; x < len; x += Lanes(df)) {
+ auto it = Load(df, row_top + x);
+ auto im = Load(df, row_mid + x);
+ auto ib = Load(df, row_bot + x);
+ auto im_scaled = Mul(im, threefour);
+ Store(MulAdd(it, onefour, im_scaled), df, row_out0 + x);
+ Store(MulAdd(ib, onefour, im_scaled), df, row_out1 + x);
+ }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+
+HWY_EXPORT(Upsample2Horizontal);
+HWY_EXPORT(Upsample2Vertical);
+
+void Upsample2Horizontal(float* JXL_RESTRICT row,
+ float* JXL_RESTRICT scratch_space, size_t len_out) {
+ return HWY_DYNAMIC_DISPATCH(Upsample2Horizontal)(row, scratch_space, len_out);
+}
+
+void Upsample2Vertical(const float* JXL_RESTRICT row_top,
+ const float* JXL_RESTRICT row_mid,
+ const float* JXL_RESTRICT row_bot,
+ float* JXL_RESTRICT row_out0,
+ float* JXL_RESTRICT row_out1, size_t len) {
+ return HWY_DYNAMIC_DISPATCH(Upsample2Vertical)(row_top, row_mid, row_bot,
+ row_out0, row_out1, len);
+}
+} // namespace jpegli
+#endif // HWY_ONCE
diff --git a/lib/jpegli/upsample.h b/lib/jpegli/upsample.h
new file mode 100644
index 0000000..1a05720
--- /dev/null
+++ b/lib/jpegli/upsample.h
@@ -0,0 +1,26 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_UPSAMPLE_H_
+#define LIB_JPEGLI_UPSAMPLE_H_
+
+#include <stddef.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jpegli {
+
+void Upsample2Horizontal(float* JXL_RESTRICT row,
+ float* JXL_RESTRICT scratch_space, size_t len_out);
+
+void Upsample2Vertical(const float* JXL_RESTRICT row_top,
+ const float* JXL_RESTRICT row_mid,
+ const float* JXL_RESTRICT row_bot,
+ float* JXL_RESTRICT row_out0,
+ float* JXL_RESTRICT row_out1, size_t len);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_UPSAMPLE_H_