diff options
Diffstat (limited to 'lib/jxl/enc_frame.cc')
-rw-r--r-- | lib/jxl/enc_frame.cc | 501 |
1 files changed, 258 insertions, 243 deletions
diff --git a/lib/jxl/enc_frame.cc b/lib/jxl/enc_frame.cc index f57175b..cfd97e9 100644 --- a/lib/jxl/enc_frame.cc +++ b/lib/jxl/enc_frame.cc @@ -19,31 +19,30 @@ #include "lib/jxl/ac_context.h" #include "lib/jxl/ac_strategy.h" #include "lib/jxl/ans_params.h" -#include "lib/jxl/aux_out.h" -#include "lib/jxl/aux_out_fwd.h" #include "lib/jxl/base/bits.h" +#include "lib/jxl/base/common.h" #include "lib/jxl/base/compiler_specific.h" #include "lib/jxl/base/data_parallel.h" #include "lib/jxl/base/override.h" -#include "lib/jxl/base/padded_bytes.h" -#include "lib/jxl/base/profiler.h" #include "lib/jxl/base/status.h" #include "lib/jxl/chroma_from_luma.h" #include "lib/jxl/coeff_order.h" #include "lib/jxl/coeff_order_fwd.h" #include "lib/jxl/color_encoding_internal.h" -#include "lib/jxl/color_management.h" -#include "lib/jxl/common.h" +#include "lib/jxl/common.h" // kMaxNumPasses #include "lib/jxl/compressed_dc.h" #include "lib/jxl/dct_util.h" #include "lib/jxl/enc_adaptive_quantization.h" #include "lib/jxl/enc_ans.h" +#include "lib/jxl/enc_aux_out.h" #include "lib/jxl/enc_bit_writer.h" #include "lib/jxl/enc_cache.h" #include "lib/jxl/enc_chroma_from_luma.h" #include "lib/jxl/enc_coeff_order.h" #include "lib/jxl/enc_context_map.h" #include "lib/jxl/enc_entropy_coder.h" +#include "lib/jxl/enc_fields.h" +#include "lib/jxl/enc_gaborish.h" #include "lib/jxl/enc_group.h" #include "lib/jxl/enc_modular.h" #include "lib/jxl/enc_noise.h" @@ -55,12 +54,13 @@ #include "lib/jxl/enc_toc.h" #include "lib/jxl/enc_xyb.h" #include "lib/jxl/fields.h" +#include "lib/jxl/frame_dimensions.h" #include "lib/jxl/frame_header.h" -#include "lib/jxl/gaborish.h" #include "lib/jxl/image.h" #include "lib/jxl/image_bundle.h" #include "lib/jxl/image_ops.h" #include "lib/jxl/loop_filter.h" +#include "lib/jxl/modular/options.h" #include "lib/jxl/quant_weights.h" #include "lib/jxl/quantizer.h" #include "lib/jxl/splines.h" @@ -69,140 +69,21 @@ namespace jxl { namespace { -void ClusterGroups(PassesEncoderState* enc_state) { - if (enc_state->shared.frame_header.passes.num_passes > 1) { - // TODO(veluca): implement this for progressive modes. - return; - } - // This only considers pass 0 for now. - std::vector<uint8_t> context_map; - EntropyEncodingData codes; - auto& ac = enc_state->passes[0].ac_tokens; - size_t limit = std::ceil(std::sqrt(ac.size())); - if (limit == 1) return; - size_t num_contexts = enc_state->shared.block_ctx_map.NumACContexts(); - std::vector<float> costs(ac.size()); - HistogramParams params; - params.uint_method = HistogramParams::HybridUintMethod::kNone; - params.lz77_method = HistogramParams::LZ77Method::kNone; - params.ans_histogram_strategy = - HistogramParams::ANSHistogramStrategy::kApproximate; - size_t max = 0; - auto token_cost = [&](std::vector<std::vector<Token>>& tokens, size_t num_ctx, - bool estimate = true) { - // TODO(veluca): not estimating is very expensive. - BitWriter writer; - size_t c = BuildAndEncodeHistograms( - params, num_ctx, tokens, &codes, &context_map, - estimate ? nullptr : &writer, 0, /*aux_out=*/0); - if (estimate) return c; - for (size_t i = 0; i < tokens.size(); i++) { - WriteTokens(tokens[i], codes, context_map, &writer, 0, nullptr); - } - return writer.BitsWritten(); - }; - for (size_t i = 0; i < ac.size(); i++) { - std::vector<std::vector<Token>> tokens{ac[i]}; - costs[i] = - token_cost(tokens, enc_state->shared.block_ctx_map.NumACContexts()); - if (costs[i] > costs[max]) { - max = i; - } - } - auto dist = [&](int i, int j) { - std::vector<std::vector<Token>> tokens{ac[i], ac[j]}; - return token_cost(tokens, num_contexts) - costs[i] - costs[j]; - }; - std::vector<size_t> out{max}; - std::vector<float> dists(ac.size()); - size_t farthest = 0; - for (size_t i = 0; i < ac.size(); i++) { - if (i == max) continue; - dists[i] = dist(max, i); - if (dists[i] > dists[farthest]) { - farthest = i; - } - } - - while (dists[farthest] > 0 && out.size() < limit) { - out.push_back(farthest); - dists[farthest] = 0; - enc_state->histogram_idx[farthest] = out.size() - 1; - for (size_t i = 0; i < ac.size(); i++) { - float d = dist(out.back(), i); - if (d < dists[i]) { - dists[i] = d; - enc_state->histogram_idx[i] = out.size() - 1; - } - if (dists[i] > dists[farthest]) { - farthest = i; - } - } - } - - std::vector<size_t> remap(out.size()); - std::iota(remap.begin(), remap.end(), 0); - for (size_t i = 0; i < enc_state->histogram_idx.size(); i++) { - enc_state->histogram_idx[i] = remap[enc_state->histogram_idx[i]]; - } - auto remap_cost = [&](std::vector<size_t> remap) { - std::vector<size_t> re_remap(remap.size(), remap.size()); - size_t r = 0; - for (size_t i = 0; i < remap.size(); i++) { - if (re_remap[remap[i]] == remap.size()) { - re_remap[remap[i]] = r++; - } - remap[i] = re_remap[remap[i]]; - } - auto tokens = ac; - size_t max_hist = 0; - for (size_t i = 0; i < tokens.size(); i++) { - for (size_t j = 0; j < tokens[i].size(); j++) { - size_t hist = remap[enc_state->histogram_idx[i]]; - tokens[i][j].context += hist * num_contexts; - max_hist = std::max(hist + 1, max_hist); - } - } - return token_cost(tokens, max_hist * num_contexts, /*estimate=*/false); - }; - - for (size_t src = 0; src < out.size(); src++) { - float cost = remap_cost(remap); - size_t best = src; - for (size_t j = src + 1; j < out.size(); j++) { - if (remap[src] == remap[j]) continue; - auto remap_c = remap; - std::replace(remap_c.begin(), remap_c.end(), remap[src], remap[j]); - float c = remap_cost(remap_c); - if (c < cost) { - best = j; - cost = c; - } - } - if (src != best) { - std::replace(remap.begin(), remap.end(), remap[src], remap[best]); - } - } - std::vector<size_t> re_remap(remap.size(), remap.size()); - size_t r = 0; - for (size_t i = 0; i < remap.size(); i++) { - if (re_remap[remap[i]] == remap.size()) { - re_remap[remap[i]] = r++; - } - remap[i] = re_remap[remap[i]]; - } +PassDefinition progressive_passes_dc_vlf_lf_full_ac[] = { + {/*num_coefficients=*/2, /*shift=*/0, + /*suitable_for_downsampling_of_at_least=*/4}, + {/*num_coefficients=*/3, /*shift=*/0, + /*suitable_for_downsampling_of_at_least=*/2}, + {/*num_coefficients=*/8, /*shift=*/0, + /*suitable_for_downsampling_of_at_least=*/0}, +}; - enc_state->shared.num_histograms = - *std::max_element(remap.begin(), remap.end()) + 1; - for (size_t i = 0; i < enc_state->histogram_idx.size(); i++) { - enc_state->histogram_idx[i] = remap[enc_state->histogram_idx[i]]; - } - for (size_t i = 0; i < ac.size(); i++) { - for (size_t j = 0; j < ac[i].size(); j++) { - ac[i][j].context += enc_state->histogram_idx[i] * num_contexts; - } - } -} +PassDefinition progressive_passes_dc_quant_ac_full_ac[] = { + {/*num_coefficients=*/8, /*shift=*/1, + /*suitable_for_downsampling_of_at_least=*/2}, + {/*num_coefficients=*/8, /*shift=*/0, + /*suitable_for_downsampling_of_at_least=*/0}, +}; uint64_t FrameFlagsFromParams(const CompressParams& cparams) { uint64_t flags = 0; @@ -281,7 +162,17 @@ Status MakeFrameHeader(const CompressParams& cparams, if (cparams.modular_mode) { frame_header->encoding = FrameEncoding::kModular; - frame_header->group_size_shift = cparams.modular_group_size_shift; + if (cparams.modular_group_size_shift == -1) { + frame_header->group_size_shift = 1; + // no point using groups when only one group is full and the others are + // less than half full: multithreading will not really help much, while + // compression does suffer + if (ib.xsize() <= 400 && ib.ysize() <= 400) { + frame_header->group_size_shift = 2; + } + } else { + frame_header->group_size_shift = cparams.modular_group_size_shift; + } } frame_header->chroma_subsampling = ib.chroma_subsampling; @@ -299,6 +190,13 @@ Status MakeFrameHeader(const CompressParams& cparams, "recompressing JPEGs"); } } + if (frame_header->color_transform != ColorTransform::kYCbCr && + (frame_header->chroma_subsampling.MaxHShift() != 0 || + frame_header->chroma_subsampling.MaxVShift() != 0)) { + return JXL_FAILURE( + "Chroma subsampling is not supported when color transform is not " + "YCbCr"); + } frame_header->flags = FrameFlagsFromParams(cparams); // Non-photon noise is not supported in the Modular encoder for now. @@ -477,6 +375,78 @@ void SimplifyInvisible(Image3F* image, const ImageF& alpha, bool lossless) { } } +struct PixelStatsForChromacityAdjustment { + float dx = 0; + float db = 0; + float exposed_blue = 0; + float CalcPlane(const ImageF* JXL_RESTRICT plane) const { + float xmax = 0; + float ymax = 0; + for (size_t ty = 1; ty < plane->ysize(); ++ty) { + for (size_t tx = 1; tx < plane->xsize(); ++tx) { + float cur = plane->Row(ty)[tx]; + float prev_row = plane->Row(ty - 1)[tx]; + float prev = plane->Row(ty)[tx - 1]; + xmax = std::max(xmax, std::abs(cur - prev)); + ymax = std::max(ymax, std::abs(cur - prev_row)); + } + } + return std::max(xmax, ymax); + } + void CalcExposedBlue(const ImageF* JXL_RESTRICT plane_y, + const ImageF* JXL_RESTRICT plane_b) { + float eb = 0; + float xmax = 0; + float ymax = 0; + for (size_t ty = 1; ty < plane_y->ysize(); ++ty) { + for (size_t tx = 1; tx < plane_y->xsize(); ++tx) { + float cur_y = plane_y->Row(ty)[tx]; + float cur_b = plane_b->Row(ty)[tx]; + float exposed_b = cur_b - cur_y * 1.2; + float diff_b = cur_b - cur_y; + float prev_row = plane_b->Row(ty - 1)[tx]; + float prev = plane_b->Row(ty)[tx - 1]; + float diff_prev_row = prev_row - plane_y->Row(ty - 1)[tx]; + float diff_prev = prev - plane_y->Row(ty)[tx - 1]; + xmax = std::max(xmax, std::abs(diff_b - diff_prev)); + ymax = std::max(ymax, std::abs(diff_b - diff_prev_row)); + if (exposed_b >= 0) { + exposed_b *= fabs(cur_b - prev) + fabs(cur_b - prev_row); + eb = std::max(eb, exposed_b); + } + } + } + exposed_blue = eb; + db = std::max(xmax, ymax); + } + void Calc(const Image3F* JXL_RESTRICT opsin) { + dx = CalcPlane(&opsin->Plane(0)); + CalcExposedBlue(&opsin->Plane(1), &opsin->Plane(2)); + } + int HowMuchIsXChannelPixelized() { + if (dx >= 0.03) { + return 2; + } + if (dx >= 0.017) { + return 1; + } + return 0; + } + int HowMuchIsBChannelPixelized() { + int add = exposed_blue >= 0.13 ? 1 : 0; + if (db > 0.38) { + return 2 + add; + } + if (db > 0.33) { + return 1 + add; + } + if (db > 0.28) { + return add; + } + return 0; + } +}; + } // namespace class LossyFrameEncoder { @@ -498,16 +468,18 @@ class LossyFrameEncoder { const JxlCmsInterface& cms, ThreadPool* pool, ModularFrameEncoder* modular_frame_encoder, FrameHeader* frame_header) { - PROFILER_ZONE("ComputeEncodingData uninstrumented"); JXL_ASSERT((opsin->xsize() % kBlockDim) == 0 && (opsin->ysize() % kBlockDim) == 0); PassesSharedState& shared = enc_state_->shared; if (!enc_state_->cparams.max_error_mode) { - float x_qm_scale_steps[2] = {1.25f, 9.0f}; + // Compute chromacity adjustments using two approaches. + // 1) Distance based approach for chromacity adjustment: + float x_qm_scale_steps[4] = {1.25f, 7.0f, 15.0f, 24.0f}; shared.frame_header.x_qm_scale = 2; for (float x_qm_scale_step : x_qm_scale_steps) { - if (enc_state_->cparams.butteraugli_distance > x_qm_scale_step) { + if (enc_state_->cparams.original_butteraugli_distance > + x_qm_scale_step) { shared.frame_header.x_qm_scale++; } } @@ -516,6 +488,20 @@ class LossyFrameEncoder { // faithful to original even with extreme (5-10x) zooming. shared.frame_header.x_qm_scale++; } + // 2) Pixel-based approach for chromacity adjustment: + // look at the individual pixels and make a guess how difficult + // the image would be based on the worst case pixel. + PixelStatsForChromacityAdjustment pixel_stats; + if (enc_state_->cparams.speed_tier <= SpeedTier::kWombat) { + pixel_stats.Calc(opsin); + } + // For X take the most severe adjustment. + shared.frame_header.x_qm_scale = + std::max<int>(shared.frame_header.x_qm_scale, + 2 + pixel_stats.HowMuchIsXChannelPixelized()); + // B only adjusted by pixel-based approach. + shared.frame_header.b_qm_scale = + 2 + pixel_stats.HowMuchIsBChannelPixelized(); } JXL_RETURN_IF_ERROR(enc_state_->heuristics->LossyFrameHeuristics( @@ -571,7 +557,6 @@ class LossyFrameEncoder { Status ComputeJPEGTranscodingData(const jpeg::JPEGData& jpeg_data, ModularFrameEncoder* modular_frame_encoder, FrameHeader* frame_header) { - PROFILER_ZONE("ComputeJPEGTranscodingData uninstrumented"); PassesSharedState& shared = enc_state_->shared; frame_header->x_qm_scale = 2; @@ -589,9 +574,16 @@ class LossyFrameEncoder { shared.ac_strategy.FillDCT8(); FillImage(uint8_t(0), &shared.epf_sharpness); + enc_state_->passes.resize(enc_state_->progressive_splitter.GetNumPasses()); + for (PassesEncoderState::PassData& pass : enc_state_->passes) { + pass.ac_tokens.resize(shared.frame_dim.num_groups); + } + enc_state_->coeffs.clear(); - enc_state_->coeffs.emplace_back(make_unique<ACImageT<int32_t>>( - kGroupDim * kGroupDim, frame_dim.num_groups)); + while (enc_state_->coeffs.size() < enc_state_->passes.size()) { + enc_state_->coeffs.emplace_back(make_unique<ACImageT<int32_t>>( + kGroupDim * kGroupDim, frame_dim.num_groups)); + } // convert JPEG quantization table to a Quantizer object float dcquantization[3]; @@ -735,9 +727,12 @@ class LossyFrameEncoder { "FindCorrelation")); } } + if (!frame_header->chroma_subsampling.Is444()) { ZeroFillImage(&dc); - enc_state_->coeffs[0]->ZeroFill(); + for (auto& coeff : enc_state_->coeffs) { + coeff->ZeroFill(); + } } // JPEG DC is from -1024 to 1023. std::vector<size_t> dc_counts[3] = {}; @@ -747,7 +742,9 @@ class LossyFrameEncoder { size_t total_dc[3] = {}; for (size_t c : {1, 0, 2}) { if (jpeg_data.components.size() == 1 && c != 1) { - enc_state_->coeffs[0]->ZeroFillPlane(c); + for (auto& coeff : enc_state_->coeffs) { + coeff->ZeroFillPlane(c); + } ZeroFillImage(&dc.Plane(c)); // Ensure no division by 0. dc_counts[c][1024] = 1; @@ -761,9 +758,11 @@ class LossyFrameEncoder { group_index++) { const size_t gx = group_index % frame_dim.xsize_groups; const size_t gy = group_index / frame_dim.xsize_groups; - size_t offset = 0; - int32_t* JXL_RESTRICT ac = - enc_state_->coeffs[0]->PlaneRow(c, group_index, 0).ptr32; + int32_t* coeffs[kMaxNumPasses]; + for (size_t i = 0; i < enc_state_->coeffs.size(); i++) { + coeffs[i] = enc_state_->coeffs[i]->PlaneRow(c, group_index, 0).ptr32; + } + int32_t block[64]; for (size_t by = gy * kGroupDimInBlocks; by < ysize_blocks && by < (gy + 1) * kGroupDimInBlocks; ++by) { if ((by >> vshift) << vshift != by) continue; @@ -790,7 +789,7 @@ class LossyFrameEncoder { !frame_header->chroma_subsampling.Is444()) { for (size_t y = 0; y < 8; y++) { for (size_t x = 0; x < 8; x++) { - ac[offset + y * 8 + x] = inputjpeg[base + x * 8 + y]; + block[y * 8 + x] = inputjpeg[base + x * 8 + y]; } } } else { @@ -810,11 +809,16 @@ class LossyFrameEncoder { (1 << (kCFLFixedPointPrecision - 1))) >> kCFLFixedPointPrecision; int QCR = QChroma - cfl_factor; - ac[offset + y * 8 + x] = QCR; + block[y * 8 + x] = QCR; } } } - offset += 64; + enc_state_->progressive_splitter.SplitACCoefficients( + block, AcStrategy::FromRawStrategy(AcStrategy::Type::DCT), bx, + by, coeffs); + for (size_t i = 0; i < enc_state_->coeffs.size(); i++) { + coeffs[i] += kDCTBlockSize; + } } } } @@ -875,14 +879,6 @@ class LossyFrameEncoder { // Must happen before WriteFrameHeader! shared.frame_header.UpdateFlag(true, FrameHeader::kSkipAdaptiveDCSmoothing); - enc_state_->passes.resize(enc_state_->progressive_splitter.GetNumPasses()); - for (PassesEncoderState::PassData& pass : enc_state_->passes) { - pass.ac_tokens.resize(shared.frame_dim.num_groups); - } - - JXL_CHECK(enc_state_->passes.size() == - 1); // skipping coeff splitting so need to have only one pass - ComputeAllCoeffOrders(frame_dim); shared.num_histograms = 1; @@ -924,8 +920,9 @@ class LossyFrameEncoder { Status EncodeGlobalDCInfo(const FrameHeader& frame_header, BitWriter* writer) const { // Encode quantizer DC and global scale. + QuantizerParams params = enc_state_->shared.quantizer.GetParams(); JXL_RETURN_IF_ERROR( - enc_state_->shared.quantizer.Encode(writer, kLayerQuant, aux_out_)); + WriteQuantizerParams(params, writer, kLayerQuant, aux_out_)); EncodeBlockCtxMap(enc_state_->shared.block_ctx_map, writer, aux_out_); ColorCorrelationMapEncodeDC(&enc_state_->shared.cmap, writer, kLayerDC, aux_out_); @@ -937,15 +934,12 @@ class LossyFrameEncoder { JXL_RETURN_IF_ERROR(DequantMatricesEncode(&enc_state_->shared.matrices, writer, kLayerQuant, aux_out_, modular_frame_encoder)); - if (enc_state_->cparams.speed_tier <= SpeedTier::kTortoise) { - if (!doing_jpeg_recompression) ClusterGroups(enc_state_); - } size_t num_histo_bits = CeilLog2Nonzero(enc_state_->shared.frame_dim.num_groups); if (num_histo_bits != 0) { BitWriter::Allotment allotment(writer, num_histo_bits); writer->Write(num_histo_bits, enc_state_->shared.num_histograms - 1); - ReclaimAndCharge(writer, &allotment, kLayerAC, aux_out_); + allotment.ReclaimAndCharge(writer, kLayerAC, aux_out_); } for (size_t i = 0; i < enc_state_->progressive_splitter.GetNumPasses(); @@ -956,7 +950,7 @@ class LossyFrameEncoder { kOrderEnc, enc_state_->used_orders[i], &order_bits)); BitWriter::Allotment allotment(writer, order_bits); JXL_CHECK(U32Coder::Write(kOrderEnc, enc_state_->used_orders[i], writer)); - ReclaimAndCharge(writer, &allotment, kLayerOrder, aux_out_); + allotment.ReclaimAndCharge(writer, kLayerOrder, aux_out_); EncodeCoeffOrders( enc_state_->used_orders[i], &enc_state_->shared @@ -995,7 +989,6 @@ class LossyFrameEncoder { private: void ComputeAllCoeffOrders(const FrameDimensions& frame_dim) { - PROFILER_FUNC; // No coefficient reordering in Falcon or faster. auto used_orders_info = ComputeUsedOrders( enc_state_->cparams.speed_tier, enc_state_->shared.ac_strategy, @@ -1077,56 +1070,90 @@ Status EncodeFrame(const CompressParams& cparams_orig, const JxlCmsInterface& cms, ThreadPool* pool, BitWriter* writer, AuxOut* aux_out) { CompressParams cparams = cparams_orig; - if (cparams_orig.target_bitrate > 0.0f && - frame_info.frame_type == FrameType::kRegularFrame) { - cparams.target_bitrate = 0.0f; - const float target_bitrate = cparams_orig.target_bitrate; - float bitrate = 0.0f; - float prev_bitrate = 0.0f; - float rescale = 1.0f; - size_t prev_bits = 0; - float error = 0.0f; - float best_error = 100.0f; - float best_rescale = 1.0f; - for (size_t i = 0; i < 10; ++i) { - std::unique_ptr<PassesEncoderState> state = - jxl::make_unique<PassesEncoderState>(); - BitWriter bw; - JXL_CHECK(EncodeFrame(cparams, frame_info, metadata, ib, state.get(), cms, - pool, &bw, nullptr)); - bitrate = bw.BitsWritten() * 1.0 / (ib.xsize() * ib.ysize()); - error = target_bitrate / bitrate - 1.0f; - if (std::abs(error) < std::abs(best_error)) { - best_error = error; - best_rescale = cparams.quant_ac_rescale; - } - if (bw.BitsWritten() == prev_bits || std::abs(error) < 0.0005f) { - break; - } - float lambda = 1.0f; - if (i > 0) { - lambda = (((bitrate / prev_bitrate) - 1.0f) / (rescale - 1.0f)); - } - rescale = (1.0f + ((target_bitrate / bitrate) - 1.0f) / lambda); - if (rescale < 0.0f) { - break; + if (cparams.speed_tier == SpeedTier::kGlacier && !cparams.IsLossless()) { + cparams.speed_tier = SpeedTier::kTortoise; + } + if (cparams.speed_tier == SpeedTier::kGlacier) { + std::vector<CompressParams> all_params; + std::vector<size_t> size; + + CompressParams cparams_attempt = cparams_orig; + cparams_attempt.speed_tier = SpeedTier::kTortoise; + cparams_attempt.options.max_properties = 4; + + for (float x : {0.0f, 80.f}) { + cparams_attempt.channel_colors_percent = x; + for (float y : {0.0f, 95.0f}) { + cparams_attempt.channel_colors_pre_transform_percent = y; + // 70000 ensures that the number of palette colors is representable in + // modular headers. + for (int K : {0, 1 << 10, 70000}) { + cparams_attempt.palette_colors = K; + for (int tree_mode : {-1, (int)ModularOptions::TreeMode::kNoWP, + (int)ModularOptions::TreeMode::kDefault}) { + if (tree_mode == -1) { + // LZ77 only + cparams_attempt.options.nb_repeats = 0; + } else { + cparams_attempt.options.nb_repeats = 1; + cparams_attempt.options.wp_tree_mode = + static_cast<ModularOptions::TreeMode>(tree_mode); + } + for (Predictor pred : {Predictor::Zero, Predictor::Variable}) { + cparams_attempt.options.predictor = pred; + for (int g : {0, -1, 3}) { + cparams_attempt.modular_group_size_shift = g; + for (Override patches : {Override::kDefault, Override::kOff}) { + cparams_attempt.patches = patches; + all_params.push_back(cparams_attempt); + } + } + } + } + } } - cparams.quant_ac_rescale *= rescale; - prev_bitrate = bitrate; - prev_bits = bw.BitsWritten(); } - if (aux_out) { - aux_out->max_quant_rescale = best_rescale; - aux_out->min_quant_rescale = best_rescale; - aux_out->min_bitrate_error = best_error; - aux_out->max_bitrate_error = best_error; + + size.resize(all_params.size()); + + std::atomic<int> num_errors{0}; + + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, all_params.size(), ThreadPool::NoInit, + [&](size_t task, size_t) { + BitWriter w; + PassesEncoderState state; + if (!EncodeFrame(all_params[task], frame_info, metadata, ib, &state, + cms, nullptr, &w, aux_out)) { + num_errors.fetch_add(1, std::memory_order_relaxed); + return; + } + size[task] = w.BitsWritten(); + }, + "Compress kGlacier")); + JXL_RETURN_IF_ERROR(num_errors.load(std::memory_order_relaxed) == 0); + + size_t best_idx = 0; + for (size_t i = 1; i < all_params.size(); i++) { + if (size[best_idx] > size[i]) { + best_idx = i; + } } - cparams.quant_ac_rescale = best_rescale; + cparams = all_params[best_idx]; } + ib.VerifyMetadata(); passes_enc_state->special_frames.clear(); + if (cparams.qprogressive_mode) { + passes_enc_state->progressive_splitter.SetProgressiveMode( + ProgressiveMode{progressive_passes_dc_quant_ac_full_ac}); + } else if (cparams.progressive_mode) { + passes_enc_state->progressive_splitter.SetProgressiveMode( + ProgressiveMode{progressive_passes_dc_vlf_lf_full_ac}); + } + JXL_RETURN_IF_ERROR(ParamsPostInit(&cparams)); if (cparams.progressive_dc < 0) { @@ -1135,13 +1162,6 @@ Status EncodeFrame(const CompressParams& cparams_orig, cparams.progressive_dc); } cparams.progressive_dc = 0; - // Enable progressive_dc for lower qualities, except for fast speeds where - // the modular encoder uses fixed tree. - if (cparams.speed_tier <= SpeedTier::kCheetah && - cparams.butteraugli_distance >= - kMinButteraugliDistanceForProgressiveDc) { - cparams.progressive_dc = 1; - } } if (cparams.ec_resampling < cparams.resampling) { cparams.ec_resampling = cparams.resampling; @@ -1210,7 +1230,7 @@ Status EncodeFrame(const CompressParams& cparams_orig, metadata_linear->color_encoding = c_linear; ImageBundle linear_storage(metadata_linear.get()); - std::vector<AuxOut> aux_outs; + std::vector<std::unique_ptr<AuxOut>> aux_outs; // LossyFrameEncoder stores a reference to a std::function<Status(size_t)> // so we need to keep the std::function<Status(size_t)> being referenced // alive while lossy_frame_encoder is used. We could make resize_aux_outs a @@ -1218,18 +1238,15 @@ Status EncodeFrame(const CompressParams& cparams_orig, // simpler. const std::function<Status(size_t)> resize_aux_outs = [&aux_outs, aux_out](const size_t num_threads) -> Status { - if (aux_out != nullptr) { - size_t old_size = aux_outs.size(); - for (size_t i = num_threads; i < old_size; i++) { - aux_out->Assimilate(aux_outs[i]); - } + if (aux_out == nullptr) { aux_outs.resize(num_threads); - // Each thread needs these INPUTS. Don't copy the entire AuxOut - // because it may contain stats which would be Assimilated multiple - // times below. - for (size_t i = old_size; i < aux_outs.size(); i++) { - aux_outs[i].dump_image = aux_out->dump_image; - aux_outs[i].debug_prefix = aux_out->debug_prefix; + } else { + while (aux_outs.size() > num_threads) { + aux_out->Assimilate(*aux_outs.back()); + aux_outs.pop_back(); + } + while (num_threads > aux_outs.size()) { + aux_outs.emplace_back(jxl::make_unique<AuxOut>()); } } return true; @@ -1288,10 +1305,6 @@ Status EncodeFrame(const CompressParams& cparams_orig, ib.alpha(), lossless); } } - if (aux_out != nullptr) { - JXL_RETURN_IF_ERROR( - aux_out->InspectImage3F("enc_frame:OpsinDynamicsImage", opsin)); - } if (frame_header->encoding == FrameEncoding::kVarDCT) { PadImageToBlockMultipleInPlace(&opsin); JXL_RETURN_IF_ERROR(lossy_frame_encoder.ComputeEncodingData( @@ -1309,9 +1322,11 @@ Status EncodeFrame(const CompressParams& cparams_orig, } if (cparams.ec_resampling != 1 && !cparams.already_downsampled) { extra_channels = &extra_channels_storage; - for (size_t i = 0; i < ib.extra_channels().size(); i++) { - extra_channels_storage.emplace_back(CopyImage(ib.extra_channels()[i])); - DownsampleImage(&extra_channels_storage.back(), cparams.ec_resampling); + for (const ImageF& ec : ib.extra_channels()) { + ImageF d_ec(ec.xsize(), ec.ysize()); + CopyImageTo(ec, &d_ec); + DownsampleImage(&d_ec, cparams.ec_resampling); + extra_channels_storage.emplace_back(std::move(d_ec)); } } // needs to happen *AFTER* VarDCT-ComputeEncodingData. @@ -1388,13 +1403,13 @@ Status EncodeFrame(const CompressParams& cparams_orig, const auto process_dc_group = [&](const uint32_t group_index, const size_t thread) { - AuxOut* my_aux_out = aux_out ? &aux_outs[thread] : nullptr; + AuxOut* my_aux_out = aux_outs[thread].get(); BitWriter* output = get_output(group_index + 1); if (frame_header->encoding == FrameEncoding::kVarDCT && !(frame_header->flags & FrameHeader::kUseDcFrame)) { BitWriter::Allotment allotment(output, 2); output->Write(2, modular_frame_encoder->extra_dc_precision[group_index]); - ReclaimAndCharge(output, &allotment, kLayerDC, my_aux_out); + allotment.ReclaimAndCharge(output, kLayerDC, my_aux_out); JXL_CHECK(modular_frame_encoder->EncodeStream( output, my_aux_out, kLayerDC, ModularStreamId::VarDCTDC(group_index))); @@ -1410,7 +1425,7 @@ Status EncodeFrame(const CompressParams& cparams_orig, BitWriter::Allotment allotment(output, nb_bits); output->Write(nb_bits, modular_frame_encoder->ac_metadata_size[group_index] - 1); - ReclaimAndCharge(output, &allotment, kLayerControlFields, my_aux_out); + allotment.ReclaimAndCharge(output, kLayerControlFields, my_aux_out); } JXL_CHECK(modular_frame_encoder->EncodeStream( output, my_aux_out, kLayerControlFields, @@ -1429,7 +1444,7 @@ Status EncodeFrame(const CompressParams& cparams_orig, std::atomic<int> num_errors{0}; const auto process_group = [&](const uint32_t group_index, const size_t thread) { - AuxOut* my_aux_out = aux_out ? &aux_outs[thread] : nullptr; + AuxOut* my_aux_out = aux_outs[thread].get(); for (size_t i = 0; i < num_passes; i++) { if (frame_header->encoding == FrameEncoding::kVarDCT) { @@ -1458,7 +1473,7 @@ Status EncodeFrame(const CompressParams& cparams_orig, for (BitWriter& bw : group_codes) { BitWriter::Allotment allotment(&bw, 8); bw.ZeroPadToByte(); // end of group. - ReclaimAndCharge(&bw, &allotment, kLayerAC, aux_out); + allotment.ReclaimAndCharge(&bw, kLayerAC, aux_out); } std::vector<coeff_order_t>* permutation_ptr = nullptr; |