diff options
author | Adrian Grange <agrange@google.com> | 2011-10-06 15:49:11 -0700 |
---|---|---|
committer | Adrian Grange <agrange@google.com> | 2011-10-11 12:49:12 -0700 |
commit | 217591fde57a1f459003e651854b53b940b00865 (patch) | |
tree | 3cf8b6e5648eb38b8f3a1f7d2b8e0436316589d5 | |
parent | af12c23e8ee6931c364f417d510b48de6e1d8530 (diff) | |
download | libvpx-217591fde57a1f459003e651854b53b940b00865.tar.gz libvpx-217591fde57a1f459003e651854b53b940b00865.tar.bz2 libvpx-217591fde57a1f459003e651854b53b940b00865.zip |
Added rate-targeted temporal scalability
Added the ability to create rate-targeted, temporally
scalable, VP8 compatible bitstreams.
The application vp8_scalable_patterns.c demonstrates how
to use this capability. Users can create output bitstreams
containing upto 5 temporally separable streams encoded
as a single VP8 bitstream.
(previously abandoned as:
I92d1483e887adb274d07ce9e567e4d0314881b0a)
Change-Id: I156250a3fe930be57c069d508c41b6a7a4ea8d6a
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | examples.mk | 6 | ||||
-rw-r--r-- | examples/vp8_scalable_patterns.txt | 143 | ||||
-rw-r--r-- | vp8/common/onyx.h | 9 | ||||
-rw-r--r-- | vp8/encoder/encodeframe.c | 34 | ||||
-rw-r--r-- | vp8/encoder/onyx_if.c | 511 | ||||
-rw-r--r-- | vp8/encoder/onyx_int.h | 67 | ||||
-rw-r--r-- | vp8/encoder/pickinter.c | 3 | ||||
-rw-r--r-- | vp8/encoder/ratectrl.c | 44 | ||||
-rw-r--r-- | vp8/vp8_cx_iface.c | 110 | ||||
-rw-r--r-- | vp8_scalable_patterns.c | 467 | ||||
-rw-r--r-- | vpx/vpx_encoder.h | 43 |
12 files changed, 1150 insertions, 288 deletions
diff --git a/.gitignore b/.gitignore index ae616b28c..110146d3c 100644 --- a/.gitignore +++ b/.gitignore @@ -48,7 +48,6 @@ /twopass_encoder.dox /vp8_api1_migration.dox /vp8_scalable_patterns -/vp8_scalable_patterns.c /vp8_scalable_patterns.dox /vp8_set_maps /vp8_set_maps.c diff --git a/examples.mk b/examples.mk index b6bf882e5..8088d3217 100644 --- a/examples.mk +++ b/examples.mk @@ -37,6 +37,9 @@ vpxenc.SRCS += libmkv/EbmlWriter.c vpxenc.SRCS += libmkv/EbmlWriter.h vpxenc.GUID = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1 vpxenc.DESCRIPTION = Full featured encoder +UTILS-$(CONFIG_ENCODERS) += vp8_scalable_patterns.c +vp8_scalable_patterns.GUID = 0D6A210B-F482-4D6F-8570-4A9C01ACC88C +vp8_scalable_patterns.DESCRIPTION = Temporal Scalability Encoder # Clean up old ivfenc, ivfdec binaries. ifeq ($(CONFIG_MSVS),yes) @@ -86,9 +89,6 @@ GEN_EXAMPLES-$(CONFIG_ENCODERS) += error_resilient.c error_resilient.GUID = DF5837B9-4145-4F92-A031-44E4F832E00C error_resilient.DESCRIPTION = Error Resiliency Feature -GEN_EXAMPLES-$(CONFIG_VP8_ENCODER) += vp8_scalable_patterns.c -vp8_scalable_patterns.GUID = 0D6A210B-F482-4D6F-8570-4A9C01ACC88C -vp8_scalable_patterns.DESCRIPTION = VP8 Scalable Bitstream Patterns GEN_EXAMPLES-$(CONFIG_VP8_ENCODER) += vp8_set_maps.c vp8_set_maps.GUID = ECB2D24D-98B8-4015-A465-A4AF3DCC145F vp8_set_maps.DESCRIPTION = VP8 set active and ROI maps diff --git a/examples/vp8_scalable_patterns.txt b/examples/vp8_scalable_patterns.txt deleted file mode 100644 index e1d5dbdaa..000000000 --- a/examples/vp8_scalable_patterns.txt +++ /dev/null @@ -1,143 +0,0 @@ -@TEMPLATE encoder_tmpl.c -VP8 Scalable Frame Patterns -=========================== -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INTRODUCTION -This is an example demonstrating how to control the VP8 encoder's -reference frame selection and update mechanism for video applications -that benefit from a scalable bitstream. -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INTRODUCTION - - -Configuration -------------- -Scalable frame patterns are most useful in an error resilient context, -so error resiliency mode is enabled, as in the `error_resilient.c` -example. In addition, we want to disable automatic keyframe selection, -so we force an interval of 1000 frames. -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENC_SET_CFG2 - -/* Enable error resilient mode */ -cfg.g_error_resilient = 1; -cfg.g_lag_in_frames = 0; -cfg.kf_mode = VPX_KF_FIXED; - -/* Disable automatic keyframe placement */ -cfg.kf_min_dist = cfg.kf_max_dist = 1000; -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENC_SET_CFG2 - -This example uses the following frame pattern (L->last_frame, -G->golden_frame, A->alt_ref_frame): - -* Frame 0 Intra, use none, update L&G&A -* Frame 1 Inter, use LGA, update none -* Frame 2 Inter, use LGA, update L -* Frame 3 Inter, use LGA, update none -* Frame 4 Inter, use GA, update L&G -* Frame 5 Inter, use LGA, update none -* Frame 6 Inter, use LGA, update L -* Frame 7 Inter, use LGA, update none -* Frame 8 Inter, use A, update L&G&A -* Frame 9 Inter, use LGA, update none -* Frame 10 Inter, use LGA, update L -* Frame 11 Inter, use LGA, update none -* Frame 12 Inter, use GA, update L&G -* Frame 13 Inter, use LGA, update none -* Frame 14 Inter, use LGA, update L -* Frame 15 Inter, use LGA, update none -* ...Repeats the pattern from frame 0 - -Change this variable to test the 3 decodable streams case. -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TWOPASS_VARS -int num_streams = 5; -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TWOPASS_VARS - - -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PER_FRAME_CFG -flags = 0; -if(num_streams == 5) -{ - switch(frame_cnt % 16) { - case 0: - flags |= VPX_EFLAG_FORCE_KF; - flags |= VP8_EFLAG_FORCE_GF; - flags |= VP8_EFLAG_FORCE_ARF; - break; - case 1: - case 3: - case 5: - case 7: - case 9: - case 11: - case 13: - case 15: - flags |= VP8_EFLAG_NO_UPD_LAST; - flags |= VP8_EFLAG_NO_UPD_GF; - flags |= VP8_EFLAG_NO_UPD_ARF; - break; - case 2: - case 6: - case 10: - case 14: - break; - case 4: - flags |= VP8_EFLAG_NO_REF_LAST; - flags |= VP8_EFLAG_FORCE_GF; - break; - case 8: - flags |= VP8_EFLAG_NO_REF_LAST; - flags |= VP8_EFLAG_NO_REF_GF; - flags |= VP8_EFLAG_FORCE_GF; - flags |= VP8_EFLAG_FORCE_ARF; - break; - case 12: - flags |= VP8_EFLAG_NO_REF_LAST; - flags |= VP8_EFLAG_FORCE_GF; - break; - } -} -else -{ - switch(frame_cnt % 9) { - case 0: - if(frame_cnt==0) - { - flags |= VPX_EFLAG_FORCE_KF; - } - else - { - cfg.rc_max_quantizer = 26; - cfg.rc_min_quantizer = 0; - cfg.rc_target_bitrate = 300; - flags |= VP8_EFLAG_NO_REF_LAST; - flags |= VP8_EFLAG_NO_REF_ARF; - } - flags |= VP8_EFLAG_FORCE_GF; - flags |= VP8_EFLAG_FORCE_ARF; - break; - case 1: - case 2: - case 4: - case 5: - case 7: - case 8: - cfg.rc_max_quantizer = 45; - cfg.rc_min_quantizer = 0; - cfg.rc_target_bitrate = 230; - break; - case 3: - case 6: - cfg.rc_max_quantizer = 45; - cfg.rc_min_quantizer = 0; - cfg.rc_target_bitrate = 215; - flags |= VP8_EFLAG_NO_REF_LAST; - flags |= VP8_EFLAG_FORCE_ARF; - break; - } -} -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PER_FRAME_CFG - -Observing The Effects ---------------------- -Use the `decode_with_drops` example to decode with various dropped frame -patterns. Good patterns to start with are 1/2, 3/4, 7/8, and 15/16 -drops. diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h index 015b4c4d4..f1f843af1 100644 --- a/vp8/common/onyx.h +++ b/vp8/common/onyx.h @@ -19,6 +19,7 @@ extern "C" #include "vpx/internal/vpx_codec_internal.h" #include "vpx/vp8cx.h" +#include "vpx/vpx_encoder.h" #include "vpx_scale/yv12config.h" #include "type_aliases.h" #include "ppflags.h" @@ -198,6 +199,14 @@ extern "C" struct vpx_codec_pkt_list *output_pkt_list; vp8e_tuning tuning; + + // Temporal scaling parameters + unsigned int number_of_layers; + unsigned int target_bitrate[MAX_PERIODICITY]; + unsigned int rate_decimator[MAX_PERIODICITY]; + unsigned int periodicity; + unsigned int layer_id[MAX_PERIODICITY]; + } VP8_CONFIG; diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index 510e4cc98..7f2b46daa 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -456,7 +456,7 @@ void encode_mb_row(VP8_COMP *cpi, vp8_activity_masking(cpi, x); // Is segmentation enabled - // MB level adjutment to quantizer + // MB level adjustment to quantizer if (xd->segmentation_enabled) { // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking) @@ -505,7 +505,8 @@ void encode_mb_row(VP8_COMP *cpi, // Special case code for cyclic refresh // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map - if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled) + if ((cpi->current_layer == 0) && + (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)) { cpi->segmentation_map[map_index+mb_col] = xd->mode_info_context->mbmi.segment_id; @@ -648,6 +649,30 @@ void init_encode_frame_mb_context(VP8_COMP *cpi) + vp8_cost_one(255) + vp8_cost_one(128); } + else if ((cpi->oxcf.number_of_layers > 1) && + (cpi->ref_frame_flags == VP8_GOLD_FLAG)) + { + xd->ref_frame_cost[LAST_FRAME] = vp8_cost_one(cpi->prob_intra_coded) + + vp8_cost_zero(1); + xd->ref_frame_cost[GOLDEN_FRAME] = vp8_cost_one(cpi->prob_intra_coded) + + vp8_cost_one(1) + + vp8_cost_zero(255); + xd->ref_frame_cost[ALTREF_FRAME] = vp8_cost_one(cpi->prob_intra_coded) + + vp8_cost_one(1) + + vp8_cost_one(255); + } + else if ((cpi->oxcf.number_of_layers > 1) && + (cpi->ref_frame_flags == VP8_ALT_FLAG)) + { + xd->ref_frame_cost[LAST_FRAME] = vp8_cost_one(cpi->prob_intra_coded) + + vp8_cost_zero(1); + xd->ref_frame_cost[GOLDEN_FRAME] = vp8_cost_one(cpi->prob_intra_coded) + + vp8_cost_one(1) + + vp8_cost_zero(1); + xd->ref_frame_cost[ALTREF_FRAME] = vp8_cost_one(cpi->prob_intra_coded) + + vp8_cost_one(1) + + vp8_cost_one(1); + } else { xd->ref_frame_cost[LAST_FRAME] = vp8_cost_one(cpi->prob_intra_coded) @@ -937,7 +962,8 @@ void vp8_encode_frame(VP8_COMP *cpi) // Adjust the projected reference frame useage probability numbers to reflect // what we have just seen. This may be usefull when we make multiple itterations // of the recode loop rather than continuing to use values from the previous frame. - if ((cm->frame_type != KEY_FRAME) && !cm->refresh_alt_ref_frame && !cm->refresh_golden_frame) + if ((cm->frame_type != KEY_FRAME) && ((cpi->oxcf.number_of_layers > 1) || + (!cm->refresh_alt_ref_frame && !cm->refresh_golden_frame))) { const int *const rfct = cpi->count_mb_ref_frame_usage; const int rf_intra = rfct[INTRA_FRAME]; @@ -1220,7 +1246,7 @@ int vp8cx_encode_inter_macroblock if (xd->segmentation_enabled) { // If cyclic update enabled - if (cpi->cyclic_refresh_mode_enabled) + if (cpi->current_layer == 0 && cpi->cyclic_refresh_mode_enabled) { // Clear segment_id back to 0 if not coded (last frame 0,0) if ((xd->mode_info_context->mbmi.segment_id == 1) && diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index cac92057c..43c971480 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -237,6 +237,79 @@ void vp8_initialize() extern FILE *vpxlogc; #endif +static void save_layer_context(VP8_COMP *cpi) +{ + LAYER_CONTEXT *lc = &cpi->layer_context[cpi->current_layer]; + + // Save layer dependent coding state + lc->target_bandwidth = cpi->target_bandwidth; + //lc->target_bandwidth = cpi->oxcf.target_bandwidth; + lc->starting_buffer_level = cpi->oxcf.starting_buffer_level; + lc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level; + lc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size; + lc->buffer_level = cpi->buffer_level; + lc->bits_off_target = cpi->bits_off_target; + lc->total_actual_bits = cpi->total_actual_bits; + lc->worst_quality = cpi->worst_quality; + lc->active_worst_quality = cpi->active_worst_quality; + lc->best_quality = cpi->best_quality; + lc->active_best_quality = cpi->active_best_quality; + lc->ni_av_qi = cpi->ni_av_qi; + lc->ni_tot_qi = cpi->ni_tot_qi; + lc->ni_frames = cpi->ni_frames; + lc->avg_frame_qindex = cpi->avg_frame_qindex; + lc->rate_correction_factor = cpi->rate_correction_factor; + lc->key_frame_rate_correction_factor = cpi->key_frame_rate_correction_factor; + lc->gf_rate_correction_factor = cpi->gf_rate_correction_factor; + lc->zbin_over_quant = cpi->zbin_over_quant; + lc->inter_frame_target = cpi->inter_frame_target; + lc->total_byte_count = cpi->total_byte_count; + lc->filter_level = cpi->common.filter_level; + + lc->last_frame_percent_intra = cpi->last_frame_percent_intra; + + memcpy (lc->count_mb_ref_frame_usage, + cpi->count_mb_ref_frame_usage, + sizeof(cpi->count_mb_ref_frame_usage)); +} + +static void restore_layer_context(VP8_COMP *cpi, const int layer) +{ + LAYER_CONTEXT *lc = &cpi->layer_context[layer]; + + // Restore layer dependent coding state + cpi->current_layer = layer; + cpi->target_bandwidth = lc->target_bandwidth; + cpi->oxcf.target_bandwidth = lc->target_bandwidth; + cpi->oxcf.starting_buffer_level = lc->starting_buffer_level; + cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level; + cpi->oxcf.maximum_buffer_size = lc->maximum_buffer_size; + cpi->buffer_level = lc->buffer_level; + cpi->bits_off_target = lc->bits_off_target; + cpi->total_actual_bits = lc->total_actual_bits; + //cpi->worst_quality = lc->worst_quality; + cpi->active_worst_quality = lc->active_worst_quality; + //cpi->best_quality = lc->best_quality; + cpi->active_best_quality = lc->active_best_quality; + cpi->ni_av_qi = lc->ni_av_qi; + cpi->ni_tot_qi = lc->ni_tot_qi; + cpi->ni_frames = lc->ni_frames; + cpi->avg_frame_qindex = lc->avg_frame_qindex; + cpi->rate_correction_factor = lc->rate_correction_factor; + cpi->key_frame_rate_correction_factor = lc->key_frame_rate_correction_factor; + cpi->gf_rate_correction_factor = lc->gf_rate_correction_factor; + cpi->zbin_over_quant = lc->zbin_over_quant; + cpi->inter_frame_target = lc->inter_frame_target; + cpi->total_byte_count = lc->total_byte_count; + cpi->common.filter_level = lc->filter_level; + + cpi->last_frame_percent_intra = lc->last_frame_percent_intra; + + memcpy (cpi->count_mb_ref_frame_usage, + lc->count_mb_ref_frame_usage, + sizeof(cpi->count_mb_ref_frame_usage)); +} + static void setup_features(VP8_COMP *cpi) { // Set up default state for MB feature flags @@ -510,7 +583,7 @@ static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment) set_segment_data((VP8_PTR)cpi, &feature_data[0][0], SEGMENT_DELTADATA); // Delete sementation map - vpx_free(seg_map); + vpx_free(seg_map); seg_map = 0; @@ -1397,11 +1470,13 @@ void vp8_new_frame_rate(VP8_COMP *cpi, double framerate) if(framerate < .1) framerate = 30; - cpi->oxcf.frame_rate = framerate; - cpi->output_frame_rate = cpi->oxcf.frame_rate; - cpi->per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth / cpi->output_frame_rate); - cpi->av_per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth / cpi->output_frame_rate); - cpi->min_frame_bandwidth = (int)(cpi->av_per_frame_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100); + cpi->oxcf.frame_rate = framerate; + cpi->output_frame_rate = cpi->oxcf.frame_rate; + cpi->per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth / + cpi->output_frame_rate); + cpi->av_per_frame_bandwidth = cpi->per_frame_bandwidth; + cpi->min_frame_bandwidth = (int)(cpi->av_per_frame_bandwidth * + cpi->oxcf.two_pass_vbrmin_section / 100); // Set Maximum gf/arf interval cpi->max_gf_interval = ((int)(cpi->output_frame_rate / 2.0) + 2); @@ -1472,6 +1547,65 @@ static void init_config(VP8_PTR ptr, VP8_CONFIG *oxcf) cpi->total_actual_bits = 0; cpi->total_target_vs_actual = 0; + // Temporal scalabilty + if (cpi->oxcf.number_of_layers > 1) + { + int i; + int prev_layer_frame_rate=0; + + for (i=0; i<cpi->oxcf.number_of_layers; i++) + { + LAYER_CONTEXT *lc = &cpi->layer_context[i]; + + // Layer configuration + lc->frame_rate = + cpi->output_frame_rate / cpi->oxcf.rate_decimator[i]; + lc->target_bandwidth = cpi->oxcf.target_bitrate[i] * 1000; + + lc->starting_buffer_level = + rescale(oxcf->starting_buffer_level, + lc->target_bandwidth, 1000); + + if (oxcf->optimal_buffer_level == 0) + lc->optimal_buffer_level = lc->target_bandwidth / 8; + else + lc->optimal_buffer_level = + rescale(oxcf->optimal_buffer_level, + lc->target_bandwidth, 1000); + + if (oxcf->maximum_buffer_size == 0) + lc->maximum_buffer_size = lc->target_bandwidth / 8; + else + lc->maximum_buffer_size = + rescale(oxcf->maximum_buffer_size, + lc->target_bandwidth, 1000); + + // Work out the average size of a frame within this layer + if (i > 0) + lc->avg_frame_size_for_layer = (cpi->oxcf.target_bitrate[i] - + cpi->oxcf.target_bitrate[i-1]) * 1000 / + (lc->frame_rate - prev_layer_frame_rate); + + lc->active_worst_quality = cpi->oxcf.worst_allowed_q; + lc->active_best_quality = cpi->oxcf.best_allowed_q; + lc->avg_frame_qindex = cpi->oxcf.worst_allowed_q; + + lc->buffer_level = lc->starting_buffer_level; + lc->bits_off_target = lc->starting_buffer_level; + + lc->total_actual_bits = 0; + lc->ni_av_qi = 0; + lc->ni_tot_qi = 0; + lc->ni_frames = 0; + lc->rate_correction_factor = 1.0; + lc->key_frame_rate_correction_factor = 1.0; + lc->gf_rate_correction_factor = 1.0; + lc->inter_frame_target = 0.0; + + prev_layer_frame_rate = lc->frame_rate; + } + } + #if VP8_TEMPORAL_ALT_REF { int i; @@ -1693,11 +1827,11 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf) cpi->target_bandwidth = cpi->oxcf.target_bandwidth; - cm->Width = cpi->oxcf.Width ; - cm->Height = cpi->oxcf.Height ; + cm->Width = cpi->oxcf.Width; + cm->Height = cpi->oxcf.Height; cm->horiz_scale = cpi->horiz_scale; - cm->vert_scale = cpi->vert_scale ; + cm->vert_scale = cpi->vert_scale; // VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs) if (cpi->oxcf.Sharpness > 7) @@ -1828,7 +1962,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) cpi->prob_gf_coded = 128; cpi->prob_intra_coded = 63; - // Prime the recent reference frame useage counters. + // Prime the recent reference frame usage counters. // Hereafter they will be maintained as a sort of moving average cpi->recent_ref_frame_usage[INTRA_FRAME] = 1; cpi->recent_ref_frame_usage[LAST_FRAME] = 1; @@ -2143,35 +2277,106 @@ void vp8_remove_compressor(VP8_PTR *ptr) FILE *f = fopen("opsnr.stt", "a"); double time_encoded = (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) / 10000000.000; - double total_encode_time = (cpi->time_receive_data + cpi->time_compress_data) / 1000.000; - double dr = (double)cpi->bytes * (double) 8 / (double)1000 / time_encoded; + double total_encode_time = (cpi->time_receive_data + + cpi->time_compress_data) / 1000.000; + double dr = (double)cpi->bytes * 8.0 / 1000.0 / time_encoded; if (cpi->b_calculate_psnr) { - YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx]; - double samples = 3.0 / 2 * cpi->count * lst_yv12->y_width * lst_yv12->y_height; - double total_psnr = vp8_mse2psnr(samples, 255.0, cpi->total_sq_error); - double total_psnr2 = vp8_mse2psnr(samples, 255.0, cpi->total_sq_error2); - double total_ssim = 100 * pow(cpi->summed_quality / cpi->summed_weights, 8.0); - - fprintf(f, "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\tVPXSSIM\t Time(us)\n"); - fprintf(f, "%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%8.0f\n", - dr, cpi->total / cpi->count, total_psnr, cpi->totalp / cpi->count, total_psnr2, total_ssim, - total_encode_time); + YV12_BUFFER_CONFIG *lst_yv12 = + &cpi->common.yv12_fb[cpi->common.lst_fb_idx]; + + if (cpi->oxcf.number_of_layers > 1) + { + int i; + + fprintf(f, "Layer\tBitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\t" + "GLPsnrP\tVPXSSIM\t\n"); + for (i=0; i<cpi->oxcf.number_of_layers; i++) + { + double dr = (double)cpi->bytes_in_layer[i] * + 8.0 / 1000.0 / time_encoded; + double samples = 3.0 / 2 * cpi->frames_in_layer[i] * + lst_yv12->y_width * lst_yv12->y_height; + double total_psnr = vp8_mse2psnr(samples, 255.0, + cpi->total_error2[i]); + double total_psnr2 = vp8_mse2psnr(samples, 255.0, + cpi->total_error2_p[i]); + double total_ssim = 100 * pow(cpi->sum_ssim[i] / + cpi->sum_weights[i], 8.0); + + fprintf(f, "%5d\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t" + "%7.3f\t%7.3f\n", + i, dr, + cpi->sum_psnr[i] / cpi->frames_in_layer[i], + total_psnr, + cpi->sum_psnr_p[i] / cpi->frames_in_layer[i], + total_psnr2, total_ssim); + } + } + else + { + double samples = 3.0 / 2 * cpi->count * + lst_yv12->y_width * lst_yv12->y_height; + double total_psnr = vp8_mse2psnr(samples, 255.0, + cpi->total_sq_error); + double total_psnr2 = vp8_mse2psnr(samples, 255.0, + cpi->total_sq_error2); + double total_ssim = 100 * pow(cpi->summed_quality / + cpi->summed_weights, 8.0); + + fprintf(f, "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\t" + "GLPsnrP\tVPXSSIM\t Time(us)\n"); + fprintf(f, "%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t" + "%7.3f\t%8.0f\n", + dr, cpi->total / cpi->count, total_psnr, + cpi->totalp / cpi->count, total_psnr2, + total_ssim, total_encode_time); + } } if (cpi->b_calculate_ssimg) { - fprintf(f, "BitRate\tSSIM_Y\tSSIM_U\tSSIM_V\tSSIM_A\t Time(us)\n"); - fprintf(f, "%7.3f\t%6.4f\t%6.4f\t%6.4f\t%6.4f\t%8.0f\n", dr, - cpi->total_ssimg_y / cpi->count, cpi->total_ssimg_u / cpi->count, - cpi->total_ssimg_v / cpi->count, cpi->total_ssimg_all / cpi->count, total_encode_time); + if (cpi->oxcf.number_of_layers > 1) + { + int i; + + fprintf(f, "Layer\tBitRate\tSSIM_Y\tSSIM_U\tSSIM_V\tSSIM_A\t" + "Time(us)\n"); + for (i=0; i<cpi->oxcf.number_of_layers; i++) + { + double dr = (double)cpi->bytes_in_layer[i] * + 8.0 / 1000.0 / time_encoded; + fprintf(f, "%5d\t%7.3f\t%6.4f\t" + "%6.4f\t%6.4f\t%6.4f\t%8.0f\n", + i, dr, + cpi->total_ssimg_y_in_layer[i] / + cpi->frames_in_layer[i], + cpi->total_ssimg_u_in_layer[i] / + cpi->frames_in_layer[i], + cpi->total_ssimg_v_in_layer[i] / + cpi->frames_in_layer[i], + cpi->total_ssimg_all_in_layer[i] / + cpi->frames_in_layer[i], + total_encode_time); + } + } + else + { + fprintf(f, "BitRate\tSSIM_Y\tSSIM_U\tSSIM_V\tSSIM_A\t" + "Time(us)\n"); + fprintf(f, "%7.3f\t%6.4f\t%6.4f\t%6.4f\t%6.4f\t%8.0f\n", dr, + cpi->total_ssimg_y / cpi->count, + cpi->total_ssimg_u / cpi->count, + cpi->total_ssimg_v / cpi->count, + cpi->total_ssimg_all / cpi->count, total_encode_time); + } } fclose(f); #if 0 f = fopen("qskip.stt", "a"); - fprintf(f, "minq:%d -maxq:%d skipture:skipfalse = %d:%d\n", cpi->oxcf.best_allowed_q, cpi->oxcf.worst_allowed_q, skiptruecount, skipfalsecount); + fprintf(f, "minq:%d -maxq:%d skiptrue:skipfalse = %d:%d\n", cpi->oxcf.best_allowed_q, cpi->oxcf.worst_allowed_q, skiptruecount, skipfalsecount); fclose(f); #endif @@ -2841,10 +3046,41 @@ static void update_rd_ref_frame_probs(VP8_COMP *cpi) } else if (!(rf_intra + rf_inter)) { - // This is a trap in case this function is called with cpi->recent_ref_frame_usage[] blank. - cpi->prob_intra_coded = 63; - cpi->prob_last_coded = 128; - cpi->prob_gf_coded = 128; + if (cpi->oxcf.number_of_layers > 1) + { + if (cpi->ref_frame_flags == VP8_LAST_FLAG) + { + cpi->prob_intra_coded = 63; + cpi->prob_last_coded = 255; + cpi->prob_gf_coded = 128; + } + else if (cpi->ref_frame_flags == VP8_GOLD_FLAG) + { + cpi->prob_intra_coded = 63; + cpi->prob_last_coded = 1; + cpi->prob_gf_coded = 255; + } + else if (cpi->ref_frame_flags == VP8_ALT_FLAG) + { + cpi->prob_intra_coded = 63; + cpi->prob_last_coded = 1; + cpi->prob_gf_coded = 1; + } + else + { + cpi->prob_intra_coded = 63; + cpi->prob_last_coded = 128; + cpi->prob_gf_coded = 128; + } + } + else + { + // This is a trap in case this function is called with + // cpi->recent_ref_frame_usage[] blank. + cpi->prob_intra_coded = 63; + cpi->prob_last_coded = 128; + cpi->prob_gf_coded = 128; + } } else { @@ -2866,32 +3102,33 @@ static void update_rd_ref_frame_probs(VP8_COMP *cpi) } // update reference frame costs since we can do better than what we got last frame. - - if (cpi->common.refresh_alt_ref_frame) - { - cpi->prob_intra_coded += 40; - cpi->prob_last_coded = 200; - cpi->prob_gf_coded = 1; - } - else if (cpi->common.frames_since_golden == 0) + if (cpi->oxcf.number_of_layers == 1) { - cpi->prob_last_coded = 214; - cpi->prob_gf_coded = 1; - } - else if (cpi->common.frames_since_golden == 1) - { - cpi->prob_last_coded = 192; - cpi->prob_gf_coded = 220; - } - else if (cpi->source_alt_ref_active) - { - //int dist = cpi->common.frames_till_alt_ref_frame + cpi->common.frames_since_golden; - cpi->prob_gf_coded -= 20; + if (cpi->common.refresh_alt_ref_frame) + { + cpi->prob_intra_coded += 40; + cpi->prob_last_coded = 200; + cpi->prob_gf_coded = 1; + } + else if (cpi->common.frames_since_golden == 0) + { + cpi->prob_last_coded = 214; + cpi->prob_gf_coded = 1; + } + else if (cpi->common.frames_since_golden == 1) + { + cpi->prob_last_coded = 192; + cpi->prob_gf_coded = 220; + } + else if (cpi->source_alt_ref_active) + { + //int dist = cpi->common.frames_till_alt_ref_frame + cpi->common.frames_since_golden; + cpi->prob_gf_coded -= 20; - if (cpi->prob_gf_coded < 10) - cpi->prob_gf_coded = 10; + if (cpi->prob_gf_coded < 10) + cpi->prob_gf_coded = 10; + } } - #endif } @@ -3283,7 +3520,6 @@ static void encode_frame_to_data_rate // Enable or disable mode based tweaking of the zbin // For 2 Pass Only used where GF/ARF prediction quality // is above a threshold - cpi->zbin_mode_boost = 0; cpi->zbin_mode_boost_enabled = TRUE; if (cpi->pass == 2) { @@ -3432,6 +3668,19 @@ static void encode_frame_to_data_rate cpi->buffer_level = cpi->bits_off_target; + if (cpi->oxcf.number_of_layers > 1) + { + int i; + + // Propagate bits saved by dropping the frame to higher layers + for (i=cpi->current_layer+1; i<cpi->oxcf.number_of_layers; i++) + { + cpi->layer_context[i].bits_off_target + += cpi->av_per_frame_bandwidth; + cpi->layer_context[i].buffer_level = cpi->bits_off_target; + } + } + return; } else @@ -3478,7 +3727,7 @@ static void encode_frame_to_data_rate } // Set an active best quality and if necessary active worst quality - // There is some odd behaviour for one pass here that needs attention. + // There is some odd behavior for one pass here that needs attention. if ( (cpi->pass == 2) || (cpi->ni_frames > 150)) { vp8_clear_system_state(); @@ -3510,13 +3759,14 @@ static void encode_frame_to_data_rate cpi->active_best_quality = kf_high_motion_minq[Q]; } - else if (cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame) + else if (cpi->oxcf.number_of_layers==1 && + (cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame)) { // Use the lower of cpi->active_worst_quality and recent // average Q as basis for GF/ARF Q limit unless last frame was // a key frame. if ( (cpi->frames_since_key > 1) && - (cpi->avg_frame_qindex < cpi->active_worst_quality) ) + (cpi->avg_frame_qindex < cpi->active_worst_quality) ) { Q = cpi->avg_frame_qindex; } @@ -3617,13 +3867,17 @@ static void encode_frame_to_data_rate // Set highest allowed value for Zbin over quant if (cm->frame_type == KEY_FRAME) zbin_oq_high = 0; //ZBIN_OQ_MAX/16 - else if (cm->refresh_alt_ref_frame || (cm->refresh_golden_frame && !cpi->source_alt_ref_active)) - zbin_oq_high = 16; + else if ((cpi->oxcf.number_of_layers == 1) && ((cm->refresh_alt_ref_frame || + (cm->refresh_golden_frame && !cpi->source_alt_ref_active)))) + { + zbin_oq_high = 16; + } else zbin_oq_high = ZBIN_OQ_MAX; - // Setup background Q adjustment for error resilliant mode - if (cpi->cyclic_refresh_mode_enabled) + // Setup background Q adjustment for error resilient mode. + // For multi-layer encodes only enable this for the base layer. + if (cpi->cyclic_refresh_mode_enabled && (cpi->current_layer==0)) cyclic_background_refresh(cpi, Q, 0); vp8_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, &frame_over_shoot_limit); @@ -3756,10 +4010,8 @@ static void encode_frame_to_data_rate if (cpi->prob_skip_false > 250) cpi->prob_skip_false = 250; - if (cpi->is_src_frame_alt_ref) + if (cpi->oxcf.number_of_layers == 1 && cpi->is_src_frame_alt_ref) cpi->prob_skip_false = 1; - - } #if 0 @@ -4111,9 +4363,10 @@ static void encode_frame_to_data_rate } // Update the GF useage maps. - // This is done after completing the compression of a frame when all modes etc. are finalized but before loop filter - // This is done after completing the compression of a frame when all modes etc. are finalized but before loop filter - vp8_update_gf_useage_maps(cpi, cm, &cpi->mb); + // This is done after completing the compression of a frame when all + // modes etc. are finalized but before loop filter + if (cpi->oxcf.number_of_layers == 1) + vp8_update_gf_useage_maps(cpi, cm, &cpi->mb); if (cm->frame_type == KEY_FRAME) cm->refresh_last_frame = 1; @@ -4179,6 +4432,13 @@ static void encode_frame_to_data_rate cpi->total_byte_count += (*size); cpi->projected_frame_size = (*size) << 3; + if (cpi->oxcf.number_of_layers > 1) + { + int i; + for (i=cpi->current_layer+1; i<cpi->oxcf.number_of_layers; i++) + cpi->layer_context[i].total_byte_count += (*size); + } + if (!active_worst_qchanged) vp8_update_rate_correction_factors(cpi, 2); @@ -4194,7 +4454,8 @@ static void encode_frame_to_data_rate cpi->avg_frame_qindex = (2 + 3 * cpi->avg_frame_qindex + cm->base_qindex) >> 2; // Keep a record from which we can calculate the average Q excluding GF updates and key frames - if ((cm->frame_type != KEY_FRAME) && !cm->refresh_golden_frame && !cm->refresh_alt_ref_frame) + if ((cm->frame_type != KEY_FRAME) && ((cpi->oxcf.number_of_layers > 1) || + (!cm->refresh_golden_frame && !cm->refresh_alt_ref_frame))) { cpi->ni_frames++; @@ -4245,7 +4506,7 @@ static void encode_frame_to_data_rate #endif - // Set the count for maximum consequative dropped frames based upon the ratio of + // Set the count for maximum consecutive dropped frames based upon the ratio of // this frame size to the target average per frame bandwidth. // (cpi->av_per_frame_bandwidth > 0) is just a sanity check to prevent / 0. if (cpi->drop_frames_allowed && (cpi->av_per_frame_bandwidth > 0)) @@ -4270,13 +4531,32 @@ static void encode_frame_to_data_rate cpi->long_rolling_actual_bits = ((cpi->long_rolling_actual_bits * 31) + cpi->projected_frame_size + 16) / 32; // Actual bits spent - cpi->total_actual_bits += cpi->projected_frame_size; + cpi->total_actual_bits += cpi->projected_frame_size; // Debug stats cpi->total_target_vs_actual += (cpi->this_frame_target - cpi->projected_frame_size); cpi->buffer_level = cpi->bits_off_target; + // Propagate values to higher temporal layers + if (cpi->oxcf.number_of_layers > 1) + { + int i; + + for (i=cpi->current_layer+1; i<cpi->oxcf.number_of_layers; i++) + { + LAYER_CONTEXT *lc = &cpi->layer_context[i]; + int bits_off_for_this_layer = lc->target_bandwidth / lc->frame_rate + - cpi->projected_frame_size; + + lc->bits_off_target += bits_off_for_this_layer; + + lc->total_actual_bits += cpi->projected_frame_size; + lc->total_target_vs_actual += bits_off_for_this_layer; + lc->buffer_level = lc->bits_off_target; + } + } + // Update bits left to the kf and gf groups to account for overshoot or undershoot on these frames if (cm->frame_type == KEY_FRAME) { @@ -4322,7 +4602,7 @@ static void encode_frame_to_data_rate vp8_clear_system_state(); //__asm emms; - if (cpi->twopass.total_left_stats.coded_error != 0.0) + if (cpi->twopass.total_left_stats->coded_error != 0.0) fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6d %6d" "%6d %6d %6d %5d %5d %5d %8d %8.2f %10d %10.3f" "%10.3f %8d\n", @@ -4340,9 +4620,9 @@ static void encode_frame_to_data_rate cm->frame_type, cpi->gfu_boost, cpi->twopass.est_max_qcorrection_factor, (int)cpi->twopass.bits_left, - cpi->twopass.total_left_stats.coded_error, + cpi->twopass.total_left_stats->coded_error, (double)cpi->twopass.bits_left / - cpi->twopass.total_left_stats.coded_error, + cpi->twopass.total_left_stats->coded_error, cpi->tot_recode_hits); else fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6d %6d" @@ -4362,7 +4642,7 @@ static void encode_frame_to_data_rate cm->frame_type, cpi->gfu_boost, cpi->twopass.est_max_qcorrection_factor, (int)cpi->twopass.bits_left, - cpi->twopass.total_left_stats.coded_error, + cpi->twopass.total_left_stats->coded_error, cpi->tot_recode_hits); fclose(f); @@ -4675,7 +4955,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon cm->refresh_golden_frame = 0; cm->refresh_last_frame = 0; cm->show_frame = 0; - cpi->source_alt_ref_pending = FALSE; // Clear Pending altf Ref flag. + cpi->source_alt_ref_pending = FALSE; // Clear Pending alt Ref flag. cpi->is_src_frame_alt_ref = 0; } } @@ -4727,6 +5007,13 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon return -1; } + // Restore layer specific context if necessary + if (cpi->oxcf.number_of_layers > 1) + { + restore_layer_context (cpi, + cpi->oxcf.layer_id[cm->current_video_frame % cpi->oxcf.periodicity]); + } + if (cpi->source->ts_start < cpi->first_time_stamp_ever) { cpi->first_time_stamp_ever = cpi->source->ts_start; @@ -4734,7 +5021,16 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon } // adjust frame rates based on timestamps given - if (!cm->refresh_alt_ref_frame) + if (cpi->oxcf.number_of_layers > 1 ) + { + vp8_new_frame_rate ( + cpi, cpi->layer_context[cpi->current_layer].frame_rate); + + cpi->last_time_stamp_seen = cpi->source->ts_start; + cpi->last_end_time_stamp_seen = cpi->source->ts_end; + + } + else if (!cm->refresh_alt_ref_frame) { int64_t this_duration; int step = 0; @@ -4786,7 +5082,8 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon if (cpi->compressor_speed == 2) { - check_gf_quality(cpi); + if (cpi->oxcf.number_of_layers == 1) + check_gf_quality(cpi); vpx_usec_timer_start(&tsctimer); vpx_usec_timer_start(&ticktimer); } @@ -4893,6 +5190,10 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon } + // Save layer specific state + if (cpi->oxcf.number_of_layers > 1) + save_layer_context (cpi); + vpx_usec_timer_mark(&cmptimer); cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer); @@ -4922,7 +5223,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon int y_samples = orig->y_height * orig->y_width ; int uv_samples = orig->uv_height * orig->uv_width ; int t_samples = y_samples + 2 * uv_samples; - int64_t sq_error; + int64_t sq_error, sq_error2; ye = calc_plane_error(orig->y_buffer, orig->y_stride, recon->y_buffer, recon->y_stride, orig->y_width, orig->y_height, @@ -4964,14 +5265,14 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon pp->v_buffer, pp->uv_stride, orig->uv_width, orig->uv_height, IF_RTCD(&cpi->rtcd.variance)); - sq_error = ye + ue + ve; + sq_error2 = ye + ue + ve; - frame_psnr2 = vp8_mse2psnr(t_samples, 255.0, sq_error); + frame_psnr2 = vp8_mse2psnr(t_samples, 255.0, sq_error2); cpi->totalp_y += vp8_mse2psnr(y_samples, 255.0, ye); cpi->totalp_u += vp8_mse2psnr(uv_samples, 255.0, ue); cpi->totalp_v += vp8_mse2psnr(uv_samples, 255.0, ve); - cpi->total_sq_error2 += sq_error; + cpi->total_sq_error2 += sq_error2; cpi->totalp += frame_psnr2; frame_ssim2 = vp8_calc_ssim(cpi->Source, @@ -4981,6 +5282,24 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon cpi->summed_quality += frame_ssim2 * weight; cpi->summed_weights += weight; + if (cpi->oxcf.number_of_layers > 1) + { + int i; + + for (i=cpi->current_layer; + i<cpi->oxcf.number_of_layers; i++) + { + cpi->frames_in_layer[i]++; + + cpi->bytes_in_layer[i] += *size; + cpi->sum_psnr[i] += frame_psnr; + cpi->sum_psnr_p[i] += frame_psnr2; + cpi->total_error2[i] += sq_error; + cpi->total_error2_p[i] += sq_error2; + cpi->sum_ssim[i] += frame_ssim2 * weight; + cpi->sum_weights[i] += weight; + } + } } } @@ -4989,10 +5308,30 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon double y, u, v, frame_all; frame_all = vp8_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u, &v, IF_RTCD(&cpi->rtcd.variance)); - cpi->total_ssimg_y += y; - cpi->total_ssimg_u += u; - cpi->total_ssimg_v += v; - cpi->total_ssimg_all += frame_all; + + if (cpi->oxcf.number_of_layers > 1) + { + int i; + + for (i=cpi->current_layer; + i<cpi->oxcf.number_of_layers; i++) + { + if (!cpi->b_calculate_psnr) + cpi->frames_in_layer[i]++; + + cpi->total_ssimg_y_in_layer[i] += y; + cpi->total_ssimg_u_in_layer[i] += u; + cpi->total_ssimg_v_in_layer[i] += v; + cpi->total_ssimg_all_in_layer[i] += frame_all; + } + } + else + { + cpi->total_ssimg_y += y; + cpi->total_ssimg_u += u; + cpi->total_ssimg_v += v; + cpi->total_ssimg_all += frame_all; + } } } diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index ee519fad0..039c2543e 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -56,6 +56,8 @@ #define VP8_TEMPORAL_ALT_REF 1 #endif +#define MAX_PERIODICITY 16 + typedef struct { int kf_indicated; @@ -238,6 +240,52 @@ enum BLOCK_MAX_SEGMENTS }; +typedef struct +{ + // Layer configuration + double frame_rate; + int target_bandwidth; + + // Layer specific coding parameters + int starting_buffer_level; + int optimal_buffer_level; + int maximum_buffer_size; + + int avg_frame_size_for_layer; + + int buffer_level; + int bits_off_target; + + long long total_actual_bits; + int total_target_vs_actual; + + int worst_quality; + int active_worst_quality; + int best_quality; + int active_best_quality; + + int ni_av_qi; + int ni_tot_qi; + int ni_frames; + int avg_frame_qindex; + + double rate_correction_factor; + double key_frame_rate_correction_factor; + double gf_rate_correction_factor; + + int zbin_over_quant; + + int inter_frame_target; + INT64 total_byte_count; + + int filter_level; + + int last_frame_percent_intra; + + int count_mb_ref_frame_usage[MAX_REF_FRAMES]; + +} LAYER_CONTEXT; + typedef struct VP8_COMP { @@ -610,6 +658,25 @@ typedef struct VP8_COMP int force_next_frame_intra; /* force next frame to intra when kf_auto says so */ int droppable; + + // Coding layer state variables + unsigned int current_layer; + LAYER_CONTEXT layer_context[MAX_LAYERS]; + + long long frames_in_layer[MAX_LAYERS]; + long long bytes_in_layer[MAX_LAYERS]; + double sum_psnr[MAX_LAYERS]; + double sum_psnr_p[MAX_LAYERS]; + double total_error2[MAX_LAYERS]; + double total_error2_p[MAX_LAYERS]; + double sum_ssim[MAX_LAYERS]; + double sum_weights[MAX_LAYERS]; + + double total_ssimg_y_in_layer[MAX_LAYERS]; + double total_ssimg_u_in_layer[MAX_LAYERS]; + double total_ssimg_v_in_layer[MAX_LAYERS]; + double total_ssimg_all_in_layer[MAX_LAYERS]; + } VP8_COMP; void control_data_rate(VP8_COMP *cpi); diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c index 1e602138f..4a8258170 100644 --- a/vp8/encoder/pickinter.c +++ b/vp8/encoder/pickinter.c @@ -471,7 +471,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, else skip_mode[GOLDEN_FRAME] = 1; - if (cpi->ref_frame_flags & VP8_ALT_FLAG && cpi->source_alt_ref_active) + if ((cpi->ref_frame_flags & VP8_ALT_FLAG) && + (cpi->source_alt_ref_active || cpi->oxcf.number_of_layers > 1)) { YV12_BUFFER_CONFIG *alt_yv12 = &cpi->common.yv12_fb[cpi->common.alt_fb_idx]; y_buffer[ALTREF_FRAME] = alt_yv12->y_buffer + recon_yoffset; diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c index 46e1d9dd9..1ac905021 100644 --- a/vp8/encoder/ratectrl.c +++ b/vp8/encoder/ratectrl.c @@ -436,7 +436,8 @@ static void calc_iframe_target_size(VP8_COMP *cpi) } -// Do the best we can to define the parameteres for the next GF based on what information we have available. +// Do the best we can to define the parameters for the next GF based on what +// information we have available. static void calc_gf_params(VP8_COMP *cpi) { int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q; @@ -607,6 +608,11 @@ static void calc_pframe_target_size(VP8_COMP *cpi) { int min_frame_target; int Adjustment; + int old_per_frame_bandwidth = cpi->per_frame_bandwidth; + + if ( cpi->current_layer > 0) + cpi->per_frame_bandwidth = + cpi->layer_context[cpi->current_layer].avg_frame_size_for_layer; min_frame_target = 0; @@ -622,7 +628,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi) // Special alt reference frame case - if (cpi->common.refresh_alt_ref_frame) + if((cpi->common.refresh_alt_ref_frame) && (cpi->oxcf.number_of_layers == 1)) { if (cpi->pass == 2) { @@ -789,7 +795,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi) // Decide whether or not we need to adjust the frame data rate target. // // If we are are below the optimal buffer fullness level and adherence - // to buffering contraints is important to the end useage then adjust + // to buffering constraints is important to the end usage then adjust // the per frame target. if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && (cpi->buffer_level < cpi->oxcf.optimal_buffer_level)) @@ -812,12 +818,12 @@ static void calc_pframe_target_size(VP8_COMP *cpi) percent_low = 0; // lower the target bandwidth for this frame. - cpi->this_frame_target -= (cpi->this_frame_target * percent_low) - / 200; + cpi->this_frame_target -= + (cpi->this_frame_target * percent_low) / 200; // Are we using allowing control of active_worst_allowed_q // according to buffer level. - if (cpi->auto_worst_q) + if (cpi->auto_worst_q && cpi->ni_frames > 150) { int critical_buffer_level; @@ -834,7 +840,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi) (cpi->buffer_level < cpi->bits_off_target) ? cpi->buffer_level : cpi->bits_off_target; } - // For local file playback short term buffering contraints + // For local file playback short term buffering constraints // are less of an issue else { @@ -905,11 +911,11 @@ static void calc_pframe_target_size(VP8_COMP *cpi) percent_high = 0; cpi->this_frame_target += (cpi->this_frame_target * - percent_high) / 200; - + percent_high) / 200; - // Are we allowing control of active_worst_allowed_q according to bufferl level. - if (cpi->auto_worst_q) + // Are we allowing control of active_worst_allowed_q according + // to buffer level. + if (cpi->auto_worst_q && cpi->ni_frames > 150) { // When using the relaxed buffer model stick to the user specified value cpi->active_worst_quality = cpi->ni_av_qi; @@ -1112,6 +1118,8 @@ static void calc_pframe_target_size(VP8_COMP *cpi) } } + + cpi->per_frame_bandwidth = old_per_frame_bandwidth; } @@ -1421,8 +1429,14 @@ void vp8_adjust_key_frame_context(VP8_COMP *cpi) * bits allocated than those following other gfs. */ overspend = (cpi->projected_frame_size - cpi->per_frame_bandwidth); - cpi->kf_overspend_bits += overspend * 7 / 8; - cpi->gf_overspend_bits += overspend * 1 / 8; + + if (cpi->oxcf.number_of_layers > 1) + cpi->kf_overspend_bits += overspend; + else + { + cpi->kf_overspend_bits += overspend * 7 / 8; + cpi->gf_overspend_bits += overspend * 1 / 8; + } /* Work out how much to try and recover per frame. */ cpi->kf_bitrate_adjustment = cpi->kf_overspend_bits @@ -1452,7 +1466,9 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, } else { - if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame) + if (cpi->oxcf.number_of_layers > 1 || + cpi->common.refresh_alt_ref_frame || + cpi->common.refresh_golden_frame) { *frame_over_shoot_limit = cpi->this_frame_target * 9 / 8; *frame_under_shoot_limit = cpi->this_frame_target * 7 / 8; diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c index ca4e505dc..f8336240c 100644 --- a/vp8/vp8_cx_iface.c +++ b/vp8/vp8_cx_iface.c @@ -218,6 +218,25 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, } #endif + RANGE_CHECK(cfg, ts_number_layers, 1, 5); + + if (cfg->ts_number_layers > 1) + { + int i; + RANGE_CHECK_HI(cfg, ts_periodicity, 16); + + for (i=1; i<cfg->ts_number_layers; i++) + if (cfg->ts_target_bitrate[i] <= cfg->ts_target_bitrate[i-1]) + ERROR("ts_target_bitrate entries are not strictly increasing"); + + RANGE_CHECK(cfg, ts_rate_decimator[cfg->ts_number_layers-1], 1, 1); + for (i=cfg->ts_number_layers-2; i>0; i--) + if (cfg->ts_rate_decimator[i-1] != 2*cfg->ts_rate_decimator[i]) + ERROR("ts_rate_decimator factors are not powers of 2"); + + RANGE_CHECK_HI(cfg, ts_layer_id[i], cfg->ts_number_layers-1); + } + return VPX_CODEC_OK; } @@ -253,14 +272,15 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf, oxcf->Width = cfg.g_w; oxcf->Height = cfg.g_h; /* guess a frame rate if out of whack, use 30 */ - oxcf->frame_rate = (double)(cfg.g_timebase.den) / (double)(cfg.g_timebase.num); + oxcf->frame_rate = (double)(cfg.g_timebase.den) / + (double)(cfg.g_timebase.num); if (oxcf->frame_rate > 180) { oxcf->frame_rate = 30; } - oxcf->error_resilient_mode = cfg.g_error_resilient; + oxcf->error_resilient_mode = cfg.g_error_resilient; switch (cfg.g_pass) { @@ -277,13 +297,13 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf, if (cfg.g_pass == VPX_RC_FIRST_PASS) { - oxcf->allow_lag = 0; - oxcf->lag_in_frames = 0; + oxcf->allow_lag = 0; + oxcf->lag_in_frames = 0; } else { - oxcf->allow_lag = (cfg.g_lag_in_frames) > 0; - oxcf->lag_in_frames = cfg.g_lag_in_frames; + oxcf->allow_lag = (cfg.g_lag_in_frames) > 0; + oxcf->lag_in_frames = cfg.g_lag_in_frames; } oxcf->allow_df = (cfg.rc_dropframe_thresh > 0); @@ -295,59 +315,71 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf, if (cfg.rc_end_usage == VPX_VBR) { - oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK; + oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK; } else if (cfg.rc_end_usage == VPX_CBR) { - oxcf->end_usage = USAGE_STREAM_FROM_SERVER; + oxcf->end_usage = USAGE_STREAM_FROM_SERVER; } else if (cfg.rc_end_usage == VPX_CQ) { - oxcf->end_usage = USAGE_CONSTRAINED_QUALITY; + oxcf->end_usage = USAGE_CONSTRAINED_QUALITY; } - oxcf->target_bandwidth = cfg.rc_target_bitrate; + oxcf->target_bandwidth = cfg.rc_target_bitrate; oxcf->rc_max_intra_bitrate_pct = vp8_cfg.rc_max_intra_bitrate_pct; - oxcf->best_allowed_q = cfg.rc_min_quantizer; - oxcf->worst_allowed_q = cfg.rc_max_quantizer; - oxcf->cq_level = vp8_cfg.cq_level; + oxcf->best_allowed_q = cfg.rc_min_quantizer; + oxcf->worst_allowed_q = cfg.rc_max_quantizer; + oxcf->cq_level = vp8_cfg.cq_level; oxcf->fixed_q = -1; - oxcf->under_shoot_pct = cfg.rc_undershoot_pct; - oxcf->over_shoot_pct = cfg.rc_overshoot_pct; + oxcf->under_shoot_pct = cfg.rc_undershoot_pct; + oxcf->over_shoot_pct = cfg.rc_overshoot_pct; - oxcf->maximum_buffer_size = cfg.rc_buf_sz; - oxcf->starting_buffer_level = cfg.rc_buf_initial_sz; - oxcf->optimal_buffer_level = cfg.rc_buf_optimal_sz; + oxcf->maximum_buffer_size = cfg.rc_buf_sz; + oxcf->starting_buffer_level = cfg.rc_buf_initial_sz; + oxcf->optimal_buffer_level = cfg.rc_buf_optimal_sz; - oxcf->two_pass_vbrbias = cfg.rc_2pass_vbr_bias_pct; + oxcf->two_pass_vbrbias = cfg.rc_2pass_vbr_bias_pct; oxcf->two_pass_vbrmin_section = cfg.rc_2pass_vbr_minsection_pct; oxcf->two_pass_vbrmax_section = cfg.rc_2pass_vbr_maxsection_pct; - oxcf->auto_key = cfg.kf_mode == VPX_KF_AUTO - && cfg.kf_min_dist != cfg.kf_max_dist; - //oxcf->kf_min_dist = cfg.kf_min_dis; - oxcf->key_freq = cfg.kf_max_dist; + oxcf->auto_key = cfg.kf_mode == VPX_KF_AUTO + && cfg.kf_min_dist != cfg.kf_max_dist; + //oxcf->kf_min_dist = cfg.kf_min_dis; + oxcf->key_freq = cfg.kf_max_dist; + + oxcf->number_of_layers = cfg.ts_number_layers; + oxcf->periodicity = cfg.ts_periodicity; + + if (oxcf->number_of_layers > 1) + { + memcpy (oxcf->target_bitrate, cfg.ts_target_bitrate, + sizeof(cfg.ts_target_bitrate)); + memcpy (oxcf->rate_decimator, cfg.ts_rate_decimator, + sizeof(cfg.ts_rate_decimator)); + memcpy (oxcf->layer_id, cfg.ts_layer_id, sizeof(cfg.ts_layer_id)); + } //oxcf->delete_first_pass_file = cfg.g_delete_firstpassfile; //strcpy(oxcf->first_pass_file, cfg.g_firstpass_file); - oxcf->cpu_used = vp8_cfg.cpu_used; - oxcf->encode_breakout = vp8_cfg.static_thresh; - oxcf->play_alternate = vp8_cfg.enable_auto_alt_ref; - oxcf->noise_sensitivity = vp8_cfg.noise_sensitivity; - oxcf->Sharpness = vp8_cfg.Sharpness; - oxcf->token_partitions = vp8_cfg.token_partitions; + oxcf->cpu_used = vp8_cfg.cpu_used; + oxcf->encode_breakout = vp8_cfg.static_thresh; + oxcf->play_alternate = vp8_cfg.enable_auto_alt_ref; + oxcf->noise_sensitivity = vp8_cfg.noise_sensitivity; + oxcf->Sharpness = vp8_cfg.Sharpness; + oxcf->token_partitions = vp8_cfg.token_partitions; - oxcf->two_pass_stats_in = cfg.rc_twopass_stats_in; - oxcf->output_pkt_list = vp8_cfg.pkt_list; + oxcf->two_pass_stats_in = cfg.rc_twopass_stats_in; + oxcf->output_pkt_list = vp8_cfg.pkt_list; - oxcf->arnr_max_frames = vp8_cfg.arnr_max_frames; - oxcf->arnr_strength = vp8_cfg.arnr_strength; - oxcf->arnr_type = vp8_cfg.arnr_type; + oxcf->arnr_max_frames = vp8_cfg.arnr_max_frames; + oxcf->arnr_strength = vp8_cfg.arnr_strength; + oxcf->arnr_type = vp8_cfg.arnr_type; - oxcf->tuning = vp8_cfg.tuning; + oxcf->tuning = vp8_cfg.tuning; /* printf("Current VP8 Settings: \n"); @@ -515,7 +547,7 @@ static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx) cfg = &ctx->priv->alg_priv->cfg; - /* Select the extra vp6 configuration table based on the current + /* Select the extra vp8 configuration table based on the current * usage value. If the current usage value isn't found, use the * values for usage case 0. */ @@ -1143,6 +1175,12 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] = 1, /* g_delete_first_pass_file */ "vp8.fpf" /* first pass filename */ #endif + + 1, /* ts_number_layers */ + {0}, /* ts_target_bitrate */ + {0}, /* ts_rate_decimator */ + 0, /* ts_periodicity */ + {0}, /* ts_layer_id */ }}, { -1, {NOT_IMPLEMENTED}} }; diff --git a/vp8_scalable_patterns.c b/vp8_scalable_patterns.c new file mode 100644 index 000000000..41ecaa78b --- /dev/null +++ b/vp8_scalable_patterns.c @@ -0,0 +1,467 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This is an example demonstrating how to implement a multi-layer VP8 + * encoding scheme based on temporal scalability for video applications + * that benefit from a scalable bitstream. + */ +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#define VPX_CODEC_DISABLE_COMPAT 1 +#include "vpx/vpx_encoder.h" +#include "vpx/vp8cx.h" +#define interface (vpx_codec_vp8_cx()) +#define fourcc 0x30385056 + +#define IVF_FILE_HDR_SZ (32) +#define IVF_FRAME_HDR_SZ (12) + +static void mem_put_le16(char *mem, unsigned int val) { + mem[0] = val; + mem[1] = val>>8; +} + +static void mem_put_le32(char *mem, unsigned int val) { + mem[0] = val; + mem[1] = val>>8; + mem[2] = val>>16; + mem[3] = val>>24; +} + +static void die(const char *fmt, ...) { + va_list ap; + + va_start(ap, fmt); + vprintf(fmt, ap); + if(fmt[strlen(fmt)-1] != '\n') + printf("\n"); + exit(EXIT_FAILURE); +} + +static void die_codec(vpx_codec_ctx_t *ctx, const char *s) { + const char *detail = vpx_codec_error_detail(ctx); + + printf("%s: %s\n", s, vpx_codec_error(ctx)); + if(detail) + printf(" %s\n",detail); + exit(EXIT_FAILURE); +} + +static int read_frame(FILE *f, vpx_image_t *img) { + size_t nbytes, to_read; + int res = 1; + + to_read = img->w*img->h*3/2; + nbytes = fread(img->planes[0], 1, to_read, f); + if(nbytes != to_read) { + res = 0; + if(nbytes > 0) + printf("Warning: Read partial frame. Check your width & height!\n"); + } + return res; +} + +static void write_ivf_file_header(FILE *outfile, + const vpx_codec_enc_cfg_t *cfg, + int frame_cnt) { + char header[32]; + + if(cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS) + return; + header[0] = 'D'; + header[1] = 'K'; + header[2] = 'I'; + header[3] = 'F'; + mem_put_le16(header+4, 0); /* version */ + mem_put_le16(header+6, 32); /* headersize */ + mem_put_le32(header+8, fourcc); /* headersize */ + mem_put_le16(header+12, cfg->g_w); /* width */ + mem_put_le16(header+14, cfg->g_h); /* height */ + mem_put_le32(header+16, cfg->g_timebase.den); /* rate */ + mem_put_le32(header+20, cfg->g_timebase.num); /* scale */ + mem_put_le32(header+24, frame_cnt); /* length */ + mem_put_le32(header+28, 0); /* unused */ + + if(fwrite(header, 1, 32, outfile)); +} + + +static void write_ivf_frame_header(FILE *outfile, + const vpx_codec_cx_pkt_t *pkt) +{ + char header[12]; + vpx_codec_pts_t pts; + + if(pkt->kind != VPX_CODEC_CX_FRAME_PKT) + return; + + pts = pkt->data.frame.pts; + mem_put_le32(header, pkt->data.frame.sz); + mem_put_le32(header+4, pts&0xFFFFFFFF); + mem_put_le32(header+8, pts >> 32); + + if(fwrite(header, 1, 12, outfile)); +} + +static int mode_to_num_layers[7] = {2, 2, 3, 3, 3, 3, 5}; + +int main(int argc, char **argv) { + FILE *infile, *outfile[MAX_LAYERS]; + vpx_codec_ctx_t codec; + vpx_codec_enc_cfg_t cfg; + int frame_cnt = 0; + vpx_image_t raw; + vpx_codec_err_t res; + unsigned int width; + unsigned int height; + int frame_avail; + int got_data; + int flags = 0; + int i; + + int layering_mode = 0; + int frames_in_layer[MAX_LAYERS] = {0}; + int layer_flags[MAX_PERIODICITY] = {0}; + + // Check usage and arguments + if (argc < 7) + die("Usage: %s <infile> <outfile> <width> <height> <mode> " + "<Rate_0> ... <Rate_nlayers-1>\n", argv[0]); + + width = strtol (argv[3], NULL, 0); + height = strtol (argv[4], NULL, 0); + if (width < 16 || width%2 || height <16 || height%2) + die ("Invalid resolution: %d x %d", width, height); + + if (!sscanf(argv[5], "%d", &layering_mode)) + die ("Invalid mode %s", argv[5]); + if (layering_mode<0 || layering_mode>6) + die ("Invalid mode (0..6) %s", argv[5]); + + if (argc != 6+mode_to_num_layers[layering_mode]) + die ("Invalid number of arguments"); + + if (!vpx_img_alloc (&raw, VPX_IMG_FMT_I420, width, height, 1)) + die ("Failed to allocate image", width, height); + + printf("Using %s\n",vpx_codec_iface_name(interface)); + + // Populate encoder configuration + res = vpx_codec_enc_config_default(interface, &cfg, 0); + if(res) { + printf("Failed to get config: %s\n", vpx_codec_err_to_string(res)); + return EXIT_FAILURE; + } + + // Update the default configuration with our settings + cfg.g_w = width; + cfg.g_h = height; + + for (i=6; i<6+mode_to_num_layers[layering_mode]; i++) + if (!sscanf(argv[i], "%d", &cfg.ts_target_bitrate[i-6])) + die ("Invalid data rate %s", argv[i]); + + // Real time parameters + cfg.rc_dropframe_thresh = 0; + cfg.rc_end_usage = VPX_CBR; + cfg.rc_resize_allowed = 0; + cfg.rc_min_quantizer = 4; + cfg.rc_max_quantizer = 63; + cfg.rc_undershoot_pct = 98; + cfg.rc_overshoot_pct = 100; + cfg.rc_buf_initial_sz = 500; + cfg.rc_buf_optimal_sz = 600; + cfg.rc_buf_sz = 1000; + + // Enable error resilient mode + cfg.g_error_resilient = 1; + cfg.g_lag_in_frames = 0; + cfg.kf_mode = VPX_KF_DISABLED; + + // Disable automatic keyframe placement + cfg.kf_min_dist = cfg.kf_max_dist = 1000; + + // Temporal scaling parameters: + // NOTE: The 3 prediction frames cannot be used interchangebly due to + // differences in the way they are handled throughout the code. The + // frames should be allocated to layers in the order LAST, GF, ARF. + // Other combinations work, but may produce slightly inferior results. + switch (layering_mode) + { + + case 0: + { + // 2-layers, 2-frame period + int ids[2] = {0,1}; + cfg.ts_number_layers = 2; + cfg.ts_periodicity = 2; + cfg.ts_rate_decimator[0] = 2; + cfg.ts_rate_decimator[1] = 1; + memcpy(cfg.ts_layer_id, ids, sizeof(ids)); + + // 0=L, 1=GF, Intra-layer prediction enabled + layer_flags[0] = VPX_EFLAG_FORCE_KF | + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; + layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_REF_ARF; +#if 0 + // 0=L, 1=GF, Intra-layer 1 prediction disabled + layer_flags[0] = VPX_EFLAG_FORCE_KF | + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; + layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_LAST; +#endif + break; + } + + case 1: + { + // 2-layers, 3-frame period + int ids[3] = {0,1,1}; + cfg.ts_number_layers = 2; + cfg.ts_periodicity = 3; + cfg.ts_rate_decimator[0] = 3; + cfg.ts_rate_decimator[1] = 1; + memcpy(cfg.ts_layer_id, ids, sizeof(ids)); + + // 0=L, 1=GF, Intra-layer prediction enabled + layer_flags[0] = VPX_EFLAG_FORCE_KF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + layer_flags[1] = + layer_flags[2] = VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST; + break; + } + + case 2: + { + // 3-layers, 6-frame period + int ids[6] = {0,2,2,1,2,2}; + cfg.ts_number_layers = 3; + cfg.ts_periodicity = 6; + cfg.ts_rate_decimator[0] = 6; + cfg.ts_rate_decimator[1] = 3; + cfg.ts_rate_decimator[2] = 1; + memcpy(cfg.ts_layer_id, ids, sizeof(ids)); + + // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled + layer_flags[0] = VPX_EFLAG_FORCE_KF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST; + layer_flags[1] = + layer_flags[2] = + layer_flags[4] = + layer_flags[5] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST; + break; + } + + case 3: + { + // 3-layers, 4-frame period + int ids[6] = {0,2,1,2}; + cfg.ts_number_layers = 3; + cfg.ts_periodicity = 4; + cfg.ts_rate_decimator[0] = 4; + cfg.ts_rate_decimator[1] = 2; + cfg.ts_rate_decimator[2] = 1; + memcpy(cfg.ts_layer_id, ids, sizeof(ids)); + + // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled + layer_flags[0] = VPX_EFLAG_FORCE_KF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST; + layer_flags[1] = + layer_flags[3] = VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF; + break; + cfg.ts_rate_decimator[2] = 1; + } + + case 4: + { + // 3-layers, 4-frame period + int ids[6] = {0,2,1,2}; + cfg.ts_number_layers = 3; + cfg.ts_periodicity = 4; + cfg.ts_rate_decimator[0] = 4; + cfg.ts_rate_decimator[1] = 2; + cfg.ts_rate_decimator[2] = 1; + memcpy(cfg.ts_layer_id, ids, sizeof(ids)); + + // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1, + // disabled in layer 2 + layer_flags[0] = VPX_EFLAG_FORCE_KF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + layer_flags[2] = VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; + layer_flags[1] = + layer_flags[3] = VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF; + break; + } + + case 5: + { + // 3-layers, 4-frame period + int ids[6] = {0,2,1,2}; + cfg.ts_number_layers = 3; + cfg.ts_periodicity = 4; + cfg.ts_rate_decimator[0] = 4; + cfg.ts_rate_decimator[1] = 2; + cfg.ts_rate_decimator[2] = 1; + memcpy(cfg.ts_layer_id, ids, sizeof(ids)); + + // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled + layer_flags[0] = VPX_EFLAG_FORCE_KF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + layer_flags[2] = VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; + layer_flags[1] = + layer_flags[3] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; + break; + } + + case 6: + { + // NOTE: Probably of academic interest only + + // 5-layers, 16-frame period + int ids[16] = {0,4,3,4,2,4,3,4,1,4,3,4,2,4,3,4}; + cfg.ts_number_layers = 5; + cfg.ts_periodicity = 16; + cfg.ts_rate_decimator[0] = 16; + cfg.ts_rate_decimator[1] = 8; + cfg.ts_rate_decimator[2] = 4; + cfg.ts_rate_decimator[3] = 2; + cfg.ts_rate_decimator[4] = 1; + memcpy(cfg.ts_layer_id, ids, sizeof(ids)); + + layer_flags[0] = VPX_EFLAG_FORCE_KF; + layer_flags[1] = + layer_flags[3] = + layer_flags[5] = + layer_flags[7] = + layer_flags[9] = + layer_flags[11] = + layer_flags[13] = + layer_flags[15] = VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_ENTROPY; + layer_flags[2] = + layer_flags[6] = + layer_flags[10] = + layer_flags[14] = 0; + layer_flags[4] = + layer_flags[12] = VP8_EFLAG_NO_REF_LAST; + layer_flags[8] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_UPD_ENTROPY; + break; + } + + default: + break; + } + + // Open input file + if(!(infile = fopen(argv[1], "rb"))) + die("Failed to open %s for reading", argv[1]); + + // Open an output file for each stream + for (i=0; i<cfg.ts_number_layers; i++) + { + char file_name[512]; + sprintf (file_name, "%s_%d.ivf", argv[2], i); + if (!(outfile[i] = fopen(file_name, "wb"))) + die("Failed to open %s for writing", file_name); + write_ivf_file_header(outfile[i], &cfg, 0); + } + + // Initialize codec + if (vpx_codec_enc_init (&codec, interface, &cfg, 0)) + die_codec (&codec, "Failed to initialize encoder"); + + // Cap CPU & first I-frame size + vpx_codec_control (&codec, VP8E_SET_CPUUSED, -6); + vpx_codec_control (&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, 600); + + frame_avail = 1; + while (frame_avail || got_data) { + vpx_codec_iter_t iter = NULL; + const vpx_codec_cx_pkt_t *pkt; + + flags = layer_flags[frame_cnt % cfg.ts_periodicity]; + + frame_avail = read_frame(infile, &raw); + if (vpx_codec_encode(&codec, frame_avail? &raw : NULL, frame_cnt, + 1, flags, VPX_DL_REALTIME)) + die_codec(&codec, "Failed to encode frame"); + + // Reset KF flag + layer_flags[0] &= ~VPX_EFLAG_FORCE_KF; + + got_data = 0; + while ( (pkt = vpx_codec_get_cx_data(&codec, &iter)) ) { + got_data = 1; + switch (pkt->kind) { + case VPX_CODEC_CX_FRAME_PKT: + for (i=cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity]; + i<cfg.ts_number_layers; i++) + { + write_ivf_frame_header(outfile[i], pkt); + if (fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, + outfile[i])); + frames_in_layer[i]++; + } + break; + default: + break; + } + printf (pkt->kind == VPX_CODEC_CX_FRAME_PKT + && (pkt->data.frame.flags & VPX_FRAME_IS_KEY)? "K":"."); + fflush (stdout); + } + frame_cnt++; + } + printf ("\n"); + fclose (infile); + + printf ("Processed %d frames.\n",frame_cnt-1); + if (vpx_codec_destroy(&codec)) + die_codec (&codec, "Failed to destroy codec"); + + // Try to rewrite the output file headers with the actual frame count + for (i=0; i<cfg.ts_number_layers; i++) + { + if (!fseek(outfile[i], 0, SEEK_SET)) + write_ivf_file_header (outfile[i], &cfg, frames_in_layer[i]); + fclose (outfile[i]); + } + + return EXIT_SUCCESS; +} + diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h index 4863dcd17..08219e9bd 100644 --- a/vpx/vpx_encoder.h +++ b/vpx/vpx_encoder.h @@ -32,6 +32,8 @@ extern "C" { #define VPX_ENCODER_H #include "vpx_codec.h" +#define MAX_PERIODICITY 16 +#define MAX_LAYERS 5 /*!\brief Current ABI version number * @@ -592,6 +594,47 @@ extern "C" { */ unsigned int kf_max_dist; + /* + * Temporal scalability settings (ts) + */ + + /*!\brief Number of coding layers + * + * This value specifies the number of coding layers to be used. + */ + unsigned int ts_number_layers; + + /*!\brief Target bitrate for each layer + * + * These values specify the target coding bitrate for each coding layer. + */ + unsigned int ts_target_bitrate[MAX_LAYERS]; + + /*!\brief Frame rate decimation factor for each layer + * + * These values specify the frame rate decimation factors to apply + * to each layer. + */ + unsigned int ts_rate_decimator[MAX_LAYERS]; + + /*!\brief Length of the sequence defining frame layer membership + * + * This value specifies the length of the sequence that defines the + * membership of frames to layers. For example, if ts_periodicity=8 then + * frames are assigned to coding layers with a repeated sequence of + * length 8. + */ + unsigned int ts_periodicity; + + /*!\brief Template defining the membership of frames to coding layers + * + * This array defines the membership of frames to coding layers. For a + * 2-layer encoding that assigns even numbered frames to one layer (0) + * and odd numbered frames to a second layer (1) with ts_periodicity=8, + * then ts_layer_id = (0,1,0,1,0,1,0,1). + */ + unsigned int ts_layer_id[MAX_PERIODICITY]; + } vpx_codec_enc_cfg_t; /**< alias for struct vpx_codec_enc_cfg */ |