diff options
Diffstat (limited to 'vp8/encoder')
45 files changed, 672 insertions, 709 deletions
diff --git a/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm index a644a004c..4abe818f1 100644 --- a/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm +++ b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm @@ -15,7 +15,7 @@ EXPORT |vp8_encode_value| IMPORT |vp8_validate_buffer_arm| - INCLUDE asm_enc_offsets.asm + INCLUDE vp8_asm_enc_offsets.asm ARM REQUIRE8 diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm index a1cd46704..90a141c62 100644 --- a/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm +++ b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm @@ -12,7 +12,7 @@ EXPORT |vp8cx_pack_tokens_armv5| IMPORT |vp8_validate_buffer_arm| - INCLUDE asm_enc_offsets.asm + INCLUDE vp8_asm_enc_offsets.asm ARM REQUIRE8 diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm index 1fa5e6c22..3a8d17a81 100644 --- a/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm +++ b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm @@ -12,7 +12,7 @@ EXPORT |vp8cx_pack_mb_row_tokens_armv5| IMPORT |vp8_validate_buffer_arm| - INCLUDE asm_enc_offsets.asm + INCLUDE vp8_asm_enc_offsets.asm ARM REQUIRE8 diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm index 90a98fe8d..e9aa4958f 100644 --- a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm +++ b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm @@ -12,7 +12,7 @@ EXPORT |vp8cx_pack_tokens_into_partitions_armv5| IMPORT |vp8_validate_buffer_arm| - INCLUDE asm_enc_offsets.asm + INCLUDE vp8_asm_enc_offsets.asm ARM REQUIRE8 diff --git a/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm b/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm index d61f5d94d..de35a1e13 100644 --- a/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm +++ b/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm @@ -11,7 +11,7 @@ EXPORT |vp8_fast_quantize_b_armv6| - INCLUDE asm_enc_offsets.asm + INCLUDE vp8_asm_enc_offsets.asm ARM REQUIRE8 diff --git a/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm b/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm index f329f8f73..05746cf7f 100644 --- a/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm +++ b/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm @@ -13,7 +13,7 @@ EXPORT |vp8_subtract_mbuv_armv6| EXPORT |vp8_subtract_b_armv6| - INCLUDE asm_enc_offsets.asm + INCLUDE vp8_asm_enc_offsets.asm ARM REQUIRE8 diff --git a/vp8/encoder/arm/dct_arm.c b/vp8/encoder/arm/dct_arm.c index af0fb274e..f71300d2c 100644 --- a/vp8/encoder/arm/dct_arm.c +++ b/vp8/encoder/arm/dct_arm.c @@ -9,7 +9,7 @@ */ #include "vpx_config.h" -#include "vpx_rtcd.h" +#include "vp8_rtcd.h" #if HAVE_MEDIA diff --git a/vp8/encoder/arm/neon/fastquantizeb_neon.asm b/vp8/encoder/arm/neon/fastquantizeb_neon.asm index 143058842..9374310e5 100644 --- a/vp8/encoder/arm/neon/fastquantizeb_neon.asm +++ b/vp8/encoder/arm/neon/fastquantizeb_neon.asm @@ -12,7 +12,7 @@ EXPORT |vp8_fast_quantize_b_neon| EXPORT |vp8_fast_quantize_b_pair_neon| - INCLUDE asm_enc_offsets.asm + INCLUDE vp8_asm_enc_offsets.asm ARM REQUIRE8 diff --git a/vp8/encoder/arm/neon/shortfdct_neon.asm b/vp8/encoder/arm/neon/shortfdct_neon.asm index 09dd011ec..5ea8dd83d 100644 --- a/vp8/encoder/arm/neon/shortfdct_neon.asm +++ b/vp8/encoder/arm/neon/shortfdct_neon.asm @@ -97,7 +97,7 @@ coeff vmlal.s16 q11, d6, d17 ; c1*2217 + d1*5352 + 12000 vmlsl.s16 q12, d6, d16 ; d1*2217 - c1*5352 + 51000 - vmvn.s16 d4, d4 + vmvn d4, d4 vshrn.s32 d1, q11, #16 ; op[4] = (c1*2217 + d1*5352 + 12000)>>16 vsub.s16 d1, d1, d4 ; op[4] += (d1!=0) vshrn.s32 d3, q12, #16 ; op[12]= (d1*2217 - c1*5352 + 51000)>>16 @@ -200,7 +200,7 @@ coeff vmlal.s16 q11, d27, d17 ; B[4] = c1*2217 + d1*5352 + 12000 vmlsl.s16 q12, d27, d16 ; B[12] = d1*2217 - c1*5352 + 51000 - vmvn.s16 q14, q14 + vmvn q14, q14 vshrn.s32 d1, q9, #16 ; A[4] = (c1*2217 + d1*5352 + 12000)>>16 vshrn.s32 d3, q10, #16 ; A[12]= (d1*2217 - c1*5352 + 51000)>>16 diff --git a/vp8/encoder/arm/neon/subtract_neon.asm b/vp8/encoder/arm/neon/subtract_neon.asm index 91a328c29..5bda78678 100644 --- a/vp8/encoder/arm/neon/subtract_neon.asm +++ b/vp8/encoder/arm/neon/subtract_neon.asm @@ -12,7 +12,7 @@ EXPORT |vp8_subtract_mby_neon| EXPORT |vp8_subtract_mbuv_neon| - INCLUDE asm_enc_offsets.asm + INCLUDE vp8_asm_enc_offsets.asm ARM REQUIRE8 diff --git a/vp8/encoder/arm/quantize_arm.c b/vp8/encoder/arm/quantize_arm.c index 8999e347f..80d9ad054 100644 --- a/vp8/encoder/arm/quantize_arm.c +++ b/vp8/encoder/arm/quantize_arm.c @@ -10,7 +10,7 @@ #include "vpx_config.h" -#include "vpx_rtcd.h" +#include "vp8_rtcd.h" #include "vp8/encoder/block.h" #include <math.h> #include "vpx_mem/vpx_mem.h" diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c index e666b6c7e..78e54e248 100644 --- a/vp8/encoder/bitstream.c +++ b/vp8/encoder/bitstream.c @@ -50,7 +50,7 @@ const int vp8cx_base_skip_false_prob[128] = unsigned __int64 Sectionbits[500]; #endif -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS int intra_mode_stats[10][10][10]; static unsigned int tree_update_hist [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] [2]; extern unsigned int active_section; @@ -90,17 +90,17 @@ static void update_mode( if (new_b + (n << 8) < old_b) { - int i = 0; + int j = 0; vp8_write_bit(w, 1); do { - const vp8_prob p = Pnew[i]; + const vp8_prob p = Pnew[j]; - vp8_write_literal(w, Pcur[i] = p ? p : 1, 8); + vp8_write_literal(w, Pcur[j] = p ? p : 1, 8); } - while (++i < n); + while (++j < n); } else vp8_write_bit(w, 0); @@ -245,15 +245,15 @@ void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount) if (L) { - const unsigned char *pp = b->prob; - int v = e >> 1; - int n = L; /* number of bits in v, assumed nonzero */ - int i = 0; + const unsigned char *proba = b->prob; + const int v2 = e >> 1; + int n2 = L; /* number of bits in v2, assumed nonzero */ + i = 0; do { - const int bb = (v >> --n) & 1; - split = 1 + (((range - 1) * pp[i>>1]) >> 8); + const int bb = (v2 >> --n2) & 1; + split = 1 + (((range - 1) * proba[i>>1]) >> 8); i = b->tree[i+bb]; if (bb) @@ -301,7 +301,7 @@ void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount) lowvalue <<= shift; } - while (n); + while (n2); } @@ -432,7 +432,7 @@ static void write_mv_ref assert(NEARESTMV <= m && m <= SPLITMV); #endif vp8_write_token(w, vp8_mv_ref_tree, p, - vp8_mv_ref_encoding_array - NEARESTMV + m); + vp8_mv_ref_encoding_array + (m - NEARESTMV)); } static void write_sub_mv_ref @@ -444,7 +444,7 @@ static void write_sub_mv_ref assert(LEFT4X4 <= m && m <= NEW4X4); #endif vp8_write_token(w, vp8_sub_mv_ref_tree, p, - vp8_sub_mv_ref_encoding_array - LEFT4X4 + m); + vp8_sub_mv_ref_encoding_array + (m - LEFT4X4)); } static void write_mv @@ -531,7 +531,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) vp8_convert_rfct_to_prob(cpi); -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS active_section = 1; #endif @@ -577,10 +577,10 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) */ xd->mb_to_left_edge = -((mb_col * 16) << 3); xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; - xd->mb_to_top_edge = -((mb_row * 16)) << 3; + xd->mb_to_top_edge = -((mb_row * 16) << 3); xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS active_section = 9; #endif @@ -593,7 +593,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) if (rf == INTRA_FRAME) { vp8_write(w, 0, cpi->prob_intra_coded); -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS active_section = 6; #endif write_ymode(w, mode, pc->fc.ymode_prob); @@ -633,13 +633,13 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) vp8_mv_ref_probs(mv_ref_p, ct); -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS accum_mv_refs(mode, ct); #endif } -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS active_section = 3; #endif @@ -649,7 +649,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) { case NEWMV: -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS active_section = 5; #endif @@ -692,7 +692,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) if (blockmode == NEW4X4) { -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS active_section = 11; #endif write_mv(w, &blockmv.as_mv, &best_mv, (const MV_CONTEXT *) mvc); @@ -769,7 +769,7 @@ static void write_kfmodes(VP8_COMP *cpi) const B_PREDICTION_MODE L = left_block_mode(m, i); const int bm = m->bmi[i].as_mode; -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS ++intra_mode_stats [A] [L] [bm]; #endif @@ -980,6 +980,12 @@ void vp8_calc_ref_frame_costs(int *ref_frame_cost, int prob_garf ) { + assert(prob_intra >= 0); + assert(prob_intra <= 255); + assert(prob_last >= 0); + assert(prob_last <= 255); + assert(prob_garf >= 0); + assert(prob_garf <= 255); ref_frame_cost[INTRA_FRAME] = vp8_cost_zero(prob_intra); ref_frame_cost[LAST_FRAME] = vp8_cost_one(prob_intra) + vp8_cost_zero(prob_last); @@ -1056,7 +1062,7 @@ int vp8_update_coef_context(VP8_COMP *cpi) if (cpi->common.frame_type == KEY_FRAME) { /* Reset to default counts/probabilities at key frames */ - vp8_copy(cpi->coef_counts, default_coef_counts); + vp8_copy(cpi->mb.coef_counts, default_coef_counts); } if (cpi->oxcf.error_resilient_mode & VPX_ERROR_RESILIENT_PARTITIONS) @@ -1154,7 +1160,7 @@ void vp8_update_coef_probs(VP8_COMP *cpi) #endif -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS ++ tree_update_hist [i][j][k][t] [u]; #endif @@ -1175,7 +1181,7 @@ void vp8_update_coef_probs(VP8_COMP *cpi) while (++t < ENTROPY_NODES); /* Accum token counts for generation of default statistics */ -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS t = 0; do @@ -1316,7 +1322,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest vp8_start_encode(bc, cx_data, cx_data_end); /* signal clr type */ - vp8_write_bit(bc, pc->clr_type); + vp8_write_bit(bc, 0); vp8_write_bit(bc, pc->clamp_type); } @@ -1521,7 +1527,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest if (pc->frame_type != KEY_FRAME) vp8_write_bit(bc, pc->refresh_last_frame); -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS if (pc->frame_type == INTER_FRAME) active_section = 0; @@ -1544,7 +1550,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest vp8_update_coef_probs(cpi); #endif -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS active_section = 2; #endif @@ -1555,7 +1561,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest { write_kfmodes(cpi); -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS active_section = 8; #endif } @@ -1563,7 +1569,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest { pack_inter_mode_mvs(cpi); -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS active_section = 1; #endif } @@ -1681,7 +1687,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest #endif } -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS void print_tree_update_probs() { int i, j, k, l; diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h index a30f88816..cf74c7aaf 100644 --- a/vp8/encoder/block.h +++ b/vp8/encoder/block.h @@ -37,7 +37,7 @@ typedef struct block /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */ short *quant; short *quant_fast; - unsigned char *quant_shift; + short *quant_shift; short *zbin; short *zrun_zbin_boost; short *round; diff --git a/vp8/encoder/boolhuff.c b/vp8/encoder/boolhuff.c index 74770a276..3b0c03a14 100644 --- a/vp8/encoder/boolhuff.c +++ b/vp8/encoder/boolhuff.c @@ -16,7 +16,7 @@ unsigned __int64 Sectionbits[500]; #endif -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS unsigned int active_section = 0; #endif diff --git a/vp8/encoder/boolhuff.h b/vp8/encoder/boolhuff.h index 830906306..39ab586b5 100644 --- a/vp8/encoder/boolhuff.h +++ b/vp8/encoder/boolhuff.h @@ -67,7 +67,7 @@ static void vp8_encode_bool(BOOL_CODER *br, int bit, int probability) unsigned int lowvalue = br->lowvalue; register unsigned int shift; -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS #if defined(SECTIONBITS_OUTPUT) if (bit) diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c index b5a11ae34..091554a5d 100644 --- a/vp8/encoder/dct.c +++ b/vp8/encoder/dct.c @@ -20,10 +20,10 @@ void vp8_short_fdct4x4_c(short *input, short *output, int pitch) for (i = 0; i < 4; i++) { - a1 = ((ip[0] + ip[3])<<3); - b1 = ((ip[1] + ip[2])<<3); - c1 = ((ip[1] - ip[2])<<3); - d1 = ((ip[0] - ip[3])<<3); + a1 = ((ip[0] + ip[3]) * 8); + b1 = ((ip[1] + ip[2]) * 8); + c1 = ((ip[1] - ip[2]) * 8); + d1 = ((ip[0] - ip[3]) * 8); op[0] = a1 + b1; op[2] = a1 - b1; @@ -72,10 +72,10 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch) for (i = 0; i < 4; i++) { - a1 = ((ip[0] + ip[2])<<2); - d1 = ((ip[1] + ip[3])<<2); - c1 = ((ip[1] - ip[3])<<2); - b1 = ((ip[0] - ip[2])<<2); + a1 = ((ip[0] + ip[2]) * 4); + d1 = ((ip[1] + ip[3]) * 4); + c1 = ((ip[1] - ip[3]) * 4); + b1 = ((ip[0] - ip[2]) * 4); op[0] = a1 + d1 + (a1!=0); op[1] = b1 + c1; diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c index f3faa227f..781926547 100644 --- a/vp8/encoder/denoising.c +++ b/vp8/encoder/denoising.c @@ -13,7 +13,7 @@ #include "vp8/common/reconinter.h" #include "vpx/vpx_integer.h" #include "vpx_mem/vpx_mem.h" -#include "vpx_rtcd.h" +#include "vp8_rtcd.h" static const unsigned int NOISE_MOTION_THRESHOLD = 25 * 25; /* SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming @@ -206,8 +206,6 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, MB_MODE_INFO saved_mbmi; MACROBLOCKD *filter_xd = &x->e_mbd; MB_MODE_INFO *mbmi = &filter_xd->mode_info_context->mbmi; - int mv_col; - int mv_row; int sse_diff = zero_mv_sse - best_sse; saved_mbmi = *mbmi; diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index d1b647be9..b550f6be1 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -10,6 +10,7 @@ #include "vpx_config.h" +#include "vp8_rtcd.h" #include "encodemb.h" #include "encodemv.h" #include "vp8/common/common.h" @@ -852,11 +853,10 @@ void vp8_encode_frame(VP8_COMP *cpi) if (xd->segmentation_enabled) { - int i, j; + int j; if (xd->segmentation_enabled) { - for (i = 0; i < cpi->encoding_thread_count; i++) { for (j = 0; j < 4; j++) @@ -1299,8 +1299,9 @@ int vp8cx_encode_inter_macroblock } { - /* Experimental code. Special case for gf and arf zeromv modes. - * Increase zbin size to supress noise + /* Experimental code. + * Special case for gf and arf zeromv modes, for 1 temporal layer. + * Increase zbin size to supress noise. */ x->zbin_mode_boost = 0; if (x->zbin_mode_boost_enabled) @@ -1309,7 +1310,8 @@ int vp8cx_encode_inter_macroblock { if (xd->mode_info_context->mbmi.mode == ZEROMV) { - if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) + if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME && + cpi->oxcf.number_of_layers == 1) x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; else x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c index 340dd638d..cfa4cb927 100644 --- a/vp8/encoder/encodeintra.c +++ b/vp8/encoder/encodeintra.c @@ -10,7 +10,7 @@ #include "vpx_config.h" -#include "vpx_rtcd.h" +#include "vp8_rtcd.h" #include "quantize.h" #include "vp8/common/reconintra4x4.h" #include "encodemb.h" diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c index 7d494f2c6..7ed2fe1a1 100644 --- a/vp8/encoder/encodemb.c +++ b/vp8/encoder/encodemb.c @@ -10,7 +10,7 @@ #include "vpx_config.h" -#include "vpx_rtcd.h" +#include "vp8_rtcd.h" #include "encodemb.h" #include "vp8/common/reconinter.h" #include "quantize.h" diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c index 0c43d0692..2a74ff4ae 100644 --- a/vp8/encoder/encodemv.c +++ b/vp8/encoder/encodemv.c @@ -16,7 +16,7 @@ #include <math.h> -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS extern unsigned int active_section; #endif @@ -359,7 +359,7 @@ void vp8_write_mvprobs(VP8_COMP *cpi) vp8_writer *const w = cpi->bc; MV_CONTEXT *mvc = cpi->common.fc.mvc; int flags[2] = {0, 0}; -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS active_section = 4; #endif write_component_probs( @@ -374,7 +374,7 @@ void vp8_write_mvprobs(VP8_COMP *cpi) if (flags[0] || flags[1]) vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flags); -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS active_section = 5; #endif } diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c index 30bf8a6ef..968c7f365 100644 --- a/vp8/encoder/firstpass.c +++ b/vp8/encoder/firstpass.c @@ -12,6 +12,7 @@ #include <limits.h> #include <stdio.h> +#include "./vpx_scale_rtcd.h" #include "block.h" #include "onyx_int.h" #include "vp8/common/variance.h" @@ -20,7 +21,7 @@ #include "vp8/common/systemdependent.h" #include "mcomp.h" #include "firstpass.h" -#include "vpx_scale/vpxscale.h" +#include "vpx_scale/vpx_scale.h" #include "encodemb.h" #include "vp8/common/extend.h" #include "vpx_mem/vpx_mem.h" @@ -710,8 +711,8 @@ skip_motion_search: neutral_count++; } - d->bmi.mv.as_mv.row <<= 3; - d->bmi.mv.as_mv.col <<= 3; + d->bmi.mv.as_mv.row *= 8; + d->bmi.mv.as_mv.col *= 8; this_error = motion_error; vp8_set_mbmode_and_mvs(x, NEWMV, &d->bmi.mv); vp8_encode_inter16x16y(x); @@ -857,7 +858,9 @@ skip_motion_search: */ if ((cm->current_video_frame > 0) && (cpi->twopass.this_frame_stats.pcnt_inter > 0.20) && - ((cpi->twopass.this_frame_stats.intra_error / cpi->twopass.this_frame_stats.coded_error) > 2.0)) + ((cpi->twopass.this_frame_stats.intra_error / + DOUBLE_DIVIDE_CHECK(cpi->twopass.this_frame_stats.coded_error)) > + 2.0)) { vp8_yv12_copy_frame(lst_yv12, gld_yv12); } @@ -906,13 +909,16 @@ extern const int vp8_bits_per_mb[2][QINDEX_RANGE]; static double bitcost( double prob ) { - return -(log( prob ) / log( 2.0 )); + if (prob > 0.000122) + return -log(prob) / log(2.0); + else + return 13.0; } static int64_t estimate_modemvcost(VP8_COMP *cpi, FIRSTPASS_STATS * fpstats) { int mv_cost; - int mode_cost; + int64_t mode_cost; double av_pct_inter = fpstats->pcnt_inter / fpstats->count; double av_pct_motion = fpstats->pcnt_motion / fpstats->count; @@ -934,10 +940,9 @@ static int64_t estimate_modemvcost(VP8_COMP *cpi, /* Crude estimate of overhead cost from modes * << 9 is the normalization to (bits * 512) used in vp8_bits_per_mb */ - mode_cost = - (int)( ( ((av_pct_inter - av_pct_motion) * zz_cost) + - (av_pct_motion * motion_cost) + - (av_intra * intra_cost) ) * cpi->common.MBs ) << 9; + mode_cost =((((av_pct_inter - av_pct_motion) * zz_cost) + + (av_pct_motion * motion_cost) + + (av_intra * intra_cost)) * cpi->common.MBs) * 512; return mv_cost + mode_cost; } @@ -1322,7 +1327,7 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta return Q; } -extern void vp8_new_frame_rate(VP8_COMP *cpi, double framerate); +extern void vp8_new_framerate(VP8_COMP *cpi, double framerate); void vp8_init_second_pass(VP8_COMP *cpi) { @@ -1346,9 +1351,9 @@ void vp8_init_second_pass(VP8_COMP *cpi) * sum duration is not. Its calculated based on the actual durations of * all frames from the first pass. */ - vp8_new_frame_rate(cpi, 10000000.0 * cpi->twopass.total_stats.count / cpi->twopass.total_stats.duration); + vp8_new_framerate(cpi, 10000000.0 * cpi->twopass.total_stats.count / cpi->twopass.total_stats.duration); - cpi->output_frame_rate = cpi->frame_rate; + cpi->output_framerate = cpi->framerate; cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats.duration * cpi->oxcf.target_bandwidth / 10000000.0) ; cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats.duration * two_pass_min_rate / 10000000.0); @@ -2115,23 +2120,25 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) (cpi->twopass.kf_group_error_left > 0)) { cpi->twopass.gf_group_bits = - (int)((double)cpi->twopass.kf_group_bits * - (gf_group_err / (double)cpi->twopass.kf_group_error_left)); + (int64_t)(cpi->twopass.kf_group_bits * + (gf_group_err / cpi->twopass.kf_group_error_left)); } else cpi->twopass.gf_group_bits = 0; - cpi->twopass.gf_group_bits = (int)( + cpi->twopass.gf_group_bits = (cpi->twopass.gf_group_bits < 0) ? 0 : (cpi->twopass.gf_group_bits > cpi->twopass.kf_group_bits) - ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits); + ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits; /* Clip cpi->twopass.gf_group_bits based on user supplied data rate * variability limit (cpi->oxcf.two_pass_vbrmax_section) */ - if (cpi->twopass.gf_group_bits > max_bits * cpi->baseline_gf_interval) - cpi->twopass.gf_group_bits = max_bits * cpi->baseline_gf_interval; + if (cpi->twopass.gf_group_bits > + (int64_t)max_bits * cpi->baseline_gf_interval) + cpi->twopass.gf_group_bits = + (int64_t)max_bits * cpi->baseline_gf_interval; /* Reset the file position */ reset_fpf_position(cpi, start_pos); @@ -2393,7 +2400,7 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) target_frame_size += cpi->min_frame_bandwidth; /* Every other frame gets a few extra bits */ - if ( (cpi->common.frames_since_golden & 0x01) && + if ( (cpi->frames_since_golden & 0x01) && (cpi->frames_till_gf_update_due > 0) ) { target_frame_size += cpi->twopass.alt_extra_bits; @@ -2445,7 +2452,7 @@ void vp8_second_pass(VP8_COMP *cpi) */ if (cpi->oxcf.error_resilient_mode) { - cpi->twopass.gf_group_bits = (int)cpi->twopass.kf_group_bits; + cpi->twopass.gf_group_bits = cpi->twopass.kf_group_bits; cpi->twopass.gf_group_error_left = (int)cpi->twopass.kf_group_error_left; cpi->baseline_gf_interval = cpi->twopass.frames_to_key; @@ -2524,7 +2531,7 @@ void vp8_second_pass(VP8_COMP *cpi) /* Set nominal per second bandwidth for this frame */ cpi->target_bandwidth = (int) - (cpi->per_frame_bandwidth * cpi->output_frame_rate); + (cpi->per_frame_bandwidth * cpi->output_framerate); if (cpi->target_bandwidth < 0) cpi->target_bandwidth = 0; @@ -3180,7 +3187,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) /* Convert to a per second bitrate */ cpi->target_bandwidth = (int)(cpi->twopass.kf_bits * - cpi->output_frame_rate); + cpi->output_framerate); } /* Note the total error score of the kf group minus the key frame itself */ @@ -3219,7 +3226,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) cpi->common.vert_scale = NORMAL; /* Calculate Average bits per frame. */ - av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate); + av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->framerate); /* CBR... Use the clip average as the target for deciding resample */ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) @@ -3294,7 +3301,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) } else { - int64_t clip_bits = (int64_t)(cpi->twopass.total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate)); + int64_t clip_bits = (int64_t)(cpi->twopass.total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->framerate)); int64_t over_spend = cpi->oxcf.starting_buffer_level - cpi->buffer_level; /* If triggered last time the threshold for triggering again is diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c index b08c7a589..0b11ea64a 100644 --- a/vp8/encoder/mcomp.c +++ b/vp8/encoder/mcomp.c @@ -18,7 +18,7 @@ #include <math.h> #include "vp8/common/findnearmv.h" -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS static int mv_ref_ct [31] [4] [2]; static int mv_mode_cts [4] [2]; #endif @@ -210,7 +210,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, unsigned char *z = (*(b->base_src) + b->src); int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1; - int br = bestmv->as_mv.row << 2, bc = bestmv->as_mv.col << 2; + int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4; int tr = br, tc = bc; unsigned int besterr; unsigned int left, right, up, down, diag; @@ -220,10 +220,14 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, unsigned int quarteriters = 4; int thismse; - int minc = MAX(x->mv_col_min << 2, (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1)); - int maxc = MIN(x->mv_col_max << 2, (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1)); - int minr = MAX(x->mv_row_min << 2, (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1)); - int maxr = MIN(x->mv_row_max << 2, (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1)); + int minc = MAX(x->mv_col_min * 4, + (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1)); + int maxc = MIN(x->mv_col_max * 4, + (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1)); + int minr = MAX(x->mv_row_min * 4, + (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1)); + int maxr = MIN(x->mv_row_max * 4, + (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1)); int y_stride; int offset; @@ -233,19 +237,18 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, #if ARCH_X86 || ARCH_X86_64 MACROBLOCKD *xd = &x->e_mbd; - unsigned char *y0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; + unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; unsigned char *y; - int buf_r1, buf_r2, buf_c1, buf_c2; + int buf_r1, buf_r2, buf_c1; /* Clamping to avoid out-of-range data access */ buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3; buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3; buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3; - buf_c2 = ((bestmv->as_mv.col + 3) > x->mv_col_max)?(x->mv_col_max - bestmv->as_mv.col):3; y_stride = 32; /* Copy to intermediate buffer before searching. */ - vfp->copymem(y0 - buf_c1 - pre_stride*buf_r1, pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2); + vfp->copymem(y_0 - buf_c1 - pre_stride*buf_r1, pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2); y = xd->y_buf + y_stride*buf_r1 +buf_c1; #else unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; @@ -255,8 +258,8 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; /* central mv */ - bestmv->as_mv.row <<= 3; - bestmv->as_mv.col <<= 3; + bestmv->as_mv.row *= 8; + bestmv->as_mv.col *= 8; /* calculate central point error */ besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1); @@ -338,8 +341,8 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, tc = bc; } - bestmv->as_mv.row = br << 1; - bestmv->as_mv.col = bc << 1; + bestmv->as_mv.row = br * 2; + bestmv->as_mv.col = bc * 2; if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) || (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3))) @@ -376,12 +379,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, #if ARCH_X86 || ARCH_X86_64 MACROBLOCKD *xd = &x->e_mbd; - unsigned char *y0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; + unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; unsigned char *y; y_stride = 32; /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ - vfp->copymem(y0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18); + vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18); y = xd->y_buf + y_stride + 1; #else unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; @@ -687,12 +690,12 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, #if ARCH_X86 || ARCH_X86_64 MACROBLOCKD *xd = &x->e_mbd; - unsigned char *y0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; + unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; unsigned char *y; y_stride = 32; /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ - vfp->copymem(y0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18); + vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18); y = xd->y_buf + y_stride + 1; #else unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; @@ -700,8 +703,8 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, #endif /* central mv */ - bestmv->as_mv.row <<= 3; - bestmv->as_mv.col <<= 3; + bestmv->as_mv.row *= 8; + bestmv->as_mv.col *= 8; startmv = *bestmv; /* calculate central point error */ @@ -1316,8 +1319,8 @@ int vp8_diamond_search_sadx4 (*num00)++; } - this_mv.as_mv.row = best_mv->as_mv.row << 3; - this_mv.as_mv.col = best_mv->as_mv.col << 3; + this_mv.as_mv.row = best_mv->as_mv.row * 8; + this_mv.as_mv.col = best_mv->as_mv.col * 8; return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); @@ -1710,8 +1713,8 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, } } - this_mv.as_mv.row = best_mv->as_mv.row << 3; - this_mv.as_mv.col = best_mv->as_mv.col << 3; + this_mv.as_mv.row = best_mv->as_mv.row * 8; + this_mv.as_mv.col = best_mv->as_mv.col * 8; return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); @@ -1906,14 +1909,14 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, } } - this_mv.as_mv.row = ref_mv->as_mv.row << 3; - this_mv.as_mv.col = ref_mv->as_mv.col << 3; + this_mv.as_mv.row = ref_mv->as_mv.row * 8; + this_mv.as_mv.col = ref_mv->as_mv.col * 8; return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); } -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS void print_mode_context(void) { FILE *f = fopen("modecont.c", "w"); @@ -1966,8 +1969,8 @@ void print_mode_context(void) fclose(f); } -/* MV ref count ENTROPY_STATS stats code */ -#ifdef ENTROPY_STATS +/* MV ref count VP8_ENTROPY_STATS stats code */ +#ifdef VP8_ENTROPY_STATS void init_mv_ref_counts() { vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct)); @@ -2021,6 +2024,6 @@ void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) } } -#endif/* END MV ref count ENTROPY_STATS stats code */ +#endif/* END MV ref count VP8_ENTROPY_STATS stats code */ #endif diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h index 890113f9a..e36c51543 100644 --- a/vp8/encoder/mcomp.h +++ b/vp8/encoder/mcomp.h @@ -15,7 +15,7 @@ #include "block.h" #include "vp8/common/variance.h" -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS extern void init_mv_ref_counts(); extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]); #endif diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 4680f392a..4b60cfd32 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -10,6 +10,7 @@ #include "vpx_config.h" +#include "./vpx_scale_rtcd.h" #include "vp8/common/onyxc_int.h" #include "vp8/common/blockd.h" #include "onyx_int.h" @@ -19,7 +20,7 @@ #include "mcomp.h" #include "firstpass.h" #include "psnr.h" -#include "vpx_scale/vpxscale.h" +#include "vpx_scale/vpx_scale.h" #include "vp8/common/extend.h" #include "ratectrl.h" #include "vp8/common/quant_common.h" @@ -110,7 +111,7 @@ extern int skip_false_count; #endif -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS extern int intra_mode_stats[10][10][10]; #endif @@ -288,6 +289,125 @@ static void restore_layer_context(VP8_COMP *cpi, const int layer) sizeof(cpi->mb.count_mb_ref_frame_usage)); } +static int rescale(int val, int num, int denom) +{ + int64_t llnum = num; + int64_t llden = denom; + int64_t llval = val; + + return (int)(llval * llnum / llden); +} + +static void init_temporal_layer_context(VP8_COMP *cpi, + VP8_CONFIG *oxcf, + const int layer, + double prev_layer_framerate) +{ + LAYER_CONTEXT *lc = &cpi->layer_context[layer]; + + lc->framerate = cpi->output_framerate / cpi->oxcf.rate_decimator[layer]; + lc->target_bandwidth = cpi->oxcf.target_bitrate[layer] * 1000; + + lc->starting_buffer_level_in_ms = oxcf->starting_buffer_level; + lc->optimal_buffer_level_in_ms = oxcf->optimal_buffer_level; + lc->maximum_buffer_size_in_ms = oxcf->maximum_buffer_size; + + lc->starting_buffer_level = + rescale((int)(oxcf->starting_buffer_level), + lc->target_bandwidth, 1000); + + if (oxcf->optimal_buffer_level == 0) + lc->optimal_buffer_level = lc->target_bandwidth / 8; + else + lc->optimal_buffer_level = + rescale((int)(oxcf->optimal_buffer_level), + lc->target_bandwidth, 1000); + + if (oxcf->maximum_buffer_size == 0) + lc->maximum_buffer_size = lc->target_bandwidth / 8; + else + lc->maximum_buffer_size = + rescale((int)(oxcf->maximum_buffer_size), + lc->target_bandwidth, 1000); + + /* Work out the average size of a frame within this layer */ + if (layer > 0) + lc->avg_frame_size_for_layer = + (int)((cpi->oxcf.target_bitrate[layer] - + cpi->oxcf.target_bitrate[layer-1]) * 1000 / + (lc->framerate - prev_layer_framerate)); + + lc->active_worst_quality = cpi->oxcf.worst_allowed_q; + lc->active_best_quality = cpi->oxcf.best_allowed_q; + lc->avg_frame_qindex = cpi->oxcf.worst_allowed_q; + + lc->buffer_level = lc->starting_buffer_level; + lc->bits_off_target = lc->starting_buffer_level; + + lc->total_actual_bits = 0; + lc->ni_av_qi = 0; + lc->ni_tot_qi = 0; + lc->ni_frames = 0; + lc->rate_correction_factor = 1.0; + lc->key_frame_rate_correction_factor = 1.0; + lc->gf_rate_correction_factor = 1.0; + lc->inter_frame_target = 0; +} + +// Upon a run-time change in temporal layers, reset the layer context parameters +// for any "new" layers. For "existing" layers, let them inherit the parameters +// from the previous layer state (at the same layer #). In future we may want +// to better map the previous layer state(s) to the "new" ones. +static void reset_temporal_layer_change(VP8_COMP *cpi, + VP8_CONFIG *oxcf, + const int prev_num_layers) +{ + int i; + double prev_layer_framerate = 0; + const int curr_num_layers = cpi->oxcf.number_of_layers; + // If the previous state was 1 layer, get current layer context from cpi. + // We need this to set the layer context for the new layers below. + if (prev_num_layers == 1) + { + cpi->current_layer = 0; + save_layer_context(cpi); + } + for (i = 0; i < curr_num_layers; i++) + { + LAYER_CONTEXT *lc = &cpi->layer_context[i]; + if (i >= prev_num_layers) + { + init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate); + } + // The initial buffer levels are set based on their starting levels. + // We could set the buffer levels based on the previous state (normalized + // properly by the layer bandwidths) but we would need to keep track of + // the previous set of layer bandwidths (i.e., target_bitrate[i]) + // before the layer change. For now, reset to the starting levels. + lc->buffer_level = cpi->oxcf.starting_buffer_level_in_ms * + cpi->oxcf.target_bitrate[i]; + lc->bits_off_target = lc->buffer_level; + // TDOD(marpan): Should we set the rate_correction_factor and + // active_worst/best_quality to values derived from the previous layer + // state (to smooth-out quality dips/rate fluctuation at transition)? + + // We need to treat the 1 layer case separately: oxcf.target_bitrate[i] + // is not set for 1 layer, and the restore_layer_context/save_context() + // are not called in the encoding loop, so we need to call it here to + // pass the layer context state to |cpi|. + if (curr_num_layers == 1) + { + lc->target_bandwidth = cpi->oxcf.target_bandwidth; + lc->buffer_level = cpi->oxcf.starting_buffer_level_in_ms * + lc->target_bandwidth / 1000; + lc->bits_off_target = lc->buffer_level; + restore_layer_context(cpi, 0); + } + prev_layer_framerate = cpi->output_framerate / + cpi->oxcf.rate_decimator[i]; + } +} + static void setup_features(VP8_COMP *cpi) { // If segmentation enabled set the update flags @@ -640,7 +760,6 @@ void vp8_set_speed_features(VP8_COMP *cpi) for (i = 0; i < MAX_MODES; i ++) { cpi->mode_check_freq[i] = 0; - cpi->mode_chosen_counts[i] = 0; } cpi->mb.mbs_tested_so_far = 0; @@ -825,7 +944,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) { unsigned int sum = 0; unsigned int total_mbs = cm->MBs; - int i, thresh; + int thresh; unsigned int total_skip; int min = 2000; @@ -1163,21 +1282,21 @@ int vp8_reverse_trans(int x) return 63; } -void vp8_new_frame_rate(VP8_COMP *cpi, double framerate) +void vp8_new_framerate(VP8_COMP *cpi, double framerate) { if(framerate < .1) framerate = 30; - cpi->frame_rate = framerate; - cpi->output_frame_rate = framerate; + cpi->framerate = framerate; + cpi->output_framerate = framerate; cpi->per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth / - cpi->output_frame_rate); + cpi->output_framerate); cpi->av_per_frame_bandwidth = cpi->per_frame_bandwidth; cpi->min_frame_bandwidth = (int)(cpi->av_per_frame_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100); /* Set Maximum gf/arf interval */ - cpi->max_gf_interval = ((int)(cpi->output_frame_rate / 2.0) + 2); + cpi->max_gf_interval = ((int)(cpi->output_framerate / 2.0) + 2); if(cpi->max_gf_interval < 12) cpi->max_gf_interval = 12; @@ -1200,17 +1319,6 @@ void vp8_new_frame_rate(VP8_COMP *cpi, double framerate) } -static int -rescale(int val, int num, int denom) -{ - int64_t llnum = num; - int64_t llden = denom; - int64_t llval = val; - - return (int)(llval * llnum / llden); -} - - static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) { VP8_COMMON *cm = &cpi->common; @@ -1229,13 +1337,13 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) * seems like a reasonable framerate, then use that as a guess, otherwise * use 30. */ - cpi->frame_rate = (double)(oxcf->timebase.den) / - (double)(oxcf->timebase.num); + cpi->framerate = (double)(oxcf->timebase.den) / + (double)(oxcf->timebase.num); - if (cpi->frame_rate > 180) - cpi->frame_rate = 30; + if (cpi->framerate > 180) + cpi->framerate = 30; - cpi->ref_frame_rate = cpi->frame_rate; + cpi->ref_framerate = cpi->framerate; /* change includes all joint functionality */ vp8_change_config(cpi, oxcf); @@ -1261,63 +1369,13 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) if (cpi->oxcf.number_of_layers > 1) { unsigned int i; - double prev_layer_frame_rate=0; + double prev_layer_framerate=0; for (i=0; i<cpi->oxcf.number_of_layers; i++) { - LAYER_CONTEXT *lc = &cpi->layer_context[i]; - - /* Layer configuration */ - lc->frame_rate = - cpi->output_frame_rate / cpi->oxcf.rate_decimator[i]; - lc->target_bandwidth = cpi->oxcf.target_bitrate[i] * 1000; - - lc->starting_buffer_level_in_ms = oxcf->starting_buffer_level; - lc->optimal_buffer_level_in_ms = oxcf->optimal_buffer_level; - lc->maximum_buffer_size_in_ms = oxcf->maximum_buffer_size; - - lc->starting_buffer_level = - rescale((int)(oxcf->starting_buffer_level), - lc->target_bandwidth, 1000); - - if (oxcf->optimal_buffer_level == 0) - lc->optimal_buffer_level = lc->target_bandwidth / 8; - else - lc->optimal_buffer_level = - rescale((int)(oxcf->optimal_buffer_level), - lc->target_bandwidth, 1000); - - if (oxcf->maximum_buffer_size == 0) - lc->maximum_buffer_size = lc->target_bandwidth / 8; - else - lc->maximum_buffer_size = - rescale((int)oxcf->maximum_buffer_size, - lc->target_bandwidth, 1000); - - /* Work out the average size of a frame within this layer */ - if (i > 0) - lc->avg_frame_size_for_layer = - (int)((cpi->oxcf.target_bitrate[i] - - cpi->oxcf.target_bitrate[i-1]) * 1000 / - (lc->frame_rate - prev_layer_frame_rate)); - - lc->active_worst_quality = cpi->oxcf.worst_allowed_q; - lc->active_best_quality = cpi->oxcf.best_allowed_q; - lc->avg_frame_qindex = cpi->oxcf.worst_allowed_q; - - lc->buffer_level = lc->starting_buffer_level; - lc->bits_off_target = lc->starting_buffer_level; - - lc->total_actual_bits = 0; - lc->ni_av_qi = 0; - lc->ni_tot_qi = 0; - lc->ni_frames = 0; - lc->rate_correction_factor = 1.0; - lc->key_frame_rate_correction_factor = 1.0; - lc->gf_rate_correction_factor = 1.0; - lc->inter_frame_target = 0; - - prev_layer_frame_rate = lc->frame_rate; + init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate); + prev_layer_framerate = cpi->output_framerate / + cpi->oxcf.rate_decimator[i]; } } @@ -1341,14 +1399,14 @@ static void update_layer_contexts (VP8_COMP *cpi) if (oxcf->number_of_layers > 1) { unsigned int i; - double prev_layer_frame_rate=0; + double prev_layer_framerate=0; for (i=0; i<oxcf->number_of_layers; i++) { LAYER_CONTEXT *lc = &cpi->layer_context[i]; - lc->frame_rate = - cpi->ref_frame_rate / oxcf->rate_decimator[i]; + lc->framerate = + cpi->ref_framerate / oxcf->rate_decimator[i]; lc->target_bandwidth = oxcf->target_bitrate[i] * 1000; lc->starting_buffer_level = rescale( @@ -1374,9 +1432,9 @@ static void update_layer_contexts (VP8_COMP *cpi) lc->avg_frame_size_for_layer = (int)((oxcf->target_bitrate[i] - oxcf->target_bitrate[i-1]) * 1000 / - (lc->frame_rate - prev_layer_frame_rate)); + (lc->framerate - prev_layer_framerate)); - prev_layer_frame_rate = lc->frame_rate; + prev_layer_framerate = lc->framerate; } } } @@ -1384,7 +1442,7 @@ static void update_layer_contexts (VP8_COMP *cpi) void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) { VP8_COMMON *cm = &cpi->common; - int last_w, last_h; + int last_w, last_h, prev_number_of_layers; if (!cpi) return; @@ -1409,6 +1467,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) last_w = cpi->oxcf.Width; last_h = cpi->oxcf.Height; + prev_number_of_layers = cpi->oxcf.number_of_layers; cpi->oxcf = *oxcf; @@ -1566,7 +1625,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) cpi->oxcf.target_bandwidth, 1000); /* Set up frame rate and related parameters rate control values. */ - vp8_new_frame_rate(cpi, cpi->frame_rate); + vp8_new_framerate(cpi, cpi->framerate); /* Set absolute upper and lower quality limits */ cpi->worst_quality = cpi->oxcf.worst_allowed_q; @@ -1601,6 +1660,16 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) cpi->target_bandwidth = cpi->oxcf.target_bandwidth; + // Check if the number of temporal layers has changed, and if so reset the + // pattern counter and set/initialize the temporal layer context for the + // new layer configuration. + if (cpi->oxcf.number_of_layers != prev_number_of_layers) + { + // If the number of temporal layers are changed we must start at the + // base of the pattern cycle, so reset temporal_pattern_counter. + cpi->temporal_pattern_counter = 0; + reset_temporal_layer_change(cpi, oxcf, prev_number_of_layers); + } cm->Width = cpi->oxcf.Width; cm->Height = cpi->oxcf.Height; @@ -1738,6 +1807,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) memcpy(cpi->base_skip_false_prob, vp8cx_base_skip_false_prob, sizeof(vp8cx_base_skip_false_prob)); cpi->common.current_video_frame = 0; + cpi->temporal_pattern_counter = 0; cpi->kf_overspend_bits = 0; cpi->kf_bitrate_adjustment = 0; cpi->frames_till_gf_update_due = 0; @@ -1805,7 +1875,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) else cpi->cyclic_refresh_map = (signed char *) NULL; -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS init_context_counters(); #endif @@ -1875,7 +1945,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) for (i = 0; i < KEY_FRAME_CONTEXT; i++) { - cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate; + cpi->prior_key_frame_distance[i] = (int)cpi->output_framerate; } #ifdef OUTPUT_YUV_SRC @@ -1923,7 +1993,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) cpi->mb.rd_thresh_mult[i] = 128; } -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS init_mv_ref_counts(); #endif @@ -2060,7 +2130,7 @@ void vp8_remove_compressor(VP8_COMP **ptr) #endif -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS print_context_counters(); print_tree_update_probs(); print_mode_context(); @@ -2203,7 +2273,7 @@ void vp8_remove_compressor(VP8_COMP **ptr) { extern int count_mb_seg[4]; FILE *f = fopen("modes.stt", "a"); - double dr = (double)cpi->frame_rate * (double)bytes * (double)8 / (double)count / (double)1000 ; + double dr = (double)cpi->framerate * (double)bytes * (double)8 / (double)count / (double)1000 ; fprintf(f, "intra_mode in Intra Frames:\n"); fprintf(f, "Y: %8d, %8d, %8d, %8d, %8d\n", y_modes[0], y_modes[1], y_modes[2], y_modes[3], y_modes[4]); fprintf(f, "UV:%8d, %8d, %8d, %8d\n", uv_modes[0], uv_modes[1], uv_modes[2], uv_modes[3]); @@ -2242,7 +2312,7 @@ void vp8_remove_compressor(VP8_COMP **ptr) } #endif -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS { int i, j, k; FILE *fmode = fopen("modecontext.c", "w"); @@ -2587,7 +2657,7 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) Scale2Ratio(cm->horiz_scale, &hr, &hs); Scale2Ratio(cm->vert_scale, &vr, &vs); - vp8_scale_frame(sd, &cpi->scaled_source, cm->temp_scale_frame.y_buffer, + vpx_scale_frame(sd, &cpi->scaled_source, cm->temp_scale_frame.y_buffer, tmp_height, hs, hr, vs, vr, 0); vp8_yv12_extend_frame_borders(&cpi->scaled_source); @@ -2680,12 +2750,12 @@ static void update_alt_ref_frame_stats(VP8_COMP *cpi) cpi->gf_active_count = cm->mb_rows * cm->mb_cols; /* this frame refreshes means next frames don't unless specified by user */ - cpi->common.frames_since_golden = 0; + cpi->frames_since_golden = 0; /* Clear the alternate reference update pending flag. */ cpi->source_alt_ref_pending = 0; - /* Set the alternate refernce frame active flag */ + /* Set the alternate reference frame active flag */ cpi->source_alt_ref_active = 1; @@ -2732,7 +2802,7 @@ static void update_golden_frame_stats(VP8_COMP *cpi) * user */ cm->refresh_golden_frame = 0; - cpi->common.frames_since_golden = 0; + cpi->frames_since_golden = 0; cpi->recent_ref_frame_usage[INTRA_FRAME] = 1; cpi->recent_ref_frame_usage[LAST_FRAME] = 1; @@ -2764,12 +2834,12 @@ static void update_golden_frame_stats(VP8_COMP *cpi) if (cpi->frames_till_gf_update_due > 0) cpi->frames_till_gf_update_due--; - if (cpi->common.frames_till_alt_ref_frame) - cpi->common.frames_till_alt_ref_frame --; + if (cpi->frames_till_alt_ref_frame) + cpi->frames_till_alt_ref_frame --; - cpi->common.frames_since_golden ++; + cpi->frames_since_golden ++; - if (cpi->common.frames_since_golden > 1) + if (cpi->frames_since_golden > 1) { cpi->recent_ref_frame_usage[INTRA_FRAME] += cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME]; @@ -2815,14 +2885,16 @@ static void update_rd_ref_frame_probs(VP8_COMP *cpi) if (cpi->common.refresh_alt_ref_frame) { cpi->prob_intra_coded += 40; + if (cpi->prob_intra_coded > 255) + cpi->prob_intra_coded = 255; cpi->prob_last_coded = 200; cpi->prob_gf_coded = 1; } - else if (cpi->common.frames_since_golden == 0) + else if (cpi->frames_since_golden == 0) { cpi->prob_last_coded = 214; } - else if (cpi->common.frames_since_golden == 1) + else if (cpi->frames_since_golden == 1) { cpi->prob_last_coded = 192; cpi->prob_gf_coded = 220; @@ -3296,12 +3368,12 @@ static void encode_frame_to_data_rate cpi->per_frame_bandwidth = cpi->twopass.gf_bits; /* per second target bitrate */ cpi->target_bandwidth = (int)(cpi->twopass.gf_bits * - cpi->output_frame_rate); + cpi->output_framerate); } } else #endif - cpi->per_frame_bandwidth = (int)(cpi->target_bandwidth / cpi->output_frame_rate); + cpi->per_frame_bandwidth = (int)(cpi->target_bandwidth / cpi->output_framerate); /* Default turn off buffer to buffer copying */ cm->copy_buffer_to_gf = 0; @@ -3330,7 +3402,7 @@ static void encode_frame_to_data_rate else cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 0; - /* Check to see if a key frame is signalled + /* Check to see if a key frame is signaled * For two pass with auto key frame enabled cm->frame_type may already * be set, but not for one pass. */ @@ -3465,7 +3537,7 @@ static void encode_frame_to_data_rate /* Note that we should not throw out a key frame (especially when * spatial resampling is enabled). */ - if ((cm->frame_type == KEY_FRAME)) + if (cm->frame_type == KEY_FRAME) { cpi->decimation_count = cpi->decimation_factor; } @@ -3483,6 +3555,8 @@ static void encode_frame_to_data_rate cm->current_video_frame++; cpi->frames_since_key++; + // We advance the temporal pattern for dropped frames. + cpi->temporal_pattern_counter++; #if CONFIG_INTERNAL_STATS cpi->count ++; @@ -3500,7 +3574,8 @@ static void encode_frame_to_data_rate for (i=cpi->current_layer+1; i<cpi->oxcf.number_of_layers; i++) { LAYER_CONTEXT *lc = &cpi->layer_context[i]; - lc->bits_off_target += cpi->av_per_frame_bandwidth; + lc->bits_off_target += (int)(lc->target_bandwidth / + lc->framerate); if (lc->bits_off_target > lc->maximum_buffer_size) lc->bits_off_target = lc->maximum_buffer_size; lc->buffer_level = lc->bits_off_target; @@ -3524,6 +3599,8 @@ static void encode_frame_to_data_rate #endif cm->current_video_frame++; cpi->frames_since_key++; + // We advance the temporal pattern for dropped frames. + cpi->temporal_pattern_counter++; return; } @@ -4481,7 +4558,7 @@ static void encode_frame_to_data_rate { LAYER_CONTEXT *lc = &cpi->layer_context[i]; int bits_off_for_this_layer = - (int)(lc->target_bandwidth / lc->frame_rate - + (int)(lc->target_bandwidth / lc->framerate - cpi->projected_frame_size); lc->bits_off_target += bits_off_for_this_layer; @@ -4597,9 +4674,6 @@ static void encode_frame_to_data_rate cm->frame_type, cm->refresh_golden_frame, cm->refresh_alt_ref_frame); - for (i = 0; i < MAX_MODES; i++) - fprintf(fmodes, "%5d ", cpi->mode_chosen_counts[i]); - fprintf(fmodes, "\n"); fclose(fmodes); @@ -4694,6 +4768,7 @@ static void encode_frame_to_data_rate { cm->current_video_frame++; cpi->frames_since_key++; + cpi->temporal_pattern_counter++; } /* reset to normal state now that we are done. */ @@ -4731,7 +4806,7 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, { double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth *cpi->oxcf.two_pass_vbrmin_section / 100); - cpi->twopass.bits_left += (int64_t)(two_pass_min_rate / cpi->frame_rate); + cpi->twopass.bits_left += (int64_t)(two_pass_min_rate / cpi->framerate); } } #endif @@ -4747,8 +4822,10 @@ int vp8_receive_raw_frame(VP8_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_C { #if HAVE_NEON int64_t store_reg[8]; -#endif +#if CONFIG_RUNTIME_CPU_DETECT VP8_COMMON *cm = &cpi->common; +#endif +#endif struct vpx_usec_timer timer; int res = 0; @@ -4774,7 +4851,6 @@ int vp8_receive_raw_frame(VP8_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_C if(vp8_lookahead_push(cpi->lookahead, sd, time_stamp, end_time, frame_flags, cpi->active_map_enabled ? cpi->active_map : NULL)) res = -1; - cm->clr_type = sd->clrtype; vpx_usec_timer_mark(&timer); cpi->time_receive_data += vpx_usec_timer_elapsed(&timer); @@ -4859,7 +4935,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l cpi->frames_till_gf_update_due); force_src_buffer = &cpi->alt_ref_buffer; } - cm->frames_till_alt_ref_frame = cpi->frames_till_gf_update_due; + cpi->frames_till_alt_ref_frame = cpi->frames_till_gf_update_due; cm->refresh_alt_ref_frame = 1; cm->refresh_golden_frame = 0; cm->refresh_last_frame = 0; @@ -4964,7 +5040,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l if (this_duration) { if (step) - cpi->ref_frame_rate = 10000000.0 / this_duration; + cpi->ref_framerate = 10000000.0 / this_duration; else { double avg_duration, interval; @@ -4978,11 +5054,11 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l if(interval > 10000000.0) interval = 10000000; - avg_duration = 10000000.0 / cpi->ref_frame_rate; + avg_duration = 10000000.0 / cpi->ref_framerate; avg_duration *= (interval - avg_duration + this_duration); avg_duration /= interval; - cpi->ref_frame_rate = 10000000.0 / avg_duration; + cpi->ref_framerate = 10000000.0 / avg_duration; } if (cpi->oxcf.number_of_layers > 1) @@ -4993,12 +5069,12 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l for (i=0; i<cpi->oxcf.number_of_layers; i++) { LAYER_CONTEXT *lc = &cpi->layer_context[i]; - lc->frame_rate = cpi->ref_frame_rate / - cpi->oxcf.rate_decimator[i]; + lc->framerate = cpi->ref_framerate / + cpi->oxcf.rate_decimator[i]; } } else - vp8_new_frame_rate(cpi, cpi->ref_frame_rate); + vp8_new_framerate(cpi, cpi->ref_framerate); } cpi->last_time_stamp_seen = cpi->source->ts_start; @@ -5013,9 +5089,9 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l /* Restore layer specific context & set frame rate */ layer = cpi->oxcf.layer_id[ - cm->current_video_frame % cpi->oxcf.periodicity]; + cpi->temporal_pattern_counter % cpi->oxcf.periodicity]; restore_layer_context (cpi, layer); - vp8_new_frame_rate (cpi, cpi->layer_context[layer].frame_rate); + vp8_new_framerate(cpi, cpi->layer_context[layer].framerate); } if (cpi->compressor_speed == 2) @@ -5180,7 +5256,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l if (cm->show_frame) { - + cpi->common.show_frame_mi = cpi->common.mi; cpi->count ++; if (cpi->b_calculate_psnr) @@ -5361,6 +5437,7 @@ int vp8_get_preview_raw_frame(VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppfla #endif #if CONFIG_POSTPROC + cpi->common.show_frame_mi = cpi->common.mi; ret = vp8_post_proc_frame(&cpi->common, dest, flags); #else diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index fb8ad357c..3ab0fe8bf 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -232,7 +232,7 @@ enum typedef struct { /* Layer configuration */ - double frame_rate; + double framerate; int target_bandwidth; /* Layer specific coding parameters */ @@ -282,17 +282,17 @@ typedef struct VP8_COMP { DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, unsigned char, Y1quant_shift[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, Y1quant_shift[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, unsigned char, Y2quant_shift[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, Y2quant_shift[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, unsigned char, UVquant_shift[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, UVquant_shift[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][16]); @@ -320,6 +320,7 @@ typedef struct VP8_COMP YV12_BUFFER_CONFIG scaled_source; YV12_BUFFER_CONFIG *last_frame_unscaled_source; + unsigned int frames_till_alt_ref_frame; /* frame in src_buffers has been identified to be encoded as an alt ref */ int source_alt_ref_pending; /* an alt ref frame has been encoded and is usable */ @@ -349,7 +350,6 @@ typedef struct VP8_COMP int ambient_err; unsigned int mode_check_freq[MAX_MODES]; - unsigned int mode_chosen_counts[MAX_MODES]; int rd_baseline_thresh[MAX_MODES]; @@ -370,6 +370,7 @@ typedef struct VP8_COMP double key_frame_rate_correction_factor; double gf_rate_correction_factor; + unsigned int frames_since_golden; /* Count down till next GF */ int frames_till_gf_update_due; @@ -402,7 +403,7 @@ typedef struct VP8_COMP /* Minimum allocation that should be used for any frame */ int min_frame_bandwidth; int inter_frame_target; - double output_frame_rate; + double output_framerate; int64_t last_time_stamp_seen; int64_t last_end_time_stamp_seen; int64_t first_time_stamp_ever; @@ -416,8 +417,8 @@ typedef struct VP8_COMP int buffered_mode; - double frame_rate; - double ref_frame_rate; + double framerate; + double ref_framerate; int64_t buffer_level; int64_t bits_off_target; @@ -510,6 +511,10 @@ typedef struct VP8_COMP int cyclic_refresh_q; signed char *cyclic_refresh_map; + // Frame counter for the temporal pattern. Counter is rest when the temporal + // layers are changed dynamically (run-time change). + unsigned int temporal_pattern_counter; + #if CONFIG_MULTITHREAD /* multithread data */ int * mt_current_mb_col; @@ -587,7 +592,7 @@ typedef struct VP8_COMP /* Error score of frames still to be coded in kf group */ int64_t kf_group_error_left; /* Projected Bits available for a group including 1 GF or ARF */ - int gf_group_bits; + int64_t gf_group_bits; /* Bits for the golden frame or ARF */ int gf_bits; int alt_extra_bits; diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c index 673de2b33..c5279fed2 100644 --- a/vp8/encoder/pickinter.c +++ b/vp8/encoder/pickinter.c @@ -389,7 +389,7 @@ static void pick_intra_mbuv_mode(MACROBLOCK *mb) } -static void update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv) +static void update_mvcount(MACROBLOCK *x, int_mv *best_ref_mv) { MACROBLOCKD *xd = &x->e_mbd; /* Split MV modes currently not supported when RD is nopt enabled, @@ -594,6 +594,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, unsigned int zero_mv_sse = INT_MAX, best_sse = INT_MAX; #endif + int sf_improved_mv_pred = cpi->sf.improved_mv_pred; int_mv mvp; int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7}; @@ -882,7 +883,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, last frame motion info is not stored, then we can not use improved_mv_pred. */ if (cpi->oxcf.mr_encoder_id && !parent_ref_valid) - cpi->sf.improved_mv_pred = 0; + sf_improved_mv_pred = 0; if (parent_ref_valid && parent_ref_frame) { @@ -899,7 +900,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, }else #endif { - if(cpi->sf.improved_mv_pred) + if(sf_improved_mv_pred) { if(!saddone) { @@ -1241,7 +1242,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, != cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame]) best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int; - update_mvcount(cpi, x, &best_ref_mv); + update_mvcount(x, &best_ref_mv); } diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c index 4121349a9..250d04c7f 100644 --- a/vp8/encoder/picklpf.c +++ b/vp8/encoder/picklpf.c @@ -9,11 +9,12 @@ */ +#include "./vpx_scale_rtcd.h" #include "vp8/common/onyxc_int.h" #include "onyx_int.h" #include "quantize.h" #include "vpx_mem/vpx_mem.h" -#include "vpx_scale/vpxscale.h" +#include "vpx_scale/vpx_scale.h" #include "vp8/common/alloccommon.h" #include "vp8/common/loopfilter.h" #if ARCH_ARM @@ -312,7 +313,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) /* Get baseline error score */ /* Copy the unfiltered / processed recon buffer to the new buffer */ - vp8_yv12_copy_y(saved_frame, cm->frame_to_show); + vpx_yv12_copy_y(saved_frame, cm->frame_to_show); vp8cx_set_alt_lf_level(cpi, filt_mid); vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_mid); @@ -338,7 +339,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) if(ss_err[filt_low] == 0) { /* Get Low filter error score */ - vp8_yv12_copy_y(saved_frame, cm->frame_to_show); + vpx_yv12_copy_y(saved_frame, cm->frame_to_show); vp8cx_set_alt_lf_level(cpi, filt_low); vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_low); @@ -366,7 +367,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) { if(ss_err[filt_high] == 0) { - vp8_yv12_copy_y(saved_frame, cm->frame_to_show); + vpx_yv12_copy_y(saved_frame, cm->frame_to_show); vp8cx_set_alt_lf_level(cpi, filt_high); vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_high); diff --git a/vp8/encoder/psnr.c b/vp8/encoder/psnr.c index 5bb49ad26..b3a3d9552 100644 --- a/vp8/encoder/psnr.c +++ b/vp8/encoder/psnr.c @@ -13,7 +13,7 @@ #include "math.h" #include "vp8/common/systemdependent.h" /* for vp8_clear_system_state() */ -#define MAX_PSNR 60 +#define MAX_PSNR 100 double vp8_mse2psnr(double Samples, double Peak, double Mse) { diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c index 33c8ef055..fda997ff6 100644 --- a/vp8/encoder/quantize.c +++ b/vp8/encoder/quantize.c @@ -50,8 +50,8 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) if (x >= zbin) { x += round_ptr[rc]; - y = (((x * quant_ptr[rc]) >> 16) + x) - >> quant_shift_ptr[rc]; /* quantize (x) */ + y = ((((x * quant_ptr[rc]) >> 16) + x) + * quant_shift_ptr[rc]) >> 16; /* quantize (x) */ x = (y ^ sz) - sz; /* get the sign back */ qcoeff_ptr[rc] = x; /* write to destination */ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */ @@ -113,7 +113,7 @@ void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d) short *zbin_ptr = b->zbin; short *round_ptr = b->round; short *quant_ptr = b->quant; - unsigned char *quant_shift_ptr = b->quant_shift; + short *quant_shift_ptr = b->quant_shift; short *qcoeff_ptr = d->qcoeff; short *dqcoeff_ptr = d->dqcoeff; short *dequant_ptr = d->dequant; @@ -138,8 +138,8 @@ void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d) if (x >= zbin) { x += round_ptr[rc]; - y = (((x * quant_ptr[rc]) >> 16) + x) - >> quant_shift_ptr[rc]; /* quantize (x) */ + y = ((((x * quant_ptr[rc]) >> 16) + x) + * quant_shift_ptr[rc]) >> 16; /* quantize (x) */ x = (y ^ sz) - sz; /* get the sign back */ qcoeff_ptr[rc] = x; /* write to destination */ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */ @@ -167,7 +167,7 @@ void vp8_strict_quantize_b_c(BLOCK *b, BLOCKD *d) int sz; short *coeff_ptr; short *quant_ptr; - unsigned char *quant_shift_ptr; + short *quant_shift_ptr; short *qcoeff_ptr; short *dqcoeff_ptr; short *dequant_ptr; @@ -184,21 +184,21 @@ void vp8_strict_quantize_b_c(BLOCK *b, BLOCKD *d) for (i = 0; i < 16; i++) { int dq; - int round; + int rounding; /*TODO: These arrays should be stored in zig-zag order.*/ rc = vp8_default_zig_zag1d[i]; z = coeff_ptr[rc]; dq = dequant_ptr[rc]; - round = dq >> 1; + rounding = dq >> 1; /* Sign of z. */ sz = -(z < 0); x = (z + sz) ^ sz; - x += round; + x += rounding; if (x >= dq) { /* Quantize x. */ - y = (((x * quant_ptr[rc]) >> 16) + x) >> quant_shift_ptr[rc]; + y = ((((x * quant_ptr[rc]) >> 16) + x) * quant_shift_ptr[rc]) >> 16; /* Put the sign back. */ x = (y + sz) ^ sz; /* Save the coefficient and its dequantized value. */ @@ -406,7 +406,7 @@ static const int qzbin_factors_y2[129] = #define EXACT_QUANT #ifdef EXACT_QUANT static void invert_quant(int improved_quant, short *quant, - unsigned char *shift, short d) + short *shift, short d) { if(improved_quant) { @@ -418,11 +418,15 @@ static void invert_quant(int improved_quant, short *quant, t = 1 + (1<<(16+l))/d; *quant = (short)(t - (1<<16)); *shift = l; + /* use multiplication and constant shift by 16 */ + *shift = 1 << (16 - *shift); } else { *quant = (1 << 16) / d; *shift = 0; + /* use multiplication and constant shift by 16 */ + *shift = 1 << (16 - *shift); } } diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c index a399a3877..fe4db13b3 100644 --- a/vp8/encoder/ratectrl.c +++ b/vp8/encoder/ratectrl.c @@ -234,7 +234,7 @@ void vp8_save_coding_context(VP8_COMP *cpi) cc->frames_since_key = cpi->frames_since_key; cc->filter_level = cpi->common.filter_level; cc->frames_till_gf_update_due = cpi->frames_till_gf_update_due; - cc->frames_since_golden = cpi->common.frames_since_golden; + cc->frames_since_golden = cpi->frames_since_golden; vp8_copy(cc->mvc, cpi->common.fc.mvc); vp8_copy(cc->mvcosts, cpi->rd_costs.mvcosts); @@ -271,7 +271,7 @@ void vp8_restore_coding_context(VP8_COMP *cpi) cpi->frames_since_key = cc->frames_since_key; cpi->common.filter_level = cc->filter_level; cpi->frames_till_gf_update_due = cc->frames_till_gf_update_due; - cpi->common.frames_since_golden = cc->frames_since_golden; + cpi->frames_since_golden = cc->frames_since_golden; vp8_copy(cpi->common.fc.mvc, cc->mvc); @@ -388,7 +388,7 @@ static void calc_iframe_target_size(VP8_COMP *cpi) int initial_boost = 32; /* |3.0 * per_frame_bandwidth| */ /* Boost depends somewhat on frame rate: only used for 1 layer case. */ if (cpi->oxcf.number_of_layers == 1) { - kf_boost = MAX(initial_boost, (int)(2 * cpi->output_frame_rate - 16)); + kf_boost = MAX(initial_boost, (int)(2 * cpi->output_framerate - 16)); } else { /* Initial factor: set target size to: |3.0 * per_frame_bandwidth|. */ @@ -399,9 +399,9 @@ static void calc_iframe_target_size(VP8_COMP *cpi) kf_boost = kf_boost * kf_boost_qadjustment[Q] / 100; /* frame separation adjustment ( down) */ - if (cpi->frames_since_key < cpi->output_frame_rate / 2) + if (cpi->frames_since_key < cpi->output_framerate / 2) kf_boost = (int)(kf_boost - * cpi->frames_since_key / (cpi->output_frame_rate / 2)); + * cpi->frames_since_key / (cpi->output_framerate / 2)); /* Minimal target size is |2* per_frame_bandwidth|. */ if (kf_boost < 16) @@ -614,7 +614,6 @@ static void calc_gf_params(VP8_COMP *cpi) static void calc_pframe_target_size(VP8_COMP *cpi) { int min_frame_target; - int Adjustment; int old_per_frame_bandwidth = cpi->per_frame_bandwidth; if ( cpi->current_layer > 0) @@ -658,6 +657,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi) /* 1 pass */ else { + int Adjustment; /* Make rate adjustment to recover bits spent in key frame * Test to see if the key frame inter data rate correction * should still be in force @@ -688,7 +688,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi) */ if ((cpi->gf_overspend_bits > 0) && (cpi->this_frame_target > min_frame_target)) { - int Adjustment = (cpi->non_gf_bitrate_adjustment <= cpi->gf_overspend_bits) ? cpi->non_gf_bitrate_adjustment : cpi->gf_overspend_bits; + Adjustment = (cpi->non_gf_bitrate_adjustment <= cpi->gf_overspend_bits) ? cpi->non_gf_bitrate_adjustment : cpi->gf_overspend_bits; if (Adjustment > (cpi->this_frame_target - min_frame_target)) Adjustment = (cpi->this_frame_target - min_frame_target); @@ -715,7 +715,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi) if (Adjustment > (cpi->this_frame_target - min_frame_target)) Adjustment = (cpi->this_frame_target - min_frame_target); - if (cpi->common.frames_since_golden == (cpi->current_gf_interval >> 1)) + if (cpi->frames_since_golden == (cpi->current_gf_interval >> 1)) cpi->this_frame_target += ((cpi->current_gf_interval - 1) * Adjustment); else cpi->this_frame_target -= Adjustment; @@ -956,6 +956,21 @@ static void calc_pframe_target_size(VP8_COMP *cpi) if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size) cpi->bits_off_target = (int)cpi->oxcf.maximum_buffer_size; cpi->buffer_level = cpi->bits_off_target; + + if (cpi->oxcf.number_of_layers > 1) { + unsigned int i; + + // Propagate bits saved by dropping the frame to higher layers. + for (i = cpi->current_layer + 1; i < cpi->oxcf.number_of_layers; + i++) { + LAYER_CONTEXT *lc = &cpi->layer_context[i]; + lc->bits_off_target += (int)(lc->target_bandwidth / + lc->framerate); + if (lc->bits_off_target > lc->maximum_buffer_size) + lc->bits_off_target = lc->maximum_buffer_size; + lc->buffer_level = lc->bits_off_target; + } + } } } @@ -1360,10 +1375,10 @@ static int estimate_keyframe_frequency(VP8_COMP *cpi) * whichever is smaller. */ int key_freq = cpi->oxcf.key_freq>0 ? cpi->oxcf.key_freq : 1; - av_key_frame_frequency = (int)cpi->output_frame_rate * 2; + av_key_frame_frequency = 1 + (int)cpi->output_framerate * 2; if (cpi->oxcf.auto_key && av_key_frame_frequency > key_freq) - av_key_frame_frequency = cpi->oxcf.key_freq; + av_key_frame_frequency = key_freq; cpi->prior_key_frame_distance[KEY_FRAME_CONTEXT - 1] = av_key_frame_frequency; @@ -1393,6 +1408,10 @@ static int estimate_keyframe_frequency(VP8_COMP *cpi) av_key_frame_frequency /= total_weight; } + // TODO (marpan): Given the checks above, |av_key_frame_frequency| + // should always be above 0. But for now we keep the sanity check in. + if (av_key_frame_frequency == 0) + av_key_frame_frequency = 1; return av_key_frame_frequency; } diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index ceb817c02..5016cc422 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -14,7 +14,7 @@ #include <limits.h> #include <assert.h> #include "vpx_config.h" -#include "vpx_rtcd.h" +#include "vp8_rtcd.h" #include "vp8/common/pragmas.h" #include "tokenize.h" #include "treewriter.h" @@ -341,7 +341,7 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue) void vp8_auto_select_speed(VP8_COMP *cpi) { - int milliseconds_for_compress = (int)(1000000 / cpi->frame_rate); + int milliseconds_for_compress = (int)(1000000 / cpi->framerate); milliseconds_for_compress = milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16; @@ -884,8 +884,8 @@ static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate, for (mode = DC_PRED; mode <= TM_PRED; mode++) { - int rate; - int distortion; + int this_rate; + int this_distortion; int this_rd; xd->mode_info_context->mbmi.uv_mode = mode; @@ -907,17 +907,17 @@ static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate, vp8_quantize_mbuv(x); rate_to = rd_cost_mbuv(x); - rate = rate_to + x->intra_uv_mode_cost[xd->frame_type][xd->mode_info_context->mbmi.uv_mode]; + this_rate = rate_to + x->intra_uv_mode_cost[xd->frame_type][xd->mode_info_context->mbmi.uv_mode]; - distortion = vp8_mbuverror(x) / 4; + this_distortion = vp8_mbuverror(x) / 4; - this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); + this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); if (this_rd < best_rd) { best_rd = this_rd; - d = distortion; - r = rate; + d = this_distortion; + r = this_rate; *rate_tokenonly = rate_to; mode_selected = mode; } @@ -935,7 +935,7 @@ int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]) assert(NEARESTMV <= m && m <= SPLITMV); vp8_mv_ref_probs(p, near_mv_ref_ct); return vp8_cost_token(vp8_mv_ref_tree, p, - vp8_mv_ref_encoding_array - NEARESTMV + m); + vp8_mv_ref_encoding_array + (m - NEARESTMV)); } void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) @@ -1294,12 +1294,11 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, if (bestsme < INT_MAX) { - int distortion; + int disto; unsigned int sse; cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost, - &distortion, &sse); - + &disto, &sse); } } /* NEW4X4 */ @@ -1733,7 +1732,7 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse } } -static void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv) +static void rd_update_mvcount(MACROBLOCK *x, int_mv *best_ref_mv) { if (x->e_mbd.mode_info_context->mbmi.mode == SPLITMV) { @@ -2512,9 +2511,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, x->rd_thresh_mult[best_mode_index]; } - /* Note how often each mode chosen as best */ - cpi->mode_chosen_counts[best_mode_index] ++; - #if CONFIG_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity) { @@ -2608,7 +2604,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, != cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame]) best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int; - rd_update_mvcount(cpi, x, &best_ref_mv); + rd_update_mvcount(x, &best_ref_mv); } void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate_) diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c index b83ae89ab..7e3af71ec 100644 --- a/vp8/encoder/temporal_filter.c +++ b/vp8/encoder/temporal_filter.c @@ -17,7 +17,7 @@ #include "mcomp.h" #include "firstpass.h" #include "psnr.h" -#include "vpx_scale/vpxscale.h" +#include "vpx_scale/vpx_scale.h" #include "vp8/common/extend.h" #include "ratectrl.h" #include "vp8/common/quant_common.h" diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c index 3b5268b61..11559a720 100644 --- a/vp8/encoder/tokenize.c +++ b/vp8/encoder/tokenize.c @@ -20,7 +20,7 @@ /* Global event counters used for accumulating statistics across several compressions, then generating context.c = initial stats. */ -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; #endif void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) ; @@ -413,7 +413,7 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) } -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS void init_context_counters(void) { diff --git a/vp8/encoder/tokenize.h b/vp8/encoder/tokenize.h index c2d1438f9..1e6cea114 100644 --- a/vp8/encoder/tokenize.h +++ b/vp8/encoder/tokenize.h @@ -33,7 +33,7 @@ typedef struct int rd_cost_mby(MACROBLOCKD *); -#ifdef ENTROPY_STATS +#ifdef VP8_ENTROPY_STATS void init_context_counters(); void print_context_counters(); diff --git a/vp8/encoder/asm_enc_offsets.c b/vp8/encoder/vp8_asm_enc_offsets.c index a4169b32f..a4169b32f 100644 --- a/vp8/encoder/asm_enc_offsets.c +++ b/vp8/encoder/vp8_asm_enc_offsets.c diff --git a/vp8/encoder/x86/dct_sse2.asm b/vp8/encoder/x86/dct_sse2.asm index d880ce0c4..d06bca592 100644 --- a/vp8/encoder/x86/dct_sse2.asm +++ b/vp8/encoder/x86/dct_sse2.asm @@ -29,7 +29,7 @@ movsxd rax, dword ptr arg(2) lea rcx, [rsi + rax*2] %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 %define input rcx %define output rdx %define pitch r8 @@ -53,7 +53,7 @@ RESTORE_GOT pop rbp %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 RESTORE_XMM %endif %endif diff --git a/vp8/encoder/x86/denoising_sse2.c b/vp8/encoder/x86/denoising_sse2.c index c1ac6c137..cceb8263f 100644 --- a/vp8/encoder/x86/denoising_sse2.c +++ b/vp8/encoder/x86/denoising_sse2.c @@ -12,9 +12,10 @@ #include "vp8/common/reconinter.h" #include "vpx/vpx_integer.h" #include "vpx_mem/vpx_mem.h" -#include "vpx_rtcd.h" +#include "vp8_rtcd.h" #include <emmintrin.h> +#include "vpx_ports/emmintrin_compat.h" union sum_union { __m128i v; diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm deleted file mode 100644 index 724e54c45..000000000 --- a/vp8/encoder/x86/quantize_sse2.asm +++ /dev/null @@ -1,386 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" -%include "asm_enc_offsets.asm" - - -; void vp8_regular_quantize_b_sse2 | arg -; (BLOCK *b, | 0 -; BLOCKD *d) | 1 - -global sym(vp8_regular_quantize_b_sse2) PRIVATE -sym(vp8_regular_quantize_b_sse2): - push rbp - mov rbp, rsp - SAVE_XMM 7 - GET_GOT rbx - -%if ABI_IS_32BIT - push rdi - push rsi -%else - %ifidn __OUTPUT_FORMAT__,x64 - push rdi - push rsi - %endif -%endif - - ALIGN_STACK 16, rax - %define zrun_zbin_boost 0 ; 8 - %define abs_minus_zbin 8 ; 32 - %define temp_qcoeff 40 ; 32 - %define qcoeff 72 ; 32 - %define stack_size 104 - sub rsp, stack_size - ; end prolog - -%if ABI_IS_32BIT - mov rdi, arg(0) ; BLOCK *b - mov rsi, arg(1) ; BLOCKD *d -%else - %ifidn __OUTPUT_FORMAT__,x64 - mov rdi, rcx ; BLOCK *b - mov rsi, rdx ; BLOCKD *d - %else - ;mov rdi, rdi ; BLOCK *b - ;mov rsi, rsi ; BLOCKD *d - %endif -%endif - - mov rdx, [rdi + vp8_block_coeff] ; coeff_ptr - mov rcx, [rdi + vp8_block_zbin] ; zbin_ptr - movd xmm7, [rdi + vp8_block_zbin_extra] ; zbin_oq_value - - ; z - movdqa xmm0, [rdx] - movdqa xmm4, [rdx + 16] - mov rdx, [rdi + vp8_block_round] ; round_ptr - - pshuflw xmm7, xmm7, 0 - punpcklwd xmm7, xmm7 ; duplicated zbin_oq_value - - movdqa xmm1, xmm0 - movdqa xmm5, xmm4 - - ; sz - psraw xmm0, 15 - psraw xmm4, 15 - - ; (z ^ sz) - pxor xmm1, xmm0 - pxor xmm5, xmm4 - - ; x = abs(z) - psubw xmm1, xmm0 - psubw xmm5, xmm4 - - movdqa xmm2, [rcx] - movdqa xmm3, [rcx + 16] - mov rcx, [rdi + vp8_block_quant] ; quant_ptr - - ; *zbin_ptr + zbin_oq_value - paddw xmm2, xmm7 - paddw xmm3, xmm7 - - ; x - (*zbin_ptr + zbin_oq_value) - psubw xmm1, xmm2 - psubw xmm5, xmm3 - movdqa [rsp + abs_minus_zbin], xmm1 - movdqa [rsp + abs_minus_zbin + 16], xmm5 - - ; add (zbin_ptr + zbin_oq_value) back - paddw xmm1, xmm2 - paddw xmm5, xmm3 - - movdqa xmm2, [rdx] - movdqa xmm6, [rdx + 16] - - movdqa xmm3, [rcx] - movdqa xmm7, [rcx + 16] - - ; x + round - paddw xmm1, xmm2 - paddw xmm5, xmm6 - - ; y = x * quant_ptr >> 16 - pmulhw xmm3, xmm1 - pmulhw xmm7, xmm5 - - ; y += x - paddw xmm1, xmm3 - paddw xmm5, xmm7 - - movdqa [rsp + temp_qcoeff], xmm1 - movdqa [rsp + temp_qcoeff + 16], xmm5 - - pxor xmm6, xmm6 - ; zero qcoeff - movdqa [rsp + qcoeff], xmm6 - movdqa [rsp + qcoeff + 16], xmm6 - - mov rdx, [rdi + vp8_block_zrun_zbin_boost] ; zbin_boost_ptr - mov rax, [rdi + vp8_block_quant_shift] ; quant_shift_ptr - mov [rsp + zrun_zbin_boost], rdx - -%macro ZIGZAG_LOOP 1 - ; x - movsx ecx, WORD PTR[rsp + abs_minus_zbin + %1 * 2] - - ; if (x >= zbin) - sub cx, WORD PTR[rdx] ; x - zbin - lea rdx, [rdx + 2] ; zbin_boost_ptr++ - jl .rq_zigzag_loop_%1 ; x < zbin - - movsx edi, WORD PTR[rsp + temp_qcoeff + %1 * 2] - - ; downshift by quant_shift[rc] - movsx cx, BYTE PTR[rax + %1] ; quant_shift_ptr[rc] - sar edi, cl ; also sets Z bit - je .rq_zigzag_loop_%1 ; !y - mov WORD PTR[rsp + qcoeff + %1 * 2], di ;qcoeff_ptr[rc] = temp_qcoeff[rc] - mov rdx, [rsp + zrun_zbin_boost] ; reset to b->zrun_zbin_boost -.rq_zigzag_loop_%1: -%endmacro -; in vp8_default_zig_zag1d order: see vp8/common/entropy.c -ZIGZAG_LOOP 0 -ZIGZAG_LOOP 1 -ZIGZAG_LOOP 4 -ZIGZAG_LOOP 8 -ZIGZAG_LOOP 5 -ZIGZAG_LOOP 2 -ZIGZAG_LOOP 3 -ZIGZAG_LOOP 6 -ZIGZAG_LOOP 9 -ZIGZAG_LOOP 12 -ZIGZAG_LOOP 13 -ZIGZAG_LOOP 10 -ZIGZAG_LOOP 7 -ZIGZAG_LOOP 11 -ZIGZAG_LOOP 14 -ZIGZAG_LOOP 15 - - movdqa xmm2, [rsp + qcoeff] - movdqa xmm3, [rsp + qcoeff + 16] - - mov rcx, [rsi + vp8_blockd_dequant] ; dequant_ptr - mov rdi, [rsi + vp8_blockd_dqcoeff] ; dqcoeff_ptr - - ; y ^ sz - pxor xmm2, xmm0 - pxor xmm3, xmm4 - ; x = (y ^ sz) - sz - psubw xmm2, xmm0 - psubw xmm3, xmm4 - - ; dequant - movdqa xmm0, [rcx] - movdqa xmm1, [rcx + 16] - - mov rcx, [rsi + vp8_blockd_qcoeff] ; qcoeff_ptr - - pmullw xmm0, xmm2 - pmullw xmm1, xmm3 - - movdqa [rcx], xmm2 ; store qcoeff - movdqa [rcx + 16], xmm3 - movdqa [rdi], xmm0 ; store dqcoeff - movdqa [rdi + 16], xmm1 - - mov rcx, [rsi + vp8_blockd_eob] - - ; select the last value (in zig_zag order) for EOB - pcmpeqw xmm2, xmm6 - pcmpeqw xmm3, xmm6 - ; ! - pcmpeqw xmm6, xmm6 - pxor xmm2, xmm6 - pxor xmm3, xmm6 - ; mask inv_zig_zag - pand xmm2, [GLOBAL(inv_zig_zag)] - pand xmm3, [GLOBAL(inv_zig_zag + 16)] - ; select the max value - pmaxsw xmm2, xmm3 - pshufd xmm3, xmm2, 00001110b - pmaxsw xmm2, xmm3 - pshuflw xmm3, xmm2, 00001110b - pmaxsw xmm2, xmm3 - pshuflw xmm3, xmm2, 00000001b - pmaxsw xmm2, xmm3 - movd eax, xmm2 - and eax, 0xff - - mov BYTE PTR [rcx], al ; store eob - - ; begin epilog - add rsp, stack_size - pop rsp -%if ABI_IS_32BIT - pop rsi - pop rdi -%else - %ifidn __OUTPUT_FORMAT__,x64 - pop rsi - pop rdi - %endif -%endif - RESTORE_GOT - RESTORE_XMM - pop rbp - ret - -; void vp8_fast_quantize_b_sse2 | arg -; (BLOCK *b, | 0 -; BLOCKD *d) | 1 - -global sym(vp8_fast_quantize_b_sse2) PRIVATE -sym(vp8_fast_quantize_b_sse2): - push rbp - mov rbp, rsp - GET_GOT rbx - -%if ABI_IS_32BIT - push rdi - push rsi -%else - %ifidn __OUTPUT_FORMAT__,x64 - push rdi - push rsi - %else - ; these registers are used for passing arguments - %endif -%endif - - ; end prolog - -%if ABI_IS_32BIT - mov rdi, arg(0) ; BLOCK *b - mov rsi, arg(1) ; BLOCKD *d -%else - %ifidn __OUTPUT_FORMAT__,x64 - mov rdi, rcx ; BLOCK *b - mov rsi, rdx ; BLOCKD *d - %else - ;mov rdi, rdi ; BLOCK *b - ;mov rsi, rsi ; BLOCKD *d - %endif -%endif - - mov rax, [rdi + vp8_block_coeff] - mov rcx, [rdi + vp8_block_round] - mov rdx, [rdi + vp8_block_quant_fast] - - ; z = coeff - movdqa xmm0, [rax] - movdqa xmm4, [rax + 16] - - ; dup z so we can save sz - movdqa xmm1, xmm0 - movdqa xmm5, xmm4 - - ; sz = z >> 15 - psraw xmm0, 15 - psraw xmm4, 15 - - ; x = abs(z) = (z ^ sz) - sz - pxor xmm1, xmm0 - pxor xmm5, xmm4 - psubw xmm1, xmm0 - psubw xmm5, xmm4 - - ; x += round - paddw xmm1, [rcx] - paddw xmm5, [rcx + 16] - - mov rax, [rsi + vp8_blockd_qcoeff] - mov rcx, [rsi + vp8_blockd_dequant] - mov rdi, [rsi + vp8_blockd_dqcoeff] - - ; y = x * quant >> 16 - pmulhw xmm1, [rdx] - pmulhw xmm5, [rdx + 16] - - ; x = (y ^ sz) - sz - pxor xmm1, xmm0 - pxor xmm5, xmm4 - psubw xmm1, xmm0 - psubw xmm5, xmm4 - - ; qcoeff = x - movdqa [rax], xmm1 - movdqa [rax + 16], xmm5 - - ; x * dequant - movdqa xmm2, xmm1 - movdqa xmm3, xmm5 - pmullw xmm2, [rcx] - pmullw xmm3, [rcx + 16] - - ; dqcoeff = x * dequant - movdqa [rdi], xmm2 - movdqa [rdi + 16], xmm3 - - pxor xmm4, xmm4 ;clear all bits - pcmpeqw xmm1, xmm4 - pcmpeqw xmm5, xmm4 - - pcmpeqw xmm4, xmm4 ;set all bits - pxor xmm1, xmm4 - pxor xmm5, xmm4 - - pand xmm1, [GLOBAL(inv_zig_zag)] - pand xmm5, [GLOBAL(inv_zig_zag + 16)] - - pmaxsw xmm1, xmm5 - - mov rcx, [rsi + vp8_blockd_eob] - - ; now down to 8 - pshufd xmm5, xmm1, 00001110b - - pmaxsw xmm1, xmm5 - - ; only 4 left - pshuflw xmm5, xmm1, 00001110b - - pmaxsw xmm1, xmm5 - - ; okay, just 2! - pshuflw xmm5, xmm1, 00000001b - - pmaxsw xmm1, xmm5 - - movd eax, xmm1 - and eax, 0xff - - mov BYTE PTR [rcx], al ; store eob - - ; begin epilog -%if ABI_IS_32BIT - pop rsi - pop rdi -%else - %ifidn __OUTPUT_FORMAT__,x64 - pop rsi - pop rdi - %endif -%endif - - RESTORE_GOT - pop rbp - ret - -SECTION_RODATA -align 16 -inv_zig_zag: - dw 0x0001, 0x0002, 0x0006, 0x0007 - dw 0x0003, 0x0005, 0x0008, 0x000d - dw 0x0004, 0x0009, 0x000c, 0x000e - dw 0x000a, 0x000b, 0x000f, 0x0010 diff --git a/vp8/encoder/x86/quantize_sse2.c b/vp8/encoder/x86/quantize_sse2.c new file mode 100644 index 000000000..f495bf287 --- /dev/null +++ b/vp8/encoder/x86/quantize_sse2.c @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_config.h" +#include "vp8_rtcd.h" +#include "vpx_ports/x86.h" +#include "vpx_mem/vpx_mem.h" +#include "vp8/encoder/block.h" +#include "vp8/common/entropy.h" /* vp8_default_inv_zig_zag */ + +#include <mmintrin.h> /* MMX */ +#include <xmmintrin.h> /* SSE */ +#include <emmintrin.h> /* SSE2 */ + +#define SELECT_EOB(i, z) \ + do { \ + short boost = *zbin_boost_ptr; \ + int cmp = (x[z] < boost) | (y[z] == 0); \ + zbin_boost_ptr++; \ + if (cmp) \ + goto select_eob_end_##i; \ + qcoeff_ptr[z] = y[z]; \ + eob = i; \ + zbin_boost_ptr = b->zrun_zbin_boost; \ + select_eob_end_##i:; \ + } while (0) + +void vp8_regular_quantize_b_sse2(BLOCK *b, BLOCKD *d) +{ + char eob = 0; + short *zbin_boost_ptr = b->zrun_zbin_boost; + short *qcoeff_ptr = d->qcoeff; + DECLARE_ALIGNED_ARRAY(16, short, x, 16); + DECLARE_ALIGNED_ARRAY(16, short, y, 16); + + __m128i sz0, x0, sz1, x1, y0, y1, x_minus_zbin0, x_minus_zbin1; + __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift)); + __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8)); + __m128i z0 = _mm_load_si128((__m128i *)(b->coeff)); + __m128i z1 = _mm_load_si128((__m128i *)(b->coeff+8)); + __m128i zbin_extra = _mm_cvtsi32_si128(b->zbin_extra); + __m128i zbin0 = _mm_load_si128((__m128i *)(b->zbin)); + __m128i zbin1 = _mm_load_si128((__m128i *)(b->zbin + 8)); + __m128i round0 = _mm_load_si128((__m128i *)(b->round)); + __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8)); + __m128i quant0 = _mm_load_si128((__m128i *)(b->quant)); + __m128i quant1 = _mm_load_si128((__m128i *)(b->quant + 8)); + __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant)); + __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8)); + + vpx_memset(qcoeff_ptr, 0, 32); + + /* Duplicate to all lanes. */ + zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0); + zbin_extra = _mm_unpacklo_epi16(zbin_extra, zbin_extra); + + /* Sign of z: z >> 15 */ + sz0 = _mm_srai_epi16(z0, 15); + sz1 = _mm_srai_epi16(z1, 15); + + /* x = abs(z): (z ^ sz) - sz */ + x0 = _mm_xor_si128(z0, sz0); + x1 = _mm_xor_si128(z1, sz1); + x0 = _mm_sub_epi16(x0, sz0); + x1 = _mm_sub_epi16(x1, sz1); + + /* zbin[] + zbin_extra */ + zbin0 = _mm_add_epi16(zbin0, zbin_extra); + zbin1 = _mm_add_epi16(zbin1, zbin_extra); + + /* In C x is compared to zbin where zbin = zbin[] + boost + extra. Rebalance + * the equation because boost is the only value which can change: + * x - (zbin[] + extra) >= boost */ + x_minus_zbin0 = _mm_sub_epi16(x0, zbin0); + x_minus_zbin1 = _mm_sub_epi16(x1, zbin1); + + _mm_store_si128((__m128i *)(x), x_minus_zbin0); + _mm_store_si128((__m128i *)(x + 8), x_minus_zbin1); + + /* All the remaining calculations are valid whether they are done now with + * simd or later inside the loop one at a time. */ + x0 = _mm_add_epi16(x0, round0); + x1 = _mm_add_epi16(x1, round1); + + y0 = _mm_mulhi_epi16(x0, quant0); + y1 = _mm_mulhi_epi16(x1, quant1); + + y0 = _mm_add_epi16(y0, x0); + y1 = _mm_add_epi16(y1, x1); + + /* Instead of shifting each value independently we convert the scaling + * factor with 1 << (16 - shift) so we can use multiply/return high half. */ + y0 = _mm_mulhi_epi16(y0, quant_shift0); + y1 = _mm_mulhi_epi16(y1, quant_shift1); + + /* Return the sign: (y ^ sz) - sz */ + y0 = _mm_xor_si128(y0, sz0); + y1 = _mm_xor_si128(y1, sz1); + y0 = _mm_sub_epi16(y0, sz0); + y1 = _mm_sub_epi16(y1, sz1); + + _mm_store_si128((__m128i *)(y), y0); + _mm_store_si128((__m128i *)(y + 8), y1); + + zbin_boost_ptr = b->zrun_zbin_boost; + + /* The loop gets unrolled anyway. Avoid the vp8_default_zig_zag1d lookup. */ + SELECT_EOB(1, 0); + SELECT_EOB(2, 1); + SELECT_EOB(3, 4); + SELECT_EOB(4, 8); + SELECT_EOB(5, 5); + SELECT_EOB(6, 2); + SELECT_EOB(7, 3); + SELECT_EOB(8, 6); + SELECT_EOB(9, 9); + SELECT_EOB(10, 12); + SELECT_EOB(11, 13); + SELECT_EOB(12, 10); + SELECT_EOB(13, 7); + SELECT_EOB(14, 11); + SELECT_EOB(15, 14); + SELECT_EOB(16, 15); + + y0 = _mm_load_si128((__m128i *)(d->qcoeff)); + y1 = _mm_load_si128((__m128i *)(d->qcoeff + 8)); + + /* dqcoeff = qcoeff * dequant */ + y0 = _mm_mullo_epi16(y0, dequant0); + y1 = _mm_mullo_epi16(y1, dequant1); + + _mm_store_si128((__m128i *)(d->dqcoeff), y0); + _mm_store_si128((__m128i *)(d->dqcoeff + 8), y1); + + *d->eob = eob; +} + +void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d) +{ + __m128i z0 = _mm_load_si128((__m128i *)(b->coeff)); + __m128i z1 = _mm_load_si128((__m128i *)(b->coeff + 8)); + __m128i round0 = _mm_load_si128((__m128i *)(b->round)); + __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8)); + __m128i quant_fast0 = _mm_load_si128((__m128i *)(b->quant_fast)); + __m128i quant_fast1 = _mm_load_si128((__m128i *)(b->quant_fast + 8)); + __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant)); + __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8)); + __m128i inv_zig_zag0 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag)); + __m128i inv_zig_zag1 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag + 8)); + + __m128i sz0, sz1, x0, x1, y0, y1, xdq0, xdq1, zeros, ones; + + /* sign of z: z >> 15 */ + sz0 = _mm_srai_epi16(z0, 15); + sz1 = _mm_srai_epi16(z1, 15); + + /* x = abs(z): (z ^ sz) - sz */ + x0 = _mm_xor_si128(z0, sz0); + x1 = _mm_xor_si128(z1, sz1); + x0 = _mm_sub_epi16(x0, sz0); + x1 = _mm_sub_epi16(x1, sz1); + + /* x += round */ + x0 = _mm_add_epi16(x0, round0); + x1 = _mm_add_epi16(x1, round1); + + /* y = (x * quant) >> 16 */ + y0 = _mm_mulhi_epi16(x0, quant_fast0); + y1 = _mm_mulhi_epi16(x1, quant_fast1); + + /* x = abs(y) = (y ^ sz) - sz */ + y0 = _mm_xor_si128(y0, sz0); + y1 = _mm_xor_si128(y1, sz1); + x0 = _mm_sub_epi16(y0, sz0); + x1 = _mm_sub_epi16(y1, sz1); + + /* qcoeff = x */ + _mm_store_si128((__m128i *)(d->qcoeff), x0); + _mm_store_si128((__m128i *)(d->qcoeff + 8), x1); + + /* x * dequant */ + xdq0 = _mm_mullo_epi16(x0, dequant0); + xdq1 = _mm_mullo_epi16(x1, dequant1); + + /* dqcoeff = x * dequant */ + _mm_store_si128((__m128i *)(d->dqcoeff), xdq0); + _mm_store_si128((__m128i *)(d->dqcoeff + 8), xdq1); + + /* build a mask for the zig zag */ + zeros = _mm_setzero_si128(); + + x0 = _mm_cmpeq_epi16(x0, zeros); + x1 = _mm_cmpeq_epi16(x1, zeros); + + ones = _mm_cmpeq_epi16(zeros, zeros); + + x0 = _mm_xor_si128(x0, ones); + x1 = _mm_xor_si128(x1, ones); + + x0 = _mm_and_si128(x0, inv_zig_zag0); + x1 = _mm_and_si128(x1, inv_zig_zag1); + + x0 = _mm_max_epi16(x0, x1); + + /* now down to 8 */ + x1 = _mm_shuffle_epi32(x0, 0xE); // 0b00001110 + + x0 = _mm_max_epi16(x0, x1); + + /* only 4 left */ + x1 = _mm_shufflelo_epi16(x0, 0xE); // 0b00001110 + + x0 = _mm_max_epi16(x0, x1); + + /* okay, just 2! */ + x1 = _mm_shufflelo_epi16(x0, 0x1); // 0b00000001 + + x0 = _mm_max_epi16(x0, x1); + + *d->eob = 0xFF & _mm_cvtsi128_si32(x0); +} diff --git a/vp8/encoder/x86/quantize_sse4.asm b/vp8/encoder/x86/quantize_sse4.asm index f0e5d407e..dbd171bfc 100644 --- a/vp8/encoder/x86/quantize_sse4.asm +++ b/vp8/encoder/x86/quantize_sse4.asm @@ -9,7 +9,7 @@ %include "vpx_ports/x86_abi_support.asm" -%include "asm_enc_offsets.asm" +%include "vp8_asm_enc_offsets.asm" ; void vp8_regular_quantize_b_sse4 | arg @@ -31,7 +31,7 @@ sym(vp8_regular_quantize_b_sse4): %define stack_size 32 sub rsp, stack_size %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 SAVE_XMM 8, u push rdi push rsi @@ -43,7 +43,7 @@ sym(vp8_regular_quantize_b_sse4): mov rdi, arg(0) ; BLOCK *b mov rsi, arg(1) ; BLOCKD *d %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 mov rdi, rcx ; BLOCK *b mov rsi, rdx ; BLOCKD *d %else @@ -240,7 +240,7 @@ ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8 pop rbp %else %undef xmm5 - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 pop rsi pop rdi RESTORE_XMM diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm index dd526f4f1..7b1dc119f 100644 --- a/vp8/encoder/x86/quantize_ssse3.asm +++ b/vp8/encoder/x86/quantize_ssse3.asm @@ -9,7 +9,7 @@ %include "vpx_ports/x86_abi_support.asm" -%include "asm_enc_offsets.asm" +%include "vp8_asm_enc_offsets.asm" ; void vp8_fast_quantize_b_ssse3 | arg @@ -27,7 +27,7 @@ sym(vp8_fast_quantize_b_ssse3): push rdi push rsi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 push rdi push rsi %endif @@ -38,7 +38,7 @@ sym(vp8_fast_quantize_b_ssse3): mov rdi, arg(0) ; BLOCK *b mov rsi, arg(1) ; BLOCKD *d %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 mov rdi, rcx ; BLOCK *b mov rsi, rdx ; BLOCKD *d %else @@ -122,7 +122,7 @@ sym(vp8_fast_quantize_b_ssse3): pop rsi pop rdi %else - %ifidn __OUTPUT_FORMAT__,x64 + %if LIBVPX_YASM_WIN64 pop rsi pop rdi %endif diff --git a/vp8/encoder/x86/temporal_filter_apply_sse2.asm b/vp8/encoder/x86/temporal_filter_apply_sse2.asm index ce9d9836b..bd92b398a 100644 --- a/vp8/encoder/x86/temporal_filter_apply_sse2.asm +++ b/vp8/encoder/x86/temporal_filter_apply_sse2.asm @@ -50,7 +50,7 @@ sym(vp8_temporal_filter_apply_sse2): ; 0x8000 >> (16 - strength) mov rdx, 16 sub rdx, arg(4) ; 16 - strength - movd xmm4, rdx ; can't use rdx w/ shift + movq xmm4, rdx ; can't use rdx w/ shift movdqa xmm5, [GLOBAL(_const_top_bit)] psrlw xmm5, xmm4 movdqa [rsp + rounding_bit], xmm5 diff --git a/vp8/encoder/x86/vp8_enc_stubs_mmx.c b/vp8/encoder/x86/vp8_enc_stubs_mmx.c index da25f5227..cf3d8ca4a 100644 --- a/vp8/encoder/x86/vp8_enc_stubs_mmx.c +++ b/vp8/encoder/x86/vp8_enc_stubs_mmx.c @@ -10,7 +10,7 @@ #include "vpx_config.h" -#include "vpx_rtcd.h" +#include "vp8_rtcd.h" #include "vpx_ports/x86.h" #include "vp8/encoder/block.h" diff --git a/vp8/encoder/x86/vp8_enc_stubs_sse2.c b/vp8/encoder/x86/vp8_enc_stubs_sse2.c index 68db8155a..3dfbee368 100644 --- a/vp8/encoder/x86/vp8_enc_stubs_sse2.c +++ b/vp8/encoder/x86/vp8_enc_stubs_sse2.c @@ -10,7 +10,7 @@ #include "vpx_config.h" -#include "vpx_rtcd.h" +#include "vp8_rtcd.h" #include "vpx_ports/x86.h" #include "vp8/encoder/block.h" |