summaryrefslogtreecommitdiff
path: root/vp8/encoder
diff options
context:
space:
mode:
Diffstat (limited to 'vp8/encoder')
-rw-r--r--vp8/encoder/arm/armv5te/boolhuff_armv5te.asm2
-rw-r--r--vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm2
-rw-r--r--vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm2
-rw-r--r--vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm2
-rw-r--r--vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm2
-rw-r--r--vp8/encoder/arm/armv6/vp8_subtract_armv6.asm2
-rw-r--r--vp8/encoder/arm/dct_arm.c2
-rw-r--r--vp8/encoder/arm/neon/fastquantizeb_neon.asm2
-rw-r--r--vp8/encoder/arm/neon/shortfdct_neon.asm4
-rw-r--r--vp8/encoder/arm/neon/subtract_neon.asm2
-rw-r--r--vp8/encoder/arm/quantize_arm.c2
-rw-r--r--vp8/encoder/bitstream.c70
-rw-r--r--vp8/encoder/block.h2
-rw-r--r--vp8/encoder/boolhuff.c2
-rw-r--r--vp8/encoder/boolhuff.h2
-rw-r--r--vp8/encoder/dct.c16
-rw-r--r--vp8/encoder/denoising.c4
-rw-r--r--vp8/encoder/encodeframe.c12
-rw-r--r--vp8/encoder/encodeintra.c2
-rw-r--r--vp8/encoder/encodemb.c2
-rw-r--r--vp8/encoder/encodemv.c6
-rw-r--r--vp8/encoder/firstpass.c57
-rw-r--r--vp8/encoder/mcomp.c63
-rw-r--r--vp8/encoder/mcomp.h2
-rw-r--r--vp8/encoder/onyx_if.c325
-rw-r--r--vp8/encoder/onyx_int.h23
-rw-r--r--vp8/encoder/pickinter.c9
-rw-r--r--vp8/encoder/picklpf.c9
-rw-r--r--vp8/encoder/psnr.c2
-rw-r--r--vp8/encoder/quantize.c26
-rw-r--r--vp8/encoder/ratectrl.c39
-rw-r--r--vp8/encoder/rdopt.c32
-rw-r--r--vp8/encoder/temporal_filter.c2
-rw-r--r--vp8/encoder/tokenize.c4
-rw-r--r--vp8/encoder/tokenize.h2
-rw-r--r--vp8/encoder/vp8_asm_enc_offsets.c (renamed from vp8/encoder/asm_enc_offsets.c)0
-rw-r--r--vp8/encoder/x86/dct_sse2.asm4
-rw-r--r--vp8/encoder/x86/denoising_sse2.c3
-rw-r--r--vp8/encoder/x86/quantize_sse2.asm386
-rw-r--r--vp8/encoder/x86/quantize_sse2.c229
-rw-r--r--vp8/encoder/x86/quantize_sse4.asm8
-rw-r--r--vp8/encoder/x86/quantize_ssse3.asm8
-rw-r--r--vp8/encoder/x86/temporal_filter_apply_sse2.asm2
-rw-r--r--vp8/encoder/x86/vp8_enc_stubs_mmx.c2
-rw-r--r--vp8/encoder/x86/vp8_enc_stubs_sse2.c2
45 files changed, 672 insertions, 709 deletions
diff --git a/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
index a644a004c..4abe818f1 100644
--- a/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
+++ b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
@@ -15,7 +15,7 @@
EXPORT |vp8_encode_value|
IMPORT |vp8_validate_buffer_arm|
- INCLUDE asm_enc_offsets.asm
+ INCLUDE vp8_asm_enc_offsets.asm
ARM
REQUIRE8
diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
index a1cd46704..90a141c62 100644
--- a/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
@@ -12,7 +12,7 @@
EXPORT |vp8cx_pack_tokens_armv5|
IMPORT |vp8_validate_buffer_arm|
- INCLUDE asm_enc_offsets.asm
+ INCLUDE vp8_asm_enc_offsets.asm
ARM
REQUIRE8
diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
index 1fa5e6c22..3a8d17a81 100644
--- a/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
@@ -12,7 +12,7 @@
EXPORT |vp8cx_pack_mb_row_tokens_armv5|
IMPORT |vp8_validate_buffer_arm|
- INCLUDE asm_enc_offsets.asm
+ INCLUDE vp8_asm_enc_offsets.asm
ARM
REQUIRE8
diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
index 90a98fe8d..e9aa4958f 100644
--- a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
@@ -12,7 +12,7 @@
EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
IMPORT |vp8_validate_buffer_arm|
- INCLUDE asm_enc_offsets.asm
+ INCLUDE vp8_asm_enc_offsets.asm
ARM
REQUIRE8
diff --git a/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm b/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm
index d61f5d94d..de35a1e13 100644
--- a/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm
+++ b/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm
@@ -11,7 +11,7 @@
EXPORT |vp8_fast_quantize_b_armv6|
- INCLUDE asm_enc_offsets.asm
+ INCLUDE vp8_asm_enc_offsets.asm
ARM
REQUIRE8
diff --git a/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm b/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
index f329f8f73..05746cf7f 100644
--- a/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
+++ b/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm
@@ -13,7 +13,7 @@
EXPORT |vp8_subtract_mbuv_armv6|
EXPORT |vp8_subtract_b_armv6|
- INCLUDE asm_enc_offsets.asm
+ INCLUDE vp8_asm_enc_offsets.asm
ARM
REQUIRE8
diff --git a/vp8/encoder/arm/dct_arm.c b/vp8/encoder/arm/dct_arm.c
index af0fb274e..f71300d2c 100644
--- a/vp8/encoder/arm/dct_arm.c
+++ b/vp8/encoder/arm/dct_arm.c
@@ -9,7 +9,7 @@
*/
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#if HAVE_MEDIA
diff --git a/vp8/encoder/arm/neon/fastquantizeb_neon.asm b/vp8/encoder/arm/neon/fastquantizeb_neon.asm
index 143058842..9374310e5 100644
--- a/vp8/encoder/arm/neon/fastquantizeb_neon.asm
+++ b/vp8/encoder/arm/neon/fastquantizeb_neon.asm
@@ -12,7 +12,7 @@
EXPORT |vp8_fast_quantize_b_neon|
EXPORT |vp8_fast_quantize_b_pair_neon|
- INCLUDE asm_enc_offsets.asm
+ INCLUDE vp8_asm_enc_offsets.asm
ARM
REQUIRE8
diff --git a/vp8/encoder/arm/neon/shortfdct_neon.asm b/vp8/encoder/arm/neon/shortfdct_neon.asm
index 09dd011ec..5ea8dd83d 100644
--- a/vp8/encoder/arm/neon/shortfdct_neon.asm
+++ b/vp8/encoder/arm/neon/shortfdct_neon.asm
@@ -97,7 +97,7 @@ coeff
vmlal.s16 q11, d6, d17 ; c1*2217 + d1*5352 + 12000
vmlsl.s16 q12, d6, d16 ; d1*2217 - c1*5352 + 51000
- vmvn.s16 d4, d4
+ vmvn d4, d4
vshrn.s32 d1, q11, #16 ; op[4] = (c1*2217 + d1*5352 + 12000)>>16
vsub.s16 d1, d1, d4 ; op[4] += (d1!=0)
vshrn.s32 d3, q12, #16 ; op[12]= (d1*2217 - c1*5352 + 51000)>>16
@@ -200,7 +200,7 @@ coeff
vmlal.s16 q11, d27, d17 ; B[4] = c1*2217 + d1*5352 + 12000
vmlsl.s16 q12, d27, d16 ; B[12] = d1*2217 - c1*5352 + 51000
- vmvn.s16 q14, q14
+ vmvn q14, q14
vshrn.s32 d1, q9, #16 ; A[4] = (c1*2217 + d1*5352 + 12000)>>16
vshrn.s32 d3, q10, #16 ; A[12]= (d1*2217 - c1*5352 + 51000)>>16
diff --git a/vp8/encoder/arm/neon/subtract_neon.asm b/vp8/encoder/arm/neon/subtract_neon.asm
index 91a328c29..5bda78678 100644
--- a/vp8/encoder/arm/neon/subtract_neon.asm
+++ b/vp8/encoder/arm/neon/subtract_neon.asm
@@ -12,7 +12,7 @@
EXPORT |vp8_subtract_mby_neon|
EXPORT |vp8_subtract_mbuv_neon|
- INCLUDE asm_enc_offsets.asm
+ INCLUDE vp8_asm_enc_offsets.asm
ARM
REQUIRE8
diff --git a/vp8/encoder/arm/quantize_arm.c b/vp8/encoder/arm/quantize_arm.c
index 8999e347f..80d9ad054 100644
--- a/vp8/encoder/arm/quantize_arm.c
+++ b/vp8/encoder/arm/quantize_arm.c
@@ -10,7 +10,7 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vp8/encoder/block.h"
#include <math.h>
#include "vpx_mem/vpx_mem.h"
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index e666b6c7e..78e54e248 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -50,7 +50,7 @@ const int vp8cx_base_skip_false_prob[128] =
unsigned __int64 Sectionbits[500];
#endif
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
int intra_mode_stats[10][10][10];
static unsigned int tree_update_hist [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] [2];
extern unsigned int active_section;
@@ -90,17 +90,17 @@ static void update_mode(
if (new_b + (n << 8) < old_b)
{
- int i = 0;
+ int j = 0;
vp8_write_bit(w, 1);
do
{
- const vp8_prob p = Pnew[i];
+ const vp8_prob p = Pnew[j];
- vp8_write_literal(w, Pcur[i] = p ? p : 1, 8);
+ vp8_write_literal(w, Pcur[j] = p ? p : 1, 8);
}
- while (++i < n);
+ while (++j < n);
}
else
vp8_write_bit(w, 0);
@@ -245,15 +245,15 @@ void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount)
if (L)
{
- const unsigned char *pp = b->prob;
- int v = e >> 1;
- int n = L; /* number of bits in v, assumed nonzero */
- int i = 0;
+ const unsigned char *proba = b->prob;
+ const int v2 = e >> 1;
+ int n2 = L; /* number of bits in v2, assumed nonzero */
+ i = 0;
do
{
- const int bb = (v >> --n) & 1;
- split = 1 + (((range - 1) * pp[i>>1]) >> 8);
+ const int bb = (v2 >> --n2) & 1;
+ split = 1 + (((range - 1) * proba[i>>1]) >> 8);
i = b->tree[i+bb];
if (bb)
@@ -301,7 +301,7 @@ void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount)
lowvalue <<= shift;
}
- while (n);
+ while (n2);
}
@@ -432,7 +432,7 @@ static void write_mv_ref
assert(NEARESTMV <= m && m <= SPLITMV);
#endif
vp8_write_token(w, vp8_mv_ref_tree, p,
- vp8_mv_ref_encoding_array - NEARESTMV + m);
+ vp8_mv_ref_encoding_array + (m - NEARESTMV));
}
static void write_sub_mv_ref
@@ -444,7 +444,7 @@ static void write_sub_mv_ref
assert(LEFT4X4 <= m && m <= NEW4X4);
#endif
vp8_write_token(w, vp8_sub_mv_ref_tree, p,
- vp8_sub_mv_ref_encoding_array - LEFT4X4 + m);
+ vp8_sub_mv_ref_encoding_array + (m - LEFT4X4));
}
static void write_mv
@@ -531,7 +531,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
vp8_convert_rfct_to_prob(cpi);
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
active_section = 1;
#endif
@@ -577,10 +577,10 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
*/
xd->mb_to_left_edge = -((mb_col * 16) << 3);
xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
- xd->mb_to_top_edge = -((mb_row * 16)) << 3;
+ xd->mb_to_top_edge = -((mb_row * 16) << 3);
xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
active_section = 9;
#endif
@@ -593,7 +593,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
if (rf == INTRA_FRAME)
{
vp8_write(w, 0, cpi->prob_intra_coded);
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
active_section = 6;
#endif
write_ymode(w, mode, pc->fc.ymode_prob);
@@ -633,13 +633,13 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
vp8_mv_ref_probs(mv_ref_p, ct);
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
accum_mv_refs(mode, ct);
#endif
}
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
active_section = 3;
#endif
@@ -649,7 +649,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
{
case NEWMV:
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
active_section = 5;
#endif
@@ -692,7 +692,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
if (blockmode == NEW4X4)
{
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
active_section = 11;
#endif
write_mv(w, &blockmv.as_mv, &best_mv, (const MV_CONTEXT *) mvc);
@@ -769,7 +769,7 @@ static void write_kfmodes(VP8_COMP *cpi)
const B_PREDICTION_MODE L = left_block_mode(m, i);
const int bm = m->bmi[i].as_mode;
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
++intra_mode_stats [A] [L] [bm];
#endif
@@ -980,6 +980,12 @@ void vp8_calc_ref_frame_costs(int *ref_frame_cost,
int prob_garf
)
{
+ assert(prob_intra >= 0);
+ assert(prob_intra <= 255);
+ assert(prob_last >= 0);
+ assert(prob_last <= 255);
+ assert(prob_garf >= 0);
+ assert(prob_garf <= 255);
ref_frame_cost[INTRA_FRAME] = vp8_cost_zero(prob_intra);
ref_frame_cost[LAST_FRAME] = vp8_cost_one(prob_intra)
+ vp8_cost_zero(prob_last);
@@ -1056,7 +1062,7 @@ int vp8_update_coef_context(VP8_COMP *cpi)
if (cpi->common.frame_type == KEY_FRAME)
{
/* Reset to default counts/probabilities at key frames */
- vp8_copy(cpi->coef_counts, default_coef_counts);
+ vp8_copy(cpi->mb.coef_counts, default_coef_counts);
}
if (cpi->oxcf.error_resilient_mode & VPX_ERROR_RESILIENT_PARTITIONS)
@@ -1154,7 +1160,7 @@ void vp8_update_coef_probs(VP8_COMP *cpi)
#endif
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
++ tree_update_hist [i][j][k][t] [u];
#endif
@@ -1175,7 +1181,7 @@ void vp8_update_coef_probs(VP8_COMP *cpi)
while (++t < ENTROPY_NODES);
/* Accum token counts for generation of default statistics */
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
t = 0;
do
@@ -1316,7 +1322,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
vp8_start_encode(bc, cx_data, cx_data_end);
/* signal clr type */
- vp8_write_bit(bc, pc->clr_type);
+ vp8_write_bit(bc, 0);
vp8_write_bit(bc, pc->clamp_type);
}
@@ -1521,7 +1527,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
if (pc->frame_type != KEY_FRAME)
vp8_write_bit(bc, pc->refresh_last_frame);
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
if (pc->frame_type == INTER_FRAME)
active_section = 0;
@@ -1544,7 +1550,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
vp8_update_coef_probs(cpi);
#endif
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
active_section = 2;
#endif
@@ -1555,7 +1561,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
{
write_kfmodes(cpi);
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
active_section = 8;
#endif
}
@@ -1563,7 +1569,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
{
pack_inter_mode_mvs(cpi);
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
active_section = 1;
#endif
}
@@ -1681,7 +1687,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
#endif
}
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
void print_tree_update_probs()
{
int i, j, k, l;
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index a30f88816..cf74c7aaf 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -37,7 +37,7 @@ typedef struct block
/* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */
short *quant;
short *quant_fast;
- unsigned char *quant_shift;
+ short *quant_shift;
short *zbin;
short *zrun_zbin_boost;
short *round;
diff --git a/vp8/encoder/boolhuff.c b/vp8/encoder/boolhuff.c
index 74770a276..3b0c03a14 100644
--- a/vp8/encoder/boolhuff.c
+++ b/vp8/encoder/boolhuff.c
@@ -16,7 +16,7 @@ unsigned __int64 Sectionbits[500];
#endif
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
unsigned int active_section = 0;
#endif
diff --git a/vp8/encoder/boolhuff.h b/vp8/encoder/boolhuff.h
index 830906306..39ab586b5 100644
--- a/vp8/encoder/boolhuff.h
+++ b/vp8/encoder/boolhuff.h
@@ -67,7 +67,7 @@ static void vp8_encode_bool(BOOL_CODER *br, int bit, int probability)
unsigned int lowvalue = br->lowvalue;
register unsigned int shift;
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
#if defined(SECTIONBITS_OUTPUT)
if (bit)
diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c
index b5a11ae34..091554a5d 100644
--- a/vp8/encoder/dct.c
+++ b/vp8/encoder/dct.c
@@ -20,10 +20,10 @@ void vp8_short_fdct4x4_c(short *input, short *output, int pitch)
for (i = 0; i < 4; i++)
{
- a1 = ((ip[0] + ip[3])<<3);
- b1 = ((ip[1] + ip[2])<<3);
- c1 = ((ip[1] - ip[2])<<3);
- d1 = ((ip[0] - ip[3])<<3);
+ a1 = ((ip[0] + ip[3]) * 8);
+ b1 = ((ip[1] + ip[2]) * 8);
+ c1 = ((ip[1] - ip[2]) * 8);
+ d1 = ((ip[0] - ip[3]) * 8);
op[0] = a1 + b1;
op[2] = a1 - b1;
@@ -72,10 +72,10 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch)
for (i = 0; i < 4; i++)
{
- a1 = ((ip[0] + ip[2])<<2);
- d1 = ((ip[1] + ip[3])<<2);
- c1 = ((ip[1] - ip[3])<<2);
- b1 = ((ip[0] - ip[2])<<2);
+ a1 = ((ip[0] + ip[2]) * 4);
+ d1 = ((ip[1] + ip[3]) * 4);
+ c1 = ((ip[1] - ip[3]) * 4);
+ b1 = ((ip[0] - ip[2]) * 4);
op[0] = a1 + d1 + (a1!=0);
op[1] = b1 + c1;
diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c
index f3faa227f..781926547 100644
--- a/vp8/encoder/denoising.c
+++ b/vp8/encoder/denoising.c
@@ -13,7 +13,7 @@
#include "vp8/common/reconinter.h"
#include "vpx/vpx_integer.h"
#include "vpx_mem/vpx_mem.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
static const unsigned int NOISE_MOTION_THRESHOLD = 25 * 25;
/* SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming
@@ -206,8 +206,6 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
MB_MODE_INFO saved_mbmi;
MACROBLOCKD *filter_xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &filter_xd->mode_info_context->mbmi;
- int mv_col;
- int mv_row;
int sse_diff = zero_mv_sse - best_sse;
saved_mbmi = *mbmi;
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index d1b647be9..b550f6be1 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -10,6 +10,7 @@
#include "vpx_config.h"
+#include "vp8_rtcd.h"
#include "encodemb.h"
#include "encodemv.h"
#include "vp8/common/common.h"
@@ -852,11 +853,10 @@ void vp8_encode_frame(VP8_COMP *cpi)
if (xd->segmentation_enabled)
{
- int i, j;
+ int j;
if (xd->segmentation_enabled)
{
-
for (i = 0; i < cpi->encoding_thread_count; i++)
{
for (j = 0; j < 4; j++)
@@ -1299,8 +1299,9 @@ int vp8cx_encode_inter_macroblock
}
{
- /* Experimental code. Special case for gf and arf zeromv modes.
- * Increase zbin size to supress noise
+ /* Experimental code.
+ * Special case for gf and arf zeromv modes, for 1 temporal layer.
+ * Increase zbin size to supress noise.
*/
x->zbin_mode_boost = 0;
if (x->zbin_mode_boost_enabled)
@@ -1309,7 +1310,8 @@ int vp8cx_encode_inter_macroblock
{
if (xd->mode_info_context->mbmi.mode == ZEROMV)
{
- if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME)
+ if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME &&
+ cpi->oxcf.number_of_layers == 1)
x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
else
x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 340dd638d..cfa4cb927 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -10,7 +10,7 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "quantize.h"
#include "vp8/common/reconintra4x4.h"
#include "encodemb.h"
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index 7d494f2c6..7ed2fe1a1 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -10,7 +10,7 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "encodemb.h"
#include "vp8/common/reconinter.h"
#include "quantize.h"
diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c
index 0c43d0692..2a74ff4ae 100644
--- a/vp8/encoder/encodemv.c
+++ b/vp8/encoder/encodemv.c
@@ -16,7 +16,7 @@
#include <math.h>
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
extern unsigned int active_section;
#endif
@@ -359,7 +359,7 @@ void vp8_write_mvprobs(VP8_COMP *cpi)
vp8_writer *const w = cpi->bc;
MV_CONTEXT *mvc = cpi->common.fc.mvc;
int flags[2] = {0, 0};
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
active_section = 4;
#endif
write_component_probs(
@@ -374,7 +374,7 @@ void vp8_write_mvprobs(VP8_COMP *cpi)
if (flags[0] || flags[1])
vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flags);
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
active_section = 5;
#endif
}
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 30bf8a6ef..968c7f365 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -12,6 +12,7 @@
#include <limits.h>
#include <stdio.h>
+#include "./vpx_scale_rtcd.h"
#include "block.h"
#include "onyx_int.h"
#include "vp8/common/variance.h"
@@ -20,7 +21,7 @@
#include "vp8/common/systemdependent.h"
#include "mcomp.h"
#include "firstpass.h"
-#include "vpx_scale/vpxscale.h"
+#include "vpx_scale/vpx_scale.h"
#include "encodemb.h"
#include "vp8/common/extend.h"
#include "vpx_mem/vpx_mem.h"
@@ -710,8 +711,8 @@ skip_motion_search:
neutral_count++;
}
- d->bmi.mv.as_mv.row <<= 3;
- d->bmi.mv.as_mv.col <<= 3;
+ d->bmi.mv.as_mv.row *= 8;
+ d->bmi.mv.as_mv.col *= 8;
this_error = motion_error;
vp8_set_mbmode_and_mvs(x, NEWMV, &d->bmi.mv);
vp8_encode_inter16x16y(x);
@@ -857,7 +858,9 @@ skip_motion_search:
*/
if ((cm->current_video_frame > 0) &&
(cpi->twopass.this_frame_stats.pcnt_inter > 0.20) &&
- ((cpi->twopass.this_frame_stats.intra_error / cpi->twopass.this_frame_stats.coded_error) > 2.0))
+ ((cpi->twopass.this_frame_stats.intra_error /
+ DOUBLE_DIVIDE_CHECK(cpi->twopass.this_frame_stats.coded_error)) >
+ 2.0))
{
vp8_yv12_copy_frame(lst_yv12, gld_yv12);
}
@@ -906,13 +909,16 @@ extern const int vp8_bits_per_mb[2][QINDEX_RANGE];
static double bitcost( double prob )
{
- return -(log( prob ) / log( 2.0 ));
+ if (prob > 0.000122)
+ return -log(prob) / log(2.0);
+ else
+ return 13.0;
}
static int64_t estimate_modemvcost(VP8_COMP *cpi,
FIRSTPASS_STATS * fpstats)
{
int mv_cost;
- int mode_cost;
+ int64_t mode_cost;
double av_pct_inter = fpstats->pcnt_inter / fpstats->count;
double av_pct_motion = fpstats->pcnt_motion / fpstats->count;
@@ -934,10 +940,9 @@ static int64_t estimate_modemvcost(VP8_COMP *cpi,
/* Crude estimate of overhead cost from modes
* << 9 is the normalization to (bits * 512) used in vp8_bits_per_mb
*/
- mode_cost =
- (int)( ( ((av_pct_inter - av_pct_motion) * zz_cost) +
- (av_pct_motion * motion_cost) +
- (av_intra * intra_cost) ) * cpi->common.MBs ) << 9;
+ mode_cost =((((av_pct_inter - av_pct_motion) * zz_cost) +
+ (av_pct_motion * motion_cost) +
+ (av_intra * intra_cost)) * cpi->common.MBs) * 512;
return mv_cost + mode_cost;
}
@@ -1322,7 +1327,7 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta
return Q;
}
-extern void vp8_new_frame_rate(VP8_COMP *cpi, double framerate);
+extern void vp8_new_framerate(VP8_COMP *cpi, double framerate);
void vp8_init_second_pass(VP8_COMP *cpi)
{
@@ -1346,9 +1351,9 @@ void vp8_init_second_pass(VP8_COMP *cpi)
* sum duration is not. Its calculated based on the actual durations of
* all frames from the first pass.
*/
- vp8_new_frame_rate(cpi, 10000000.0 * cpi->twopass.total_stats.count / cpi->twopass.total_stats.duration);
+ vp8_new_framerate(cpi, 10000000.0 * cpi->twopass.total_stats.count / cpi->twopass.total_stats.duration);
- cpi->output_frame_rate = cpi->frame_rate;
+ cpi->output_framerate = cpi->framerate;
cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats.duration * cpi->oxcf.target_bandwidth / 10000000.0) ;
cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats.duration * two_pass_min_rate / 10000000.0);
@@ -2115,23 +2120,25 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
(cpi->twopass.kf_group_error_left > 0))
{
cpi->twopass.gf_group_bits =
- (int)((double)cpi->twopass.kf_group_bits *
- (gf_group_err / (double)cpi->twopass.kf_group_error_left));
+ (int64_t)(cpi->twopass.kf_group_bits *
+ (gf_group_err / cpi->twopass.kf_group_error_left));
}
else
cpi->twopass.gf_group_bits = 0;
- cpi->twopass.gf_group_bits = (int)(
+ cpi->twopass.gf_group_bits =
(cpi->twopass.gf_group_bits < 0)
? 0
: (cpi->twopass.gf_group_bits > cpi->twopass.kf_group_bits)
- ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits);
+ ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits;
/* Clip cpi->twopass.gf_group_bits based on user supplied data rate
* variability limit (cpi->oxcf.two_pass_vbrmax_section)
*/
- if (cpi->twopass.gf_group_bits > max_bits * cpi->baseline_gf_interval)
- cpi->twopass.gf_group_bits = max_bits * cpi->baseline_gf_interval;
+ if (cpi->twopass.gf_group_bits >
+ (int64_t)max_bits * cpi->baseline_gf_interval)
+ cpi->twopass.gf_group_bits =
+ (int64_t)max_bits * cpi->baseline_gf_interval;
/* Reset the file position */
reset_fpf_position(cpi, start_pos);
@@ -2393,7 +2400,7 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
target_frame_size += cpi->min_frame_bandwidth;
/* Every other frame gets a few extra bits */
- if ( (cpi->common.frames_since_golden & 0x01) &&
+ if ( (cpi->frames_since_golden & 0x01) &&
(cpi->frames_till_gf_update_due > 0) )
{
target_frame_size += cpi->twopass.alt_extra_bits;
@@ -2445,7 +2452,7 @@ void vp8_second_pass(VP8_COMP *cpi)
*/
if (cpi->oxcf.error_resilient_mode)
{
- cpi->twopass.gf_group_bits = (int)cpi->twopass.kf_group_bits;
+ cpi->twopass.gf_group_bits = cpi->twopass.kf_group_bits;
cpi->twopass.gf_group_error_left =
(int)cpi->twopass.kf_group_error_left;
cpi->baseline_gf_interval = cpi->twopass.frames_to_key;
@@ -2524,7 +2531,7 @@ void vp8_second_pass(VP8_COMP *cpi)
/* Set nominal per second bandwidth for this frame */
cpi->target_bandwidth = (int)
- (cpi->per_frame_bandwidth * cpi->output_frame_rate);
+ (cpi->per_frame_bandwidth * cpi->output_framerate);
if (cpi->target_bandwidth < 0)
cpi->target_bandwidth = 0;
@@ -3180,7 +3187,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
/* Convert to a per second bitrate */
cpi->target_bandwidth = (int)(cpi->twopass.kf_bits *
- cpi->output_frame_rate);
+ cpi->output_framerate);
}
/* Note the total error score of the kf group minus the key frame itself */
@@ -3219,7 +3226,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
cpi->common.vert_scale = NORMAL;
/* Calculate Average bits per frame. */
- av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate);
+ av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->framerate);
/* CBR... Use the clip average as the target for deciding resample */
if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
@@ -3294,7 +3301,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
}
else
{
- int64_t clip_bits = (int64_t)(cpi->twopass.total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate));
+ int64_t clip_bits = (int64_t)(cpi->twopass.total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->framerate));
int64_t over_spend = cpi->oxcf.starting_buffer_level - cpi->buffer_level;
/* If triggered last time the threshold for triggering again is
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index b08c7a589..0b11ea64a 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -18,7 +18,7 @@
#include <math.h>
#include "vp8/common/findnearmv.h"
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
static int mv_ref_ct [31] [4] [2];
static int mv_mode_cts [4] [2];
#endif
@@ -210,7 +210,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
unsigned char *z = (*(b->base_src) + b->src);
int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
- int br = bestmv->as_mv.row << 2, bc = bestmv->as_mv.col << 2;
+ int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
int tr = br, tc = bc;
unsigned int besterr;
unsigned int left, right, up, down, diag;
@@ -220,10 +220,14 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
unsigned int quarteriters = 4;
int thismse;
- int minc = MAX(x->mv_col_min << 2, (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
- int maxc = MIN(x->mv_col_max << 2, (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
- int minr = MAX(x->mv_row_min << 2, (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
- int maxr = MIN(x->mv_row_max << 2, (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
+ int minc = MAX(x->mv_col_min * 4,
+ (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
+ int maxc = MIN(x->mv_col_max * 4,
+ (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
+ int minr = MAX(x->mv_row_min * 4,
+ (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
+ int maxr = MIN(x->mv_row_max * 4,
+ (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
int y_stride;
int offset;
@@ -233,19 +237,18 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
#if ARCH_X86 || ARCH_X86_64
MACROBLOCKD *xd = &x->e_mbd;
- unsigned char *y0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
+ unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
unsigned char *y;
- int buf_r1, buf_r2, buf_c1, buf_c2;
+ int buf_r1, buf_r2, buf_c1;
/* Clamping to avoid out-of-range data access */
buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3;
buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3;
buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3;
- buf_c2 = ((bestmv->as_mv.col + 3) > x->mv_col_max)?(x->mv_col_max - bestmv->as_mv.col):3;
y_stride = 32;
/* Copy to intermediate buffer before searching. */
- vfp->copymem(y0 - buf_c1 - pre_stride*buf_r1, pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2);
+ vfp->copymem(y_0 - buf_c1 - pre_stride*buf_r1, pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2);
y = xd->y_buf + y_stride*buf_r1 +buf_c1;
#else
unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
@@ -255,8 +258,8 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
/* central mv */
- bestmv->as_mv.row <<= 3;
- bestmv->as_mv.col <<= 3;
+ bestmv->as_mv.row *= 8;
+ bestmv->as_mv.col *= 8;
/* calculate central point error */
besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
@@ -338,8 +341,8 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
tc = bc;
}
- bestmv->as_mv.row = br << 1;
- bestmv->as_mv.col = bc << 1;
+ bestmv->as_mv.row = br * 2;
+ bestmv->as_mv.col = bc * 2;
if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
(abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
@@ -376,12 +379,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
#if ARCH_X86 || ARCH_X86_64
MACROBLOCKD *xd = &x->e_mbd;
- unsigned char *y0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
+ unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
unsigned char *y;
y_stride = 32;
/* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
- vfp->copymem(y0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
+ vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
y = xd->y_buf + y_stride + 1;
#else
unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
@@ -687,12 +690,12 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
#if ARCH_X86 || ARCH_X86_64
MACROBLOCKD *xd = &x->e_mbd;
- unsigned char *y0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
+ unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
unsigned char *y;
y_stride = 32;
/* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
- vfp->copymem(y0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
+ vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
y = xd->y_buf + y_stride + 1;
#else
unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
@@ -700,8 +703,8 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
#endif
/* central mv */
- bestmv->as_mv.row <<= 3;
- bestmv->as_mv.col <<= 3;
+ bestmv->as_mv.row *= 8;
+ bestmv->as_mv.col *= 8;
startmv = *bestmv;
/* calculate central point error */
@@ -1316,8 +1319,8 @@ int vp8_diamond_search_sadx4
(*num00)++;
}
- this_mv.as_mv.row = best_mv->as_mv.row << 3;
- this_mv.as_mv.col = best_mv->as_mv.col << 3;
+ this_mv.as_mv.row = best_mv->as_mv.row * 8;
+ this_mv.as_mv.col = best_mv->as_mv.col * 8;
return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
+ mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
@@ -1710,8 +1713,8 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
}
}
- this_mv.as_mv.row = best_mv->as_mv.row << 3;
- this_mv.as_mv.col = best_mv->as_mv.col << 3;
+ this_mv.as_mv.row = best_mv->as_mv.row * 8;
+ this_mv.as_mv.col = best_mv->as_mv.col * 8;
return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
+ mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
@@ -1906,14 +1909,14 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
}
}
- this_mv.as_mv.row = ref_mv->as_mv.row << 3;
- this_mv.as_mv.col = ref_mv->as_mv.col << 3;
+ this_mv.as_mv.row = ref_mv->as_mv.row * 8;
+ this_mv.as_mv.col = ref_mv->as_mv.col * 8;
return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
+ mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
}
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
void print_mode_context(void)
{
FILE *f = fopen("modecont.c", "w");
@@ -1966,8 +1969,8 @@ void print_mode_context(void)
fclose(f);
}
-/* MV ref count ENTROPY_STATS stats code */
-#ifdef ENTROPY_STATS
+/* MV ref count VP8_ENTROPY_STATS stats code */
+#ifdef VP8_ENTROPY_STATS
void init_mv_ref_counts()
{
vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
@@ -2021,6 +2024,6 @@ void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
}
}
-#endif/* END MV ref count ENTROPY_STATS stats code */
+#endif/* END MV ref count VP8_ENTROPY_STATS stats code */
#endif
diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h
index 890113f9a..e36c51543 100644
--- a/vp8/encoder/mcomp.h
+++ b/vp8/encoder/mcomp.h
@@ -15,7 +15,7 @@
#include "block.h"
#include "vp8/common/variance.h"
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
extern void init_mv_ref_counts();
extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]);
#endif
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 4680f392a..4b60cfd32 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -10,6 +10,7 @@
#include "vpx_config.h"
+#include "./vpx_scale_rtcd.h"
#include "vp8/common/onyxc_int.h"
#include "vp8/common/blockd.h"
#include "onyx_int.h"
@@ -19,7 +20,7 @@
#include "mcomp.h"
#include "firstpass.h"
#include "psnr.h"
-#include "vpx_scale/vpxscale.h"
+#include "vpx_scale/vpx_scale.h"
#include "vp8/common/extend.h"
#include "ratectrl.h"
#include "vp8/common/quant_common.h"
@@ -110,7 +111,7 @@ extern int skip_false_count;
#endif
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
extern int intra_mode_stats[10][10][10];
#endif
@@ -288,6 +289,125 @@ static void restore_layer_context(VP8_COMP *cpi, const int layer)
sizeof(cpi->mb.count_mb_ref_frame_usage));
}
+static int rescale(int val, int num, int denom)
+{
+ int64_t llnum = num;
+ int64_t llden = denom;
+ int64_t llval = val;
+
+ return (int)(llval * llnum / llden);
+}
+
+static void init_temporal_layer_context(VP8_COMP *cpi,
+ VP8_CONFIG *oxcf,
+ const int layer,
+ double prev_layer_framerate)
+{
+ LAYER_CONTEXT *lc = &cpi->layer_context[layer];
+
+ lc->framerate = cpi->output_framerate / cpi->oxcf.rate_decimator[layer];
+ lc->target_bandwidth = cpi->oxcf.target_bitrate[layer] * 1000;
+
+ lc->starting_buffer_level_in_ms = oxcf->starting_buffer_level;
+ lc->optimal_buffer_level_in_ms = oxcf->optimal_buffer_level;
+ lc->maximum_buffer_size_in_ms = oxcf->maximum_buffer_size;
+
+ lc->starting_buffer_level =
+ rescale((int)(oxcf->starting_buffer_level),
+ lc->target_bandwidth, 1000);
+
+ if (oxcf->optimal_buffer_level == 0)
+ lc->optimal_buffer_level = lc->target_bandwidth / 8;
+ else
+ lc->optimal_buffer_level =
+ rescale((int)(oxcf->optimal_buffer_level),
+ lc->target_bandwidth, 1000);
+
+ if (oxcf->maximum_buffer_size == 0)
+ lc->maximum_buffer_size = lc->target_bandwidth / 8;
+ else
+ lc->maximum_buffer_size =
+ rescale((int)(oxcf->maximum_buffer_size),
+ lc->target_bandwidth, 1000);
+
+ /* Work out the average size of a frame within this layer */
+ if (layer > 0)
+ lc->avg_frame_size_for_layer =
+ (int)((cpi->oxcf.target_bitrate[layer] -
+ cpi->oxcf.target_bitrate[layer-1]) * 1000 /
+ (lc->framerate - prev_layer_framerate));
+
+ lc->active_worst_quality = cpi->oxcf.worst_allowed_q;
+ lc->active_best_quality = cpi->oxcf.best_allowed_q;
+ lc->avg_frame_qindex = cpi->oxcf.worst_allowed_q;
+
+ lc->buffer_level = lc->starting_buffer_level;
+ lc->bits_off_target = lc->starting_buffer_level;
+
+ lc->total_actual_bits = 0;
+ lc->ni_av_qi = 0;
+ lc->ni_tot_qi = 0;
+ lc->ni_frames = 0;
+ lc->rate_correction_factor = 1.0;
+ lc->key_frame_rate_correction_factor = 1.0;
+ lc->gf_rate_correction_factor = 1.0;
+ lc->inter_frame_target = 0;
+}
+
+// Upon a run-time change in temporal layers, reset the layer context parameters
+// for any "new" layers. For "existing" layers, let them inherit the parameters
+// from the previous layer state (at the same layer #). In future we may want
+// to better map the previous layer state(s) to the "new" ones.
+static void reset_temporal_layer_change(VP8_COMP *cpi,
+ VP8_CONFIG *oxcf,
+ const int prev_num_layers)
+{
+ int i;
+ double prev_layer_framerate = 0;
+ const int curr_num_layers = cpi->oxcf.number_of_layers;
+ // If the previous state was 1 layer, get current layer context from cpi.
+ // We need this to set the layer context for the new layers below.
+ if (prev_num_layers == 1)
+ {
+ cpi->current_layer = 0;
+ save_layer_context(cpi);
+ }
+ for (i = 0; i < curr_num_layers; i++)
+ {
+ LAYER_CONTEXT *lc = &cpi->layer_context[i];
+ if (i >= prev_num_layers)
+ {
+ init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate);
+ }
+ // The initial buffer levels are set based on their starting levels.
+ // We could set the buffer levels based on the previous state (normalized
+ // properly by the layer bandwidths) but we would need to keep track of
+ // the previous set of layer bandwidths (i.e., target_bitrate[i])
+ // before the layer change. For now, reset to the starting levels.
+ lc->buffer_level = cpi->oxcf.starting_buffer_level_in_ms *
+ cpi->oxcf.target_bitrate[i];
+ lc->bits_off_target = lc->buffer_level;
+ // TDOD(marpan): Should we set the rate_correction_factor and
+ // active_worst/best_quality to values derived from the previous layer
+ // state (to smooth-out quality dips/rate fluctuation at transition)?
+
+ // We need to treat the 1 layer case separately: oxcf.target_bitrate[i]
+ // is not set for 1 layer, and the restore_layer_context/save_context()
+ // are not called in the encoding loop, so we need to call it here to
+ // pass the layer context state to |cpi|.
+ if (curr_num_layers == 1)
+ {
+ lc->target_bandwidth = cpi->oxcf.target_bandwidth;
+ lc->buffer_level = cpi->oxcf.starting_buffer_level_in_ms *
+ lc->target_bandwidth / 1000;
+ lc->bits_off_target = lc->buffer_level;
+ restore_layer_context(cpi, 0);
+ }
+ prev_layer_framerate = cpi->output_framerate /
+ cpi->oxcf.rate_decimator[i];
+ }
+}
+
static void setup_features(VP8_COMP *cpi)
{
// If segmentation enabled set the update flags
@@ -640,7 +760,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
for (i = 0; i < MAX_MODES; i ++)
{
cpi->mode_check_freq[i] = 0;
- cpi->mode_chosen_counts[i] = 0;
}
cpi->mb.mbs_tested_so_far = 0;
@@ -825,7 +944,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
{
unsigned int sum = 0;
unsigned int total_mbs = cm->MBs;
- int i, thresh;
+ int thresh;
unsigned int total_skip;
int min = 2000;
@@ -1163,21 +1282,21 @@ int vp8_reverse_trans(int x)
return 63;
}
-void vp8_new_frame_rate(VP8_COMP *cpi, double framerate)
+void vp8_new_framerate(VP8_COMP *cpi, double framerate)
{
if(framerate < .1)
framerate = 30;
- cpi->frame_rate = framerate;
- cpi->output_frame_rate = framerate;
+ cpi->framerate = framerate;
+ cpi->output_framerate = framerate;
cpi->per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth /
- cpi->output_frame_rate);
+ cpi->output_framerate);
cpi->av_per_frame_bandwidth = cpi->per_frame_bandwidth;
cpi->min_frame_bandwidth = (int)(cpi->av_per_frame_bandwidth *
cpi->oxcf.two_pass_vbrmin_section / 100);
/* Set Maximum gf/arf interval */
- cpi->max_gf_interval = ((int)(cpi->output_frame_rate / 2.0) + 2);
+ cpi->max_gf_interval = ((int)(cpi->output_framerate / 2.0) + 2);
if(cpi->max_gf_interval < 12)
cpi->max_gf_interval = 12;
@@ -1200,17 +1319,6 @@ void vp8_new_frame_rate(VP8_COMP *cpi, double framerate)
}
-static int
-rescale(int val, int num, int denom)
-{
- int64_t llnum = num;
- int64_t llden = denom;
- int64_t llval = val;
-
- return (int)(llval * llnum / llden);
-}
-
-
static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
{
VP8_COMMON *cm = &cpi->common;
@@ -1229,13 +1337,13 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
* seems like a reasonable framerate, then use that as a guess, otherwise
* use 30.
*/
- cpi->frame_rate = (double)(oxcf->timebase.den) /
- (double)(oxcf->timebase.num);
+ cpi->framerate = (double)(oxcf->timebase.den) /
+ (double)(oxcf->timebase.num);
- if (cpi->frame_rate > 180)
- cpi->frame_rate = 30;
+ if (cpi->framerate > 180)
+ cpi->framerate = 30;
- cpi->ref_frame_rate = cpi->frame_rate;
+ cpi->ref_framerate = cpi->framerate;
/* change includes all joint functionality */
vp8_change_config(cpi, oxcf);
@@ -1261,63 +1369,13 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
if (cpi->oxcf.number_of_layers > 1)
{
unsigned int i;
- double prev_layer_frame_rate=0;
+ double prev_layer_framerate=0;
for (i=0; i<cpi->oxcf.number_of_layers; i++)
{
- LAYER_CONTEXT *lc = &cpi->layer_context[i];
-
- /* Layer configuration */
- lc->frame_rate =
- cpi->output_frame_rate / cpi->oxcf.rate_decimator[i];
- lc->target_bandwidth = cpi->oxcf.target_bitrate[i] * 1000;
-
- lc->starting_buffer_level_in_ms = oxcf->starting_buffer_level;
- lc->optimal_buffer_level_in_ms = oxcf->optimal_buffer_level;
- lc->maximum_buffer_size_in_ms = oxcf->maximum_buffer_size;
-
- lc->starting_buffer_level =
- rescale((int)(oxcf->starting_buffer_level),
- lc->target_bandwidth, 1000);
-
- if (oxcf->optimal_buffer_level == 0)
- lc->optimal_buffer_level = lc->target_bandwidth / 8;
- else
- lc->optimal_buffer_level =
- rescale((int)(oxcf->optimal_buffer_level),
- lc->target_bandwidth, 1000);
-
- if (oxcf->maximum_buffer_size == 0)
- lc->maximum_buffer_size = lc->target_bandwidth / 8;
- else
- lc->maximum_buffer_size =
- rescale((int)oxcf->maximum_buffer_size,
- lc->target_bandwidth, 1000);
-
- /* Work out the average size of a frame within this layer */
- if (i > 0)
- lc->avg_frame_size_for_layer =
- (int)((cpi->oxcf.target_bitrate[i] -
- cpi->oxcf.target_bitrate[i-1]) * 1000 /
- (lc->frame_rate - prev_layer_frame_rate));
-
- lc->active_worst_quality = cpi->oxcf.worst_allowed_q;
- lc->active_best_quality = cpi->oxcf.best_allowed_q;
- lc->avg_frame_qindex = cpi->oxcf.worst_allowed_q;
-
- lc->buffer_level = lc->starting_buffer_level;
- lc->bits_off_target = lc->starting_buffer_level;
-
- lc->total_actual_bits = 0;
- lc->ni_av_qi = 0;
- lc->ni_tot_qi = 0;
- lc->ni_frames = 0;
- lc->rate_correction_factor = 1.0;
- lc->key_frame_rate_correction_factor = 1.0;
- lc->gf_rate_correction_factor = 1.0;
- lc->inter_frame_target = 0;
-
- prev_layer_frame_rate = lc->frame_rate;
+ init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate);
+ prev_layer_framerate = cpi->output_framerate /
+ cpi->oxcf.rate_decimator[i];
}
}
@@ -1341,14 +1399,14 @@ static void update_layer_contexts (VP8_COMP *cpi)
if (oxcf->number_of_layers > 1)
{
unsigned int i;
- double prev_layer_frame_rate=0;
+ double prev_layer_framerate=0;
for (i=0; i<oxcf->number_of_layers; i++)
{
LAYER_CONTEXT *lc = &cpi->layer_context[i];
- lc->frame_rate =
- cpi->ref_frame_rate / oxcf->rate_decimator[i];
+ lc->framerate =
+ cpi->ref_framerate / oxcf->rate_decimator[i];
lc->target_bandwidth = oxcf->target_bitrate[i] * 1000;
lc->starting_buffer_level = rescale(
@@ -1374,9 +1432,9 @@ static void update_layer_contexts (VP8_COMP *cpi)
lc->avg_frame_size_for_layer =
(int)((oxcf->target_bitrate[i] -
oxcf->target_bitrate[i-1]) * 1000 /
- (lc->frame_rate - prev_layer_frame_rate));
+ (lc->framerate - prev_layer_framerate));
- prev_layer_frame_rate = lc->frame_rate;
+ prev_layer_framerate = lc->framerate;
}
}
}
@@ -1384,7 +1442,7 @@ static void update_layer_contexts (VP8_COMP *cpi)
void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
{
VP8_COMMON *cm = &cpi->common;
- int last_w, last_h;
+ int last_w, last_h, prev_number_of_layers;
if (!cpi)
return;
@@ -1409,6 +1467,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
last_w = cpi->oxcf.Width;
last_h = cpi->oxcf.Height;
+ prev_number_of_layers = cpi->oxcf.number_of_layers;
cpi->oxcf = *oxcf;
@@ -1566,7 +1625,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
cpi->oxcf.target_bandwidth, 1000);
/* Set up frame rate and related parameters rate control values. */
- vp8_new_frame_rate(cpi, cpi->frame_rate);
+ vp8_new_framerate(cpi, cpi->framerate);
/* Set absolute upper and lower quality limits */
cpi->worst_quality = cpi->oxcf.worst_allowed_q;
@@ -1601,6 +1660,16 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
cpi->target_bandwidth = cpi->oxcf.target_bandwidth;
+ // Check if the number of temporal layers has changed, and if so reset the
+ // pattern counter and set/initialize the temporal layer context for the
+ // new layer configuration.
+ if (cpi->oxcf.number_of_layers != prev_number_of_layers)
+ {
+ // If the number of temporal layers are changed we must start at the
+ // base of the pattern cycle, so reset temporal_pattern_counter.
+ cpi->temporal_pattern_counter = 0;
+ reset_temporal_layer_change(cpi, oxcf, prev_number_of_layers);
+ }
cm->Width = cpi->oxcf.Width;
cm->Height = cpi->oxcf.Height;
@@ -1738,6 +1807,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
memcpy(cpi->base_skip_false_prob, vp8cx_base_skip_false_prob, sizeof(vp8cx_base_skip_false_prob));
cpi->common.current_video_frame = 0;
+ cpi->temporal_pattern_counter = 0;
cpi->kf_overspend_bits = 0;
cpi->kf_bitrate_adjustment = 0;
cpi->frames_till_gf_update_due = 0;
@@ -1805,7 +1875,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
else
cpi->cyclic_refresh_map = (signed char *) NULL;
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
init_context_counters();
#endif
@@ -1875,7 +1945,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
for (i = 0; i < KEY_FRAME_CONTEXT; i++)
{
- cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate;
+ cpi->prior_key_frame_distance[i] = (int)cpi->output_framerate;
}
#ifdef OUTPUT_YUV_SRC
@@ -1923,7 +1993,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->mb.rd_thresh_mult[i] = 128;
}
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
init_mv_ref_counts();
#endif
@@ -2060,7 +2130,7 @@ void vp8_remove_compressor(VP8_COMP **ptr)
#endif
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
print_context_counters();
print_tree_update_probs();
print_mode_context();
@@ -2203,7 +2273,7 @@ void vp8_remove_compressor(VP8_COMP **ptr)
{
extern int count_mb_seg[4];
FILE *f = fopen("modes.stt", "a");
- double dr = (double)cpi->frame_rate * (double)bytes * (double)8 / (double)count / (double)1000 ;
+ double dr = (double)cpi->framerate * (double)bytes * (double)8 / (double)count / (double)1000 ;
fprintf(f, "intra_mode in Intra Frames:\n");
fprintf(f, "Y: %8d, %8d, %8d, %8d, %8d\n", y_modes[0], y_modes[1], y_modes[2], y_modes[3], y_modes[4]);
fprintf(f, "UV:%8d, %8d, %8d, %8d\n", uv_modes[0], uv_modes[1], uv_modes[2], uv_modes[3]);
@@ -2242,7 +2312,7 @@ void vp8_remove_compressor(VP8_COMP **ptr)
}
#endif
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
{
int i, j, k;
FILE *fmode = fopen("modecontext.c", "w");
@@ -2587,7 +2657,7 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
Scale2Ratio(cm->horiz_scale, &hr, &hs);
Scale2Ratio(cm->vert_scale, &vr, &vs);
- vp8_scale_frame(sd, &cpi->scaled_source, cm->temp_scale_frame.y_buffer,
+ vpx_scale_frame(sd, &cpi->scaled_source, cm->temp_scale_frame.y_buffer,
tmp_height, hs, hr, vs, vr, 0);
vp8_yv12_extend_frame_borders(&cpi->scaled_source);
@@ -2680,12 +2750,12 @@ static void update_alt_ref_frame_stats(VP8_COMP *cpi)
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
/* this frame refreshes means next frames don't unless specified by user */
- cpi->common.frames_since_golden = 0;
+ cpi->frames_since_golden = 0;
/* Clear the alternate reference update pending flag. */
cpi->source_alt_ref_pending = 0;
- /* Set the alternate refernce frame active flag */
+ /* Set the alternate reference frame active flag */
cpi->source_alt_ref_active = 1;
@@ -2732,7 +2802,7 @@ static void update_golden_frame_stats(VP8_COMP *cpi)
* user
*/
cm->refresh_golden_frame = 0;
- cpi->common.frames_since_golden = 0;
+ cpi->frames_since_golden = 0;
cpi->recent_ref_frame_usage[INTRA_FRAME] = 1;
cpi->recent_ref_frame_usage[LAST_FRAME] = 1;
@@ -2764,12 +2834,12 @@ static void update_golden_frame_stats(VP8_COMP *cpi)
if (cpi->frames_till_gf_update_due > 0)
cpi->frames_till_gf_update_due--;
- if (cpi->common.frames_till_alt_ref_frame)
- cpi->common.frames_till_alt_ref_frame --;
+ if (cpi->frames_till_alt_ref_frame)
+ cpi->frames_till_alt_ref_frame --;
- cpi->common.frames_since_golden ++;
+ cpi->frames_since_golden ++;
- if (cpi->common.frames_since_golden > 1)
+ if (cpi->frames_since_golden > 1)
{
cpi->recent_ref_frame_usage[INTRA_FRAME] +=
cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME];
@@ -2815,14 +2885,16 @@ static void update_rd_ref_frame_probs(VP8_COMP *cpi)
if (cpi->common.refresh_alt_ref_frame)
{
cpi->prob_intra_coded += 40;
+ if (cpi->prob_intra_coded > 255)
+ cpi->prob_intra_coded = 255;
cpi->prob_last_coded = 200;
cpi->prob_gf_coded = 1;
}
- else if (cpi->common.frames_since_golden == 0)
+ else if (cpi->frames_since_golden == 0)
{
cpi->prob_last_coded = 214;
}
- else if (cpi->common.frames_since_golden == 1)
+ else if (cpi->frames_since_golden == 1)
{
cpi->prob_last_coded = 192;
cpi->prob_gf_coded = 220;
@@ -3296,12 +3368,12 @@ static void encode_frame_to_data_rate
cpi->per_frame_bandwidth = cpi->twopass.gf_bits;
/* per second target bitrate */
cpi->target_bandwidth = (int)(cpi->twopass.gf_bits *
- cpi->output_frame_rate);
+ cpi->output_framerate);
}
}
else
#endif
- cpi->per_frame_bandwidth = (int)(cpi->target_bandwidth / cpi->output_frame_rate);
+ cpi->per_frame_bandwidth = (int)(cpi->target_bandwidth / cpi->output_framerate);
/* Default turn off buffer to buffer copying */
cm->copy_buffer_to_gf = 0;
@@ -3330,7 +3402,7 @@ static void encode_frame_to_data_rate
else
cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 0;
- /* Check to see if a key frame is signalled
+ /* Check to see if a key frame is signaled
* For two pass with auto key frame enabled cm->frame_type may already
* be set, but not for one pass.
*/
@@ -3465,7 +3537,7 @@ static void encode_frame_to_data_rate
/* Note that we should not throw out a key frame (especially when
* spatial resampling is enabled).
*/
- if ((cm->frame_type == KEY_FRAME))
+ if (cm->frame_type == KEY_FRAME)
{
cpi->decimation_count = cpi->decimation_factor;
}
@@ -3483,6 +3555,8 @@ static void encode_frame_to_data_rate
cm->current_video_frame++;
cpi->frames_since_key++;
+ // We advance the temporal pattern for dropped frames.
+ cpi->temporal_pattern_counter++;
#if CONFIG_INTERNAL_STATS
cpi->count ++;
@@ -3500,7 +3574,8 @@ static void encode_frame_to_data_rate
for (i=cpi->current_layer+1; i<cpi->oxcf.number_of_layers; i++)
{
LAYER_CONTEXT *lc = &cpi->layer_context[i];
- lc->bits_off_target += cpi->av_per_frame_bandwidth;
+ lc->bits_off_target += (int)(lc->target_bandwidth /
+ lc->framerate);
if (lc->bits_off_target > lc->maximum_buffer_size)
lc->bits_off_target = lc->maximum_buffer_size;
lc->buffer_level = lc->bits_off_target;
@@ -3524,6 +3599,8 @@ static void encode_frame_to_data_rate
#endif
cm->current_video_frame++;
cpi->frames_since_key++;
+ // We advance the temporal pattern for dropped frames.
+ cpi->temporal_pattern_counter++;
return;
}
@@ -4481,7 +4558,7 @@ static void encode_frame_to_data_rate
{
LAYER_CONTEXT *lc = &cpi->layer_context[i];
int bits_off_for_this_layer =
- (int)(lc->target_bandwidth / lc->frame_rate -
+ (int)(lc->target_bandwidth / lc->framerate -
cpi->projected_frame_size);
lc->bits_off_target += bits_off_for_this_layer;
@@ -4597,9 +4674,6 @@ static void encode_frame_to_data_rate
cm->frame_type, cm->refresh_golden_frame,
cm->refresh_alt_ref_frame);
- for (i = 0; i < MAX_MODES; i++)
- fprintf(fmodes, "%5d ", cpi->mode_chosen_counts[i]);
-
fprintf(fmodes, "\n");
fclose(fmodes);
@@ -4694,6 +4768,7 @@ static void encode_frame_to_data_rate
{
cm->current_video_frame++;
cpi->frames_since_key++;
+ cpi->temporal_pattern_counter++;
}
/* reset to normal state now that we are done. */
@@ -4731,7 +4806,7 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest,
{
double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth
*cpi->oxcf.two_pass_vbrmin_section / 100);
- cpi->twopass.bits_left += (int64_t)(two_pass_min_rate / cpi->frame_rate);
+ cpi->twopass.bits_left += (int64_t)(two_pass_min_rate / cpi->framerate);
}
}
#endif
@@ -4747,8 +4822,10 @@ int vp8_receive_raw_frame(VP8_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_C
{
#if HAVE_NEON
int64_t store_reg[8];
-#endif
+#if CONFIG_RUNTIME_CPU_DETECT
VP8_COMMON *cm = &cpi->common;
+#endif
+#endif
struct vpx_usec_timer timer;
int res = 0;
@@ -4774,7 +4851,6 @@ int vp8_receive_raw_frame(VP8_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_C
if(vp8_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
frame_flags, cpi->active_map_enabled ? cpi->active_map : NULL))
res = -1;
- cm->clr_type = sd->clrtype;
vpx_usec_timer_mark(&timer);
cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
@@ -4859,7 +4935,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
cpi->frames_till_gf_update_due);
force_src_buffer = &cpi->alt_ref_buffer;
}
- cm->frames_till_alt_ref_frame = cpi->frames_till_gf_update_due;
+ cpi->frames_till_alt_ref_frame = cpi->frames_till_gf_update_due;
cm->refresh_alt_ref_frame = 1;
cm->refresh_golden_frame = 0;
cm->refresh_last_frame = 0;
@@ -4964,7 +5040,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
if (this_duration)
{
if (step)
- cpi->ref_frame_rate = 10000000.0 / this_duration;
+ cpi->ref_framerate = 10000000.0 / this_duration;
else
{
double avg_duration, interval;
@@ -4978,11 +5054,11 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
if(interval > 10000000.0)
interval = 10000000;
- avg_duration = 10000000.0 / cpi->ref_frame_rate;
+ avg_duration = 10000000.0 / cpi->ref_framerate;
avg_duration *= (interval - avg_duration + this_duration);
avg_duration /= interval;
- cpi->ref_frame_rate = 10000000.0 / avg_duration;
+ cpi->ref_framerate = 10000000.0 / avg_duration;
}
if (cpi->oxcf.number_of_layers > 1)
@@ -4993,12 +5069,12 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
for (i=0; i<cpi->oxcf.number_of_layers; i++)
{
LAYER_CONTEXT *lc = &cpi->layer_context[i];
- lc->frame_rate = cpi->ref_frame_rate /
- cpi->oxcf.rate_decimator[i];
+ lc->framerate = cpi->ref_framerate /
+ cpi->oxcf.rate_decimator[i];
}
}
else
- vp8_new_frame_rate(cpi, cpi->ref_frame_rate);
+ vp8_new_framerate(cpi, cpi->ref_framerate);
}
cpi->last_time_stamp_seen = cpi->source->ts_start;
@@ -5013,9 +5089,9 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
/* Restore layer specific context & set frame rate */
layer = cpi->oxcf.layer_id[
- cm->current_video_frame % cpi->oxcf.periodicity];
+ cpi->temporal_pattern_counter % cpi->oxcf.periodicity];
restore_layer_context (cpi, layer);
- vp8_new_frame_rate (cpi, cpi->layer_context[layer].frame_rate);
+ vp8_new_framerate(cpi, cpi->layer_context[layer].framerate);
}
if (cpi->compressor_speed == 2)
@@ -5180,7 +5256,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
if (cm->show_frame)
{
-
+ cpi->common.show_frame_mi = cpi->common.mi;
cpi->count ++;
if (cpi->b_calculate_psnr)
@@ -5361,6 +5437,7 @@ int vp8_get_preview_raw_frame(VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppfla
#endif
#if CONFIG_POSTPROC
+ cpi->common.show_frame_mi = cpi->common.mi;
ret = vp8_post_proc_frame(&cpi->common, dest, flags);
#else
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index fb8ad357c..3ab0fe8bf 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -232,7 +232,7 @@ enum
typedef struct
{
/* Layer configuration */
- double frame_rate;
+ double framerate;
int target_bandwidth;
/* Layer specific coding parameters */
@@ -282,17 +282,17 @@ typedef struct VP8_COMP
{
DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(16, unsigned char, Y1quant_shift[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, Y1quant_shift[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(16, unsigned char, Y2quant_shift[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, Y2quant_shift[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(16, unsigned char, UVquant_shift[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, UVquant_shift[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][16]);
@@ -320,6 +320,7 @@ typedef struct VP8_COMP
YV12_BUFFER_CONFIG scaled_source;
YV12_BUFFER_CONFIG *last_frame_unscaled_source;
+ unsigned int frames_till_alt_ref_frame;
/* frame in src_buffers has been identified to be encoded as an alt ref */
int source_alt_ref_pending;
/* an alt ref frame has been encoded and is usable */
@@ -349,7 +350,6 @@ typedef struct VP8_COMP
int ambient_err;
unsigned int mode_check_freq[MAX_MODES];
- unsigned int mode_chosen_counts[MAX_MODES];
int rd_baseline_thresh[MAX_MODES];
@@ -370,6 +370,7 @@ typedef struct VP8_COMP
double key_frame_rate_correction_factor;
double gf_rate_correction_factor;
+ unsigned int frames_since_golden;
/* Count down till next GF */
int frames_till_gf_update_due;
@@ -402,7 +403,7 @@ typedef struct VP8_COMP
/* Minimum allocation that should be used for any frame */
int min_frame_bandwidth;
int inter_frame_target;
- double output_frame_rate;
+ double output_framerate;
int64_t last_time_stamp_seen;
int64_t last_end_time_stamp_seen;
int64_t first_time_stamp_ever;
@@ -416,8 +417,8 @@ typedef struct VP8_COMP
int buffered_mode;
- double frame_rate;
- double ref_frame_rate;
+ double framerate;
+ double ref_framerate;
int64_t buffer_level;
int64_t bits_off_target;
@@ -510,6 +511,10 @@ typedef struct VP8_COMP
int cyclic_refresh_q;
signed char *cyclic_refresh_map;
+ // Frame counter for the temporal pattern. Counter is rest when the temporal
+ // layers are changed dynamically (run-time change).
+ unsigned int temporal_pattern_counter;
+
#if CONFIG_MULTITHREAD
/* multithread data */
int * mt_current_mb_col;
@@ -587,7 +592,7 @@ typedef struct VP8_COMP
/* Error score of frames still to be coded in kf group */
int64_t kf_group_error_left;
/* Projected Bits available for a group including 1 GF or ARF */
- int gf_group_bits;
+ int64_t gf_group_bits;
/* Bits for the golden frame or ARF */
int gf_bits;
int alt_extra_bits;
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 673de2b33..c5279fed2 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -389,7 +389,7 @@ static void pick_intra_mbuv_mode(MACROBLOCK *mb)
}
-static void update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv)
+static void update_mvcount(MACROBLOCK *x, int_mv *best_ref_mv)
{
MACROBLOCKD *xd = &x->e_mbd;
/* Split MV modes currently not supported when RD is nopt enabled,
@@ -594,6 +594,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
unsigned int zero_mv_sse = INT_MAX, best_sse = INT_MAX;
#endif
+ int sf_improved_mv_pred = cpi->sf.improved_mv_pred;
int_mv mvp;
int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
@@ -882,7 +883,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
last frame motion info is not stored, then we can not
use improved_mv_pred. */
if (cpi->oxcf.mr_encoder_id && !parent_ref_valid)
- cpi->sf.improved_mv_pred = 0;
+ sf_improved_mv_pred = 0;
if (parent_ref_valid && parent_ref_frame)
{
@@ -899,7 +900,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
}else
#endif
{
- if(cpi->sf.improved_mv_pred)
+ if(sf_improved_mv_pred)
{
if(!saddone)
{
@@ -1241,7 +1242,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
!= cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame])
best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int;
- update_mvcount(cpi, x, &best_ref_mv);
+ update_mvcount(x, &best_ref_mv);
}
diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c
index 4121349a9..250d04c7f 100644
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -9,11 +9,12 @@
*/
+#include "./vpx_scale_rtcd.h"
#include "vp8/common/onyxc_int.h"
#include "onyx_int.h"
#include "quantize.h"
#include "vpx_mem/vpx_mem.h"
-#include "vpx_scale/vpxscale.h"
+#include "vpx_scale/vpx_scale.h"
#include "vp8/common/alloccommon.h"
#include "vp8/common/loopfilter.h"
#if ARCH_ARM
@@ -312,7 +313,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
/* Get baseline error score */
/* Copy the unfiltered / processed recon buffer to the new buffer */
- vp8_yv12_copy_y(saved_frame, cm->frame_to_show);
+ vpx_yv12_copy_y(saved_frame, cm->frame_to_show);
vp8cx_set_alt_lf_level(cpi, filt_mid);
vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_mid);
@@ -338,7 +339,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
if(ss_err[filt_low] == 0)
{
/* Get Low filter error score */
- vp8_yv12_copy_y(saved_frame, cm->frame_to_show);
+ vpx_yv12_copy_y(saved_frame, cm->frame_to_show);
vp8cx_set_alt_lf_level(cpi, filt_low);
vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_low);
@@ -366,7 +367,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
{
if(ss_err[filt_high] == 0)
{
- vp8_yv12_copy_y(saved_frame, cm->frame_to_show);
+ vpx_yv12_copy_y(saved_frame, cm->frame_to_show);
vp8cx_set_alt_lf_level(cpi, filt_high);
vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_high);
diff --git a/vp8/encoder/psnr.c b/vp8/encoder/psnr.c
index 5bb49ad26..b3a3d9552 100644
--- a/vp8/encoder/psnr.c
+++ b/vp8/encoder/psnr.c
@@ -13,7 +13,7 @@
#include "math.h"
#include "vp8/common/systemdependent.h" /* for vp8_clear_system_state() */
-#define MAX_PSNR 60
+#define MAX_PSNR 100
double vp8_mse2psnr(double Samples, double Peak, double Mse)
{
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 33c8ef055..fda997ff6 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -50,8 +50,8 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
if (x >= zbin)
{
x += round_ptr[rc];
- y = (((x * quant_ptr[rc]) >> 16) + x)
- >> quant_shift_ptr[rc]; /* quantize (x) */
+ y = ((((x * quant_ptr[rc]) >> 16) + x)
+ * quant_shift_ptr[rc]) >> 16; /* quantize (x) */
x = (y ^ sz) - sz; /* get the sign back */
qcoeff_ptr[rc] = x; /* write to destination */
dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */
@@ -113,7 +113,7 @@ void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d)
short *zbin_ptr = b->zbin;
short *round_ptr = b->round;
short *quant_ptr = b->quant;
- unsigned char *quant_shift_ptr = b->quant_shift;
+ short *quant_shift_ptr = b->quant_shift;
short *qcoeff_ptr = d->qcoeff;
short *dqcoeff_ptr = d->dqcoeff;
short *dequant_ptr = d->dequant;
@@ -138,8 +138,8 @@ void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d)
if (x >= zbin)
{
x += round_ptr[rc];
- y = (((x * quant_ptr[rc]) >> 16) + x)
- >> quant_shift_ptr[rc]; /* quantize (x) */
+ y = ((((x * quant_ptr[rc]) >> 16) + x)
+ * quant_shift_ptr[rc]) >> 16; /* quantize (x) */
x = (y ^ sz) - sz; /* get the sign back */
qcoeff_ptr[rc] = x; /* write to destination */
dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */
@@ -167,7 +167,7 @@ void vp8_strict_quantize_b_c(BLOCK *b, BLOCKD *d)
int sz;
short *coeff_ptr;
short *quant_ptr;
- unsigned char *quant_shift_ptr;
+ short *quant_shift_ptr;
short *qcoeff_ptr;
short *dqcoeff_ptr;
short *dequant_ptr;
@@ -184,21 +184,21 @@ void vp8_strict_quantize_b_c(BLOCK *b, BLOCKD *d)
for (i = 0; i < 16; i++)
{
int dq;
- int round;
+ int rounding;
/*TODO: These arrays should be stored in zig-zag order.*/
rc = vp8_default_zig_zag1d[i];
z = coeff_ptr[rc];
dq = dequant_ptr[rc];
- round = dq >> 1;
+ rounding = dq >> 1;
/* Sign of z. */
sz = -(z < 0);
x = (z + sz) ^ sz;
- x += round;
+ x += rounding;
if (x >= dq)
{
/* Quantize x. */
- y = (((x * quant_ptr[rc]) >> 16) + x) >> quant_shift_ptr[rc];
+ y = ((((x * quant_ptr[rc]) >> 16) + x) * quant_shift_ptr[rc]) >> 16;
/* Put the sign back. */
x = (y + sz) ^ sz;
/* Save the coefficient and its dequantized value. */
@@ -406,7 +406,7 @@ static const int qzbin_factors_y2[129] =
#define EXACT_QUANT
#ifdef EXACT_QUANT
static void invert_quant(int improved_quant, short *quant,
- unsigned char *shift, short d)
+ short *shift, short d)
{
if(improved_quant)
{
@@ -418,11 +418,15 @@ static void invert_quant(int improved_quant, short *quant,
t = 1 + (1<<(16+l))/d;
*quant = (short)(t - (1<<16));
*shift = l;
+ /* use multiplication and constant shift by 16 */
+ *shift = 1 << (16 - *shift);
}
else
{
*quant = (1 << 16) / d;
*shift = 0;
+ /* use multiplication and constant shift by 16 */
+ *shift = 1 << (16 - *shift);
}
}
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index a399a3877..fe4db13b3 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -234,7 +234,7 @@ void vp8_save_coding_context(VP8_COMP *cpi)
cc->frames_since_key = cpi->frames_since_key;
cc->filter_level = cpi->common.filter_level;
cc->frames_till_gf_update_due = cpi->frames_till_gf_update_due;
- cc->frames_since_golden = cpi->common.frames_since_golden;
+ cc->frames_since_golden = cpi->frames_since_golden;
vp8_copy(cc->mvc, cpi->common.fc.mvc);
vp8_copy(cc->mvcosts, cpi->rd_costs.mvcosts);
@@ -271,7 +271,7 @@ void vp8_restore_coding_context(VP8_COMP *cpi)
cpi->frames_since_key = cc->frames_since_key;
cpi->common.filter_level = cc->filter_level;
cpi->frames_till_gf_update_due = cc->frames_till_gf_update_due;
- cpi->common.frames_since_golden = cc->frames_since_golden;
+ cpi->frames_since_golden = cc->frames_since_golden;
vp8_copy(cpi->common.fc.mvc, cc->mvc);
@@ -388,7 +388,7 @@ static void calc_iframe_target_size(VP8_COMP *cpi)
int initial_boost = 32; /* |3.0 * per_frame_bandwidth| */
/* Boost depends somewhat on frame rate: only used for 1 layer case. */
if (cpi->oxcf.number_of_layers == 1) {
- kf_boost = MAX(initial_boost, (int)(2 * cpi->output_frame_rate - 16));
+ kf_boost = MAX(initial_boost, (int)(2 * cpi->output_framerate - 16));
}
else {
/* Initial factor: set target size to: |3.0 * per_frame_bandwidth|. */
@@ -399,9 +399,9 @@ static void calc_iframe_target_size(VP8_COMP *cpi)
kf_boost = kf_boost * kf_boost_qadjustment[Q] / 100;
/* frame separation adjustment ( down) */
- if (cpi->frames_since_key < cpi->output_frame_rate / 2)
+ if (cpi->frames_since_key < cpi->output_framerate / 2)
kf_boost = (int)(kf_boost
- * cpi->frames_since_key / (cpi->output_frame_rate / 2));
+ * cpi->frames_since_key / (cpi->output_framerate / 2));
/* Minimal target size is |2* per_frame_bandwidth|. */
if (kf_boost < 16)
@@ -614,7 +614,6 @@ static void calc_gf_params(VP8_COMP *cpi)
static void calc_pframe_target_size(VP8_COMP *cpi)
{
int min_frame_target;
- int Adjustment;
int old_per_frame_bandwidth = cpi->per_frame_bandwidth;
if ( cpi->current_layer > 0)
@@ -658,6 +657,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
/* 1 pass */
else
{
+ int Adjustment;
/* Make rate adjustment to recover bits spent in key frame
* Test to see if the key frame inter data rate correction
* should still be in force
@@ -688,7 +688,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
*/
if ((cpi->gf_overspend_bits > 0) && (cpi->this_frame_target > min_frame_target))
{
- int Adjustment = (cpi->non_gf_bitrate_adjustment <= cpi->gf_overspend_bits) ? cpi->non_gf_bitrate_adjustment : cpi->gf_overspend_bits;
+ Adjustment = (cpi->non_gf_bitrate_adjustment <= cpi->gf_overspend_bits) ? cpi->non_gf_bitrate_adjustment : cpi->gf_overspend_bits;
if (Adjustment > (cpi->this_frame_target - min_frame_target))
Adjustment = (cpi->this_frame_target - min_frame_target);
@@ -715,7 +715,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
if (Adjustment > (cpi->this_frame_target - min_frame_target))
Adjustment = (cpi->this_frame_target - min_frame_target);
- if (cpi->common.frames_since_golden == (cpi->current_gf_interval >> 1))
+ if (cpi->frames_since_golden == (cpi->current_gf_interval >> 1))
cpi->this_frame_target += ((cpi->current_gf_interval - 1) * Adjustment);
else
cpi->this_frame_target -= Adjustment;
@@ -956,6 +956,21 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size)
cpi->bits_off_target = (int)cpi->oxcf.maximum_buffer_size;
cpi->buffer_level = cpi->bits_off_target;
+
+ if (cpi->oxcf.number_of_layers > 1) {
+ unsigned int i;
+
+ // Propagate bits saved by dropping the frame to higher layers.
+ for (i = cpi->current_layer + 1; i < cpi->oxcf.number_of_layers;
+ i++) {
+ LAYER_CONTEXT *lc = &cpi->layer_context[i];
+ lc->bits_off_target += (int)(lc->target_bandwidth /
+ lc->framerate);
+ if (lc->bits_off_target > lc->maximum_buffer_size)
+ lc->bits_off_target = lc->maximum_buffer_size;
+ lc->buffer_level = lc->bits_off_target;
+ }
+ }
}
}
@@ -1360,10 +1375,10 @@ static int estimate_keyframe_frequency(VP8_COMP *cpi)
* whichever is smaller.
*/
int key_freq = cpi->oxcf.key_freq>0 ? cpi->oxcf.key_freq : 1;
- av_key_frame_frequency = (int)cpi->output_frame_rate * 2;
+ av_key_frame_frequency = 1 + (int)cpi->output_framerate * 2;
if (cpi->oxcf.auto_key && av_key_frame_frequency > key_freq)
- av_key_frame_frequency = cpi->oxcf.key_freq;
+ av_key_frame_frequency = key_freq;
cpi->prior_key_frame_distance[KEY_FRAME_CONTEXT - 1]
= av_key_frame_frequency;
@@ -1393,6 +1408,10 @@ static int estimate_keyframe_frequency(VP8_COMP *cpi)
av_key_frame_frequency /= total_weight;
}
+ // TODO (marpan): Given the checks above, |av_key_frame_frequency|
+ // should always be above 0. But for now we keep the sanity check in.
+ if (av_key_frame_frequency == 0)
+ av_key_frame_frequency = 1;
return av_key_frame_frequency;
}
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index ceb817c02..5016cc422 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -14,7 +14,7 @@
#include <limits.h>
#include <assert.h>
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vp8/common/pragmas.h"
#include "tokenize.h"
#include "treewriter.h"
@@ -341,7 +341,7 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue)
void vp8_auto_select_speed(VP8_COMP *cpi)
{
- int milliseconds_for_compress = (int)(1000000 / cpi->frame_rate);
+ int milliseconds_for_compress = (int)(1000000 / cpi->framerate);
milliseconds_for_compress = milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;
@@ -884,8 +884,8 @@ static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,
for (mode = DC_PRED; mode <= TM_PRED; mode++)
{
- int rate;
- int distortion;
+ int this_rate;
+ int this_distortion;
int this_rd;
xd->mode_info_context->mbmi.uv_mode = mode;
@@ -907,17 +907,17 @@ static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,
vp8_quantize_mbuv(x);
rate_to = rd_cost_mbuv(x);
- rate = rate_to + x->intra_uv_mode_cost[xd->frame_type][xd->mode_info_context->mbmi.uv_mode];
+ this_rate = rate_to + x->intra_uv_mode_cost[xd->frame_type][xd->mode_info_context->mbmi.uv_mode];
- distortion = vp8_mbuverror(x) / 4;
+ this_distortion = vp8_mbuverror(x) / 4;
- this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
if (this_rd < best_rd)
{
best_rd = this_rd;
- d = distortion;
- r = rate;
+ d = this_distortion;
+ r = this_rate;
*rate_tokenonly = rate_to;
mode_selected = mode;
}
@@ -935,7 +935,7 @@ int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4])
assert(NEARESTMV <= m && m <= SPLITMV);
vp8_mv_ref_probs(p, near_mv_ref_ct);
return vp8_cost_token(vp8_mv_ref_tree, p,
- vp8_mv_ref_encoding_array - NEARESTMV + m);
+ vp8_mv_ref_encoding_array + (m - NEARESTMV));
}
void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv)
@@ -1294,12 +1294,11 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
if (bestsme < INT_MAX)
{
- int distortion;
+ int disto;
unsigned int sse;
cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost,
- &distortion, &sse);
-
+ &disto, &sse);
}
} /* NEW4X4 */
@@ -1733,7 +1732,7 @@ void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffse
}
}
-static void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv)
+static void rd_update_mvcount(MACROBLOCK *x, int_mv *best_ref_mv)
{
if (x->e_mbd.mode_info_context->mbmi.mode == SPLITMV)
{
@@ -2512,9 +2511,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
x->rd_thresh_mult[best_mode_index];
}
- /* Note how often each mode chosen as best */
- cpi->mode_chosen_counts[best_mode_index] ++;
-
#if CONFIG_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity)
{
@@ -2608,7 +2604,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
!= cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame])
best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int;
- rd_update_mvcount(cpi, x, &best_ref_mv);
+ rd_update_mvcount(x, &best_ref_mv);
}
void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate_)
diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
index b83ae89ab..7e3af71ec 100644
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c
@@ -17,7 +17,7 @@
#include "mcomp.h"
#include "firstpass.h"
#include "psnr.h"
-#include "vpx_scale/vpxscale.h"
+#include "vpx_scale/vpx_scale.h"
#include "vp8/common/extend.h"
#include "ratectrl.h"
#include "vp8/common/quant_common.h"
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index 3b5268b61..11559a720 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -20,7 +20,7 @@
/* Global event counters used for accumulating statistics across several
compressions, then generating context.c = initial stats. */
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
_int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
#endif
void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) ;
@@ -413,7 +413,7 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
}
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
void init_context_counters(void)
{
diff --git a/vp8/encoder/tokenize.h b/vp8/encoder/tokenize.h
index c2d1438f9..1e6cea114 100644
--- a/vp8/encoder/tokenize.h
+++ b/vp8/encoder/tokenize.h
@@ -33,7 +33,7 @@ typedef struct
int rd_cost_mby(MACROBLOCKD *);
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
void init_context_counters();
void print_context_counters();
diff --git a/vp8/encoder/asm_enc_offsets.c b/vp8/encoder/vp8_asm_enc_offsets.c
index a4169b32f..a4169b32f 100644
--- a/vp8/encoder/asm_enc_offsets.c
+++ b/vp8/encoder/vp8_asm_enc_offsets.c
diff --git a/vp8/encoder/x86/dct_sse2.asm b/vp8/encoder/x86/dct_sse2.asm
index d880ce0c4..d06bca592 100644
--- a/vp8/encoder/x86/dct_sse2.asm
+++ b/vp8/encoder/x86/dct_sse2.asm
@@ -29,7 +29,7 @@
movsxd rax, dword ptr arg(2)
lea rcx, [rsi + rax*2]
%else
- %ifidn __OUTPUT_FORMAT__,x64
+ %if LIBVPX_YASM_WIN64
%define input rcx
%define output rdx
%define pitch r8
@@ -53,7 +53,7 @@
RESTORE_GOT
pop rbp
%else
- %ifidn __OUTPUT_FORMAT__,x64
+ %if LIBVPX_YASM_WIN64
RESTORE_XMM
%endif
%endif
diff --git a/vp8/encoder/x86/denoising_sse2.c b/vp8/encoder/x86/denoising_sse2.c
index c1ac6c137..cceb8263f 100644
--- a/vp8/encoder/x86/denoising_sse2.c
+++ b/vp8/encoder/x86/denoising_sse2.c
@@ -12,9 +12,10 @@
#include "vp8/common/reconinter.h"
#include "vpx/vpx_integer.h"
#include "vpx_mem/vpx_mem.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include <emmintrin.h>
+#include "vpx_ports/emmintrin_compat.h"
union sum_union {
__m128i v;
diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm
deleted file mode 100644
index 724e54c45..000000000
--- a/vp8/encoder/x86/quantize_sse2.asm
+++ /dev/null
@@ -1,386 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-%include "asm_enc_offsets.asm"
-
-
-; void vp8_regular_quantize_b_sse2 | arg
-; (BLOCK *b, | 0
-; BLOCKD *d) | 1
-
-global sym(vp8_regular_quantize_b_sse2) PRIVATE
-sym(vp8_regular_quantize_b_sse2):
- push rbp
- mov rbp, rsp
- SAVE_XMM 7
- GET_GOT rbx
-
-%if ABI_IS_32BIT
- push rdi
- push rsi
-%else
- %ifidn __OUTPUT_FORMAT__,x64
- push rdi
- push rsi
- %endif
-%endif
-
- ALIGN_STACK 16, rax
- %define zrun_zbin_boost 0 ; 8
- %define abs_minus_zbin 8 ; 32
- %define temp_qcoeff 40 ; 32
- %define qcoeff 72 ; 32
- %define stack_size 104
- sub rsp, stack_size
- ; end prolog
-
-%if ABI_IS_32BIT
- mov rdi, arg(0) ; BLOCK *b
- mov rsi, arg(1) ; BLOCKD *d
-%else
- %ifidn __OUTPUT_FORMAT__,x64
- mov rdi, rcx ; BLOCK *b
- mov rsi, rdx ; BLOCKD *d
- %else
- ;mov rdi, rdi ; BLOCK *b
- ;mov rsi, rsi ; BLOCKD *d
- %endif
-%endif
-
- mov rdx, [rdi + vp8_block_coeff] ; coeff_ptr
- mov rcx, [rdi + vp8_block_zbin] ; zbin_ptr
- movd xmm7, [rdi + vp8_block_zbin_extra] ; zbin_oq_value
-
- ; z
- movdqa xmm0, [rdx]
- movdqa xmm4, [rdx + 16]
- mov rdx, [rdi + vp8_block_round] ; round_ptr
-
- pshuflw xmm7, xmm7, 0
- punpcklwd xmm7, xmm7 ; duplicated zbin_oq_value
-
- movdqa xmm1, xmm0
- movdqa xmm5, xmm4
-
- ; sz
- psraw xmm0, 15
- psraw xmm4, 15
-
- ; (z ^ sz)
- pxor xmm1, xmm0
- pxor xmm5, xmm4
-
- ; x = abs(z)
- psubw xmm1, xmm0
- psubw xmm5, xmm4
-
- movdqa xmm2, [rcx]
- movdqa xmm3, [rcx + 16]
- mov rcx, [rdi + vp8_block_quant] ; quant_ptr
-
- ; *zbin_ptr + zbin_oq_value
- paddw xmm2, xmm7
- paddw xmm3, xmm7
-
- ; x - (*zbin_ptr + zbin_oq_value)
- psubw xmm1, xmm2
- psubw xmm5, xmm3
- movdqa [rsp + abs_minus_zbin], xmm1
- movdqa [rsp + abs_minus_zbin + 16], xmm5
-
- ; add (zbin_ptr + zbin_oq_value) back
- paddw xmm1, xmm2
- paddw xmm5, xmm3
-
- movdqa xmm2, [rdx]
- movdqa xmm6, [rdx + 16]
-
- movdqa xmm3, [rcx]
- movdqa xmm7, [rcx + 16]
-
- ; x + round
- paddw xmm1, xmm2
- paddw xmm5, xmm6
-
- ; y = x * quant_ptr >> 16
- pmulhw xmm3, xmm1
- pmulhw xmm7, xmm5
-
- ; y += x
- paddw xmm1, xmm3
- paddw xmm5, xmm7
-
- movdqa [rsp + temp_qcoeff], xmm1
- movdqa [rsp + temp_qcoeff + 16], xmm5
-
- pxor xmm6, xmm6
- ; zero qcoeff
- movdqa [rsp + qcoeff], xmm6
- movdqa [rsp + qcoeff + 16], xmm6
-
- mov rdx, [rdi + vp8_block_zrun_zbin_boost] ; zbin_boost_ptr
- mov rax, [rdi + vp8_block_quant_shift] ; quant_shift_ptr
- mov [rsp + zrun_zbin_boost], rdx
-
-%macro ZIGZAG_LOOP 1
- ; x
- movsx ecx, WORD PTR[rsp + abs_minus_zbin + %1 * 2]
-
- ; if (x >= zbin)
- sub cx, WORD PTR[rdx] ; x - zbin
- lea rdx, [rdx + 2] ; zbin_boost_ptr++
- jl .rq_zigzag_loop_%1 ; x < zbin
-
- movsx edi, WORD PTR[rsp + temp_qcoeff + %1 * 2]
-
- ; downshift by quant_shift[rc]
- movsx cx, BYTE PTR[rax + %1] ; quant_shift_ptr[rc]
- sar edi, cl ; also sets Z bit
- je .rq_zigzag_loop_%1 ; !y
- mov WORD PTR[rsp + qcoeff + %1 * 2], di ;qcoeff_ptr[rc] = temp_qcoeff[rc]
- mov rdx, [rsp + zrun_zbin_boost] ; reset to b->zrun_zbin_boost
-.rq_zigzag_loop_%1:
-%endmacro
-; in vp8_default_zig_zag1d order: see vp8/common/entropy.c
-ZIGZAG_LOOP 0
-ZIGZAG_LOOP 1
-ZIGZAG_LOOP 4
-ZIGZAG_LOOP 8
-ZIGZAG_LOOP 5
-ZIGZAG_LOOP 2
-ZIGZAG_LOOP 3
-ZIGZAG_LOOP 6
-ZIGZAG_LOOP 9
-ZIGZAG_LOOP 12
-ZIGZAG_LOOP 13
-ZIGZAG_LOOP 10
-ZIGZAG_LOOP 7
-ZIGZAG_LOOP 11
-ZIGZAG_LOOP 14
-ZIGZAG_LOOP 15
-
- movdqa xmm2, [rsp + qcoeff]
- movdqa xmm3, [rsp + qcoeff + 16]
-
- mov rcx, [rsi + vp8_blockd_dequant] ; dequant_ptr
- mov rdi, [rsi + vp8_blockd_dqcoeff] ; dqcoeff_ptr
-
- ; y ^ sz
- pxor xmm2, xmm0
- pxor xmm3, xmm4
- ; x = (y ^ sz) - sz
- psubw xmm2, xmm0
- psubw xmm3, xmm4
-
- ; dequant
- movdqa xmm0, [rcx]
- movdqa xmm1, [rcx + 16]
-
- mov rcx, [rsi + vp8_blockd_qcoeff] ; qcoeff_ptr
-
- pmullw xmm0, xmm2
- pmullw xmm1, xmm3
-
- movdqa [rcx], xmm2 ; store qcoeff
- movdqa [rcx + 16], xmm3
- movdqa [rdi], xmm0 ; store dqcoeff
- movdqa [rdi + 16], xmm1
-
- mov rcx, [rsi + vp8_blockd_eob]
-
- ; select the last value (in zig_zag order) for EOB
- pcmpeqw xmm2, xmm6
- pcmpeqw xmm3, xmm6
- ; !
- pcmpeqw xmm6, xmm6
- pxor xmm2, xmm6
- pxor xmm3, xmm6
- ; mask inv_zig_zag
- pand xmm2, [GLOBAL(inv_zig_zag)]
- pand xmm3, [GLOBAL(inv_zig_zag + 16)]
- ; select the max value
- pmaxsw xmm2, xmm3
- pshufd xmm3, xmm2, 00001110b
- pmaxsw xmm2, xmm3
- pshuflw xmm3, xmm2, 00001110b
- pmaxsw xmm2, xmm3
- pshuflw xmm3, xmm2, 00000001b
- pmaxsw xmm2, xmm3
- movd eax, xmm2
- and eax, 0xff
-
- mov BYTE PTR [rcx], al ; store eob
-
- ; begin epilog
- add rsp, stack_size
- pop rsp
-%if ABI_IS_32BIT
- pop rsi
- pop rdi
-%else
- %ifidn __OUTPUT_FORMAT__,x64
- pop rsi
- pop rdi
- %endif
-%endif
- RESTORE_GOT
- RESTORE_XMM
- pop rbp
- ret
-
-; void vp8_fast_quantize_b_sse2 | arg
-; (BLOCK *b, | 0
-; BLOCKD *d) | 1
-
-global sym(vp8_fast_quantize_b_sse2) PRIVATE
-sym(vp8_fast_quantize_b_sse2):
- push rbp
- mov rbp, rsp
- GET_GOT rbx
-
-%if ABI_IS_32BIT
- push rdi
- push rsi
-%else
- %ifidn __OUTPUT_FORMAT__,x64
- push rdi
- push rsi
- %else
- ; these registers are used for passing arguments
- %endif
-%endif
-
- ; end prolog
-
-%if ABI_IS_32BIT
- mov rdi, arg(0) ; BLOCK *b
- mov rsi, arg(1) ; BLOCKD *d
-%else
- %ifidn __OUTPUT_FORMAT__,x64
- mov rdi, rcx ; BLOCK *b
- mov rsi, rdx ; BLOCKD *d
- %else
- ;mov rdi, rdi ; BLOCK *b
- ;mov rsi, rsi ; BLOCKD *d
- %endif
-%endif
-
- mov rax, [rdi + vp8_block_coeff]
- mov rcx, [rdi + vp8_block_round]
- mov rdx, [rdi + vp8_block_quant_fast]
-
- ; z = coeff
- movdqa xmm0, [rax]
- movdqa xmm4, [rax + 16]
-
- ; dup z so we can save sz
- movdqa xmm1, xmm0
- movdqa xmm5, xmm4
-
- ; sz = z >> 15
- psraw xmm0, 15
- psraw xmm4, 15
-
- ; x = abs(z) = (z ^ sz) - sz
- pxor xmm1, xmm0
- pxor xmm5, xmm4
- psubw xmm1, xmm0
- psubw xmm5, xmm4
-
- ; x += round
- paddw xmm1, [rcx]
- paddw xmm5, [rcx + 16]
-
- mov rax, [rsi + vp8_blockd_qcoeff]
- mov rcx, [rsi + vp8_blockd_dequant]
- mov rdi, [rsi + vp8_blockd_dqcoeff]
-
- ; y = x * quant >> 16
- pmulhw xmm1, [rdx]
- pmulhw xmm5, [rdx + 16]
-
- ; x = (y ^ sz) - sz
- pxor xmm1, xmm0
- pxor xmm5, xmm4
- psubw xmm1, xmm0
- psubw xmm5, xmm4
-
- ; qcoeff = x
- movdqa [rax], xmm1
- movdqa [rax + 16], xmm5
-
- ; x * dequant
- movdqa xmm2, xmm1
- movdqa xmm3, xmm5
- pmullw xmm2, [rcx]
- pmullw xmm3, [rcx + 16]
-
- ; dqcoeff = x * dequant
- movdqa [rdi], xmm2
- movdqa [rdi + 16], xmm3
-
- pxor xmm4, xmm4 ;clear all bits
- pcmpeqw xmm1, xmm4
- pcmpeqw xmm5, xmm4
-
- pcmpeqw xmm4, xmm4 ;set all bits
- pxor xmm1, xmm4
- pxor xmm5, xmm4
-
- pand xmm1, [GLOBAL(inv_zig_zag)]
- pand xmm5, [GLOBAL(inv_zig_zag + 16)]
-
- pmaxsw xmm1, xmm5
-
- mov rcx, [rsi + vp8_blockd_eob]
-
- ; now down to 8
- pshufd xmm5, xmm1, 00001110b
-
- pmaxsw xmm1, xmm5
-
- ; only 4 left
- pshuflw xmm5, xmm1, 00001110b
-
- pmaxsw xmm1, xmm5
-
- ; okay, just 2!
- pshuflw xmm5, xmm1, 00000001b
-
- pmaxsw xmm1, xmm5
-
- movd eax, xmm1
- and eax, 0xff
-
- mov BYTE PTR [rcx], al ; store eob
-
- ; begin epilog
-%if ABI_IS_32BIT
- pop rsi
- pop rdi
-%else
- %ifidn __OUTPUT_FORMAT__,x64
- pop rsi
- pop rdi
- %endif
-%endif
-
- RESTORE_GOT
- pop rbp
- ret
-
-SECTION_RODATA
-align 16
-inv_zig_zag:
- dw 0x0001, 0x0002, 0x0006, 0x0007
- dw 0x0003, 0x0005, 0x0008, 0x000d
- dw 0x0004, 0x0009, 0x000c, 0x000e
- dw 0x000a, 0x000b, 0x000f, 0x0010
diff --git a/vp8/encoder/x86/quantize_sse2.c b/vp8/encoder/x86/quantize_sse2.c
new file mode 100644
index 000000000..f495bf287
--- /dev/null
+++ b/vp8/encoder/x86/quantize_sse2.c
@@ -0,0 +1,229 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_config.h"
+#include "vp8_rtcd.h"
+#include "vpx_ports/x86.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vp8/encoder/block.h"
+#include "vp8/common/entropy.h" /* vp8_default_inv_zig_zag */
+
+#include <mmintrin.h> /* MMX */
+#include <xmmintrin.h> /* SSE */
+#include <emmintrin.h> /* SSE2 */
+
+#define SELECT_EOB(i, z) \
+ do { \
+ short boost = *zbin_boost_ptr; \
+ int cmp = (x[z] < boost) | (y[z] == 0); \
+ zbin_boost_ptr++; \
+ if (cmp) \
+ goto select_eob_end_##i; \
+ qcoeff_ptr[z] = y[z]; \
+ eob = i; \
+ zbin_boost_ptr = b->zrun_zbin_boost; \
+ select_eob_end_##i:; \
+ } while (0)
+
+void vp8_regular_quantize_b_sse2(BLOCK *b, BLOCKD *d)
+{
+ char eob = 0;
+ short *zbin_boost_ptr = b->zrun_zbin_boost;
+ short *qcoeff_ptr = d->qcoeff;
+ DECLARE_ALIGNED_ARRAY(16, short, x, 16);
+ DECLARE_ALIGNED_ARRAY(16, short, y, 16);
+
+ __m128i sz0, x0, sz1, x1, y0, y1, x_minus_zbin0, x_minus_zbin1;
+ __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift));
+ __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8));
+ __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));
+ __m128i z1 = _mm_load_si128((__m128i *)(b->coeff+8));
+ __m128i zbin_extra = _mm_cvtsi32_si128(b->zbin_extra);
+ __m128i zbin0 = _mm_load_si128((__m128i *)(b->zbin));
+ __m128i zbin1 = _mm_load_si128((__m128i *)(b->zbin + 8));
+ __m128i round0 = _mm_load_si128((__m128i *)(b->round));
+ __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));
+ __m128i quant0 = _mm_load_si128((__m128i *)(b->quant));
+ __m128i quant1 = _mm_load_si128((__m128i *)(b->quant + 8));
+ __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));
+ __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));
+
+ vpx_memset(qcoeff_ptr, 0, 32);
+
+ /* Duplicate to all lanes. */
+ zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0);
+ zbin_extra = _mm_unpacklo_epi16(zbin_extra, zbin_extra);
+
+ /* Sign of z: z >> 15 */
+ sz0 = _mm_srai_epi16(z0, 15);
+ sz1 = _mm_srai_epi16(z1, 15);
+
+ /* x = abs(z): (z ^ sz) - sz */
+ x0 = _mm_xor_si128(z0, sz0);
+ x1 = _mm_xor_si128(z1, sz1);
+ x0 = _mm_sub_epi16(x0, sz0);
+ x1 = _mm_sub_epi16(x1, sz1);
+
+ /* zbin[] + zbin_extra */
+ zbin0 = _mm_add_epi16(zbin0, zbin_extra);
+ zbin1 = _mm_add_epi16(zbin1, zbin_extra);
+
+ /* In C x is compared to zbin where zbin = zbin[] + boost + extra. Rebalance
+ * the equation because boost is the only value which can change:
+ * x - (zbin[] + extra) >= boost */
+ x_minus_zbin0 = _mm_sub_epi16(x0, zbin0);
+ x_minus_zbin1 = _mm_sub_epi16(x1, zbin1);
+
+ _mm_store_si128((__m128i *)(x), x_minus_zbin0);
+ _mm_store_si128((__m128i *)(x + 8), x_minus_zbin1);
+
+ /* All the remaining calculations are valid whether they are done now with
+ * simd or later inside the loop one at a time. */
+ x0 = _mm_add_epi16(x0, round0);
+ x1 = _mm_add_epi16(x1, round1);
+
+ y0 = _mm_mulhi_epi16(x0, quant0);
+ y1 = _mm_mulhi_epi16(x1, quant1);
+
+ y0 = _mm_add_epi16(y0, x0);
+ y1 = _mm_add_epi16(y1, x1);
+
+ /* Instead of shifting each value independently we convert the scaling
+ * factor with 1 << (16 - shift) so we can use multiply/return high half. */
+ y0 = _mm_mulhi_epi16(y0, quant_shift0);
+ y1 = _mm_mulhi_epi16(y1, quant_shift1);
+
+ /* Return the sign: (y ^ sz) - sz */
+ y0 = _mm_xor_si128(y0, sz0);
+ y1 = _mm_xor_si128(y1, sz1);
+ y0 = _mm_sub_epi16(y0, sz0);
+ y1 = _mm_sub_epi16(y1, sz1);
+
+ _mm_store_si128((__m128i *)(y), y0);
+ _mm_store_si128((__m128i *)(y + 8), y1);
+
+ zbin_boost_ptr = b->zrun_zbin_boost;
+
+ /* The loop gets unrolled anyway. Avoid the vp8_default_zig_zag1d lookup. */
+ SELECT_EOB(1, 0);
+ SELECT_EOB(2, 1);
+ SELECT_EOB(3, 4);
+ SELECT_EOB(4, 8);
+ SELECT_EOB(5, 5);
+ SELECT_EOB(6, 2);
+ SELECT_EOB(7, 3);
+ SELECT_EOB(8, 6);
+ SELECT_EOB(9, 9);
+ SELECT_EOB(10, 12);
+ SELECT_EOB(11, 13);
+ SELECT_EOB(12, 10);
+ SELECT_EOB(13, 7);
+ SELECT_EOB(14, 11);
+ SELECT_EOB(15, 14);
+ SELECT_EOB(16, 15);
+
+ y0 = _mm_load_si128((__m128i *)(d->qcoeff));
+ y1 = _mm_load_si128((__m128i *)(d->qcoeff + 8));
+
+ /* dqcoeff = qcoeff * dequant */
+ y0 = _mm_mullo_epi16(y0, dequant0);
+ y1 = _mm_mullo_epi16(y1, dequant1);
+
+ _mm_store_si128((__m128i *)(d->dqcoeff), y0);
+ _mm_store_si128((__m128i *)(d->dqcoeff + 8), y1);
+
+ *d->eob = eob;
+}
+
+void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
+{
+ __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));
+ __m128i z1 = _mm_load_si128((__m128i *)(b->coeff + 8));
+ __m128i round0 = _mm_load_si128((__m128i *)(b->round));
+ __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));
+ __m128i quant_fast0 = _mm_load_si128((__m128i *)(b->quant_fast));
+ __m128i quant_fast1 = _mm_load_si128((__m128i *)(b->quant_fast + 8));
+ __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));
+ __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));
+ __m128i inv_zig_zag0 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag));
+ __m128i inv_zig_zag1 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag + 8));
+
+ __m128i sz0, sz1, x0, x1, y0, y1, xdq0, xdq1, zeros, ones;
+
+ /* sign of z: z >> 15 */
+ sz0 = _mm_srai_epi16(z0, 15);
+ sz1 = _mm_srai_epi16(z1, 15);
+
+ /* x = abs(z): (z ^ sz) - sz */
+ x0 = _mm_xor_si128(z0, sz0);
+ x1 = _mm_xor_si128(z1, sz1);
+ x0 = _mm_sub_epi16(x0, sz0);
+ x1 = _mm_sub_epi16(x1, sz1);
+
+ /* x += round */
+ x0 = _mm_add_epi16(x0, round0);
+ x1 = _mm_add_epi16(x1, round1);
+
+ /* y = (x * quant) >> 16 */
+ y0 = _mm_mulhi_epi16(x0, quant_fast0);
+ y1 = _mm_mulhi_epi16(x1, quant_fast1);
+
+ /* x = abs(y) = (y ^ sz) - sz */
+ y0 = _mm_xor_si128(y0, sz0);
+ y1 = _mm_xor_si128(y1, sz1);
+ x0 = _mm_sub_epi16(y0, sz0);
+ x1 = _mm_sub_epi16(y1, sz1);
+
+ /* qcoeff = x */
+ _mm_store_si128((__m128i *)(d->qcoeff), x0);
+ _mm_store_si128((__m128i *)(d->qcoeff + 8), x1);
+
+ /* x * dequant */
+ xdq0 = _mm_mullo_epi16(x0, dequant0);
+ xdq1 = _mm_mullo_epi16(x1, dequant1);
+
+ /* dqcoeff = x * dequant */
+ _mm_store_si128((__m128i *)(d->dqcoeff), xdq0);
+ _mm_store_si128((__m128i *)(d->dqcoeff + 8), xdq1);
+
+ /* build a mask for the zig zag */
+ zeros = _mm_setzero_si128();
+
+ x0 = _mm_cmpeq_epi16(x0, zeros);
+ x1 = _mm_cmpeq_epi16(x1, zeros);
+
+ ones = _mm_cmpeq_epi16(zeros, zeros);
+
+ x0 = _mm_xor_si128(x0, ones);
+ x1 = _mm_xor_si128(x1, ones);
+
+ x0 = _mm_and_si128(x0, inv_zig_zag0);
+ x1 = _mm_and_si128(x1, inv_zig_zag1);
+
+ x0 = _mm_max_epi16(x0, x1);
+
+ /* now down to 8 */
+ x1 = _mm_shuffle_epi32(x0, 0xE); // 0b00001110
+
+ x0 = _mm_max_epi16(x0, x1);
+
+ /* only 4 left */
+ x1 = _mm_shufflelo_epi16(x0, 0xE); // 0b00001110
+
+ x0 = _mm_max_epi16(x0, x1);
+
+ /* okay, just 2! */
+ x1 = _mm_shufflelo_epi16(x0, 0x1); // 0b00000001
+
+ x0 = _mm_max_epi16(x0, x1);
+
+ *d->eob = 0xFF & _mm_cvtsi128_si32(x0);
+}
diff --git a/vp8/encoder/x86/quantize_sse4.asm b/vp8/encoder/x86/quantize_sse4.asm
index f0e5d407e..dbd171bfc 100644
--- a/vp8/encoder/x86/quantize_sse4.asm
+++ b/vp8/encoder/x86/quantize_sse4.asm
@@ -9,7 +9,7 @@
%include "vpx_ports/x86_abi_support.asm"
-%include "asm_enc_offsets.asm"
+%include "vp8_asm_enc_offsets.asm"
; void vp8_regular_quantize_b_sse4 | arg
@@ -31,7 +31,7 @@ sym(vp8_regular_quantize_b_sse4):
%define stack_size 32
sub rsp, stack_size
%else
- %ifidn __OUTPUT_FORMAT__,x64
+ %if LIBVPX_YASM_WIN64
SAVE_XMM 8, u
push rdi
push rsi
@@ -43,7 +43,7 @@ sym(vp8_regular_quantize_b_sse4):
mov rdi, arg(0) ; BLOCK *b
mov rsi, arg(1) ; BLOCKD *d
%else
- %ifidn __OUTPUT_FORMAT__,x64
+ %if LIBVPX_YASM_WIN64
mov rdi, rcx ; BLOCK *b
mov rsi, rdx ; BLOCKD *d
%else
@@ -240,7 +240,7 @@ ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8
pop rbp
%else
%undef xmm5
- %ifidn __OUTPUT_FORMAT__,x64
+ %if LIBVPX_YASM_WIN64
pop rsi
pop rdi
RESTORE_XMM
diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm
index dd526f4f1..7b1dc119f 100644
--- a/vp8/encoder/x86/quantize_ssse3.asm
+++ b/vp8/encoder/x86/quantize_ssse3.asm
@@ -9,7 +9,7 @@
%include "vpx_ports/x86_abi_support.asm"
-%include "asm_enc_offsets.asm"
+%include "vp8_asm_enc_offsets.asm"
; void vp8_fast_quantize_b_ssse3 | arg
@@ -27,7 +27,7 @@ sym(vp8_fast_quantize_b_ssse3):
push rdi
push rsi
%else
- %ifidn __OUTPUT_FORMAT__,x64
+ %if LIBVPX_YASM_WIN64
push rdi
push rsi
%endif
@@ -38,7 +38,7 @@ sym(vp8_fast_quantize_b_ssse3):
mov rdi, arg(0) ; BLOCK *b
mov rsi, arg(1) ; BLOCKD *d
%else
- %ifidn __OUTPUT_FORMAT__,x64
+ %if LIBVPX_YASM_WIN64
mov rdi, rcx ; BLOCK *b
mov rsi, rdx ; BLOCKD *d
%else
@@ -122,7 +122,7 @@ sym(vp8_fast_quantize_b_ssse3):
pop rsi
pop rdi
%else
- %ifidn __OUTPUT_FORMAT__,x64
+ %if LIBVPX_YASM_WIN64
pop rsi
pop rdi
%endif
diff --git a/vp8/encoder/x86/temporal_filter_apply_sse2.asm b/vp8/encoder/x86/temporal_filter_apply_sse2.asm
index ce9d9836b..bd92b398a 100644
--- a/vp8/encoder/x86/temporal_filter_apply_sse2.asm
+++ b/vp8/encoder/x86/temporal_filter_apply_sse2.asm
@@ -50,7 +50,7 @@ sym(vp8_temporal_filter_apply_sse2):
; 0x8000 >> (16 - strength)
mov rdx, 16
sub rdx, arg(4) ; 16 - strength
- movd xmm4, rdx ; can't use rdx w/ shift
+ movq xmm4, rdx ; can't use rdx w/ shift
movdqa xmm5, [GLOBAL(_const_top_bit)]
psrlw xmm5, xmm4
movdqa [rsp + rounding_bit], xmm5
diff --git a/vp8/encoder/x86/vp8_enc_stubs_mmx.c b/vp8/encoder/x86/vp8_enc_stubs_mmx.c
index da25f5227..cf3d8ca4a 100644
--- a/vp8/encoder/x86/vp8_enc_stubs_mmx.c
+++ b/vp8/encoder/x86/vp8_enc_stubs_mmx.c
@@ -10,7 +10,7 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vpx_ports/x86.h"
#include "vp8/encoder/block.h"
diff --git a/vp8/encoder/x86/vp8_enc_stubs_sse2.c b/vp8/encoder/x86/vp8_enc_stubs_sse2.c
index 68db8155a..3dfbee368 100644
--- a/vp8/encoder/x86/vp8_enc_stubs_sse2.c
+++ b/vp8/encoder/x86/vp8_enc_stubs_sse2.c
@@ -10,7 +10,7 @@
#include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
#include "vpx_ports/x86.h"
#include "vp8/encoder/block.h"