summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRogozhkin, Dmitry V <dmitry.v.rogozhkin@intel.com>2012-06-11 09:45:41 +0800
committerXiang, Haihao <haihao.xiang@intel.com>2012-06-11 09:49:38 +0800
commit8ac4027a75e060e2ab69b0e9b4aefaedb6f3f9d7 (patch)
tree142163022940c173b9b23eb2548ac3154ebc1e6d
parent7f51f442aa00faa007fe089cc80811c7b6ecdecc (diff)
downloadvaapi-intel-driver-8ac4027a75e060e2ab69b0e9b4aefaedb6f3f9d7.tar.gz
vaapi-intel-driver-8ac4027a75e060e2ab69b0e9b4aefaedb6f3f9d7.tar.bz2
vaapi-intel-driver-8ac4027a75e060e2ab69b0e9b4aefaedb6f3f9d7.zip
New BRC algorithm to match HRD
It reduces the number of recoding as well. Signed-off-by: Rogozhkin, Dmitry V <dmitry.v.rogozhkin@intel.com>
-rwxr-xr-xsrc/Makefile.am2
-rw-r--r--src/gen6_mfc.c367
-rw-r--r--src/gen6_mfc.h21
-rw-r--r--src/gen6_vme.c11
-rw-r--r--src/gen7_mfc.c1
-rw-r--r--src/i965_encoder.c2
-rw-r--r--src/i965_encoder.h2
7 files changed, 322 insertions, 84 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index bb0b033..b8cc501 100755
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -40,7 +40,7 @@ driver_ldflags = \
$(NULL)
driver_libs = \
- -lpthread \
+ -lpthread -lm \
$(DRM_LIBS) -ldrm_intel \
$(LIBVA_DEPS_LIBS) \
$(LIBVA_X11_DEPS_LIBS) \
diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c
index aad854b..95a85f3 100644
--- a/src/gen6_mfc.c
+++ b/src/gen6_mfc.c
@@ -30,6 +30,7 @@
#include <stdlib.h>
#include <string.h>
#include <assert.h>
+#include <math.h>
#include "intel_batchbuffer.h"
#include "i965_defines.h"
@@ -42,6 +43,34 @@
#define CMD_LEN_IN_OWORD 4
+#define BRC_CLIP(x, min, max) \
+{ \
+ x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x)); \
+}
+
+#define BRC_P_B_QP_DIFF 4
+#define BRC_I_P_QP_DIFF 2
+#define BRC_I_B_QP_DIFF (BRC_I_P_QP_DIFF + BRC_P_B_QP_DIFF)
+
+#define BRC_PWEIGHT 0.6 /* weight if P slice with comparison to I slice */
+#define BRC_BWEIGHT 0.25 /* weight if B slice with comparison to I slice */
+
+#define BRC_QP_MAX_CHANGE 5 /* maximum qp modification */
+#define BRC_CY 0.1 /* weight for */
+#define BRC_CX_UNDERFLOW 5.
+#define BRC_CX_OVERFLOW -4.
+
+#define BRC_PI_0_5 1.5707963267948966192313216916398
+
+typedef enum _gen6_brc_status
+{
+ BRC_NO_HRD_VIOLATION = 0,
+ BRC_UNDERFLOW = 1,
+ BRC_OVERFLOW = 2,
+ BRC_UNDERFLOW_WITH_MAX_QP = 3,
+ BRC_OVERFLOW_WITH_MIN_QP = 4,
+} gen6_brc_status;
+
static const uint32_t gen6_mfc_batchbuffer_avc_intra[][4] = {
#include "shaders/utils/mfc_batchbuffer_avc_intra.g6b"
};
@@ -364,10 +393,11 @@ gen6_mfc_avc_slice_state(VADriverContextP ctx,
if (batch == NULL)
batch = encoder_context->base.batch;
- if (slice_type == SLICE_TYPE_I)
- bit_rate_control_target = 0;
- else
- bit_rate_control_target = 1;
+ bit_rate_control_target = slice_type;
+ if (slice_type == SLICE_TYPE_SP)
+ bit_rate_control_target = SLICE_TYPE_P;
+ else if (slice_type == SLICE_TYPE_SI)
+ bit_rate_control_target = SLICE_TYPE_I;
if (slice_type == SLICE_TYPE_P) {
weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
@@ -420,11 +450,11 @@ gen6_mfc_avc_slice_state(VADriverContextP ctx,
slice_param->macroblock_address );
OUT_BCS_BATCH(batch, (nexty << 16) | nextx); /*Next slice first MB X&Y*/
OUT_BCS_BATCH(batch,
- (rate_control_enable << 31) | /*in CBR mode RateControlCounterEnable = enable*/
+ (0/*rate_control_enable*/ << 31) | /*in CBR mode RateControlCounterEnable = enable*/
(1 << 30) | /*ResetRateControlCounter*/
(0 << 28) | /*RC Triggle Mode = Always Rate Control*/
(4 << 24) | /*RC Stable Tolerance, middle level*/
- (rate_control_enable << 23) | /*RC Panic Enable*/
+ (0/*rate_control_enable*/ << 23) | /*RC Panic Enable*/
(0 << 22) | /*QP mode, don't modfiy CBP*/
(0 << 21) | /*MB Type Direct Conversion Enabled*/
(0 << 20) | /*MB Type Skip Conversion Enabled*/
@@ -734,20 +764,21 @@ gen6_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
struct gen6_mfc_context *mfc_context)
{
VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
-
int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
float fps = pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
int intra_mb_size = inter_mb_size * 5.0;
int i;
-
- mfc_context->bit_rate_control_context[0].target_mb_size = intra_mb_size;
- mfc_context->bit_rate_control_context[0].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
- mfc_context->bit_rate_control_context[1].target_mb_size = inter_mb_size;
- mfc_context->bit_rate_control_context[1].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
- for(i = 0 ; i < 2; i++) {
+ mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_mb_size = intra_mb_size;
+ mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
+ mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_mb_size = inter_mb_size;
+ mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
+ mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_mb_size = inter_mb_size;
+ mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
+
+ for(i = 0 ; i < 3; i++) {
mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
@@ -764,52 +795,222 @@ gen6_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
mfc_context->bit_rate_control_context[i].Correct[5] = 8;
}
- mfc_context->bit_rate_control_context[0].TargetSizeInWord = (intra_mb_size + 16)/ 16;
- mfc_context->bit_rate_control_context[1].TargetSizeInWord = (inter_mb_size + 16)/ 16;
+ mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord = (intra_mb_size + 16)/ 16;
+ mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord = (inter_mb_size + 16)/ 16;
+ mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord = (inter_mb_size + 16)/ 16;
- mfc_context->bit_rate_control_context[0].MaxSizeInWord = mfc_context->bit_rate_control_context[0].TargetSizeInWord * 1.5;
- mfc_context->bit_rate_control_context[1].MaxSizeInWord = mfc_context->bit_rate_control_context[1].TargetSizeInWord * 1.5;
+ mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord * 1.5;
+ mfc_context->bit_rate_control_context[SLICE_TYPE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord * 1.5;
+ mfc_context->bit_rate_control_context[SLICE_TYPE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord * 1.5;
}
-static int gen6_mfc_bit_rate_control_context_update(struct encode_state *encode_state,
- struct gen6_mfc_context *mfc_context,
- int current_frame_size)
+static void
+gen6_mfc_brc_init(struct encode_state *encode_state,
+ struct intel_encoder_context* encoder_context)
{
- VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
- int control_index = 1 - (pSliceParameter->slice_type == SLICE_TYPE_I);
- int oldQp = mfc_context->bit_rate_control_context[control_index].QpPrimeY;
-
- /*
- printf("conrol_index = %d, start_qp = %d, result = %d, target = %d\n", control_index,
- mfc_context->bit_rate_control_context[control_index].QpPrimeY, current_frame_size,
- mfc_context->bit_rate_control_context[control_index].target_frame_size );
- */
-
- if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 4.0 ) {
- mfc_context->bit_rate_control_context[control_index].QpPrimeY += 4;
- } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 2.0 ) {
- mfc_context->bit_rate_control_context[control_index].QpPrimeY += 3;
- } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 1.50 ) {
- mfc_context->bit_rate_control_context[control_index].QpPrimeY += 2;
- } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 1.20 ) {
- mfc_context->bit_rate_control_context[control_index].QpPrimeY ++;
- } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.30 ) {
- mfc_context->bit_rate_control_context[control_index].QpPrimeY -= 3;
- } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.50 ) {
- mfc_context->bit_rate_control_context[control_index].QpPrimeY -= 2;
- } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.80 ) {
- mfc_context->bit_rate_control_context[control_index].QpPrimeY --;
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
+ VAEncMiscParameterBuffer* pMiscParamHRD = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer;
+ VAEncMiscParameterHRD* pParameterHRD = (VAEncMiscParameterBuffer*)pMiscParamHRD->data;
+ double bitrate = pSequenceParameter->bits_per_second;
+ double framerate = (double)pSequenceParameter->time_scale /(2 * (double)pSequenceParameter->num_units_in_tick);
+ int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
+ int intra_period = pSequenceParameter->intra_period;
+ int ip_period = pSequenceParameter->ip_period;
+ double qp1_size = 0.1 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
+ double qp51_size = 0.001 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
+ double bpf;
+
+ if (pSequenceParameter->ip_period) {
+ pnum = (intra_period + ip_period - 1)/ip_period - 1;
+ bnum = intra_period - inum - pnum;
+ }
+
+ mfc_context->brc.mode = encoder_context->rate_control_mode;
+
+ mfc_context->brc.target_frame_size[SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
+ (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
+ mfc_context->brc.target_frame_size[SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
+ mfc_context->brc.target_frame_size[SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
+
+ mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
+ mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
+ mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum;
+
+ bpf = mfc_context->brc.bits_per_frame = bitrate/framerate;
+
+ mfc_context->hrd.buffer_size = (double)pParameterHRD->buffer_size;
+ mfc_context->hrd.current_buffer_fullness =
+ (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size)?
+ pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.;
+ mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
+ mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
+ mfc_context->hrd.violation_noted = 0;
+
+ if ((bpf > qp51_size) && (bpf < qp1_size)) {
+ mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
+ }
+ else if (bpf >= qp1_size)
+ mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 1;
+ else if (bpf <= qp51_size)
+ mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51;
+
+ mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
+ mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
+
+ BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
+ BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
+ BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
+}
+
+static int gen6_mfc_update_hrd(struct encode_state *encode_state,
+ struct gen6_mfc_context *mfc_context,
+ int frame_bits)
+{
+ double prev_bf = mfc_context->hrd.current_buffer_fullness;
+
+ mfc_context->hrd.current_buffer_fullness -= frame_bits;
+
+ if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
+ mfc_context->hrd.current_buffer_fullness = prev_bf;
+ return BRC_UNDERFLOW;
}
- if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY > 51)
- mfc_context->bit_rate_control_context[control_index].QpPrimeY = 51;
- if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY < 1)
- mfc_context->bit_rate_control_context[control_index].QpPrimeY = 1;
-
- if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY != oldQp)
- return 0;
+ mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
+ if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
+ if (mfc_context->brc.mode == VA_RC_VBR)
+ mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
+ else {
+ mfc_context->hrd.current_buffer_fullness = prev_bf;
+ return BRC_OVERFLOW;
+ }
+ }
+ return BRC_NO_HRD_VIOLATION;
+}
- return 1;
+static int gen6_mfc_brc_postpack(struct encode_state *encode_state,
+ struct gen6_mfc_context *mfc_context,
+ int frame_bits)
+{
+ gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
+ VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+ int slicetype = pSliceParameter->slice_type;
+ int qpi = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
+ int qpp = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
+ int qpb = mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY;
+ int qp; // quantizer of previously encoded slice of current type
+ int qpn; // predicted quantizer for next frame of current type in integer format
+ double qpf; // predicted quantizer for next frame of current type in float format
+ double delta_qp; // QP correction
+ int target_frame_size, frame_size_next;
+ /* Notes:
+ * x - how far we are from HRD buffer borders
+ * y - how far we are from target HRD buffer fullness
+ */
+ double x, y;
+ double frame_size_alpha;
+
+ if (slicetype == SLICE_TYPE_SP)
+ slicetype = SLICE_TYPE_P;
+ else if (slicetype == SLICE_TYPE_SI)
+ slicetype = SLICE_TYPE_I;
+
+ qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
+
+ target_frame_size = mfc_context->brc.target_frame_size[slicetype];
+ if (mfc_context->hrd.buffer_capacity < 5)
+ frame_size_alpha = 0;
+ else
+ frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
+ if (frame_size_alpha > 30) frame_size_alpha = 30;
+ frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
+ (double)(frame_size_alpha + 1.);
+
+ /* frame_size_next: avoiding negative number and too small value */
+ if ((double)frame_size_next < (double)(target_frame_size * 0.25))
+ frame_size_next = (int)((double)target_frame_size * 0.25);
+
+ qpf = (double)qp * target_frame_size / frame_size_next;
+ qpn = (int)(qpf + 0.5);
+
+ if (qpn == qp) {
+ /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
+ mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
+ if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
+ qpn++;
+ mfc_context->brc.qpf_rounding_accumulator = 0.;
+ } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
+ qpn--;
+ mfc_context->brc.qpf_rounding_accumulator = 0.;
+ }
+ }
+ /* making sure that QP is not changing too fast */
+ if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
+ else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
+ /* making sure that with QP predictions we did do not leave QPs range */
+ BRC_CLIP(qpn, 1, 51);
+
+ /* checking wthether HRD compliance is still met */
+ sts = gen6_mfc_update_hrd(encode_state, mfc_context, frame_bits);
+
+ /* calculating QP delta as some function*/
+ x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
+ if (x > 0) {
+ x /= mfc_context->hrd.target_buffer_fullness;
+ y = mfc_context->hrd.current_buffer_fullness;
+ }
+ else {
+ x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
+ y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
+ }
+ if (y < 0.01) y = 0.01;
+ if (x > 1) x = 1;
+ else if (x < -1) x = -1;
+
+ delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
+ qpn = (int)(qpn + delta_qp + 0.5);
+
+ /* making sure that with QP predictions we did do not leave QPs range */
+ BRC_CLIP(qpn, 1, 51);
+
+ if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
+ /* correcting QPs of slices of other types */
+ if (slicetype == SLICE_TYPE_P) {
+ if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
+ mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
+ if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
+ mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
+ } else if (slicetype == SLICE_TYPE_I) {
+ if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
+ mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
+ if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
+ mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
+ } else { // SLICE_TYPE_B
+ if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
+ mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
+ if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
+ mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
+ }
+ BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
+ BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
+ BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
+ } else if (sts == BRC_UNDERFLOW) { // underflow
+ if (qpn <= qp) qpn = qp + 1;
+ if (qpn > 51) {
+ qpn = 51;
+ sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
+ }
+ } else if (sts == BRC_OVERFLOW) {
+ if (qpn >= qp) qpn = qp - 1;
+ if (qpn < 1) { // < 0 (?) overflow with minQP
+ qpn = 1;
+ sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
+ }
+ }
+
+ mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
+
+ return sts;
}
static void
@@ -983,14 +1184,6 @@ static VAStatus gen6_mfc_avc_prepare(VADriverContextP ctx,
*flag = 0;
dri_bo_unmap(bo);
- /*Programing bit rate control */
- if ( mfc_context->bit_rate_control_context[0].MaxSizeInWord == 0 )
- gen6_mfc_bit_rate_control_context_init(encode_state, mfc_context);
-
- /*Programing HRD control */
- if ( (rate_control_mode == VA_RC_CBR) && (mfc_context->vui_hrd.i_cpb_size_value == 0) )
- gen6_mfc_hrd_context_init(encode_state, encoder_context);
-
return vaStatus;
}
@@ -1146,9 +1339,11 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
unsigned char *slice_header = NULL;
int slice_header_length_in_bits = 0;
unsigned int tail_data[] = { 0x0, 0x0 };
+ int slice_type = pSliceParameter->slice_type;
+
if (rate_control_mode == VA_RC_CBR) {
- qp = mfc_context->bit_rate_control_context[1 - is_intra].QpPrimeY;
+ qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
pSliceParameter->slice_qp_delta = qp - 26;
}
@@ -1175,10 +1370,6 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
5, /* first 5 bytes are start code + nal unit type */
1, 0, 1, slice_batch);
- if ( rate_control_mode == VA_RC_CBR) {
- qp = mfc_context->bit_rate_control_context[1-is_intra].QpPrimeY;
- }
-
dri_bo_map(vme_context->vme_output.bo , 1);
msg = (unsigned int *)vme_context->vme_output.bo->virtual;
@@ -1514,7 +1705,6 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
- int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
@@ -1526,9 +1716,10 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
long head_offset;
int old_used = intel_batchbuffer_used_size(slice_batch), used;
unsigned short head_size, tail_size;
+ int slice_type = pSliceParameter->slice_type;
if (rate_control_mode == VA_RC_CBR) {
- qp = mfc_context->bit_rate_control_context[1 - is_intra].QpPrimeY;
+ qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
pSliceParameter->slice_qp_delta = qp - 26;
}
@@ -1571,10 +1762,6 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
head_size = (used - old_used) / 16;
old_used = used;
- if (rate_control_mode == VA_RC_CBR) {
- qp = mfc_context->bit_rate_control_context[1 - is_intra].QpPrimeY;
- }
-
/* tail */
if (last_slice) {
mfc_context->insert_object(ctx,
@@ -1734,23 +1921,29 @@ gen6_mfc_avc_encode_picture(VADriverContextP ctx,
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
unsigned int rate_control_mode = encoder_context->rate_control_mode;
- int MAX_CBR_INTERATE = 4;
int current_frame_bits_size;
- int i;
+ int sts;
- for(i = 0; i < MAX_CBR_INTERATE; i++) {
+ for (;;) {
gen6_mfc_init(ctx, encoder_context);
gen6_mfc_avc_prepare(ctx, encode_state, encoder_context);
/*Programing bcs pipeline*/
gen6_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
gen6_mfc_run(ctx, encode_state, encoder_context);
- if ( rate_control_mode == VA_RC_CBR) {
+ if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
gen6_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
- //gen6_mfc_hrd_context_check(encode_state, mfc_context);
- if ( gen6_mfc_bit_rate_control_context_update(encode_state, mfc_context, current_frame_bits_size)) {
+ sts = gen6_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
+ if (sts == BRC_NO_HRD_VIOLATION) {
gen6_mfc_hrd_context_update(encode_state, mfc_context);
break;
}
+ else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
+ if (!mfc_context->hrd.violation_noted) {
+ fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
+ mfc_context->hrd.violation_noted = 1;
+ }
+ return VA_STATUS_SUCCESS;
+ }
} else {
break;
}
@@ -1840,6 +2033,25 @@ gen6_mfc_context_destroy(void *context)
free(mfc_context);
}
+void gen6_mfc_brc_prepare(struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ unsigned int rate_control_mode = encoder_context->rate_control_mode;
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+
+ if (rate_control_mode == VA_RC_CBR) {
+ /*Programing bit rate control */
+ if ( mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0 ) {
+ gen6_mfc_bit_rate_control_context_init(encode_state, mfc_context);
+ gen6_mfc_brc_init(encode_state, encoder_context);
+ }
+
+ /*Programing HRD control */
+ if ( mfc_context->vui_hrd.i_cpb_size_value == 0 )
+ gen6_mfc_hrd_context_init(encode_state, encoder_context);
+ }
+}
+
Bool gen6_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
{
struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
@@ -1874,6 +2086,7 @@ Bool gen6_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e
encoder_context->mfc_context = mfc_context;
encoder_context->mfc_context_destroy = gen6_mfc_context_destroy;
encoder_context->mfc_pipeline = gen6_mfc_pipeline;
+ encoder_context->mfc_brc_prepare = gen6_mfc_brc_prepare;
return True;
}
diff --git a/src/gen6_mfc.h b/src/gen6_mfc.h
index 4cdb20d..43f7082 100644
--- a/src/gen6_mfc.h
+++ b/src/gen6_mfc.h
@@ -144,7 +144,23 @@ struct gen6_mfc_context
unsigned int target_mb_size;
unsigned int target_frame_size;
- } bit_rate_control_context[2]; //INTERNAL: 0 for intra frames, 1 for inter frames.
+ } bit_rate_control_context[3]; //INTERNAL: for I, P, B frames
+
+ struct {
+ int mode;
+ int gop_nums[3];
+ int target_frame_size[3]; // I,P,B
+ double bits_per_frame;
+ double qpf_rounding_accumulator;
+ } brc;
+
+ struct {
+ double current_buffer_fullness;
+ double target_buffer_fullness;
+ double buffer_capacity;
+ unsigned int buffer_size;
+ unsigned int violation_noted;
+ } hrd;
//HRD control context
struct {
@@ -200,5 +216,8 @@ VAStatus gen6_mfc_pipeline(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context);
void gen6_mfc_context_destroy(void *context);
+void gen6_mfc_brc_prepare(struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context);
+
#endif /* _GEN6_MFC_BCS_H_ */
diff --git a/src/gen6_vme.c b/src/gen6_vme.c
index 832e59c..e74b235 100644
--- a/src/gen6_vme.c
+++ b/src/gen6_vme.c
@@ -38,6 +38,7 @@
#include "i965_drv_video.h"
#include "i965_encoder.h"
#include "gen6_vme.h"
+#include "gen6_mfc.h"
#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32)
#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32)
@@ -387,17 +388,17 @@ static void gen6_vme_state_setup_fixup(VADriverContextP ctx,
struct intel_encoder_context *encoder_context,
unsigned int *vme_state_message)
{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
- if (encoder_context->rate_control_mode != VA_RC_CQP)
- return;
-
if (slice_param->slice_type != SLICE_TYPE_I &&
slice_param->slice_type != SLICE_TYPE_SI)
return;
-
- vme_state_message[16] = intra_mb_mode_cost_table[pic_param->pic_init_qp + slice_param->slice_qp_delta];
+ if (encoder_context->rate_control_mode == VA_RC_CQP)
+ vme_state_message[16] = intra_mb_mode_cost_table[pic_param->pic_init_qp + slice_param->slice_qp_delta];
+ else
+ vme_state_message[16] = intra_mb_mode_cost_table[mfc_context->bit_rate_control_context[slice_param->slice_type].QpPrimeY];
}
static VAStatus gen6_vme_vme_state_setup(VADriverContextP ctx,
diff --git a/src/gen7_mfc.c b/src/gen7_mfc.c
index f8378b9..98102c8 100644
--- a/src/gen7_mfc.c
+++ b/src/gen7_mfc.c
@@ -366,6 +366,7 @@ gen7_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encode
encoder_context->mfc_context = mfc_context;
encoder_context->mfc_context_destroy = gen6_mfc_context_destroy;
encoder_context->mfc_pipeline = gen6_mfc_pipeline;
+ encoder_context->mfc_brc_prepare = gen6_mfc_brc_prepare;
return True;
}
diff --git a/src/i965_encoder.c b/src/i965_encoder.c
index 7cfb966..4f0c245 100644
--- a/src/i965_encoder.c
+++ b/src/i965_encoder.c
@@ -131,6 +131,8 @@ intel_encoder_end_picture(VADriverContextP ctx,
intel_encoder_check_yuv_surface(ctx, profile, encode_state, encoder_context);
+ encoder_context->mfc_brc_prepare(encode_state, encoder_context);
+
vaStatus = encoder_context->vme_pipeline(ctx, profile, encode_state, encoder_context);
if (vaStatus == VA_STATUS_SUCCESS)
diff --git a/src/i965_encoder.h b/src/i965_encoder.h
index b477a2b..180aa65 100644
--- a/src/i965_encoder.h
+++ b/src/i965_encoder.h
@@ -54,6 +54,8 @@ struct intel_encoder_context
VAProfile profile,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context);
+ void (*mfc_brc_prepare)(struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context);
};
#endif /* _I965_ENCODER_H_ */