diff options
author | Rogozhkin, Dmitry V <dmitry.v.rogozhkin@intel.com> | 2012-06-11 09:45:41 +0800 |
---|---|---|
committer | Xiang, Haihao <haihao.xiang@intel.com> | 2012-06-11 09:49:38 +0800 |
commit | 8ac4027a75e060e2ab69b0e9b4aefaedb6f3f9d7 (patch) | |
tree | 142163022940c173b9b23eb2548ac3154ebc1e6d | |
parent | 7f51f442aa00faa007fe089cc80811c7b6ecdecc (diff) | |
download | vaapi-intel-driver-8ac4027a75e060e2ab69b0e9b4aefaedb6f3f9d7.tar.gz vaapi-intel-driver-8ac4027a75e060e2ab69b0e9b4aefaedb6f3f9d7.tar.bz2 vaapi-intel-driver-8ac4027a75e060e2ab69b0e9b4aefaedb6f3f9d7.zip |
New BRC algorithm to match HRD
It reduces the number of recoding as well.
Signed-off-by: Rogozhkin, Dmitry V <dmitry.v.rogozhkin@intel.com>
-rwxr-xr-x | src/Makefile.am | 2 | ||||
-rw-r--r-- | src/gen6_mfc.c | 367 | ||||
-rw-r--r-- | src/gen6_mfc.h | 21 | ||||
-rw-r--r-- | src/gen6_vme.c | 11 | ||||
-rw-r--r-- | src/gen7_mfc.c | 1 | ||||
-rw-r--r-- | src/i965_encoder.c | 2 | ||||
-rw-r--r-- | src/i965_encoder.h | 2 |
7 files changed, 322 insertions, 84 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index bb0b033..b8cc501 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -40,7 +40,7 @@ driver_ldflags = \ $(NULL) driver_libs = \ - -lpthread \ + -lpthread -lm \ $(DRM_LIBS) -ldrm_intel \ $(LIBVA_DEPS_LIBS) \ $(LIBVA_X11_DEPS_LIBS) \ diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c index aad854b..95a85f3 100644 --- a/src/gen6_mfc.c +++ b/src/gen6_mfc.c @@ -30,6 +30,7 @@ #include <stdlib.h> #include <string.h> #include <assert.h> +#include <math.h> #include "intel_batchbuffer.h" #include "i965_defines.h" @@ -42,6 +43,34 @@ #define CMD_LEN_IN_OWORD 4 +#define BRC_CLIP(x, min, max) \ +{ \ + x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x)); \ +} + +#define BRC_P_B_QP_DIFF 4 +#define BRC_I_P_QP_DIFF 2 +#define BRC_I_B_QP_DIFF (BRC_I_P_QP_DIFF + BRC_P_B_QP_DIFF) + +#define BRC_PWEIGHT 0.6 /* weight if P slice with comparison to I slice */ +#define BRC_BWEIGHT 0.25 /* weight if B slice with comparison to I slice */ + +#define BRC_QP_MAX_CHANGE 5 /* maximum qp modification */ +#define BRC_CY 0.1 /* weight for */ +#define BRC_CX_UNDERFLOW 5. +#define BRC_CX_OVERFLOW -4. + +#define BRC_PI_0_5 1.5707963267948966192313216916398 + +typedef enum _gen6_brc_status +{ + BRC_NO_HRD_VIOLATION = 0, + BRC_UNDERFLOW = 1, + BRC_OVERFLOW = 2, + BRC_UNDERFLOW_WITH_MAX_QP = 3, + BRC_OVERFLOW_WITH_MIN_QP = 4, +} gen6_brc_status; + static const uint32_t gen6_mfc_batchbuffer_avc_intra[][4] = { #include "shaders/utils/mfc_batchbuffer_avc_intra.g6b" }; @@ -364,10 +393,11 @@ gen6_mfc_avc_slice_state(VADriverContextP ctx, if (batch == NULL) batch = encoder_context->base.batch; - if (slice_type == SLICE_TYPE_I) - bit_rate_control_target = 0; - else - bit_rate_control_target = 1; + bit_rate_control_target = slice_type; + if (slice_type == SLICE_TYPE_SP) + bit_rate_control_target = SLICE_TYPE_P; + else if (slice_type == SLICE_TYPE_SI) + bit_rate_control_target = SLICE_TYPE_I; if (slice_type == SLICE_TYPE_P) { weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag; @@ -420,11 +450,11 @@ gen6_mfc_avc_slice_state(VADriverContextP ctx, slice_param->macroblock_address ); OUT_BCS_BATCH(batch, (nexty << 16) | nextx); /*Next slice first MB X&Y*/ OUT_BCS_BATCH(batch, - (rate_control_enable << 31) | /*in CBR mode RateControlCounterEnable = enable*/ + (0/*rate_control_enable*/ << 31) | /*in CBR mode RateControlCounterEnable = enable*/ (1 << 30) | /*ResetRateControlCounter*/ (0 << 28) | /*RC Triggle Mode = Always Rate Control*/ (4 << 24) | /*RC Stable Tolerance, middle level*/ - (rate_control_enable << 23) | /*RC Panic Enable*/ + (0/*rate_control_enable*/ << 23) | /*RC Panic Enable*/ (0 << 22) | /*QP mode, don't modfiy CBP*/ (0 << 21) | /*MB Type Direct Conversion Enabled*/ (0 << 20) | /*MB Type Skip Conversion Enabled*/ @@ -734,20 +764,21 @@ gen6_mfc_bit_rate_control_context_init(struct encode_state *encode_state, struct gen6_mfc_context *mfc_context) { VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; - int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; float fps = pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ; int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs; int intra_mb_size = inter_mb_size * 5.0; int i; - - mfc_context->bit_rate_control_context[0].target_mb_size = intra_mb_size; - mfc_context->bit_rate_control_context[0].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs; - mfc_context->bit_rate_control_context[1].target_mb_size = inter_mb_size; - mfc_context->bit_rate_control_context[1].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs; - for(i = 0 ; i < 2; i++) { + mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_mb_size = intra_mb_size; + mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs; + mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_mb_size = inter_mb_size; + mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs; + mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_mb_size = inter_mb_size; + mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs; + + for(i = 0 ; i < 3; i++) { mfc_context->bit_rate_control_context[i].QpPrimeY = 26; mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6; mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6; @@ -764,52 +795,222 @@ gen6_mfc_bit_rate_control_context_init(struct encode_state *encode_state, mfc_context->bit_rate_control_context[i].Correct[5] = 8; } - mfc_context->bit_rate_control_context[0].TargetSizeInWord = (intra_mb_size + 16)/ 16; - mfc_context->bit_rate_control_context[1].TargetSizeInWord = (inter_mb_size + 16)/ 16; + mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord = (intra_mb_size + 16)/ 16; + mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord = (inter_mb_size + 16)/ 16; + mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord = (inter_mb_size + 16)/ 16; - mfc_context->bit_rate_control_context[0].MaxSizeInWord = mfc_context->bit_rate_control_context[0].TargetSizeInWord * 1.5; - mfc_context->bit_rate_control_context[1].MaxSizeInWord = mfc_context->bit_rate_control_context[1].TargetSizeInWord * 1.5; + mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord * 1.5; + mfc_context->bit_rate_control_context[SLICE_TYPE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord * 1.5; + mfc_context->bit_rate_control_context[SLICE_TYPE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord * 1.5; } -static int gen6_mfc_bit_rate_control_context_update(struct encode_state *encode_state, - struct gen6_mfc_context *mfc_context, - int current_frame_size) +static void +gen6_mfc_brc_init(struct encode_state *encode_state, + struct intel_encoder_context* encoder_context) { - VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; - int control_index = 1 - (pSliceParameter->slice_type == SLICE_TYPE_I); - int oldQp = mfc_context->bit_rate_control_context[control_index].QpPrimeY; - - /* - printf("conrol_index = %d, start_qp = %d, result = %d, target = %d\n", control_index, - mfc_context->bit_rate_control_context[control_index].QpPrimeY, current_frame_size, - mfc_context->bit_rate_control_context[control_index].target_frame_size ); - */ - - if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 4.0 ) { - mfc_context->bit_rate_control_context[control_index].QpPrimeY += 4; - } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 2.0 ) { - mfc_context->bit_rate_control_context[control_index].QpPrimeY += 3; - } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 1.50 ) { - mfc_context->bit_rate_control_context[control_index].QpPrimeY += 2; - } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 1.20 ) { - mfc_context->bit_rate_control_context[control_index].QpPrimeY ++; - } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.30 ) { - mfc_context->bit_rate_control_context[control_index].QpPrimeY -= 3; - } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.50 ) { - mfc_context->bit_rate_control_context[control_index].QpPrimeY -= 2; - } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.80 ) { - mfc_context->bit_rate_control_context[control_index].QpPrimeY --; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + VAEncMiscParameterBuffer* pMiscParamHRD = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer; + VAEncMiscParameterHRD* pParameterHRD = (VAEncMiscParameterBuffer*)pMiscParamHRD->data; + double bitrate = pSequenceParameter->bits_per_second; + double framerate = (double)pSequenceParameter->time_scale /(2 * (double)pSequenceParameter->num_units_in_tick); + int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */ + int intra_period = pSequenceParameter->intra_period; + int ip_period = pSequenceParameter->ip_period; + double qp1_size = 0.1 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2; + double qp51_size = 0.001 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2; + double bpf; + + if (pSequenceParameter->ip_period) { + pnum = (intra_period + ip_period - 1)/ip_period - 1; + bnum = intra_period - inum - pnum; + } + + mfc_context->brc.mode = encoder_context->rate_control_mode; + + mfc_context->brc.target_frame_size[SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) / + (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum)); + mfc_context->brc.target_frame_size[SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I]; + mfc_context->brc.target_frame_size[SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I]; + + mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum; + mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum; + mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum; + + bpf = mfc_context->brc.bits_per_frame = bitrate/framerate; + + mfc_context->hrd.buffer_size = (double)pParameterHRD->buffer_size; + mfc_context->hrd.current_buffer_fullness = + (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size)? + pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.; + mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.; + mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size; + mfc_context->hrd.violation_noted = 0; + + if ((bpf > qp51_size) && (bpf < qp1_size)) { + mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size); + } + else if (bpf >= qp1_size) + mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 1; + else if (bpf <= qp51_size) + mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51; + + mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY; + mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY; + + BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51); + BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51); + BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51); +} + +static int gen6_mfc_update_hrd(struct encode_state *encode_state, + struct gen6_mfc_context *mfc_context, + int frame_bits) +{ + double prev_bf = mfc_context->hrd.current_buffer_fullness; + + mfc_context->hrd.current_buffer_fullness -= frame_bits; + + if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) { + mfc_context->hrd.current_buffer_fullness = prev_bf; + return BRC_UNDERFLOW; } - if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY > 51) - mfc_context->bit_rate_control_context[control_index].QpPrimeY = 51; - if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY < 1) - mfc_context->bit_rate_control_context[control_index].QpPrimeY = 1; - - if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY != oldQp) - return 0; + mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame; + if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) { + if (mfc_context->brc.mode == VA_RC_VBR) + mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size; + else { + mfc_context->hrd.current_buffer_fullness = prev_bf; + return BRC_OVERFLOW; + } + } + return BRC_NO_HRD_VIOLATION; +} - return 1; +static int gen6_mfc_brc_postpack(struct encode_state *encode_state, + struct gen6_mfc_context *mfc_context, + int frame_bits) +{ + gen6_brc_status sts = BRC_NO_HRD_VIOLATION; + VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int slicetype = pSliceParameter->slice_type; + int qpi = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY; + int qpp = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY; + int qpb = mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY; + int qp; // quantizer of previously encoded slice of current type + int qpn; // predicted quantizer for next frame of current type in integer format + double qpf; // predicted quantizer for next frame of current type in float format + double delta_qp; // QP correction + int target_frame_size, frame_size_next; + /* Notes: + * x - how far we are from HRD buffer borders + * y - how far we are from target HRD buffer fullness + */ + double x, y; + double frame_size_alpha; + + if (slicetype == SLICE_TYPE_SP) + slicetype = SLICE_TYPE_P; + else if (slicetype == SLICE_TYPE_SI) + slicetype = SLICE_TYPE_I; + + qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY; + + target_frame_size = mfc_context->brc.target_frame_size[slicetype]; + if (mfc_context->hrd.buffer_capacity < 5) + frame_size_alpha = 0; + else + frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype]; + if (frame_size_alpha > 30) frame_size_alpha = 30; + frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) / + (double)(frame_size_alpha + 1.); + + /* frame_size_next: avoiding negative number and too small value */ + if ((double)frame_size_next < (double)(target_frame_size * 0.25)) + frame_size_next = (int)((double)target_frame_size * 0.25); + + qpf = (double)qp * target_frame_size / frame_size_next; + qpn = (int)(qpf + 0.5); + + if (qpn == qp) { + /* setting qpn we round qpf making mistakes: now we are trying to compensate this */ + mfc_context->brc.qpf_rounding_accumulator += qpf - qpn; + if (mfc_context->brc.qpf_rounding_accumulator > 1.0) { + qpn++; + mfc_context->brc.qpf_rounding_accumulator = 0.; + } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) { + qpn--; + mfc_context->brc.qpf_rounding_accumulator = 0.; + } + } + /* making sure that QP is not changing too fast */ + if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE; + else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE; + /* making sure that with QP predictions we did do not leave QPs range */ + BRC_CLIP(qpn, 1, 51); + + /* checking wthether HRD compliance is still met */ + sts = gen6_mfc_update_hrd(encode_state, mfc_context, frame_bits); + + /* calculating QP delta as some function*/ + x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness; + if (x > 0) { + x /= mfc_context->hrd.target_buffer_fullness; + y = mfc_context->hrd.current_buffer_fullness; + } + else { + x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness); + y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness; + } + if (y < 0.01) y = 0.01; + if (x > 1) x = 1; + else if (x < -1) x = -1; + + delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x); + qpn = (int)(qpn + delta_qp + 0.5); + + /* making sure that with QP predictions we did do not leave QPs range */ + BRC_CLIP(qpn, 1, 51); + + if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation + /* correcting QPs of slices of other types */ + if (slicetype == SLICE_TYPE_P) { + if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2) + mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1; + if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2) + mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1; + } else if (slicetype == SLICE_TYPE_I) { + if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4) + mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2; + if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2) + mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2; + } else { // SLICE_TYPE_B + if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2) + mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1; + if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4) + mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2; + } + BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51); + BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51); + BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51); + } else if (sts == BRC_UNDERFLOW) { // underflow + if (qpn <= qp) qpn = qp + 1; + if (qpn > 51) { + qpn = 51; + sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP + } + } else if (sts == BRC_OVERFLOW) { + if (qpn >= qp) qpn = qp - 1; + if (qpn < 1) { // < 0 (?) overflow with minQP + qpn = 1; + sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done + } + } + + mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn; + + return sts; } static void @@ -983,14 +1184,6 @@ static VAStatus gen6_mfc_avc_prepare(VADriverContextP ctx, *flag = 0; dri_bo_unmap(bo); - /*Programing bit rate control */ - if ( mfc_context->bit_rate_control_context[0].MaxSizeInWord == 0 ) - gen6_mfc_bit_rate_control_context_init(encode_state, mfc_context); - - /*Programing HRD control */ - if ( (rate_control_mode == VA_RC_CBR) && (mfc_context->vui_hrd.i_cpb_size_value == 0) ) - gen6_mfc_hrd_context_init(encode_state, encoder_context); - return vaStatus; } @@ -1146,9 +1339,11 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, unsigned char *slice_header = NULL; int slice_header_length_in_bits = 0; unsigned int tail_data[] = { 0x0, 0x0 }; + int slice_type = pSliceParameter->slice_type; + if (rate_control_mode == VA_RC_CBR) { - qp = mfc_context->bit_rate_control_context[1 - is_intra].QpPrimeY; + qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; pSliceParameter->slice_qp_delta = qp - 26; } @@ -1175,10 +1370,6 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, 5, /* first 5 bytes are start code + nal unit type */ 1, 0, 1, slice_batch); - if ( rate_control_mode == VA_RC_CBR) { - qp = mfc_context->bit_rate_control_context[1-is_intra].QpPrimeY; - } - dri_bo_map(vme_context->vme_output.bo , 1); msg = (unsigned int *)vme_context->vme_output.bo->virtual; @@ -1514,7 +1705,6 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx, VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; - int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I; int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs); @@ -1526,9 +1716,10 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx, long head_offset; int old_used = intel_batchbuffer_used_size(slice_batch), used; unsigned short head_size, tail_size; + int slice_type = pSliceParameter->slice_type; if (rate_control_mode == VA_RC_CBR) { - qp = mfc_context->bit_rate_control_context[1 - is_intra].QpPrimeY; + qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; pSliceParameter->slice_qp_delta = qp - 26; } @@ -1571,10 +1762,6 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx, head_size = (used - old_used) / 16; old_used = used; - if (rate_control_mode == VA_RC_CBR) { - qp = mfc_context->bit_rate_control_context[1 - is_intra].QpPrimeY; - } - /* tail */ if (last_slice) { mfc_context->insert_object(ctx, @@ -1734,23 +1921,29 @@ gen6_mfc_avc_encode_picture(VADriverContextP ctx, { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; unsigned int rate_control_mode = encoder_context->rate_control_mode; - int MAX_CBR_INTERATE = 4; int current_frame_bits_size; - int i; + int sts; - for(i = 0; i < MAX_CBR_INTERATE; i++) { + for (;;) { gen6_mfc_init(ctx, encoder_context); gen6_mfc_avc_prepare(ctx, encode_state, encoder_context); /*Programing bcs pipeline*/ gen6_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline gen6_mfc_run(ctx, encode_state, encoder_context); - if ( rate_control_mode == VA_RC_CBR) { + if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) { gen6_mfc_stop(ctx, encode_state, encoder_context, ¤t_frame_bits_size); - //gen6_mfc_hrd_context_check(encode_state, mfc_context); - if ( gen6_mfc_bit_rate_control_context_update(encode_state, mfc_context, current_frame_bits_size)) { + sts = gen6_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size); + if (sts == BRC_NO_HRD_VIOLATION) { gen6_mfc_hrd_context_update(encode_state, mfc_context); break; } + else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) { + if (!mfc_context->hrd.violation_noted) { + fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow"); + mfc_context->hrd.violation_noted = 1; + } + return VA_STATUS_SUCCESS; + } } else { break; } @@ -1840,6 +2033,25 @@ gen6_mfc_context_destroy(void *context) free(mfc_context); } +void gen6_mfc_brc_prepare(struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + unsigned int rate_control_mode = encoder_context->rate_control_mode; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + if (rate_control_mode == VA_RC_CBR) { + /*Programing bit rate control */ + if ( mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0 ) { + gen6_mfc_bit_rate_control_context_init(encode_state, mfc_context); + gen6_mfc_brc_init(encode_state, encoder_context); + } + + /*Programing HRD control */ + if ( mfc_context->vui_hrd.i_cpb_size_value == 0 ) + gen6_mfc_hrd_context_init(encode_state, encoder_context); + } +} + Bool gen6_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context)); @@ -1874,6 +2086,7 @@ Bool gen6_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e encoder_context->mfc_context = mfc_context; encoder_context->mfc_context_destroy = gen6_mfc_context_destroy; encoder_context->mfc_pipeline = gen6_mfc_pipeline; + encoder_context->mfc_brc_prepare = gen6_mfc_brc_prepare; return True; } diff --git a/src/gen6_mfc.h b/src/gen6_mfc.h index 4cdb20d..43f7082 100644 --- a/src/gen6_mfc.h +++ b/src/gen6_mfc.h @@ -144,7 +144,23 @@ struct gen6_mfc_context unsigned int target_mb_size; unsigned int target_frame_size; - } bit_rate_control_context[2]; //INTERNAL: 0 for intra frames, 1 for inter frames. + } bit_rate_control_context[3]; //INTERNAL: for I, P, B frames + + struct { + int mode; + int gop_nums[3]; + int target_frame_size[3]; // I,P,B + double bits_per_frame; + double qpf_rounding_accumulator; + } brc; + + struct { + double current_buffer_fullness; + double target_buffer_fullness; + double buffer_capacity; + unsigned int buffer_size; + unsigned int violation_noted; + } hrd; //HRD control context struct { @@ -200,5 +216,8 @@ VAStatus gen6_mfc_pipeline(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context); void gen6_mfc_context_destroy(void *context); +void gen6_mfc_brc_prepare(struct encode_state *encode_state, + struct intel_encoder_context *encoder_context); + #endif /* _GEN6_MFC_BCS_H_ */ diff --git a/src/gen6_vme.c b/src/gen6_vme.c index 832e59c..e74b235 100644 --- a/src/gen6_vme.c +++ b/src/gen6_vme.c @@ -38,6 +38,7 @@ #include "i965_drv_video.h" #include "i965_encoder.h" #include "gen6_vme.h" +#include "gen6_mfc.h" #define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) #define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32) @@ -387,17 +388,17 @@ static void gen6_vme_state_setup_fixup(VADriverContextP ctx, struct intel_encoder_context *encoder_context, unsigned int *vme_state_message) { + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; - if (encoder_context->rate_control_mode != VA_RC_CQP) - return; - if (slice_param->slice_type != SLICE_TYPE_I && slice_param->slice_type != SLICE_TYPE_SI) return; - - vme_state_message[16] = intra_mb_mode_cost_table[pic_param->pic_init_qp + slice_param->slice_qp_delta]; + if (encoder_context->rate_control_mode == VA_RC_CQP) + vme_state_message[16] = intra_mb_mode_cost_table[pic_param->pic_init_qp + slice_param->slice_qp_delta]; + else + vme_state_message[16] = intra_mb_mode_cost_table[mfc_context->bit_rate_control_context[slice_param->slice_type].QpPrimeY]; } static VAStatus gen6_vme_vme_state_setup(VADriverContextP ctx, diff --git a/src/gen7_mfc.c b/src/gen7_mfc.c index f8378b9..98102c8 100644 --- a/src/gen7_mfc.c +++ b/src/gen7_mfc.c @@ -366,6 +366,7 @@ gen7_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encode encoder_context->mfc_context = mfc_context; encoder_context->mfc_context_destroy = gen6_mfc_context_destroy; encoder_context->mfc_pipeline = gen6_mfc_pipeline; + encoder_context->mfc_brc_prepare = gen6_mfc_brc_prepare; return True; } diff --git a/src/i965_encoder.c b/src/i965_encoder.c index 7cfb966..4f0c245 100644 --- a/src/i965_encoder.c +++ b/src/i965_encoder.c @@ -131,6 +131,8 @@ intel_encoder_end_picture(VADriverContextP ctx, intel_encoder_check_yuv_surface(ctx, profile, encode_state, encoder_context); + encoder_context->mfc_brc_prepare(encode_state, encoder_context); + vaStatus = encoder_context->vme_pipeline(ctx, profile, encode_state, encoder_context); if (vaStatus == VA_STATUS_SUCCESS) diff --git a/src/i965_encoder.h b/src/i965_encoder.h index b477a2b..180aa65 100644 --- a/src/i965_encoder.h +++ b/src/i965_encoder.h @@ -54,6 +54,8 @@ struct intel_encoder_context VAProfile profile, struct encode_state *encode_state, struct intel_encoder_context *encoder_context); + void (*mfc_brc_prepare)(struct encode_state *encode_state, + struct intel_encoder_context *encoder_context); }; #endif /* _I965_ENCODER_H_ */ |