From d07f1e3c6505b169f59f075b5ea5a82331118f8f Mon Sep 17 00:00:00 2001 From: Gwenole Beauchesne Date: Mon, 2 Jun 2014 13:08:57 +0200 Subject: decoder: h264: enable Picture ID Remapping on Haswell and newer. Fill and submit MFX_AVC_PICID_STATE commands to Gen7.5+ hardware. This optimizes the management of the DPB as the binding array can now contain entries in any order. This also makes it possible to support H.264 MultiView High profiles, with any particular number of views. v2: added more comments for clarity, removed an assert [Yakui] Signed-off-by: Gwenole Beauchesne (cherry picked from commit 8dfdf10612c726b60ecd5b61eee2b7d6a520bb33) --- src/gen75_mfd.c | 23 +++-------- src/gen8_mfd.c | 27 ++++--------- src/i965_decoder_utils.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++ src/i965_decoder_utils.h | 23 +++++++++++ 4 files changed, 136 insertions(+), 36 deletions(-) (limited to 'src') diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c index 1ed874f..bab5df4 100644 --- a/src/gen75_mfd.c +++ b/src/gen75_mfd.c @@ -632,25 +632,13 @@ gen75_mfd_avc_qm_state(VADriverContextP ctx, } } -static void +static inline void gen75_mfd_avc_picid_state(VADriverContextP ctx, struct decode_state *decode_state, struct gen7_mfd_context *gen7_mfd_context) { - struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; - - BEGIN_BCS_BATCH(batch, 10); - OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2)); - OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - ADVANCE_BCS_BATCH(batch); + gen75_send_avc_picid_state(gen7_mfd_context->base.batch, + gen7_mfd_context->reference_surface); } static void @@ -1054,7 +1042,8 @@ gen75_mfd_avc_decode_init(VADriverContextP ctx, assert(decode_state->pic_param && decode_state->pic_param->buffer); pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; - intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface); + gen75_update_avc_frame_store_index(ctx, decode_state, pic_param, + gen7_mfd_context->reference_surface); width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */ @@ -1139,8 +1128,8 @@ gen75_mfd_avc_decode_picture(VADriverContextP ctx, gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context); gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context); gen75_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context); - gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context); gen75_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context); + gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context); for (j = 0; j < decode_state->num_slice_params; j++) { assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index cecd05c..0b39dac 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -489,25 +489,13 @@ gen8_mfd_avc_qm_state(VADriverContextP ctx, } } -static void +static inline void gen8_mfd_avc_picid_state(VADriverContextP ctx, - struct decode_state *decode_state, - struct gen7_mfd_context *gen7_mfd_context) + struct decode_state *decode_state, + struct gen7_mfd_context *gen7_mfd_context) { - struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; - - BEGIN_BCS_BATCH(batch, 10); - OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2)); - OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - ADVANCE_BCS_BATCH(batch); + gen75_send_avc_picid_state(gen7_mfd_context->base.batch, + gen7_mfd_context->reference_surface); } static void @@ -817,7 +805,8 @@ gen8_mfd_avc_decode_init(VADriverContextP ctx, assert(decode_state->pic_param && decode_state->pic_param->buffer); pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer; - intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface); + gen75_update_avc_frame_store_index(ctx, decode_state, pic_param, + gen7_mfd_context->reference_surface); width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1; height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */ @@ -902,8 +891,8 @@ gen8_mfd_avc_decode_picture(VADriverContextP ctx, gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context); gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context); gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context); - gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context); gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context); + gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context); for (j = 0; j < decode_state->num_slice_params; j++) { assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 3239212..9af57c2 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -254,6 +254,24 @@ avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix) memset(&iq_matrix->ScalingList8x8, 16, sizeof(iq_matrix->ScalingList8x8)); } +/* Returns a unique picture ID that represents the supplied VA surface object */ +int +avc_get_picture_id(struct object_surface *obj_surface) +{ + int pic_id; + + /* This highly depends on how the internal VA objects are organized. + + Theory of operations: + The VA objects are maintained in heaps so that any released VA + surface will become free again for future allocation. This means + that holes in there are filled in for subsequent allocations. + So, this ultimately means that we could just use the Heap ID of + the VA surface as the resulting picture ID (16 bits) */ + pic_id = 1 + (obj_surface->base.id & OBJECT_HEAP_ID_MASK); + return (pic_id <= 0xffff) ? pic_id : -1; +} + /* Finds the VA/H264 picture associated with the specified VA surface id */ VAPictureH264 * avc_find_picture(VASurfaceID id, VAPictureH264 *pic_list, int pic_list_count) @@ -502,6 +520,87 @@ intel_update_avc_frame_store_index( } } +void +gen75_update_avc_frame_store_index( + VADriverContextP ctx, + struct decode_state *decode_state, + VAPictureParameterBufferH264 *pic_param, + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] +) +{ + int i, n; + + /* Construct the Frame Store array, in compact form. i.e. empty or + invalid entries are discarded. */ + for (i = 0, n = 0; i < ARRAY_ELEMS(decode_state->reference_objects); i++) { + struct object_surface * const obj_surface = + decode_state->reference_objects[i]; + if (!obj_surface) + continue; + + GenFrameStore * const fs = &frame_store[n]; + fs->surface_id = obj_surface->base.id; + fs->obj_surface = obj_surface; + fs->frame_store_id = n++; + } + + /* Any remaining entry is marked as invalid */ + for (; n < MAX_GEN_REFERENCE_FRAMES; n++) { + GenFrameStore * const fs = &frame_store[n]; + fs->surface_id = VA_INVALID_ID; + fs->obj_surface = NULL; + fs->frame_store_id = -1; + } +} + +bool +gen75_fill_avc_picid_list( + uint16_t pic_ids[16], + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] +) +{ + int i, pic_id; + + /* Fill in with known picture IDs. The Frame Store array is in + compact form, i.e. empty entries are only to be found at the + end of the array: there are no holes in the set of active + reference frames */ + for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) { + GenFrameStore * const fs = &frame_store[i]; + if (!fs->obj_surface) + break; + pic_id = avc_get_picture_id(fs->obj_surface); + if (pic_id < 0) + return false; + pic_ids[i] = pic_id; + } + + /* When an element of the list is not relevant the value of the + picture ID shall be set to 0 */ + for (; i < MAX_GEN_REFERENCE_FRAMES; i++) + pic_ids[i] = 0; + return true; +} + +bool +gen75_send_avc_picid_state( + struct intel_batchbuffer *batch, + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] +) +{ + uint16_t pic_ids[16]; + + if (!gen75_fill_avc_picid_list(pic_ids, frame_store)) + return false; + + BEGIN_BCS_BATCH(batch, 10); + OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2)); + OUT_BCS_BATCH(batch, 0); // enable Picture ID Remapping + intel_batchbuffer_data(batch, pic_ids, sizeof(pic_ids)); + ADVANCE_BCS_BATCH(batch); + return true; +} + void intel_update_vc1_frame_store_index(VADriverContextP ctx, struct decode_state *decode_state, diff --git a/src/i965_decoder_utils.h b/src/i965_decoder_utils.h index a4c9415..0ffbd7f 100644 --- a/src/i965_decoder_utils.h +++ b/src/i965_decoder_utils.h @@ -54,6 +54,9 @@ avc_ensure_surface_bo( void avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix); +int +avc_get_picture_id(struct object_surface *obj_surface); + VAPictureH264 * avc_find_picture(VASurfaceID id, VAPictureH264 *pic_list, int pic_list_count); @@ -97,6 +100,26 @@ intel_update_avc_frame_store_index(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param, GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]); +void +gen75_update_avc_frame_store_index( + VADriverContextP ctx, + struct decode_state *decode_state, + VAPictureParameterBufferH264 *pic_param, + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] +); + +bool +gen75_fill_avc_picid_list( + uint16_t pic_ids[16], + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] +); + +bool +gen75_send_avc_picid_state( + struct intel_batchbuffer *batch, + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES] +); + void intel_update_vc1_frame_store_index(VADriverContextP ctx, struct decode_state *decode_state, -- cgit v1.2.3