/* * Copyright © 2011 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Zhou Chang * Xiang, Haihao * */ #include #include #include #include #include "intel_batchbuffer.h" #include "i965_defines.h" #include "i965_structs.h" #include "i965_drv_video.h" #include "i965_encoder.h" #include "i965_encoder_utils.h" #include "gen6_mfc.h" #include "gen6_vme.h" static const uint32_t gen7_mfc_batchbuffer_avc_intra[][4] = { #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b" }; static const uint32_t gen7_mfc_batchbuffer_avc_inter[][4] = { #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b" }; static struct i965_kernel gen7_mfc_kernels[] = { { "MFC AVC INTRA BATCHBUFFER ", MFC_BATCHBUFFER_AVC_INTRA, gen7_mfc_batchbuffer_avc_intra, sizeof(gen7_mfc_batchbuffer_avc_intra), NULL }, { "MFC AVC INTER BATCHBUFFER ", MFC_BATCHBUFFER_AVC_INTER, gen7_mfc_batchbuffer_avc_inter, sizeof(gen7_mfc_batchbuffer_avc_inter), NULL }, }; static void gen7_mfc_pipe_mode_select(VADriverContextP ctx, int standard_select, struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; assert(standard_select == MFX_FORMAT_MPEG2 || standard_select == MFX_FORMAT_AVC); BEGIN_BCS_BATCH(batch, 5); OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2)); OUT_BCS_BATCH(batch, (MFX_LONG_MODE << 17) | /* Must be long format for encoder */ (MFD_MODE_VLD << 15) | /* VLD mode */ (1 << 10) | /* Stream-Out Enable */ ((!!mfc_context->post_deblocking_output.bo) << 9) | /* Post Deblocking Output */ ((!!mfc_context->pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */ (0 << 8) | /* Pre Deblocking Output */ (0 << 5) | /* not in stitch mode */ (1 << 4) | /* encoding mode */ (standard_select << 0)); /* standard select: avc or mpeg2 */ OUT_BCS_BATCH(batch, (0 << 7) | /* expand NOA bus flag */ (0 << 6) | /* disable slice-level clock gating */ (0 << 5) | /* disable clock gating for NOA */ (0 << 4) | /* terminate if AVC motion and POC table error occurs */ (0 << 3) | /* terminate if AVC mbdata error occurs */ (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */ (0 << 1) | (0 << 0)); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); ADVANCE_BCS_BATCH(batch); } static void gen7_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, ((mfc_context->surface_state.height - 1) << 18) | ((mfc_context->surface_state.width - 1) << 4)); OUT_BCS_BATCH(batch, (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */ (0 << 22) | /* surface object control state, FIXME??? */ ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */ (0 << 2) | /* must be 0 for interleave U/V */ (1 << 1) | /* must be tiled */ (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */ OUT_BCS_BATCH(batch, (0 << 16) | /* must be 0 for interleave U/V */ (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */ OUT_BCS_BATCH(batch, 0); ADVANCE_BCS_BATCH(batch); } static void gen7_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct gen6_vme_context *vme_context = encoder_context->vme_context; BEGIN_BCS_BATCH(batch, 11); OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2)); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); /* MFX Indirect MV Object Base Address */ OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */ OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); /*MFC Indirect PAK-BSE Object Base Address for Encoder*/ OUT_BCS_RELOC(batch, mfc_context->mfc_indirect_pak_bse_object.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); OUT_BCS_RELOC(batch, mfc_context->mfc_indirect_pak_bse_object.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, mfc_context->mfc_indirect_pak_bse_object.end_offset); ADVANCE_BCS_BATCH(batch); } static void gen7_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; BEGIN_BCS_BATCH(batch, 16); OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2)); OUT_BCS_BATCH(batch, ((width_in_mbs * height_in_mbs) & 0xFFFF)); OUT_BCS_BATCH(batch, ((height_in_mbs - 1) << 16) | ((width_in_mbs - 1) << 0)); OUT_BCS_BATCH(batch, (0 << 24) | /* Second Chroma QP Offset */ (0 << 16) | /* Chroma QP Offset */ (0 << 14) | /* Max-bit conformance Intra flag */ (0 << 13) | /* Max Macroblock size conformance Inter flag */ (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) | /*Weighted_Pred_Flag */ (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) | /* Weighted_BiPred_Idc */ (0 << 8) | /* FIXME: Image Structure */ (0 << 0) ); /* Current Decoed Image Frame Store ID, reserved in Encode mode */ OUT_BCS_BATCH(batch, (0 << 16) | /* Mininum Frame size */ (0 << 15) | /* Disable reading of Macroblock Status Buffer */ (0 << 14) | /* Load BitStream Pointer only once, 1 slic 1 frame */ (0 << 13) | /* CABAC 0 word insertion test enable */ (1 << 12) | /* MVUnpackedEnable,compliant to DXVA */ (1 << 10) | /* Chroma Format IDC, 4:2:0 */ (0 << 9) | /* FIXME: MbMvFormatFlag */ (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7) | /*0:CAVLC encoding mode,1:CABAC*/ (0 << 6) | /* Only valid for VLD decoding mode */ (0 << 5) | /* Constrained Intra Predition Flag, from PPS */ (0 << 4) | /* Direct 8x8 inference flag */ (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3) | /*8x8 or 4x4 IDCT Transform Mode Flag*/ (1 << 2) | /* Frame MB only flag */ (0 << 1) | /* MBAFF mode is in active */ (0 << 0)); /* Field picture flag */ OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */ OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */ (0xBB8 << 16) | /* InterMbMaxSz */ (0xEE8) ); /* IntraMbMaxSz */ OUT_BCS_BATCH(batch, 0); /* Reserved */ OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */ OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */ OUT_BCS_BATCH(batch, 0x8C000000); OUT_BCS_BATCH(batch, 0x00010000); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); ADVANCE_BCS_BATCH(batch); } static void gen7_mfc_qm_state(VADriverContextP ctx, int qm_type, unsigned int *qm, int qm_length, struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; unsigned int qm_buffer[16]; assert(qm_length <= 16); assert(sizeof(*qm) == 4); memcpy(qm_buffer, qm, qm_length * 4); BEGIN_BCS_BATCH(batch, 18); OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2)); OUT_BCS_BATCH(batch, qm_type << 0); intel_batchbuffer_data(batch, qm_buffer, 16 * 4); ADVANCE_BCS_BATCH(batch); } static void gen7_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { unsigned int qm[16] = { 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010 }; gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context); gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context); gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context); gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context); } static void gen7_mfc_fqm_state(VADriverContextP ctx, int fqm_type, unsigned int *fqm, int fqm_length, struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; unsigned int fqm_buffer[32]; assert(fqm_length <= 32); assert(sizeof(*fqm) == 4); memcpy(fqm_buffer, fqm, fqm_length * 4); BEGIN_BCS_BATCH(batch, 34); OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2)); OUT_BCS_BATCH(batch, fqm_type << 0); intel_batchbuffer_data(batch, fqm_buffer, 32 * 4); ADVANCE_BCS_BATCH(batch); } static void gen7_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { unsigned int qm[32] = { 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000, 0x10001000 }; gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context); gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context); gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context); gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context); } static void gen7_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context, unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw, int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag, struct intel_batchbuffer *batch) { if (batch == NULL) batch = encoder_context->base.batch; BEGIN_BCS_BATCH(batch, lenght_in_dws + 2); OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2)); OUT_BCS_BATCH(batch, (0 << 16) | /* always start at offset 0 */ (data_bits_in_last_dw << 8) | (skip_emul_byte_count << 4) | (!!emulation_flag << 3) | ((!!is_last_header) << 2) | ((!!is_end_of_slice) << 1) | (0 << 0)); /* FIXME: ??? */ intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4); ADVANCE_BCS_BATCH(batch); } Bool gen7_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context)); mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS; mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data); mfc_context->gpe_context.curbe.length = 32 * 4; mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1; mfc_context->gpe_context.vfe_state.num_urb_entries = 16; mfc_context->gpe_context.vfe_state.gpgpu_mode = 0; mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1; mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1; i965_gpe_load_kernels(ctx, &mfc_context->gpe_context, gen7_mfc_kernels, NUM_MFC_KERNEL); mfc_context->pipe_mode_select = gen7_mfc_pipe_mode_select; mfc_context->set_surface_state = gen7_mfc_surface_state; mfc_context->ind_obj_base_addr_state = gen7_mfc_ind_obj_base_addr_state; mfc_context->avc_img_state = gen7_mfc_avc_img_state; mfc_context->avc_qm_state = gen7_mfc_avc_qm_state; mfc_context->avc_fqm_state = gen7_mfc_avc_fqm_state; mfc_context->insert_object = gen7_mfc_avc_insert_object; mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup; encoder_context->mfc_context = mfc_context; encoder_context->mfc_context_destroy = gen6_mfc_context_destroy; encoder_context->mfc_pipeline = gen6_mfc_pipeline; encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare; return True; }