summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorZhao Yakui <yakui.zhao@intel.com>2012-12-24 15:08:25 +0800
committerXiang, Haihao <haihao.xiang@intel.com>2013-01-17 13:08:39 +0800
commitf2b5f3f038efff0c4161bf6df9ae27797b031b25 (patch)
tree8cec1ff56bc1bec38fd5965eb81ebd369716a541 /src
parent788e99361208127763fdf1e146e63fca03a09f67 (diff)
downloadvaapi-intel-driver-f2b5f3f038efff0c4161bf6df9ae27797b031b25.tar.gz
vaapi-intel-driver-f2b5f3f038efff0c4161bf6df9ae27797b031b25.tar.bz2
vaapi-intel-driver-f2b5f3f038efff0c4161bf6df9ae27797b031b25.zip
MEDIA_OBJECT uses hardware scoreboard during VME prediction on Haswell
To get the precise VME prediction the current macroblock will depend on its neighbour mb(left, top, up-right). So the hardware scoreboard is used when submitting the MEDIA_OBJECT command. This is to do the preparation of adding MV prediction in VME prediction. Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Diffstat (limited to 'src')
-rw-r--r--src/gen75_vme.c127
-rw-r--r--src/i965_gpe_utils.c7
-rw-r--r--src/i965_gpe_utils.h41
3 files changed, 165 insertions, 10 deletions
diff --git a/src/gen75_vme.c b/src/gen75_vme.c
index dc5730e..6bf8777 100644
--- a/src/gen75_vme.c
+++ b/src/gen75_vme.c
@@ -28,6 +28,7 @@
#include <stdio.h>
#include <stdlib.h>
+#include <stdbool.h>
#include <string.h>
#include <assert.h>
@@ -62,6 +63,10 @@
#define VME_MSG_LENGTH 32
+#define MB_SCOREBOARD_A (1 << 0)
+#define MB_SCOREBOARD_B (1 << 1)
+#define MB_SCOREBOARD_C (1 << 2)
+
static const uint32_t gen75_vme_intra_frame[][4] = {
#include "shaders/vme/intra_frame_haswell.g75b"
};
@@ -481,6 +486,12 @@ static VAStatus gen75_vme_vme_state_setup(VADriverContextP ctx,
return VA_STATUS_SUCCESS;
}
+#define INTRA_PRED_AVAIL_FLAG_AE 0x60
+#define INTRA_PRED_AVAIL_FLAG_B 0x10
+#define INTRA_PRED_AVAIL_FLAG_C 0x8
+#define INTRA_PRED_AVAIL_FLAG_D 0x4
+#define INTRA_PRED_AVAIL_FLAG_BCD_MASK 0x1C
+
static void
gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx,
struct encode_state *encode_state,
@@ -494,12 +505,6 @@ gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx,
int i, s;
unsigned int *command_ptr;
-#define INTRA_PRED_AVAIL_FLAG_AE 0x60
-#define INTRA_PRED_AVAIL_FLAG_B 0x10
-#define INTRA_PRED_AVAIL_FLAG_C 0x8
-#define INTRA_PRED_AVAIL_FLAG_D 0x4
-#define INTRA_PRED_AVAIL_FLAG_BCD_MASK 0x1C
-
dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
command_ptr = vme_context->vme_batchbuffer.bo->virtual;
@@ -557,6 +562,77 @@ gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx,
dri_bo_unmap(vme_context->vme_batchbuffer.bo);
}
+
+static void
+gen75_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ int mb_width, int mb_height,
+ int kernel,
+ int transform_8x8_mode_flag,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ int mb_x = 0, mb_y = 0;
+ int mb_row;
+ int i, s;
+ unsigned int *command_ptr;
+ int temp;
+
+
+#define USE_SCOREBOARD (1 << 21)
+
+ dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
+ command_ptr = vme_context->vme_batchbuffer.bo->virtual;
+
+ for (s = 0; s < encode_state->num_slice_params_ext; s++) {
+ VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
+ int slice_mb_begin = pSliceParameter->macroblock_address;
+ int slice_mb_number = pSliceParameter->num_macroblocks;
+ unsigned int mb_intra_ub, score_dep;
+ int slice_mb_x = pSliceParameter->macroblock_address % mb_width;
+ mb_row = slice_mb_begin / mb_width;
+ for (i = 0; i < slice_mb_number; ) {
+ int mb_count = i + slice_mb_begin;
+ mb_x = mb_count % mb_width;
+ mb_y = mb_count / mb_width;
+ mb_intra_ub = 0;
+ score_dep = 0;
+ if (mb_x != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+ score_dep |= MB_SCOREBOARD_A;
+ }
+ if (mb_y != mb_row) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+ score_dep |= MB_SCOREBOARD_B;
+ if (mb_x != 0)
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+ if (mb_x != (mb_width -1)) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+ score_dep |= MB_SCOREBOARD_C;
+ }
+ }
+
+ *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+ *command_ptr++ = kernel;
+ *command_ptr++ = USE_SCOREBOARD;
+ *command_ptr++ = 0;
+ /* the (X, Y) term of scoreboard */
+ *command_ptr++ = ((mb_y << 16) | mb_x);
+ *command_ptr++ = score_dep;
+ /*inline data */
+ *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
+ *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
+
+ i += 1;
+ }
+ }
+
+ *command_ptr++ = 0;
+ *command_ptr++ = MI_BATCH_BUFFER_END;
+
+ dri_bo_unmap(vme_context->vme_batchbuffer.bo);
+}
+
static void gen75_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
@@ -589,8 +665,26 @@ static void gen75_vme_pipeline_programing(VADriverContextP ctx,
int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
+ bool allow_hwscore = true;
+ int s;
+
+ for (s = 0; s < encode_state->num_slice_params_ext; s++) {
+ pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
+ if ((pSliceParameter->macroblock_address % width_in_mbs)) {
+ allow_hwscore = false;
+ break;
+ }
+ }
- gen75_vme_fill_vme_batchbuffer(ctx,
+ if (allow_hwscore)
+ gen75_vme_walker_fill_vme_batchbuffer(ctx,
+ encode_state,
+ width_in_mbs, height_in_mbs,
+ is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER,
+ pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
+ encoder_context);
+ else
+ gen75_vme_fill_vme_batchbuffer(ctx,
encode_state,
width_in_mbs, height_in_mbs,
is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER,
@@ -997,6 +1091,25 @@ Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *
vme_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
+ vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
+ vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
+ vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
+ MB_SCOREBOARD_B |
+ MB_SCOREBOARD_C);
+
+ /* In VME prediction the current mb depends on the neighbour
+ * A/B/C macroblock. So the left/up/up-right dependency should
+ * be considered.
+ */
+ vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
+ vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
+ vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
+ vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
+ vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
+ vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
+
+ vme_context->gpe_context.vfe_desc7.dword = 0;
+
i965_gpe_load_kernels(ctx,
&vme_context->gpe_context,
vme_kernel_list,
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 546e5ba..9e569b2 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -87,9 +87,10 @@ gen6_gpe_vfe_state(VADriverContextP ctx,
OUT_BATCH(batch,
gpe_context->vfe_state.urb_entry_size << 16 | /* URB Entry Allocation Size */
gpe_context->vfe_state.curbe_allocation_size); /* CURBE Allocation Size */
- OUT_BATCH(batch, 0); /* Disable Scoreboard */
- OUT_BATCH(batch, 0); /* Disable Scoreboard */
- OUT_BATCH(batch, 0); /* Disable Scoreboard */
+ /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */
+ OUT_BATCH(batch, gpe_context->vfe_desc5.dword);
+ OUT_BATCH(batch, gpe_context->vfe_desc6.dword);
+ OUT_BATCH(batch, gpe_context->vfe_desc7.dword);
ADVANCE_BATCH(batch);
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 3ebb3cb..72d7de8 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -70,6 +70,47 @@ struct i965_gpe_context
unsigned int urb_entry_size : 16;
unsigned int curbe_allocation_size : 16;
} vfe_state;
+
+ /* vfe_desc5/6/7 is used to determine whether the HW scoreboard is used.
+ * If scoreboard is not used, don't touch them
+ */
+ union {
+ unsigned int dword;
+ struct {
+ unsigned int mask:8;
+ unsigned int pad:22;
+ unsigned int type:1;
+ unsigned int enable:1;
+ } scoreboard0;
+ }vfe_desc5;
+
+ union {
+ unsigned int dword;
+ struct {
+ int delta_x0:4;
+ int delta_y0:4;
+ int delta_x1:4;
+ int delta_y1:4;
+ int delta_x2:4;
+ int delta_y2:4;
+ int delta_x3:4;
+ int delta_y3:4;
+ } scoreboard1;
+ } vfe_desc6;
+
+ union {
+ unsigned int dword;
+ struct {
+ int delta_x4:4;
+ int delta_y4:4;
+ int delta_x5:4;
+ int delta_y5:4;
+ int delta_x6:4;
+ int delta_y6:4;
+ int delta_x7:4;
+ int delta_y7:4;
+ } scoreboard2;
+ } vfe_desc7;
unsigned int num_kernels;
struct i965_kernel kernels[MAX_GPE_KERNELS];