summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXiang, Haihao <haihao.xiang@intel.com>2012-04-25 13:14:43 +0800
committerXiang, Haihao <haihao.xiang@intel.com>2012-04-25 13:14:43 +0800
commit4f64c0dbc99c3dcda37d8b463dce2b8dd4895f7e (patch)
tree297f940b5c1e111c4127e232cc17943d8af9252e
parentea9e9016a4206ba5e72cbc762cbd503c96cb1215 (diff)
downloadlibva-intel-driver-4f64c0dbc99c3dcda37d8b463dce2b8dd4895f7e.tar.gz
libva-intel-driver-4f64c0dbc99c3dcda37d8b463dce2b8dd4895f7e.tar.bz2
libva-intel-driver-4f64c0dbc99c3dcda37d8b463dce2b8dd4895f7e.zip
Support mixed mode for VME
Signed-off-by: Xiang, Haihao <haihao.xiang@intel.com>
-rw-r--r--src/gen6_mfc.c7
-rw-r--r--src/gen6_mfc.h2
-rw-r--r--src/shaders/utils/mfc_batchbuffer.inc2
-rw-r--r--src/shaders/utils/mfc_batchbuffer_avc_inter.asm80
-rw-r--r--src/shaders/utils/mfc_batchbuffer_avc_inter.g6b30
-rw-r--r--src/shaders/utils/mfc_batchbuffer_avc_inter.g7b30
-rw-r--r--src/shaders/vme/inter_frame.asm73
-rw-r--r--src/shaders/vme/inter_frame.g6b46
-rw-r--r--src/shaders/vme/inter_frame.g7b46
9 files changed, 273 insertions, 43 deletions
diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c
index 01ce0d7..f5c04c6 100644
--- a/src/gen6_mfc.c
+++ b/src/gen6_mfc.c
@@ -1209,7 +1209,12 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
msg += INTRA_VME_OUTPUT_IN_DWS;
} else {
- gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
+ if (msg[0] & INTRA_MB_FLAG_MASK) {
+ gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
+ } else {
+ gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
+ }
+
msg += INTER_VME_OUTPUT_IN_DWS;
offset += INTER_VME_OUTPUT_IN_BYTES;
}
diff --git a/src/gen6_mfc.h b/src/gen6_mfc.h
index c7d1580..4cdb20d 100644
--- a/src/gen6_mfc.h
+++ b/src/gen6_mfc.h
@@ -40,6 +40,8 @@ struct encode_state;
#define MAX_MFC_REFERENCE_SURFACES 16
#define NUM_MFC_DMV_BUFFERS 34
+#define INTRA_MB_FLAG_MASK 0x00002000
+
#define __SOFTWARE__ 0
#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32)
diff --git a/src/shaders/utils/mfc_batchbuffer.inc b/src/shaders/utils/mfc_batchbuffer.inc
index efebb0e..c83d5d4 100644
--- a/src/shaders/utils/mfc_batchbuffer.inc
+++ b/src/shaders/utils/mfc_batchbuffer.inc
@@ -28,6 +28,8 @@ define(`BIND_IDX_VME_OUTPUT', `0')
define(`BIND_IDX_MFC_SLICE_HEADER', `1')
define(`BIND_IDX_MFC_BATCHBUFFER', `2')
+define(`INTRAMBFLAG_MASK', `0x00002000')
+
#ifdef DEV_SNB
define(`OB_CACHE_TYPE', `5')
diff --git a/src/shaders/utils/mfc_batchbuffer_avc_inter.asm b/src/shaders/utils/mfc_batchbuffer_avc_inter.asm
index 1cf4502..59152b8 100644
--- a/src/shaders/utils/mfc_batchbuffer_avc_inter.asm
+++ b/src/shaders/utils/mfc_batchbuffer_avc_inter.asm
@@ -35,20 +35,11 @@ __PAK_OBJECT:
*/
mul (1) tmp_vme_output.8<1>:ud tmp_offset.0<0,1,0>:ud INTER_VME_OUTPUT_IN_OWS:ud {align1} ; /* point to output buffer */
add (1) tmp_vme_output.8<1>:ud tmp_vme_output.8<0,1,0>:ud INTER_VME_OUTPUT_MV_IN_OWS:uw {align1}; /* point to other info */
-
- mov (16) pak_object_ud<1>:ud 0x0:ud {align1} ;
- /* DW0 */
- mov (1) pak_object0_ud<1>:ud MFC_AVC_PAK_OBJECT_INTER_DW0 ;
-
- /* DW2 */
- mul (1) pak_object2_ud<1>:ud tmp_offset.0<0,1,0>:ud INTER_VME_OUTPUT_IN_BYTES:ud {align1} ;
-
- /* DW5 */
- mov (1) pak_object5_ud<1>:ud MFC_AVC_PAK_OBJECT_INTRA_DW5 ;
-
- and.z.f0.1 (1) null<1>:uw flags<0,1,0>:uw FLAG_MASK_LAST_OBJECT {align1};
-
+
__PAK_OBJECT_LOOP:
+ /*
+ * Read other info
+ */
mov (8) msg_reg0.0<1>:ud tmp_vme_output<8,8,1>:ud {align1} ;
send (16)
@@ -67,6 +58,28 @@ send (16)
rlen ob_read_wb_len_vme_inter
{align1};
+
+ /*
+ * Fill the command
+ */
+ mov (16) pak_object_ud<1>:ud 0x0:ud {align1} ;
+
+ and.z.f0.1 (1) null<1>:uw flags<0,1,0>:uw FLAG_MASK_LAST_OBJECT {align1};
+
+ and.z.f0.0 (1) null<1>:ud ob_read_wb0.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ;
+
+ (-f0.0)jmpi (1) __FILL_INTRA_PAK_COMMAND ;
+
+__FILL_INTER_PAK_COMMAND:
+ /* DW0 */
+ mov (1) pak_object0_ud<1>:ud MFC_AVC_PAK_OBJECT_INTER_DW0 ;
+
+ /* DW2 */
+ mul (1) pak_object2_ud<1>:ud tmp_offset.0<0,1,0>:ud INTER_VME_OUTPUT_IN_BYTES:ud {align1} ;
+
+ /* DW5 */
+ mov (1) pak_object5_ud<1>:ud MFC_AVC_PAK_OBJECT_INTRA_DW5 ;
+
/* DW1 must be 32 for 8 MVs and 128 for 32 MVs !!! */
mov (1) pak_object1_ud<1>:ud ob_read_wb0.8<0,1,0>:ud {align1} ;
@@ -89,11 +102,48 @@ send (16)
/* DW7 */
mov (1) pak_object7_ud<1>:ud ob_read_wb0.4<0,1,0>:ud {align1} ;
+
+ jmpi (1) __OUTPUT_PAK_COMMAND ;
+
+__FILL_INTRA_PAK_COMMAND:
+ /* DW0 */
+ mov (1) pak_object0_ud<1>:ud MFC_AVC_PAK_OBJECT_INTRA_DW0 ;
+
+ /* DW5 */
+ mov (1) pak_object5_ud<1>:ud MFC_AVC_PAK_OBJECT_INTRA_DW5 ;
+
+ /* DW4 */
+ add (1) pak_object4_ud<1>:ud mb_xy<0,1,0>:uw MFC_AVC_PAK_OBJECT_INTRA_DW4 {align1} ;
+ add (1) mb_x<1>:ub mb_x<0,1,0>:ub 1:uw {align1};
+ cmp.e.f0.0 (1) null<1>:uw width_in_mb<0,1,0>:uw mb_x<0,1,0>:ub {align1};
+ (f0.0)mov (1) mb_x<1>:ub 0:uw {align1} ;
+ (f0.0)add (1) mb_y<1>:ub mb_y<0,1,0>:ub 1:uw {align1} ;
+
+ /* DW6 */
+ mov (1) pak_object6_ud<1>:ud 0x0:ud {align1} ;
+ (-f0.1)mov (1) pak_object6_ud<1>:ud MFC_AVC_PAK_OBJECT_INTRA_DW6 {align1} ;
+ cmp.e.f0.0 (1) null<1>:uw total_mbs<0,1,0>:uw 1:uw {align1};
+ (-f0.0)mov (1) pak_object6_ud<1>:ud 0x0:ud {align1} ;
+ add (1) pak_object6_ud<1>:ud pak_object6_ud<0,1,0>:ud qp<0,1,0>:ub {align1} ;
+
+ /* DW3 */
+ and (1) pak_object3_ud<1>:ud ob_read_wb0.0<0,1,0>:ud 0xFFFF {align1} ;
+ add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud MFC_AVC_PAK_OBJECT_INTRA_DW3 {align1} ;
+
+ /* DW7 */
+ mov (1) pak_object7_ud<1>:ud ob_read_wb0.4<0,1,0>:ud {align1} ;
+
+ /* DW8 */
+ mov (1) pak_object8_ud<1>:ud ob_read_wb0.8<0,1,0>:ud {align1} ;
+
+ /* DW9 */
+ and (1) pak_object9_ud<1>:ud ob_read_wb0.12<0,1,0>:ud 0xFC:ud {align1} ;
+__OUTPUT_PAK_COMMAND:
mov (8) msg_reg0.0<1>:ud tmp_mfc_batchbuffer<8,8,1>:ud {align1} ;
mov (8) msg_reg1.0<1>:ud pak_object_ud<8,8,1>:ud {align1} ;
mov (8) msg_reg2.0<1>:ud pak_object8_ud<8,8,1>:ud {align1} ;
-
+
/* point to the next other info block */
add (1) tmp_vme_output.8<1>:ud tmp_vme_output.8<0,1,0>:ud INTER_VME_OUTPUT_IN_OWS:ud {align1} ;
@@ -115,7 +165,7 @@ send (16)
/* the new offset */
add (1) tmp_mfc_batchbuffer.8<1>:ud tmp_mfc_batchbuffer.8<0,1,0>:ud 4:ud {align1} ;
- add (1) pak_object2_ud<1>:ud pak_object2_ud<0,1,0>:ud MFC_AVC_PAK_OBJECT_INTER_DW2 {align1} ;
+ add (1) tmp_offset.0<1>:ud tmp_offset.0<0,1,0>:ud 1:ud {align1};
add.z.f0.0 (1) total_mbs<1>:w total_mbs<0,1,0>:w -1:w {align1};
(-f0.0)jmpi (1) __PAK_OBJECT_LOOP ;
diff --git a/src/shaders/utils/mfc_batchbuffer_avc_inter.g6b b/src/shaders/utils/mfc_batchbuffer_avc_inter.g6b
index ef83a16..2e1703e 100644
--- a/src/shaders/utils/mfc_batchbuffer_avc_inter.g6b
+++ b/src/shaders/utils/mfc_batchbuffer_avc_inter.g6b
@@ -19,13 +19,15 @@
{ 0x00000040, 0x21e04421, 0x000001e0, 0x000000b0 },
{ 0x00000041, 0x21080c21, 0x000001e0, 0x0000000a },
{ 0x00000040, 0x21082c21, 0x00000108, 0x00080008 },
+ { 0x00600001, 0x20000022, 0x008d0100, 0x00000000 },
+ { 0x05800031, 0x22001cc9, 0x00000000, 0x021a0200 },
{ 0x00800001, 0x23400061, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002d28, 0x020000ac, 0x00020002 },
+ { 0x01000005, 0x20000c20, 0x00000200, 0x00002000 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x00000022 },
{ 0x00000001, 0x23400061, 0x00000000, 0x71490009 },
{ 0x00000041, 0x23480c21, 0x000001e0, 0x000000a0 },
{ 0x00000001, 0x23540061, 0x00000000, 0x000f000f },
- { 0x01000005, 0x20002d28, 0x020000ac, 0x00020002 },
- { 0x00600001, 0x20000022, 0x008d0100, 0x00000000 },
- { 0x05800031, 0x22001cc9, 0x00000000, 0x021a0200 },
{ 0x00000001, 0x23440021, 0x00000208, 0x00000000 },
{ 0x00000001, 0x234c0021, 0x00000200, 0x00000000 },
{ 0x00000040, 0x23500d21, 0x000000b0, 0xffff0000 },
@@ -39,15 +41,33 @@
{ 0x00110001, 0x23580061, 0x00000000, 0x00000000 },
{ 0x00000040, 0x23584421, 0x00000358, 0x000000b6 },
{ 0x00000001, 0x235c0021, 0x00000204, 0x00000000 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000022 },
+ { 0x00000001, 0x23400061, 0x00000000, 0x71490009 },
+ { 0x00000001, 0x23540061, 0x00000000, 0x000f000f },
+ { 0x00000040, 0x23500d21, 0x000000b0, 0xffff0000 },
+ { 0x00000040, 0x20b02e31, 0x000000b0, 0x00010001 },
+ { 0x01000010, 0x20004528, 0x000000b4, 0x000000b0 },
+ { 0x00010001, 0x20b00171, 0x00000000, 0x00000000 },
+ { 0x00010040, 0x20b12e31, 0x000000b1, 0x00010001 },
+ { 0x00000001, 0x23580061, 0x00000000, 0x00000000 },
+ { 0x00110001, 0x23580061, 0x02000000, 0x04000000 },
+ { 0x01000010, 0x20002d28, 0x000000ae, 0x00010001 },
+ { 0x00110001, 0x23580061, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x23584421, 0x00000358, 0x000000b6 },
+ { 0x00000005, 0x234c1c21, 0x00000200, 0x0000ffff },
+ { 0x00000040, 0x234c0c21, 0x0000034c, 0x000e0000 },
+ { 0x00000001, 0x235c0021, 0x00000204, 0x00000000 },
+ { 0x00000001, 0x23600021, 0x00000208, 0x00000000 },
+ { 0x00000005, 0x23640c21, 0x0000020c, 0x000000fc },
{ 0x00600001, 0x20000022, 0x008d0140, 0x00000000 },
{ 0x00600001, 0x20200022, 0x008d0340, 0x00000000 },
{ 0x00600001, 0x20400022, 0x008d0360, 0x00000000 },
{ 0x00000040, 0x21080c21, 0x00000108, 0x0000000a },
{ 0x05800031, 0x23001cdd, 0x00000000, 0x061b0302 },
{ 0x00000040, 0x21480c21, 0x00000148, 0x00000004 },
- { 0x00000040, 0x23480c21, 0x00000348, 0x000000a0 },
+ { 0x00000040, 0x21e00c21, 0x000001e0, 0x00000001 },
{ 0x01000040, 0x20ae3dad, 0x000000ae, 0xffffffff },
- { 0x00110020, 0x34001c00, 0x00001400, 0xffffffd0 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0xffffff9e },
{ 0x00010020, 0x34001c00, 0x02001400, 0x0000001e },
{ 0x00600001, 0x20000022, 0x008d0120, 0x00000000 },
{ 0x05800031, 0x22001cc9, 0x00000000, 0x021a0001 },
diff --git a/src/shaders/utils/mfc_batchbuffer_avc_inter.g7b b/src/shaders/utils/mfc_batchbuffer_avc_inter.g7b
index a780e16..1664010 100644
--- a/src/shaders/utils/mfc_batchbuffer_avc_inter.g7b
+++ b/src/shaders/utils/mfc_batchbuffer_avc_inter.g7b
@@ -19,13 +19,15 @@
{ 0x00000040, 0x21e04421, 0x000001e0, 0x000000b0 },
{ 0x00000041, 0x21080c21, 0x000001e0, 0x0000000a },
{ 0x00000040, 0x21082c21, 0x00000108, 0x00080008 },
+ { 0x00600001, 0x28000021, 0x008d0100, 0x00000000 },
+ { 0x0a800031, 0x22001ca9, 0x00000800, 0x02180200 },
{ 0x00800001, 0x23400061, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002d28, 0x020000ac, 0x00020002 },
+ { 0x01000005, 0x20000c20, 0x00000200, 0x00002000 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x00000022 },
{ 0x00000001, 0x23400061, 0x00000000, 0x71490009 },
{ 0x00000041, 0x23480c21, 0x000001e0, 0x000000a0 },
{ 0x00000001, 0x23540061, 0x00000000, 0x000f000f },
- { 0x01000005, 0x20002d28, 0x020000ac, 0x00020002 },
- { 0x00600001, 0x28000021, 0x008d0100, 0x00000000 },
- { 0x0a800031, 0x22001ca9, 0x00000800, 0x02180200 },
{ 0x00000001, 0x23440021, 0x00000208, 0x00000000 },
{ 0x00000001, 0x234c0021, 0x00000200, 0x00000000 },
{ 0x00000040, 0x23500d21, 0x000000b0, 0xffff0000 },
@@ -39,15 +41,33 @@
{ 0x00110001, 0x23580061, 0x00000000, 0x00000000 },
{ 0x00000040, 0x23584421, 0x00000358, 0x000000b6 },
{ 0x00000001, 0x235c0021, 0x00000204, 0x00000000 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000022 },
+ { 0x00000001, 0x23400061, 0x00000000, 0x71490009 },
+ { 0x00000001, 0x23540061, 0x00000000, 0x000f000f },
+ { 0x00000040, 0x23500d21, 0x000000b0, 0xffff0000 },
+ { 0x00000040, 0x20b02e31, 0x000000b0, 0x00010001 },
+ { 0x01000010, 0x20004528, 0x000000b4, 0x000000b0 },
+ { 0x00010001, 0x20b00171, 0x00000000, 0x00000000 },
+ { 0x00010040, 0x20b12e31, 0x000000b1, 0x00010001 },
+ { 0x00000001, 0x23580061, 0x00000000, 0x00000000 },
+ { 0x00110001, 0x23580061, 0x02000000, 0x04000000 },
+ { 0x01000010, 0x20002d28, 0x000000ae, 0x00010001 },
+ { 0x00110001, 0x23580061, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x23584421, 0x00000358, 0x000000b6 },
+ { 0x00000005, 0x234c1c21, 0x00000200, 0x0000ffff },
+ { 0x00000040, 0x234c0c21, 0x0000034c, 0x000e0000 },
+ { 0x00000001, 0x235c0021, 0x00000204, 0x00000000 },
+ { 0x00000001, 0x23600021, 0x00000208, 0x00000000 },
+ { 0x00000005, 0x23640c21, 0x0000020c, 0x000000fc },
{ 0x00600001, 0x28000021, 0x008d0140, 0x00000000 },
{ 0x00600001, 0x28200021, 0x008d0340, 0x00000000 },
{ 0x00600001, 0x28400021, 0x008d0360, 0x00000000 },
{ 0x00000040, 0x21080c21, 0x00000108, 0x0000000a },
{ 0x0a800031, 0x20001cac, 0x00000800, 0x060a0302 },
{ 0x00000040, 0x21480c21, 0x00000148, 0x00000004 },
- { 0x00000040, 0x23480c21, 0x00000348, 0x000000a0 },
+ { 0x00000040, 0x21e00c21, 0x000001e0, 0x00000001 },
{ 0x01000040, 0x20ae3dad, 0x000000ae, 0xffffffff },
- { 0x00110020, 0x34001c00, 0x00001400, 0xffffffd0 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0xffffff9e },
{ 0x00010020, 0x34001c00, 0x02001400, 0x0000001e },
{ 0x00600001, 0x28000021, 0x008d0120, 0x00000000 },
{ 0x0a800031, 0x22001ca9, 0x00000800, 0x02180001 },
diff --git a/src/shaders/vme/inter_frame.asm b/src/shaders/vme/inter_frame.asm
index a7f8f14..ef68e4c 100644
--- a/src/shaders/vme/inter_frame.asm
+++ b/src/shaders/vme/inter_frame.asm
@@ -23,6 +23,17 @@ mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1};
mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1};
mov (16) tmp_reg3.0<1>:UD 0x0:UD {align1};
+shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
+add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */
+add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */
+mov (1) read0_header.8<1>:UD BLOCK_32X1 {align1};
+mov (1) read0_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
+add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */
+mov (1) read1_header.8<1>:UD BLOCK_4X16 {align1};
+mov (1) read1_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* Source = (x, y) * 16 */
#ifdef DEV_SNB
@@ -46,25 +57,60 @@ mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1
add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD INTER_VME_OUTPUT_IN_OWS:UD {align1};
mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+__VME_LOOP:
+
+/*
+ * Media Read Message -- fetch neighbor edge pixels
+ */
+/* ROW */
+mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1};
+send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1};
+
+/* COL */
+mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1};
+send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1};
/*
* VME message
*/
/* m0 */
-__VME_LOOP:
mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
/* m1 */
+mov (1) intra_flag<1>:UW 0x0:UW {align1} ;
+and.z.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1};
+(f0.0) mov (1) intra_part_mask_ub<1>:UB LUMA_INTRA_8x8_DISABLE {align1};
+
+cmp.nz.f0.0 (1) null<1>:UW orig_x_ub<0,1,0>:UB 0:UW {align1}; /* X != 0 */
+(f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_AE {align1}; /* A */
+
+cmp.nz.f0.0 (1) null<1>:UW orig_y_ub<0,1,0>:UB 0:UW {align1}; /* Y != 0 */
+(f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_B {align1}; /* B */
+
+mul.nz.f0.0 (1) null<1>:UW orig_x_ub<0,1,0>:UB orig_y_ub<0,1,0>:UB {align1}; /* X * Y != 0 */
+(f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_D {align1}; /* D */
+
+add (1) tmp_x_w<1>:W orig_x_ub<0,1,0>:UB 1:UW {align1}; /* X + 1 */
+add (1) tmp_x_w<1>:W w_in_mb_uw<0,1,0>:UW -tmp_x_w<0,1,0>:W {align1}; /* width - (X + 1) */
+mul.nz.f0.0 (1) null<1>:UD tmp_x_w<0,1,0>:W orig_y_ub<0,1,0>:UB {align1}; /* (width - (X + 1)) * Y != 0 */
+(f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_C {align1}; /* C */
+
+and.nz.f0.0 (1) null<1>:UW slice_edge_ub<0,1,0>:UB 2:UW {align1};
+(f0.0) and (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB 0xE0 {align1}; /* slice edge disable B,C,D*/
+
mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1};
/* m2 */
mov (8) vme_msg_2<1>:UD 0x0:UD {align1};
-/* m3 */
-mov (8) vme_msg_3<1>:UD 0x0:UD {align1};
+/* m3 */
+mov (8) vme_msg_3<1>:UD INEP_ROW.0<8,8,1>:UD {align1};
-/* m4 */
-mov (8) vme_msg_4<1>:UD 0x0:UD {align1};
+/* m4 */
+mov (8) vme_msg_4<1>:UD 0x0 {align1};
+mov (16) vme_msg_4.0<1>:UB INEP_COL0.3<32,8,4>:UB {align1};
+mov (1) vme_msg_4.16<1>:UD INTRA_PREDICTORE_MODE {align1};
send (8)
vme_msg_ind
@@ -74,7 +120,7 @@ send (8)
BIND_IDX_VME,
0,
0,
- VME_MESSAGE_TYPE_INTER
+ VME_MESSAGE_TYPE_MIXED
)
mlen vme_msg_length
rlen vme_inter_wb_length
@@ -134,9 +180,13 @@ send (16)
/* other info */
add (1) msg_reg0.8<1>:UD obw_m0.8<0,1,0>:UD INTER_VME_OUTPUT_MV_IN_OWS:UD {align1} ;
+and.z.f0.0 (1) null<1>:ud vme_wb0.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ;
+
+(-f0.0)jmpi (1) __INTRA_INFO ;
+
+__INTER_INFO:
mov (1) tmp_uw1<1>:uw 0:uw {align1} ;
mov (1) tmp_ud1<1>:ud 0:ud {align1} ;
-and.z.f0.0 (1) null<1>:ud vme_wb0.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ;
(f0.0)and (1) tmp_uw1<1>:uw vme_wb0.2<0,1,0>:uw MV32_BIT_MASK:uw {align1} ;
(f0.0)shr (1) tmp_uw1<1>:uw tmp_uw1<1>:uw MV32_BIT_SHIFT:uw {align1} ;
(f0.0)mul (1) tmp_ud1<1>:ud tmp_uw1<0,1,0>:uw 96:uw {align1} ;
@@ -149,7 +199,16 @@ mov (1) msg_reg1.0<1>:uw vme_wb0.0<0,1,0>:uw {align1}
mov (1) msg_reg1.2<1>:uw tmp_uw1<0,1,0>:uw {align1} ;
mov (1) msg_reg1.4<1>:UD vme_wb0.28<0,1,0>:UD {align1};
mov (1) msg_reg1.8<1>:ud tmp_ud1<0,1,0>:ud {align1} ;
+
+jmpi (1) __OUTPUT_INFO ;
+__INTRA_INFO:
+mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1};
+mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1};
+mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1};
+mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1};
+
+__OUTPUT_INFO:
/* bind index 3, write 1 oword, msg type: 8(OWord Block Write) */
send (16)
msg_ind
diff --git a/src/shaders/vme/inter_frame.g6b b/src/shaders/vme/inter_frame.g6b
index 14ed3c0..bef169b 100644
--- a/src/shaders/vme/inter_frame.g6b
+++ b/src/shaders/vme/inter_frame.g6b
@@ -1,6 +1,15 @@
{ 0x00800001, 0x24000061, 0x00000000, 0x00000000 },
{ 0x00800001, 0x24400061, 0x00000000, 0x00000000 },
{ 0x00800001, 0x24600061, 0x00000000, 0x00000000 },
+ { 0x00200009, 0x24002e25, 0x004500a0, 0x00040004 },
+ { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 },
+ { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff },
+ { 0x00000001, 0x240800e1, 0x00000000, 0x0000001f },
+ { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
+ { 0x00200009, 0x24202e25, 0x004500a0, 0x00040004 },
+ { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc },
+ { 0x00000001, 0x242800e1, 0x00000000, 0x000f0003 },
+ { 0x00000001, 0x24340231, 0x00000014, 0x00000000 },
{ 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 },
{ 0x00200009, 0x24402e29, 0x004500a0, 0x00040004 },
{ 0x00000040, 0x24403dad, 0x00000440, 0xfff0fff0 },
@@ -15,12 +24,33 @@
{ 0x00000040, 0x24884421, 0x00000488, 0x000000a0 },
{ 0x00000041, 0x24880c21, 0x00000488, 0x0000000a },
{ 0x00000001, 0x24940231, 0x00000014, 0x00000000 },
+ { 0x00600001, 0x20000022, 0x008d0400, 0x00000000 },
+ { 0x04600031, 0x22401cd1, 0x00000000, 0x02188004 },
+ { 0x00600001, 0x20000022, 0x008d0420, 0x00000000 },
+ { 0x04600031, 0x22801cd1, 0x00000000, 0x02288004 },
{ 0x00600001, 0x20000022, 0x008d0440, 0x00000000 },
+ { 0x00000001, 0x247c0169, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 },
+ { 0x00010001, 0x247c00f1, 0x00000000, 0x00000002 },
+ { 0x02000010, 0x20002e28, 0x000000a0, 0x00000000 },
+ { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000060 },
+ { 0x02000010, 0x20002e28, 0x000000a1, 0x00000000 },
+ { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000010 },
+ { 0x02000041, 0x20004628, 0x000000a0, 0x000000a1 },
+ { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000004 },
+ { 0x00000040, 0x25202e2d, 0x000000a0, 0x00010001 },
+ { 0x00000040, 0x2520352d, 0x000000a2, 0x00004520 },
+ { 0x02000041, 0x200045a0, 0x00000520, 0x000000a1 },
+ { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000008 },
+ { 0x02000005, 0x20002e28, 0x000000a4, 0x00020002 },
+ { 0x00010005, 0x247d1e31, 0x0000047d, 0x000000e0 },
{ 0x00600001, 0x20200022, 0x008d0460, 0x00000000 },
{ 0x00600001, 0x20400062, 0x00000000, 0x00000000 },
- { 0x00600001, 0x20400062, 0x00000000, 0x00000000 },
- { 0x00600001, 0x20600062, 0x00000000, 0x00000000 },
- { 0x08600031, 0x21801cdd, 0x00000000, 0x08482000 },
+ { 0x00600001, 0x20400022, 0x008d0240, 0x00000000 },
+ { 0x00600001, 0x206000e2, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x20600232, 0x00cf0283, 0x00000000 },
+ { 0x00000001, 0x20700062, 0x00000000, 0x11111111 },
+ { 0x08600031, 0x21801cdd, 0x00000000, 0x08486000 },
{ 0x00600001, 0x20000022, 0x008d0480, 0x00000000 },
{ 0x00800001, 0x24a00229, 0x00b101a0, 0x00000000 },
{ 0x00600040, 0x44a03dad, 0x00ae04a0, 0xffc0ffc0 },
@@ -40,9 +70,10 @@
{ 0x00600001, 0x20800022, 0x008d0500, 0x00000000 },
{ 0x05800031, 0x22001cdd, 0x00000000, 0x0a1b0403 },
{ 0x00000040, 0x20080c22, 0x00000488, 0x00000008 },
+ { 0x01000005, 0x20000c20, 0x00000180, 0x00002000 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x0000001c },
{ 0x00000001, 0x25420169, 0x00000000, 0x00000000 },
{ 0x00000001, 0x25440061, 0x00000000, 0x00000000 },
- { 0x01000005, 0x20000c20, 0x00000180, 0x00002000 },
{ 0x00010005, 0x25422d29, 0x00000182, 0x00100010 },
{ 0x00010008, 0x25422d29, 0x00200542, 0x00040004 },
{ 0x00010041, 0x25442d21, 0x00000542, 0x00600060 },
@@ -54,6 +85,11 @@
{ 0x00000001, 0x2022012a, 0x00000542, 0x00000000 },
{ 0x00000001, 0x20240022, 0x0000019c, 0x00000000 },
{ 0x00000001, 0x20280022, 0x00000544, 0x00000000 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00000001, 0x20200022, 0x00000180, 0x00000000 },
+ { 0x00000001, 0x20240022, 0x00000190, 0x00000000 },
+ { 0x00000001, 0x20280022, 0x00000194, 0x00000000 },
+ { 0x00000001, 0x202c0022, 0x00000198, 0x00000000 },
{ 0x05800031, 0x22001cdd, 0x00000000, 0x041b0003 },
{ 0x00000040, 0x20a02e31, 0x000000a0, 0x00010001 },
{ 0x00000040, 0x24482d29, 0x00000448, 0x00100010 },
@@ -66,6 +102,6 @@
{ 0x00010040, 0x24423dad, 0x00000442, 0x00100010 },
{ 0x00000040, 0x24882c21, 0x00000488, 0x000a000a },
{ 0x01000040, 0x20a63dad, 0x020000a6, 0xffffffff },
- { 0x00110020, 0x34001c00, 0x02001400, 0xffffff98 },
+ { 0x00110020, 0x34001c00, 0x02001400, 0xffffff62 },
{ 0x00600001, 0x20000022, 0x008d0000, 0x00000000 },
{ 0x07800031, 0x24001cc8, 0x00000000, 0x82000010 },
diff --git a/src/shaders/vme/inter_frame.g7b b/src/shaders/vme/inter_frame.g7b
index 4d5d508..98bf471 100644
--- a/src/shaders/vme/inter_frame.g7b
+++ b/src/shaders/vme/inter_frame.g7b
@@ -1,6 +1,15 @@
{ 0x00800001, 0x24000061, 0x00000000, 0x00000000 },
{ 0x00800001, 0x24400061, 0x00000000, 0x00000000 },
{ 0x00800001, 0x24600061, 0x00000000, 0x00000000 },
+ { 0x00200009, 0x24002e25, 0x004500a0, 0x00040004 },
+ { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 },
+ { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff },
+ { 0x00000001, 0x240800e1, 0x00000000, 0x0000001f },
+ { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
+ { 0x00200009, 0x24202e25, 0x004500a0, 0x00040004 },
+ { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc },
+ { 0x00000001, 0x242800e1, 0x00000000, 0x000f0003 },
+ { 0x00000001, 0x24340231, 0x00000014, 0x00000000 },
{ 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 },
{ 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 },
{ 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 },
@@ -14,12 +23,33 @@
{ 0x00000040, 0x24884421, 0x00000488, 0x000000a0 },
{ 0x00000041, 0x24880c21, 0x00000488, 0x0000000a },
{ 0x00000001, 0x24940231, 0x00000014, 0x00000000 },
+ { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 },
+ { 0x04600031, 0x22401cb1, 0x00000800, 0x02190004 },
+ { 0x00600001, 0x28000021, 0x008d0420, 0x00000000 },
+ { 0x04600031, 0x22801cb1, 0x00000800, 0x02290004 },
{ 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
+ { 0x00000001, 0x247c0169, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 },
+ { 0x00010001, 0x247c00f1, 0x00000000, 0x00000002 },
+ { 0x02000010, 0x20002e28, 0x000000a0, 0x00000000 },
+ { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000060 },
+ { 0x02000010, 0x20002e28, 0x000000a1, 0x00000000 },
+ { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000010 },
+ { 0x02000041, 0x20004628, 0x000000a0, 0x000000a1 },
+ { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000004 },
+ { 0x00000040, 0x25202e2d, 0x000000a0, 0x00010001 },
+ { 0x00000040, 0x2520352d, 0x000000a2, 0x00004520 },
+ { 0x02000041, 0x200045a0, 0x00000520, 0x000000a1 },
+ { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000008 },
+ { 0x02000005, 0x20002e28, 0x000000a4, 0x00020002 },
+ { 0x00010005, 0x247d1e31, 0x0000047d, 0x000000e0 },
{ 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
{ 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
- { 0x08600031, 0x21801cbd, 0x00000800, 0x0a682000 },
+ { 0x00600001, 0x28600021, 0x008d0240, 0x00000000 },
+ { 0x00600001, 0x288000e1, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x28800231, 0x00cf0283, 0x00000000 },
+ { 0x00000001, 0x28900061, 0x00000000, 0x11111111 },
+ { 0x08600031, 0x21801cbd, 0x00000800, 0x0a686000 },
{ 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
{ 0x00600001, 0x24a00021, 0x008d01a0, 0x00000000 },
{ 0x00600001, 0x24c00021, 0x008d01c0, 0x00000000 },
@@ -31,9 +61,10 @@
{ 0x00600001, 0x28800021, 0x008d0500, 0x00000000 },
{ 0x0a800031, 0x20001cac, 0x00000800, 0x0a0a0403 },
{ 0x00000040, 0x28080c21, 0x00000488, 0x00000008 },
+ { 0x01000005, 0x20000c20, 0x00000180, 0x00002000 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x0000001c },
{ 0x00000001, 0x25420169, 0x00000000, 0x00000000 },
{ 0x00000001, 0x25440061, 0x00000000, 0x00000000 },
- { 0x01000005, 0x20000c20, 0x00000180, 0x00002000 },
{ 0x00010005, 0x25422d29, 0x00000182, 0x00200020 },
{ 0x00010008, 0x25422d29, 0x00200542, 0x00050005 },
{ 0x00010041, 0x25442d21, 0x00000542, 0x00600060 },
@@ -45,6 +76,11 @@
{ 0x00000001, 0x28220129, 0x00000542, 0x00000000 },
{ 0x00000001, 0x28240021, 0x0000019c, 0x00000000 },
{ 0x00000001, 0x28280021, 0x00000544, 0x00000000 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00000001, 0x28200021, 0x00000180, 0x00000000 },
+ { 0x00000001, 0x28240021, 0x00000190, 0x00000000 },
+ { 0x00000001, 0x28280021, 0x00000194, 0x00000000 },
+ { 0x00000001, 0x282c0021, 0x00000198, 0x00000000 },
{ 0x0a800031, 0x20001cac, 0x00000800, 0x040a0003 },
{ 0x00000040, 0x20a02e31, 0x000000a0, 0x00010001 },
{ 0x00000040, 0x24482d29, 0x00000448, 0x00100010 },
@@ -54,6 +90,6 @@
{ 0x00010040, 0x244a2d29, 0x0000044a, 0x00100010 },
{ 0x00000040, 0x24882c21, 0x00000488, 0x000a000a },
{ 0x01000040, 0x20a63dad, 0x020000a6, 0xffffffff },
- { 0x00110020, 0x34001c00, 0x02001400, 0xffffffae },
+ { 0x00110020, 0x34001c00, 0x02001400, 0xffffff78 },
{ 0x00600001, 0x28000021, 0x008d0000, 0x00000000 },
{ 0x07800031, 0x24001ca8, 0x00000800, 0x82000010 },