summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2012-07-29 09:50:39 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2012-07-30 13:12:04 +0100
commit7c9dbc980b760e0053d83ca2d7cb147613285680 (patch)
tree7c1f85208d7b10a009282387bc95a9bf0b197e77
parent8ebafa0493c0fa08ab9d80eeb1191b7560dc0863 (diff)
downloadxf86-video-intel-7c9dbc980b760e0053d83ca2d7cb147613285680.tar.gz
xf86-video-intel-7c9dbc980b760e0053d83ca2d7cb147613285680.tar.bz2
xf86-video-intel-7c9dbc980b760e0053d83ca2d7cb147613285680.zip
sna: Assemble SF and WM kernels using brw
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--.gitignore1
-rw-r--r--src/sna/brw/Makefile.am17
-rw-r--r--src/sna/brw/brw.h14
-rw-r--r--src/sna/brw/brw_sf.c51
-rw-r--r--src/sna/brw/brw_test.c60
-rw-r--r--src/sna/brw/brw_test.h46
-rw-r--r--src/sna/brw/brw_test_gen4.c199
-rw-r--r--src/sna/brw/brw_test_gen5.c208
-rw-r--r--src/sna/brw/brw_test_gen6.c209
-rw-r--r--src/sna/brw/brw_test_gen7.c178
-rw-r--r--src/sna/brw/brw_wm.c542
-rw-r--r--src/sna/gen6_render.c6
-rw-r--r--src/sna/gen7_render.c6
-rw-r--r--src/sna/sna_render.h17
-rw-r--r--src/sna/sna_stream.c38
15 files changed, 1582 insertions, 10 deletions
diff --git a/.gitignore b/.gitignore
index 579fe4be9..f7799e52b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,6 +44,7 @@ symlink-tree
texinfo.tex
ylwrap
src/sna/git_version.h
+src/sna/brw/brw_test
# Do not edit the following section
# Edit Compile Debug Document Distribute
diff --git a/src/sna/brw/Makefile.am b/src/sna/brw/Makefile.am
index edb3db4fa..b3513cf95 100644
--- a/src/sna/brw/Makefile.am
+++ b/src/sna/brw/Makefile.am
@@ -20,6 +20,7 @@
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
noinst_LTLIBRARIES = libbrw.la
+noinst_PROGRAMS = brw_test
AM_CFLAGS = \
@CWARNFLAGS@ \
@@ -35,8 +36,24 @@ AM_CFLAGS += @VALGRIND_CFLAGS@
endif
libbrw_la_SOURCES = \
+ brw.h \
brw_disasm.c \
brw_eu.h \
brw_eu.c \
brw_eu_emit.c \
+ brw_sf.c \
+ brw_wm.c \
+ $(NULL)
+
+brw_test_SOURCES = \
+ brw_test.c \
+ brw_test.h \
+ brw_test_gen4.c \
+ brw_test_gen5.c \
+ brw_test_gen6.c \
+ brw_test_gen7.c \
+ $(NULL)
+
+brw_test_LDADD = \
+ libbrw.la \
$(NULL)
diff --git a/src/sna/brw/brw.h b/src/sna/brw/brw.h
new file mode 100644
index 000000000..a39b253ae
--- /dev/null
+++ b/src/sna/brw/brw.h
@@ -0,0 +1,14 @@
+#include "brw_eu.h"
+
+void brw_sf_kernel__nomask(struct brw_compile *p);
+void brw_sf_kernel__mask(struct brw_compile *p);
+
+void brw_wm_kernel__affine(struct brw_compile *p, int dispatch_width);
+void brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch_width);
+void brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch_width);
+void brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch_width);
+
+void brw_wm_kernel__projective(struct brw_compile *p, int dispatch_width);
+void brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch_width);
+void brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch_width);
+void brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch_width);
diff --git a/src/sna/brw/brw_sf.c b/src/sna/brw/brw_sf.c
new file mode 100644
index 000000000..0c69433d6
--- /dev/null
+++ b/src/sna/brw/brw_sf.c
@@ -0,0 +1,51 @@
+#include "brw.h"
+
+void brw_sf_kernel__nomask(struct brw_compile *p)
+{
+ struct brw_reg inv, v0, v1, v2, delta;
+
+ v0 = brw_vec4_grf(3, 0);
+ v1 = brw_vec4_grf(4, 0);
+ v2 = brw_vec4_grf(5, 0);
+ delta = brw_vec8_grf(7, 0);
+
+ inv = brw_vec4_grf(6, 0);
+ brw_math_invert(p, inv, brw_vec4_grf(1, 11));
+
+ brw_MOV(p, brw_message_reg(3), v0);
+
+ brw_ADD(p, delta, v1, brw_negate(v2));
+ brw_MUL(p, brw_message_reg(1), delta, brw_vec1_grf(6,0));
+
+ brw_ADD(p, delta, v2, brw_negate(v0));
+ brw_MUL(p, brw_message_reg(2), delta, brw_vec1_grf(6,2));
+
+ brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0 ,0),
+ false, true, 4, 0, true, true, 0,
+ BRW_URB_SWIZZLE_TRANSPOSE);
+}
+
+void
+brw_sf_kernel__mask(struct brw_compile *p)
+{
+ struct brw_reg inv, v0, v1, v2;
+
+ v0 = brw_vec8_grf(3, 0);
+ v1 = brw_vec8_grf(4, 0);
+ v2 = brw_vec8_grf(5, 0);
+
+ inv = brw_vec4_grf(6, 0);
+ brw_math_invert(p, inv, brw_vec4_grf(1, 11));
+
+ brw_MOV(p, brw_message_reg(3), v0);
+
+ brw_ADD(p, brw_vec8_grf(7, 0), v1, brw_negate(v2));
+ brw_MUL(p, brw_message_reg(1), brw_vec8_grf(7, 0), brw_vec1_grf(6,0));
+
+ brw_ADD(p, brw_vec8_grf(7, 0), v2, brw_negate(v0));
+ brw_MUL(p, brw_message_reg(2), brw_vec8_grf(7, 0), brw_vec1_grf(6,2));
+
+ brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0 ,0),
+ false, true, 4, 0, true, true, 0,
+ BRW_URB_SWIZZLE_TRANSPOSE);
+}
diff --git a/src/sna/brw/brw_test.c b/src/sna/brw/brw_test.c
new file mode 100644
index 000000000..4f038584e
--- /dev/null
+++ b/src/sna/brw/brw_test.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "brw_test.h"
+#include <string.h>
+
+void brw_test_compare(const char *function, int gen,
+ const struct brw_instruction *new, int num_new,
+ const struct brw_instruction *old, int num_old)
+{
+ int n;
+
+ if (num_new != num_old ||
+ memcmp(new, old, num_new * sizeof(struct brw_instruction))) {
+ printf ("%s: new\n", function);
+ for (n = 0; n < num_new; n++)
+ brw_disasm(stdout, &new[n], gen);
+
+ printf ("%s: old\n", function);
+ for (n = 0; n < num_old; n++)
+ brw_disasm(stdout, &old[n], gen);
+ printf ("\n");
+ }
+}
+
+
+/* Check that we can recreate all the existing programs using the assembler */
+int main(int argc, char **argv)
+{
+ brw_test_gen4();
+ brw_test_gen5();
+ brw_test_gen6();
+ brw_test_gen7();
+
+ return 0;
+}
diff --git a/src/sna/brw/brw_test.h b/src/sna/brw/brw_test.h
new file mode 100644
index 000000000..41f4ca6b5
--- /dev/null
+++ b/src/sna/brw/brw_test.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifndef BRW_TEST_H
+#define BRW_TEST_H
+
+#include "brw.h"
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
+#endif
+
+void brw_test_compare(const char *function, int gen,
+ const struct brw_instruction *new, int num_new,
+ const struct brw_instruction *old, int num_old);
+
+void brw_test_gen4(void);
+void brw_test_gen5(void);
+void brw_test_gen6(void);
+void brw_test_gen7(void);
+
+#endif /* BRW_TEST_H */
diff --git a/src/sna/brw/brw_test_gen4.c b/src/sna/brw/brw_test_gen4.c
new file mode 100644
index 000000000..742c7c24f
--- /dev/null
+++ b/src/sna/brw/brw_test_gen4.c
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "brw_test.h"
+
+#include <string.h>
+
+static const uint32_t sf_kernel[][4] = {
+#include "exa_sf.g4b"
+};
+
+static const uint32_t sf_kernel_mask[][4] = {
+#include "exa_sf_mask.g4b"
+};
+
+static const uint32_t ps_kernel_nomask_affine[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_affine.g4b"
+#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_nomask_projective[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_projective.g4b"
+#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_maskca_affine[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_affine.g4b"
+#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_mask_affine.g4b"
+#include "exa_wm_mask_sample_argb.g4b"
+#include "exa_wm_ca.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_maskca_projective[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_projective.g4b"
+#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_mask_projective.g4b"
+#include "exa_wm_mask_sample_argb.g4b"
+#include "exa_wm_ca.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_affine[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_affine.g4b"
+#include "exa_wm_src_sample_a.g4b"
+#include "exa_wm_mask_affine.g4b"
+#include "exa_wm_mask_sample_argb.g4b"
+#include "exa_wm_ca_srcalpha.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_projective[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_projective.g4b"
+#include "exa_wm_src_sample_a.g4b"
+#include "exa_wm_mask_projective.g4b"
+#include "exa_wm_mask_sample_argb.g4b"
+#include "exa_wm_ca_srcalpha.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_masknoca_affine[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_affine.g4b"
+#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_mask_affine.g4b"
+#include "exa_wm_mask_sample_a.g4b"
+#include "exa_wm_noca.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_masknoca_projective[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_projective.g4b"
+#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_mask_projective.g4b"
+#include "exa_wm_mask_sample_a.g4b"
+#include "exa_wm_noca.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_packed_static[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_affine.g4b"
+#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_yuv_rgb.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_planar_static[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_affine.g4b"
+#include "exa_wm_src_sample_planar.g4b"
+#include "exa_wm_yuv_rgb.g4b"
+#include "exa_wm_write.g4b"
+};
+
+#define compare(old) brw_test_compare(__FUNCTION__, p.gen, p.store, p.nr_insn, (struct brw_instruction *)old, ARRAY_SIZE(old)-8)
+
+static void gen4_sf__nomask(void)
+{
+ uint32_t store[128];
+ struct brw_compile p;
+
+ brw_compile_init(&p, 40, store);
+ brw_sf_kernel__nomask(&p);
+
+ compare(sf_kernel);
+}
+
+static void gen4_sf__mask(void)
+{
+ uint32_t store[128];
+ struct brw_compile p;
+
+ brw_compile_init(&p, 40, store);
+ brw_sf_kernel__mask(&p);
+
+ compare(sf_kernel_mask);
+}
+
+static void
+gen4_wm_kernel__affine_nomask(void)
+{
+ uint32_t store[128];
+ struct brw_compile p;
+
+ brw_compile_init(&p, 40, store);
+ brw_wm_kernel__affine(&p, 16);
+
+ compare(ps_kernel_nomask_affine);
+}
+
+static void
+gen4_wm_kernel__affine_mask_noca(void)
+{
+ uint32_t store[128];
+ struct brw_compile p;
+
+ brw_compile_init(&p, 40, store);
+ brw_wm_kernel__affine_mask(&p, 16);
+
+ compare(ps_kernel_masknoca_affine);
+}
+
+static void
+gen4_wm_kernel__projective_nomask(void)
+{
+ uint32_t store[128];
+ struct brw_compile p;
+
+ brw_compile_init(&p, 40, store);
+ brw_wm_kernel__projective(&p, 16);
+
+ compare(ps_kernel_nomask_projective);
+}
+
+void brw_test_gen4(void)
+{
+ gen4_sf__nomask();
+ gen4_sf__mask();
+
+ gen4_wm_kernel__affine_nomask();
+ gen4_wm_kernel__affine_mask_noca();
+
+ gen4_wm_kernel__projective_nomask();
+}
diff --git a/src/sna/brw/brw_test_gen5.c b/src/sna/brw/brw_test_gen5.c
new file mode 100644
index 000000000..62a999e1d
--- /dev/null
+++ b/src/sna/brw/brw_test_gen5.c
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "brw_test.h"
+
+#include <string.h>
+
+static const uint32_t sf_kernel[][4] = {
+#include "exa_sf.g5b"
+};
+
+static const uint32_t sf_kernel_mask[][4] = {
+#include "exa_sf_mask.g5b"
+};
+
+static const uint32_t ps_kernel_nomask_affine[][4] = {
+#include "exa_wm_xy.g5b"
+#include "exa_wm_src_affine.g5b"
+#include "exa_wm_src_sample_argb.g5b"
+#include "exa_wm_write.g5b"
+};
+
+static const uint32_t ps_kernel_nomask_projective[][4] = {
+#include "exa_wm_xy.g5b"
+#include "exa_wm_src_projective.g5b"
+#include "exa_wm_src_sample_argb.g5b"
+#include "exa_wm_write.g5b"
+};
+
+static const uint32_t ps_kernel_maskca_affine[][4] = {
+#include "exa_wm_xy.g5b"
+#include "exa_wm_src_affine.g5b"
+#include "exa_wm_src_sample_argb.g5b"
+#include "exa_wm_mask_affine.g5b"
+#include "exa_wm_mask_sample_argb.g5b"
+#include "exa_wm_ca.g5b"
+#include "exa_wm_write.g5b"
+};
+
+static const uint32_t ps_kernel_maskca_projective[][4] = {
+#include "exa_wm_xy.g5b"
+#include "exa_wm_src_projective.g5b"
+#include "exa_wm_src_sample_argb.g5b"
+#include "exa_wm_mask_projective.g5b"
+#include "exa_wm_mask_sample_argb.g5b"
+#include "exa_wm_ca.g5b"
+#include "exa_wm_write.g5b"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_affine[][4] = {
+#include "exa_wm_xy.g5b"
+#include "exa_wm_src_affine.g5b"
+#include "exa_wm_src_sample_a.g5b"
+#include "exa_wm_mask_affine.g5b"
+#include "exa_wm_mask_sample_argb.g5b"
+#include "exa_wm_ca_srcalpha.g5b"
+#include "exa_wm_write.g5b"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_projective[][4] = {
+#include "exa_wm_xy.g5b"
+#include "exa_wm_src_projective.g5b"
+#include "exa_wm_src_sample_a.g5b"
+#include "exa_wm_mask_projective.g5b"
+#include "exa_wm_mask_sample_argb.g5b"
+#include "exa_wm_ca_srcalpha.g5b"
+#include "exa_wm_write.g5b"
+};
+
+static const uint32_t ps_kernel_masknoca_affine[][4] = {
+#include "exa_wm_xy.g5b"
+#include "exa_wm_src_affine.g5b"
+#include "exa_wm_src_sample_argb.g5b"
+#include "exa_wm_mask_affine.g5b"
+#include "exa_wm_mask_sample_a.g5b"
+#include "exa_wm_noca.g5b"
+#include "exa_wm_write.g5b"
+};
+
+static const uint32_t ps_kernel_masknoca_projective[][4] = {
+#include "exa_wm_xy.g5b"
+#include "exa_wm_src_projective.g5b"
+#include "exa_wm_src_sample_argb.g5b"
+#include "exa_wm_mask_projective.g5b"
+#include "exa_wm_mask_sample_a.g5b"
+#include "exa_wm_noca.g5b"
+#include "exa_wm_write.g5b"
+};
+
+static const uint32_t ps_kernel_packed_static[][4] = {
+#include "exa_wm_xy.g5b"
+#include "exa_wm_src_affine.g5b"
+#include "exa_wm_src_sample_argb.g5b"
+#include "exa_wm_yuv_rgb.g5b"
+#include "exa_wm_write.g5b"
+};
+
+static const uint32_t ps_kernel_planar_static[][4] = {
+#include "exa_wm_xy.g5b"
+#include "exa_wm_src_affine.g5b"
+#include "exa_wm_src_sample_planar.g5b"
+#include "exa_wm_yuv_rgb.g5b"
+#include "exa_wm_write.g5b"
+};
+
+#define compare(old) brw_test_compare(__FUNCTION__, p.gen, p.store, p.nr_insn, (struct brw_instruction *)old, ARRAY_SIZE(old))
+
+static void gen5_sf(void)
+{
+ uint32_t store[128];
+ struct brw_compile p;
+
+ brw_compile_init(&p, 50, store);
+ brw_sf_kernel__nomask(&p);
+
+ compare(sf_kernel);
+}
+
+static void gen5_sf_mask(void)
+{
+ uint32_t store[128];
+ struct brw_compile p;
+
+ brw_compile_init(&p, 50, store);
+ brw_sf_kernel__mask(&p);
+
+ compare(sf_kernel_mask);
+}
+
+static void gen5_wm_affine_nomask(void)
+{
+ uint32_t store[128];
+ struct brw_compile p;
+
+ brw_compile_init(&p, 50, store);
+ brw_wm_kernel__affine(&p, 16);
+
+ compare(ps_kernel_nomask_affine);
+}
+
+static void gen5_wm_affine_mask_noca(void)
+{
+ uint32_t store[128];
+ struct brw_compile p;
+
+ brw_compile_init(&p, 50, store);
+ brw_wm_kernel__affine_mask(&p, 16);
+
+ compare(ps_kernel_masknoca_affine);
+}
+
+static void gen5_wm_affine_mask_ca(void)
+{
+ uint32_t store[128];
+ struct brw_compile p;
+
+ brw_compile_init(&p, 50, store);
+ brw_wm_kernel__affine_mask_ca(&p, 16);
+
+ compare(ps_kernel_maskca_affine);
+}
+
+static void gen5_wm_projective_nomask(void)
+{
+ uint32_t store[128];
+ struct brw_compile p;
+
+ brw_compile_init(&p, 50, store);
+ brw_wm_kernel__projective(&p, 16);
+
+ compare(ps_kernel_nomask_projective);
+}
+
+void brw_test_gen5(void)
+{
+ gen5_sf();
+ gen5_sf_mask();
+
+ gen5_wm_affine_nomask();
+ gen5_wm_affine_mask_noca();
+ gen5_wm_affine_mask_ca();
+
+ gen5_wm_projective_nomask();
+}
diff --git a/src/sna/brw/brw_test_gen6.c b/src/sna/brw/brw_test_gen6.c
new file mode 100644
index 000000000..64bc2fb10
--- /dev/null
+++ b/src/sna/brw/brw_test_gen6.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "brw_test.h"
+
+#include <string.h>
+
+static const uint32_t ps_kernel_nomask_affine[][4] = {
+#include "exa_wm_src_affine.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_nomask_projective[][4] = {
+#include "exa_wm_src_projective.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_maskca_affine[][4] = {
+#include "exa_wm_src_affine.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_mask_affine.g6b"
+#include "exa_wm_mask_sample_argb.g6b"
+#include "exa_wm_ca.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_maskca_projective[][4] = {
+#include "exa_wm_src_projective.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_mask_projective.g6b"
+#include "exa_wm_mask_sample_argb.g6b"
+#include "exa_wm_ca.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_affine[][4] = {
+#include "exa_wm_src_affine.g6b"
+#include "exa_wm_src_sample_a.g6b"
+#include "exa_wm_mask_affine.g6b"
+#include "exa_wm_mask_sample_argb.g6b"
+#include "exa_wm_ca_srcalpha.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_projective[][4] = {
+#include "exa_wm_src_projective.g6b"
+#include "exa_wm_src_sample_a.g6b"
+#include "exa_wm_mask_projective.g6b"
+#include "exa_wm_mask_sample_argb.g6b"
+#include "exa_wm_ca_srcalpha.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_masknoca_affine[][4] = {
+#include "exa_wm_src_affine.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_mask_affine.g6b"
+#include "exa_wm_mask_sample_a.g6b"
+#include "exa_wm_noca.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_masknoca_projective[][4] = {
+#include "exa_wm_src_projective.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_mask_projective.g6b"
+#include "exa_wm_mask_sample_a.g6b"
+#include "exa_wm_noca.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_packed[][4] = {
+#include "exa_wm_src_affine.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_yuv_rgb.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_planar[][4] = {
+#include "exa_wm_src_affine.g6b"
+#include "exa_wm_src_sample_planar.g6b"
+#include "exa_wm_yuv_rgb.g6b"
+#include "exa_wm_write.g6b"
+};
+
+#define compare(old) brw_test_compare(__FUNCTION__, p.gen, p.store, p.nr_insn, (struct brw_instruction *)old, ARRAY_SIZE(old))
+
+#if 0
+static void wm_src_affine(struct brw_compile *p)
+{
+ brw_PLN(p, brw_message_reg(2), brw_vec1_grf(6,0), brw_vec8_grf(2,0));
+ brw_PLN(p, brw_message_reg(3), brw_vec1_grf(6,0), brw_vec8_grf(4,0));
+ brw_PLN(p, brw_message_reg(4), brw_vec1_grf(6,4), brw_vec8_grf(2,0));
+ brw_PLN(p, brw_message_reg(5), brw_vec1_grf(6,4), brw_vec8_grf(4,0));
+}
+
+static void wm_src_sample_argb(struct brw_compile *p)
+{
+ static const uint32_t fragment[][4] = {
+#include "exa_wm_src_affine.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_write.g6b"
+ };
+ int n;
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p,
+ retype(brw_vec1_grf(0,2), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(0));
+ brw_pop_insn_state(p);
+
+ brw_SAMPLE(p,
+ retype(vec16(brw_vec8_grf(14, 0)), BRW_REGISTER_TYPE_UW),
+ 1,
+ retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD),
+ 1, 0,
+ WRITEMASK_XYZW,
+ GEN5_SAMPLER_MESSAGE_SAMPLE,
+ 8,
+ 5,
+ true,
+ BRW_SAMPLER_SIMD_MODE_SIMD16);
+
+
+ for (n = 0; n < p->nr_insn; n++) {
+ brw_disasm(stdout, &p->store[n], 60);
+ }
+
+ printf("\n\n");
+ for (n = 0; n < ARRAY_SIZE(fragment); n++) {
+ brw_disasm(stdout,
+ (const struct brw_instruction *)&fragment[n][0],
+ 60);
+ }
+}
+
+static void wm_write(struct brw_compile *p)
+{
+}
+#endif
+
+static void gen6_ps_nomask_affine(void)
+{
+ uint32_t store[1024];
+ struct brw_compile p;
+
+ brw_compile_init(&p, 60, store);
+ brw_wm_kernel__affine(&p, 16);
+
+ compare(ps_kernel_nomask_affine);
+}
+
+static void gen6_ps_mask_affine(void)
+{
+ uint32_t store[1024];
+ struct brw_compile p;
+
+ brw_compile_init(&p, 60, store);
+ brw_wm_kernel__affine_mask(&p, 16);
+
+ compare(ps_kernel_masknoca_affine);
+}
+
+static void gen6_ps_nomask_projective(void)
+{
+ uint32_t store[1024];
+ struct brw_compile p;
+
+ brw_compile_init(&p, 60, store);
+ brw_wm_kernel__projective(&p, 16);
+
+ compare(ps_kernel_nomask_projective);
+}
+
+void brw_test_gen6(void)
+{
+ gen6_ps_nomask_affine();
+ gen6_ps_mask_affine();
+
+ gen6_ps_nomask_projective();
+}
diff --git a/src/sna/brw/brw_test_gen7.c b/src/sna/brw/brw_test_gen7.c
new file mode 100644
index 000000000..c3f0e2310
--- /dev/null
+++ b/src/sna/brw/brw_test_gen7.c
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "brw_test.h"
+
+#include <string.h>
+
+static const uint32_t ps_kernel_nomask_affine[][4] = {
+#include "exa_wm_src_affine.g7b"
+#include "exa_wm_src_sample_argb.g7b"
+#include "exa_wm_write.g7b"
+};
+
+static const uint32_t ps_kernel_nomask_projective[][4] = {
+#include "exa_wm_src_projective.g7b"
+#include "exa_wm_src_sample_argb.g7b"
+#include "exa_wm_write.g7b"
+};
+
+static const uint32_t ps_kernel_maskca_affine[][4] = {
+#include "exa_wm_src_affine.g7b"
+#include "exa_wm_src_sample_argb.g7b"
+#include "exa_wm_mask_affine.g7b"
+#include "exa_wm_mask_sample_argb.g7b"
+#include "exa_wm_ca.g6b"
+#include "exa_wm_write.g7b"
+};
+
+static const uint32_t ps_kernel_maskca_projective[][4] = {
+#include "exa_wm_src_projective.g7b"
+#include "exa_wm_src_sample_argb.g7b"
+#include "exa_wm_mask_projective.g7b"
+#include "exa_wm_mask_sample_argb.g7b"
+#include "exa_wm_ca.g6b"
+#include "exa_wm_write.g7b"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_affine[][4] = {
+#include "exa_wm_src_affine.g7b"
+#include "exa_wm_src_sample_a.g7b"
+#include "exa_wm_mask_affine.g7b"
+#include "exa_wm_mask_sample_argb.g7b"
+#include "exa_wm_ca_srcalpha.g6b"
+#include "exa_wm_write.g7b"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_projective[][4] = {
+#include "exa_wm_src_projective.g7b"
+#include "exa_wm_src_sample_a.g7b"
+#include "exa_wm_mask_projective.g7b"
+#include "exa_wm_mask_sample_argb.g7b"
+#include "exa_wm_ca_srcalpha.g6b"
+#include "exa_wm_write.g7b"
+};
+
+static const uint32_t ps_kernel_masknoca_affine[][4] = {
+#include "exa_wm_src_affine.g7b"
+#include "exa_wm_src_sample_argb.g7b"
+#include "exa_wm_mask_affine.g7b"
+#include "exa_wm_mask_sample_a.g7b"
+#include "exa_wm_noca.g6b"
+#include "exa_wm_write.g7b"
+};
+
+static const uint32_t ps_kernel_masknoca_projective[][4] = {
+#include "exa_wm_src_projective.g7b"
+#include "exa_wm_src_sample_argb.g7b"
+#include "exa_wm_mask_projective.g7b"
+#include "exa_wm_mask_sample_a.g7b"
+#include "exa_wm_noca.g6b"
+#include "exa_wm_write.g7b"
+};
+
+static const uint32_t ps_kernel_packed[][4] = {
+#include "exa_wm_src_affine.g7b"
+#include "exa_wm_src_sample_argb.g7b"
+#include "exa_wm_yuv_rgb.g7b"
+#include "exa_wm_write.g7b"
+};
+
+static const uint32_t ps_kernel_planar[][4] = {
+#include "exa_wm_src_affine.g7b"
+#include "exa_wm_src_sample_planar.g7b"
+#include "exa_wm_yuv_rgb.g7b"
+#include "exa_wm_write.g7b"
+};
+
+#define compare(old) brw_test_compare(__FUNCTION__, p.gen, p.store, p.nr_insn, (struct brw_instruction *)old, ARRAY_SIZE(old))
+#define GEN 70
+
+static void gen7_ps_nomask_affine(void)
+{
+ uint32_t store[1024];
+ struct brw_compile p;
+
+ brw_compile_init(&p, GEN, store);
+ brw_wm_kernel__affine(&p, 8);
+
+ compare(ps_kernel_nomask_affine);
+}
+
+static void gen7_ps_mask_affine(void)
+{
+ uint32_t store[1024];
+ struct brw_compile p;
+
+ brw_compile_init(&p, GEN, store);
+ brw_wm_kernel__affine_mask(&p, 8);
+
+ compare(ps_kernel_masknoca_affine);
+}
+
+static void gen7_ps_maskca_affine(void)
+{
+ uint32_t store[1024];
+ struct brw_compile p;
+
+ brw_compile_init(&p, GEN, store);
+ brw_wm_kernel__affine_mask_ca(&p, 8);
+
+ compare(ps_kernel_maskca_affine);
+}
+
+static void gen7_ps_masksa_affine(void)
+{
+ uint32_t store[1024];
+ struct brw_compile p;
+
+ brw_compile_init(&p, GEN, store);
+ brw_wm_kernel__affine_mask_sa(&p, 8);
+
+ compare(ps_kernel_maskca_srcalpha_affine);
+}
+
+static void gen7_ps_nomask_projective(void)
+{
+ uint32_t store[1024];
+ struct brw_compile p;
+
+ brw_compile_init(&p, GEN, store);
+ brw_wm_kernel__projective(&p, 8);
+
+ compare(ps_kernel_nomask_projective);
+}
+
+void brw_test_gen7(void)
+{
+ gen7_ps_nomask_affine();
+ gen7_ps_mask_affine();
+ gen7_ps_maskca_affine();
+ gen7_ps_masksa_affine();
+
+ gen7_ps_nomask_projective();
+}
diff --git a/src/sna/brw/brw_wm.c b/src/sna/brw/brw_wm.c
new file mode 100644
index 000000000..9a8af5f45
--- /dev/null
+++ b/src/sna/brw/brw_wm.c
@@ -0,0 +1,542 @@
+#include "brw.h"
+
+#define X16 8
+#define Y16 10
+
+static void brw_wm_xy(struct brw_compile *p, int dw)
+{
+ struct brw_reg r1 = brw_vec1_grf(1, 0);
+ struct brw_reg r1_uw = __retype_uw(r1);
+ struct brw_reg x_uw, y_uw;
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ if (dw == 16) {
+ x_uw = brw_uw16_grf(30, 0);
+ y_uw = brw_uw16_grf(28, 0);
+ } else {
+ x_uw = brw_uw8_grf(30, 0);
+ y_uw = brw_uw8_grf(28, 0);
+ }
+
+ brw_ADD(p,
+ x_uw,
+ __stride(__suboffset(r1_uw, 4), 2, 4, 0),
+ brw_imm_v(0x10101010));
+ brw_ADD(p,
+ y_uw,
+ __stride(__suboffset(r1_uw, 5), 2, 4, 0),
+ brw_imm_v(0x11001100));
+
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+
+ brw_ADD(p, brw_vec8_grf(X16, 0), vec8(x_uw), brw_negate(r1));
+ brw_ADD(p, brw_vec8_grf(Y16, 0), vec8(y_uw), brw_negate(__suboffset(r1, 1)));
+}
+
+static void brw_wm_affine_st(struct brw_compile *p, int dw, int channel, int msg)
+{
+ int uv;
+
+ if (dw == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ uv = p->gen >= 60 ? 6 : 3;
+ uv += 2*channel;
+ } else {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ uv = p->gen >= 60 ? 4 : 3;
+ uv += channel;
+ }
+
+ msg++;
+ if (p->gen >= 60) {
+ brw_PLN(p,
+ brw_message_reg(msg),
+ brw_vec1_grf(uv, 0),
+ brw_vec8_grf(2, 0));
+ msg += dw/8;
+
+ brw_PLN(p,
+ brw_message_reg(msg),
+ brw_vec1_grf(uv, 4),
+ brw_vec8_grf(2, 0));
+ } else {
+ struct brw_reg r = brw_vec1_grf(uv, 0);
+
+ brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0));
+ brw_MAC(p, brw_message_reg(msg), __suboffset(r, 1), brw_vec8_grf(Y16, 0));
+ msg += dw/8;
+
+ brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0));
+ brw_MAC(p, brw_message_reg(msg), __suboffset(r, 5), brw_vec8_grf(Y16, 0));
+ }
+}
+
+static inline unsigned simd(int dw)
+{
+ return dw == 16 ? BRW_SAMPLER_SIMD_MODE_SIMD16 : BRW_SAMPLER_SIMD_MODE_SIMD8;
+}
+
+static inline struct brw_reg sample_result(int dw, int result)
+{
+ return brw_reg(BRW_GENERAL_REGISTER_FILE, result, 0,
+ BRW_REGISTER_TYPE_UW,
+ dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8,
+ dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ WRITEMASK_XYZW);
+}
+
+static void brw_wm_sample(struct brw_compile *p, int dw,
+ int channel, int msg, int result)
+{
+ struct brw_reg src0;
+ bool header;
+ int len;
+
+ len = dw == 16 ? 4 : 2;
+ if (p->gen >= 60) {
+ header = false;
+ src0 = brw_message_reg(++msg);
+ } else {
+ header = true;
+ src0 = brw_vec8_grf(0, 0);
+ }
+
+ brw_SAMPLE(p, sample_result(dw, result), msg, src0,
+ channel+1, channel, WRITEMASK_XYZW, 0,
+ 2*len, len+header, header, simd(dw));
+}
+
+static void brw_wm_sample__alpha(struct brw_compile *p, int dw,
+ int channel, int msg, int result)
+{
+ struct brw_reg src0;
+ int len;
+
+ len = dw == 16 ? 4 : 2;
+ if (p->gen >= 60)
+ src0 = brw_message_reg(msg);
+ else
+ src0 = brw_vec8_grf(0, 0);
+
+ brw_SAMPLE(p, sample_result(dw, result), msg, src0,
+ channel+1, channel, WRITEMASK_W, 0,
+ len/2, len+1, true, simd(dw));
+}
+
+static void brw_wm_affine(struct brw_compile *p, int dw,
+ int channel, int msg, int result)
+{
+ brw_wm_affine_st(p, dw, channel, msg);
+ brw_wm_sample(p, dw, channel, msg, result);
+}
+
+static void brw_wm_affine__alpha(struct brw_compile *p, int dw,
+ int channel, int msg, int result)
+{
+ brw_wm_affine_st(p, dw, channel, msg);
+ brw_wm_sample__alpha(p, dw, channel, msg, result);
+}
+
+static inline struct brw_reg null_result(int dw)
+{
+ return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_NULL, 0,
+ BRW_REGISTER_TYPE_UW,
+ dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8,
+ dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ WRITEMASK_XYZW);
+}
+
+static void brw_fb_write(struct brw_compile *p, int dw)
+{
+ struct brw_instruction *insn;
+ unsigned msg_control, msg_type, msg_len;
+ struct brw_reg src0;
+ bool header;
+
+ if (dw == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
+ msg_len = 8;
+ } else {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
+ msg_len = 4;
+ }
+
+ if (p->gen < 60) {
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p, brw_message_reg(1), brw_vec8_grf(1, 0));
+ brw_pop_insn_state(p);
+
+ msg_len += 2;
+ }
+
+ /* The execution mask is ignored for render target writes. */
+ insn = brw_next_insn(p, BRW_OPCODE_SEND);
+ insn->header.predicate_control = 0;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+
+ if (p->gen >= 60) {
+ src0 = brw_message_reg(2);
+ msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
+ header = false;
+ } else {
+ insn->header.destreg__conditionalmod = 0;
+ msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
+ src0 = __retype_uw(brw_vec8_grf(0, 0));
+ header = true;
+ }
+
+ brw_set_dest(p, insn, null_result(dw));
+ brw_set_src0(p, insn, src0);
+ brw_set_dp_write_message(p, insn, 0,
+ msg_control, msg_type, msg_len,
+ header, true, 0, true, false);
+}
+
+static void brw_wm_write(struct brw_compile *p, int dw, int src)
+{
+ int n;
+
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+
+ if (dw == 8 && p->gen >= 60) {
+ brw_MOV(p, brw_message_reg(2), brw_vec8_grf(src, 0));
+ brw_MOV(p, brw_message_reg(4), brw_vec8_grf(src+2, 0));
+ goto done;
+ }
+
+ for (n = 0; n < 4; n++) {
+ if (p->gen >= 60) {
+ brw_MOV(p,
+ brw_message_reg(2 + 2*n),
+ brw_vec8_grf(src + 2*n, 0));
+ } else if (p->gen >= 45 && dw == 16) {
+ brw_MOV(p,
+ brw_message_reg(2 + n + BRW_MRF_COMPR4),
+ brw_vec8_grf(src + 2*n, 0));
+ } else {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p,
+ brw_message_reg(2 + n),
+ brw_vec8_grf(src + 2*n, 0));
+
+ if (dw == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p,
+ brw_message_reg(2 + n + 4),
+ brw_vec8_grf(src + 2*n+1, 0));
+ }
+ }
+ }
+
+done:
+ brw_fb_write(p, dw);
+}
+
+static inline struct brw_reg mask_a8(int nr)
+{
+ return brw_reg(BRW_GENERAL_REGISTER_FILE,
+ nr, 0,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_8,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ WRITEMASK_XYZW);
+}
+
+static void brw_wm_write__mask(struct brw_compile *p,
+ int dw,
+ int src, int mask)
+{
+ int n;
+
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+
+ if (dw == 8 && p->gen >= 60) {
+ brw_MUL(p,
+ brw_message_reg(2),
+ brw_vec8_grf(src, 0),
+ mask_a8(mask));
+ brw_MUL(p,
+ brw_message_reg(4),
+ brw_vec8_grf(src+2, 0),
+ mask_a8(mask));
+ goto done;
+ }
+
+ for (n = 0; n < 4; n++) {
+ if (p->gen >= 60) {
+ brw_MUL(p,
+ brw_message_reg(2 + 2*n),
+ brw_vec8_grf(src + 2*n, 0),
+ brw_vec8_grf(mask, 0));
+ } else if (p->gen >= 45 && dw == 16) {
+ brw_MUL(p,
+ brw_message_reg(2 + n + BRW_MRF_COMPR4),
+ brw_vec8_grf(src + 2*n, 0),
+ brw_vec8_grf(mask, 0));
+ } else {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MUL(p,
+ brw_message_reg(2 + n),
+ brw_vec8_grf(src + 2*n, 0),
+ brw_vec8_grf(mask, 0));
+
+ if (dw == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MUL(p,
+ brw_message_reg(2 + n + 4),
+ brw_vec8_grf(src + 2*n+1, 0),
+ brw_vec8_grf(mask+1, 0));
+ }
+ }
+ }
+
+done:
+ brw_fb_write(p, dw);
+}
+
+static void brw_wm_write__mask_ca(struct brw_compile *p,
+ int dw, int src, int mask)
+{
+ int n;
+
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+
+ if (dw == 8 && p->gen >= 60) {
+ brw_MUL(p,
+ brw_message_reg(2),
+ brw_vec8_grf(src, 0),
+ brw_vec8_grf(mask, 0));
+ brw_MUL(p,
+ brw_message_reg(4),
+ brw_vec8_grf(src + 2, 0),
+ brw_vec8_grf(mask + 2, 0));
+ goto done;
+ }
+
+ for (n = 0; n < 4; n++) {
+ if (p->gen >= 60) {
+ brw_MUL(p,
+ brw_message_reg(2 + 2*n),
+ brw_vec8_grf(src + 2*n, 0),
+ brw_vec8_grf(mask + 2*n, 0));
+ } else if (p->gen >= 45 && dw == 16) {
+ brw_MUL(p,
+ brw_message_reg(2 + n + BRW_MRF_COMPR4),
+ brw_vec8_grf(src + 2*n, 0),
+ brw_vec8_grf(mask + 2*n, 0));
+ } else {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MUL(p,
+ brw_message_reg(2 + n),
+ brw_vec8_grf(src + 2*n, 0),
+ brw_vec8_grf(mask + 2*n, 0));
+
+ if (dw == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MUL(p,
+ brw_message_reg(2 + n + 4),
+ brw_vec8_grf(src + 2*n + 1, 0),
+ brw_vec8_grf(mask + 2*n + 1, 0));
+ }
+ }
+ }
+
+done:
+ brw_fb_write(p, dw);
+}
+
+void
+brw_wm_kernel__affine(struct brw_compile *p, int dispatch)
+{
+ int src = 12;
+
+ if (p->gen < 60)
+ brw_wm_xy(p, dispatch);
+ brw_wm_affine(p, dispatch, 0, 1, src);
+ brw_wm_write(p, dispatch, src);
+}
+
+void
+brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch)
+{
+ int src = 12, mask = 20;
+
+ if (p->gen < 60)
+ brw_wm_xy(p, dispatch);
+ brw_wm_affine(p, dispatch, 0, 1, src);
+ brw_wm_affine__alpha(p, dispatch, 1, 7, mask);
+ brw_wm_write__mask(p, dispatch, src, mask);
+}
+
+void
+brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch)
+{
+ int src = 12, mask = 20;
+
+ if (p->gen < 60)
+ brw_wm_xy(p, dispatch);
+ brw_wm_affine(p, dispatch, 0, 1, src);
+ brw_wm_affine(p, dispatch, 1, 7, mask);
+ brw_wm_write__mask_ca(p, dispatch, src, mask);
+}
+
+void
+brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch)
+{
+ int src = 12, mask = 14;
+
+ if (p->gen < 60)
+ brw_wm_xy(p, dispatch);
+ brw_wm_affine__alpha(p, dispatch, 0, 1, src);
+ brw_wm_affine(p, dispatch, 1, 7, mask);
+ brw_wm_write__mask(p, dispatch, mask, src);
+}
+
+/* Projective variants */
+
+static void brw_wm_projective_st(struct brw_compile *p, int dw, int channel, int msg)
+{
+ int uv;
+
+ if (dw == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ uv = p->gen >= 60 ? 6 : 3;
+ uv += 2*channel;
+ } else {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ uv = p->gen >= 60 ? 4 : 3;
+ uv += channel;
+ }
+
+ msg++;
+ if (p->gen >= 60) {
+ /* First compute 1/z */
+ brw_PLN(p,
+ brw_message_reg(msg),
+ brw_vec1_grf(uv+1, 0),
+ brw_vec8_grf(2, 0));
+
+ if (dw == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
+ brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0));
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ } else
+ brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
+ brw_PLN(p,
+ brw_vec8_grf(28, 0),
+ brw_vec1_grf(uv, 0),
+ brw_vec8_grf(2, 0));
+ brw_MUL(p,
+ brw_message_reg(msg),
+ brw_vec8_grf(28, 0),
+ brw_vec8_grf(30, 0));
+ msg += dw/8;
+
+ brw_PLN(p,
+ brw_vec8_grf(28, 0),
+ brw_vec1_grf(uv, 0),
+ brw_vec8_grf(4, 0));
+ brw_MUL(p,
+ brw_message_reg(msg),
+ brw_vec8_grf(28, 0),
+ brw_vec8_grf(30, 0));
+ } else {
+ struct brw_reg r = brw_vec1_grf(uv, 0);
+
+ /* First compute 1/z */
+ brw_LINE(p, brw_null_reg(), brw_vec1_grf(uv+1, 0), brw_vec8_grf(X16, 0));
+ brw_MAC(p, brw_vec8_grf(30, 0), brw_vec1_grf(uv+1, 1), brw_vec8_grf(Y16, 0));
+
+ if (dw == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
+ brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0));
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ } else
+ brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
+
+ /* Now compute the output s,t values */
+ brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0));
+ brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 1), brw_vec8_grf(Y16, 0));
+ brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0));
+ msg += dw/8;
+
+ brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0));
+ brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 5), brw_vec8_grf(Y16, 0));
+ brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0));
+ }
+}
+
+static void brw_wm_projective(struct brw_compile *p, int dw,
+ int channel, int msg, int result)
+{
+ brw_wm_projective_st(p, dw, channel, msg);
+ brw_wm_sample(p, dw, channel, msg, result);
+}
+
+static void brw_wm_projective__alpha(struct brw_compile *p, int dw,
+ int channel, int msg, int result)
+{
+ brw_wm_projective_st(p, dw, channel, msg);
+ brw_wm_sample__alpha(p, dw, channel, msg, result);
+}
+
+void
+brw_wm_kernel__projective(struct brw_compile *p, int dispatch)
+{
+ int src = 12;
+
+ if (p->gen < 60)
+ brw_wm_xy(p, dispatch);
+ brw_wm_projective(p, dispatch, 0, 1, src);
+ brw_wm_write(p, dispatch, src);
+}
+
+void
+brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch)
+{
+ int src = 12, mask = 20;
+
+ if (p->gen < 60)
+ brw_wm_xy(p, dispatch);
+ brw_wm_projective(p, dispatch, 0, 1, src);
+ brw_wm_projective__alpha(p, dispatch, 1, 7, mask);
+ brw_wm_write__mask(p, dispatch, src, mask);
+}
+
+void
+brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch)
+{
+ int src = 12, mask = 20;
+
+ if (p->gen < 60)
+ brw_wm_xy(p, dispatch);
+ brw_wm_projective(p, dispatch, 0, 1, src);
+ brw_wm_projective(p, dispatch, 1,7, mask);
+ brw_wm_write__mask_ca(p, dispatch, src, mask);
+}
+
+void
+brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch)
+{
+ int src = 12, mask = 14;
+
+ if (p->gen < 60)
+ brw_wm_xy(p, dispatch);
+ brw_wm_projective__alpha(p, dispatch, 0, 1, src);
+ brw_wm_projective(p, dispatch, 1, 7, mask);
+ brw_wm_write__mask(p, dispatch, mask, src);
+}
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index c292da1ef..5b64efa1d 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -179,8 +179,8 @@ static const struct wm_kernel_info {
KERNEL(MASKCA, ps_kernel_maskca_affine, 3, 2),
KERNEL(MASKCA_PROJECTIVE, ps_kernel_maskca_projective, 3, 2),
- KERNEL(MASKCA_SRCALPHA, ps_kernel_maskca_srcalpha_affine, 3, 2),
- KERNEL(MASKCA_SRCALPHA_PROJECTIVE, ps_kernel_maskca_srcalpha_projective, 3, 2),
+ KERNEL(MASKSA, ps_kernel_maskca_srcalpha_affine, 3, 2),
+ KERNEL(MASKSA_PROJECTIVE, ps_kernel_maskca_srcalpha_projective, 3, 2),
KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7, 1),
KERNEL(VIDEO_PACKED, ps_kernel_packed, 2, 1),
@@ -431,7 +431,7 @@ gen6_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
if (has_mask) {
if (is_ca) {
if (gen6_blend_op[op].src_alpha)
- base = GEN6_WM_KERNEL_MASKCA_SRCALPHA;
+ base = GEN6_WM_KERNEL_MASKSA;
else
base = GEN6_WM_KERNEL_MASKCA;
} else
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index d06b791df..ded22d5f2 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -185,8 +185,8 @@ static const struct wm_kernel_info {
KERNEL(MASKCA, ps_kernel_maskca_affine, 3),
KERNEL(MASKCA_PROJECTIVE, ps_kernel_maskca_projective, 3),
- KERNEL(MASKCA_SRCALPHA, ps_kernel_maskca_srcalpha_affine, 3),
- KERNEL(MASKCA_SRCALPHA_PROJECTIVE, ps_kernel_maskca_srcalpha_projective, 3),
+ KERNEL(MASKSA, ps_kernel_maskca_srcalpha_affine, 3),
+ KERNEL(MASKSA_PROJECTIVE, ps_kernel_maskca_srcalpha_projective, 3),
KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7),
KERNEL(VIDEO_PACKED, ps_kernel_packed, 2),
@@ -437,7 +437,7 @@ gen7_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
if (has_mask) {
if (is_ca) {
if (gen7_blend_op[op].src_alpha)
- base = GEN7_WM_KERNEL_MASKCA_SRCALPHA;
+ base = GEN7_WM_KERNEL_MASKSA;
else
base = GEN7_WM_KERNEL_MASKCA;
} else
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index e676b6a77..64e24122f 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -13,6 +13,7 @@ struct sna;
struct sna_glyph;
struct sna_video;
struct sna_video_frame;
+struct brw_compile;
struct sna_composite_rectangles {
struct sna_coordinate {
@@ -379,8 +380,8 @@ enum {
GEN6_WM_KERNEL_MASKCA,
GEN6_WM_KERNEL_MASKCA_PROJECTIVE,
- GEN6_WM_KERNEL_MASKCA_SRCALPHA,
- GEN6_WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
+ GEN6_WM_KERNEL_MASKSA,
+ GEN6_WM_KERNEL_MASKSA_PROJECTIVE,
GEN6_WM_KERNEL_VIDEO_PLANAR,
GEN6_WM_KERNEL_VIDEO_PACKED,
@@ -428,8 +429,8 @@ enum {
GEN7_WM_KERNEL_MASKCA,
GEN7_WM_KERNEL_MASKCA_PROJECTIVE,
- GEN7_WM_KERNEL_MASKCA_SRCALPHA,
- GEN7_WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
+ GEN7_WM_KERNEL_MASKSA,
+ GEN7_WM_KERNEL_MASKSA_PROJECTIVE,
GEN7_WM_KERNEL_VIDEO_PLANAR,
GEN7_WM_KERNEL_VIDEO_PACKED,
@@ -479,6 +480,14 @@ void *sna_static_stream_map(struct sna_static_stream *stream,
uint32_t len, uint32_t align);
uint32_t sna_static_stream_offsetof(struct sna_static_stream *stream,
void *ptr);
+unsigned sna_static_stream_compile_sf(struct sna *sna,
+ struct sna_static_stream *stream,
+ void (*compile)(struct brw_compile *));
+
+unsigned sna_static_stream_compile_wm(struct sna *sna,
+ struct sna_static_stream *stream,
+ void (*compile)(struct brw_compile *, int),
+ int width);
struct kgem_bo *sna_static_stream_fini(struct sna *sna,
struct sna_static_stream *stream);
diff --git a/src/sna/sna_stream.c b/src/sna/sna_stream.c
index aab15498e..66a8c461f 100644
--- a/src/sna/sna_stream.c
+++ b/src/sna/sna_stream.c
@@ -27,6 +27,7 @@
#include "sna.h"
#include "sna_render.h"
+#include "brw/brw.h"
int sna_static_stream_init(struct sna_static_stream *stream)
{
@@ -92,3 +93,40 @@ struct kgem_bo *sna_static_stream_fini(struct sna *sna,
return bo;
}
+
+unsigned
+sna_static_stream_compile_sf(struct sna *sna,
+ struct sna_static_stream *stream,
+ void (*compile)(struct brw_compile *))
+{
+ struct brw_compile p;
+
+ brw_compile_init(&p, sna->kgem.gen,
+ sna_static_stream_map(stream,
+ 64*sizeof(uint32_t), 64));
+
+ compile(&p);
+ assert(p.nr_insn*sizeof(struct brw_instruction) <= 64*sizeof(uint32_t));
+
+ stream->used -= 64*sizeof(uint32_t) - p.nr_insn*sizeof(struct brw_instruction);
+ return sna_static_stream_offsetof(stream, p.store);
+}
+
+unsigned
+sna_static_stream_compile_wm(struct sna *sna,
+ struct sna_static_stream *stream,
+ void (*compile)(struct brw_compile *, int),
+ int dispatch_width)
+{
+ struct brw_compile p;
+
+ brw_compile_init(&p, sna->kgem.gen,
+ sna_static_stream_map(stream,
+ 256*sizeof(uint32_t), 64));
+
+ compile(&p, dispatch_width);
+ assert(p.nr_insn*sizeof(struct brw_instruction) <= 256*sizeof(uint32_t));
+
+ stream->used -= 256*sizeof(uint32_t) - p.nr_insn*sizeof(struct brw_instruction);
+ return sna_static_stream_offsetof(stream, p.store);
+}