summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/sna/g4x_render.c137
1 files changed, 55 insertions, 82 deletions
diff --git a/src/sna/g4x_render.c b/src/sna/g4x_render.c
index 41a02d337..9de8340a9 100644
--- a/src/sna/g4x_render.c
+++ b/src/sna/g4x_render.c
@@ -48,7 +48,6 @@
* the BLT engine.
*/
#define PREFER_BLT 1
-#define FLUSH_EVERY_VERTEX 1
#define FORCE_SPANS 0
#define NO_COMPOSITE 0
@@ -60,15 +59,6 @@
#define NO_FILL_BOXES 0
#define NO_VIDEO 0
-#if FLUSH_EVERY_VERTEX
-#define _FLUSH() do { \
- g4x_vertex_flush(sna); \
- OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); \
-} while (0)
-#else
-#define _FLUSH()
-#endif
-
#define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
/* Set up a default static partitioning of the URB, which is supposed to
@@ -695,23 +685,23 @@ g4x_emit_composite_primitive_solid(struct sna *sna,
} dst;
v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 15;
+ sna->render.vertex_used += 9;
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
- v[3] = v[1] = 1.;
- v[4] = v[2] = 1.;
+ v[1] = 1.;
+ v[2] = 1.;
dst.p.x = r->dst.x;
- v[5] = dst.f;
- v[8] = v[6] = 0.;
- v[9] = v[7] = 1.;
+ v[3] = dst.f;
+ v[4] = 0.;
+ v[5] = 1.;
dst.p.y = r->dst.y;
- v[10] = dst.f;
- v[13] = v[11] = 0.;
- v[14] = v[12] = 0.;
+ v[6] = dst.f;
+ v[7] = 0.;
+ v[8] = 0.;
}
fastcall static void
@@ -727,7 +717,7 @@ g4x_emit_composite_primitive_identity_source(struct sna *sna,
} dst;
v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 15;
+ sna->render.vertex_used += 9;
sx = r->src.x + op->src.offset[0];
sy = r->src.y + op->src.offset[1];
@@ -737,22 +727,16 @@ g4x_emit_composite_primitive_identity_source(struct sna *sna,
v[0] = dst.f;
v[1] = (sx + r->width) * sf[0];
v[2] = (sy + r->height) * sf[1];
- v[3] = 1.;
- v[4] = 1.;
dst.p.x = r->dst.x;
- v[5] = dst.f;
- v[6] = sx * sf[0];
- v[7] = v[2];
- v[8] = 0.;
- v[9] = 1.;
+ v[3] = dst.f;
+ v[4] = sx * sf[0];
+ v[5] = v[2];
dst.p.y = r->dst.y;
- v[10] = dst.f;
- v[11] = v[6];
- v[12] = sy * sf[1];
- v[13] = 0.;
- v[14] = 0.;
+ v[6] = dst.f;
+ v[7] = v[4];
+ v[8] = sy * sf[1];
}
fastcall static void
@@ -767,7 +751,7 @@ g4x_emit_composite_primitive_affine_source(struct sna *sna,
float *v;
v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 15;
+ sna->render.vertex_used += 9;
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
@@ -778,30 +762,24 @@ g4x_emit_composite_primitive_affine_source(struct sna *sna,
&v[1], &v[2]);
v[1] *= op->src.scale[0];
v[2] *= op->src.scale[1];
- v[3] = 1.;
- v[4] = 1.;
dst.p.x = r->dst.x;
- v[5] = dst.f;
+ v[3] = dst.f;
_sna_get_transformed_coordinates(op->src.offset[0] + r->src.x,
op->src.offset[1] + r->src.y + r->height,
op->src.transform,
- &v[6], &v[7]);
- v[6] *= op->src.scale[0];
- v[7] *= op->src.scale[1];
- v[8] = 0.;
- v[9] = 1.;
+ &v[4], &v[5]);
+ v[4] *= op->src.scale[0];
+ v[5] *= op->src.scale[1];
dst.p.y = r->dst.y;
- v[10] = dst.f;
+ v[6] = dst.f;
_sna_get_transformed_coordinates(op->src.offset[0] + r->src.x,
op->src.offset[1] + r->src.y,
op->src.transform,
- &v[11], &v[12]);
- v[11] *= op->src.scale[0];
- v[12] *= op->src.scale[1];
- v[13] = 0.;
- v[14] = 0.;
+ &v[7], &v[8]);
+ v[7] *= op->src.scale[0];
+ v[8] *= op->src.scale[1];
}
fastcall static void
@@ -1026,8 +1004,6 @@ static bool g4x_rectangle_begin(struct sna *sna,
/* 7xpipelined pointers + 6xprimitive + 1xflush */
ndwords = op->need_magic_ca_pass? 20 : 6;
- if (FLUSH_EVERY_VERTEX)
- ndwords += 1;
if ((sna->render_state.gen4.vb_id & (1 << id)) == 0)
ndwords += 5;
@@ -1045,7 +1021,7 @@ static bool g4x_rectangle_begin(struct sna *sna,
static int g4x_get_rectangles__flush(struct sna *sna,
const struct sna_composite_op *op)
{
- if (!kgem_check_batch(&sna->kgem, (FLUSH_EVERY_VERTEX || op->need_magic_ca_pass) ? 25 : 6))
+ if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 25 : 6))
return 0;
if (!kgem_check_reloc_and_exec(&sna->kgem, 1))
return 0;
@@ -1273,7 +1249,7 @@ g4x_emit_pipelined_pointers(struct sna *sna,
OUT_BATCH(sna->render_state.gen4.vs);
OUT_BATCH(GEN4_GS_DISABLE); /* passthrough */
OUT_BATCH(GEN4_CLIP_DISABLE); /* passthrough */
- OUT_BATCH(sna->render_state.gen4.sf[!!(op->u.gen4.ve_id & 2)]);
+ OUT_BATCH(sna->render_state.gen4.sf[1]);
OUT_BATCH(sna->render_state.gen4.wm + sp);
OUT_BATCH(sna->render_state.gen4.cc + bp);
@@ -1315,9 +1291,9 @@ g4x_emit_vertex_elements(struct sna *sna,
*/
struct gen4_render_state *render = &sna->render_state.gen4;
int id = op->u.gen4.ve_id;
- int selem, nelem;
uint32_t w_component;
uint32_t src_format;
+ int selem;
if (render->ve_id == id)
return;
@@ -1333,14 +1309,13 @@ g4x_emit_vertex_elements(struct sna *sna,
w_component = GEN4_VFCOMPONENT_STORE_SRC;
selem = 3;
}
- nelem = id & 2 ? 2 : 1;
/* The VUE layout
* dword 0-3: position (x, y, 1.0, 1.0),
* dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0)
* [optional] dword 8-11: texture coordinate 1 (u1, v1, w1, 1.0)
*/
- OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + nelem) - 1));
+ OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + 2) - 1));
/* x,y */
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
@@ -1363,15 +1338,21 @@ g4x_emit_vertex_elements(struct sna *sna,
(2*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* VUE offset in dwords */
/* u1, v1, w1 */
+ OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
+ src_format << VE0_FORMAT_SHIFT |
+ ((1 + selem) * 4) << VE0_OFFSET_SHIFT); /* vb offset in bytes */
if (id & 2) {
- OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
- src_format << VE0_FORMAT_SHIFT |
- ((1 + selem) * 4) << VE0_OFFSET_SHIFT); /* vb offset in bytes */
OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
w_component << VE1_VFCOMPONENT_2_SHIFT |
GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
(3*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* VUE offset in dwords */
+ } else {
+ OUT_BATCH(GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
+ GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
+ GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
+ GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
+ (3*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* VUE offset in dwords */
}
}
@@ -1715,8 +1696,6 @@ g4x_render_video(struct sna *sna,
OUT_VERTEX_F((box->x1 - dxo) * src_scale_x);
OUT_VERTEX_F((box->y1 - dyo) * src_scale_y);
- _FLUSH();
-
if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
sna_damage_add_box(&priv->gpu_damage, &r);
sna_damage_subtract_box(&priv->cpu_damage, &r);
@@ -2392,6 +2371,7 @@ g4x_render_composite(struct sna *sna,
if (tmp->src.transform == NULL && tmp->mask.transform == NULL)
tmp->prim_emit = g4x_emit_composite_primitive_identity_source_mask;
+ tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine;
} else {
if (tmp->src.is_solid)
tmp->prim_emit = g4x_emit_composite_primitive_solid;
@@ -2399,16 +2379,17 @@ g4x_render_composite(struct sna *sna,
tmp->prim_emit = g4x_emit_composite_primitive_identity_source;
else if (tmp->src.is_affine)
tmp->prim_emit = g4x_emit_composite_primitive_affine_source;
+
+ tmp->floats_per_vertex = 3 + !tmp->is_affine;
}
- tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine;
tmp->floats_per_rect = 3*tmp->floats_per_vertex;
tmp->u.gen4.wm_kernel =
g4x_choose_composite_kernel(tmp->op,
- mask != NULL,
- tmp->has_component_alpha,
- tmp->is_affine);
- tmp->u.gen4.ve_id = 1 << 1 | tmp->is_affine;
+ tmp->mask.bo != NULL,
+ tmp->has_component_alpha,
+ tmp->is_affine);
+ tmp->u.gen4.ve_id = (tmp->mask.bo != NULL) << 1 | tmp->is_affine;
tmp->blt = g4x_render_composite_blt;
tmp->box = g4x_render_composite_box;
@@ -2796,8 +2777,6 @@ g4x_render_copy_one(struct sna *sna,
OUT_VERTEX(dx, dy);
OUT_VERTEX_F(sx*op->src.scale[0]);
OUT_VERTEX_F(sy*op->src.scale[1]);
-
- _FLUSH();
}
static inline bool prefer_blt_copy(struct sna *sna, unsigned flags)
@@ -3094,20 +3073,14 @@ g4x_render_fill_rectangle(struct sna *sna,
OUT_VERTEX(x+w, y+h);
OUT_VERTEX_F(1);
OUT_VERTEX_F(1);
- OUT_VERTEX_F(1);
- OUT_VERTEX_F(1);
OUT_VERTEX(x, y+h);
OUT_VERTEX_F(0);
OUT_VERTEX_F(1);
- OUT_VERTEX_F(0);
- OUT_VERTEX_F(1);
OUT_VERTEX(x, y);
OUT_VERTEX_F(0);
OUT_VERTEX_F(0);
- OUT_VERTEX_F(0);
- OUT_VERTEX_F(0);
}
static bool
@@ -3184,10 +3157,10 @@ g4x_render_fill_boxes(struct sna *sna,
g4x_composite_solid_init(sna, &tmp.src, pixel);
tmp.is_affine = true;
- tmp.floats_per_vertex = 5;
- tmp.floats_per_rect = 15;
+ tmp.floats_per_vertex = 3;
+ tmp.floats_per_rect = 9;
tmp.u.gen4.wm_kernel = WM_KERNEL;
- tmp.u.gen4.ve_id = 1 | 1 << 1;
+ tmp.u.gen4.ve_id = 1;
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
kgem_submit(&sna->kgem);
@@ -3289,10 +3262,10 @@ g4x_render_fill(struct sna *sna, uint8_t alu,
op->base.mask.bo = NULL;
op->base.is_affine = true;
- op->base.floats_per_vertex = 5;
- op->base.floats_per_rect = 15;
+ op->base.floats_per_vertex = 3;
+ op->base.floats_per_rect = 9;
op->base.u.gen4.wm_kernel = WM_KERNEL;
- op->base.u.gen4.ve_id = 1 | 1 << 1;
+ op->base.u.gen4.ve_id = 1;
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
kgem_submit(&sna->kgem);
@@ -3365,13 +3338,13 @@ g4x_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
tmp.mask.bo = NULL;
tmp.is_affine = true;
- tmp.floats_per_vertex = 5;
- tmp.floats_per_rect = 15;
- tmp.has_component_alpha = 0;
+ tmp.floats_per_vertex = 3;
+ tmp.floats_per_rect = 9;
+ tmp.has_component_alpha = false;
tmp.need_magic_ca_pass = false;
tmp.u.gen4.wm_kernel = WM_KERNEL;
- tmp.u.gen4.ve_id = 1 | 1 << 1;
+ tmp.u.gen4.ve_id = 1;
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
_kgem_submit(&sna->kgem);