diff options
-rw-r--r-- | src/sna/g4x_render.c | 137 |
1 files changed, 55 insertions, 82 deletions
diff --git a/src/sna/g4x_render.c b/src/sna/g4x_render.c index 41a02d337..9de8340a9 100644 --- a/src/sna/g4x_render.c +++ b/src/sna/g4x_render.c @@ -48,7 +48,6 @@ * the BLT engine. */ #define PREFER_BLT 1 -#define FLUSH_EVERY_VERTEX 1 #define FORCE_SPANS 0 #define NO_COMPOSITE 0 @@ -60,15 +59,6 @@ #define NO_FILL_BOXES 0 #define NO_VIDEO 0 -#if FLUSH_EVERY_VERTEX -#define _FLUSH() do { \ - g4x_vertex_flush(sna); \ - OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); \ -} while (0) -#else -#define _FLUSH() -#endif - #define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) /* Set up a default static partitioning of the URB, which is supposed to @@ -695,23 +685,23 @@ g4x_emit_composite_primitive_solid(struct sna *sna, } dst; v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 15; + sna->render.vertex_used += 9; dst.p.x = r->dst.x + r->width; dst.p.y = r->dst.y + r->height; v[0] = dst.f; - v[3] = v[1] = 1.; - v[4] = v[2] = 1.; + v[1] = 1.; + v[2] = 1.; dst.p.x = r->dst.x; - v[5] = dst.f; - v[8] = v[6] = 0.; - v[9] = v[7] = 1.; + v[3] = dst.f; + v[4] = 0.; + v[5] = 1.; dst.p.y = r->dst.y; - v[10] = dst.f; - v[13] = v[11] = 0.; - v[14] = v[12] = 0.; + v[6] = dst.f; + v[7] = 0.; + v[8] = 0.; } fastcall static void @@ -727,7 +717,7 @@ g4x_emit_composite_primitive_identity_source(struct sna *sna, } dst; v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 15; + sna->render.vertex_used += 9; sx = r->src.x + op->src.offset[0]; sy = r->src.y + op->src.offset[1]; @@ -737,22 +727,16 @@ g4x_emit_composite_primitive_identity_source(struct sna *sna, v[0] = dst.f; v[1] = (sx + r->width) * sf[0]; v[2] = (sy + r->height) * sf[1]; - v[3] = 1.; - v[4] = 1.; dst.p.x = r->dst.x; - v[5] = dst.f; - v[6] = sx * sf[0]; - v[7] = v[2]; - v[8] = 0.; - v[9] = 1.; + v[3] = dst.f; + v[4] = sx * sf[0]; + v[5] = v[2]; dst.p.y = r->dst.y; - v[10] = dst.f; - v[11] = v[6]; - v[12] = sy * sf[1]; - v[13] = 0.; - v[14] = 0.; + v[6] = dst.f; + v[7] = v[4]; + v[8] = sy * sf[1]; } fastcall static void @@ -767,7 +751,7 @@ g4x_emit_composite_primitive_affine_source(struct sna *sna, float *v; v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 15; + sna->render.vertex_used += 9; dst.p.x = r->dst.x + r->width; dst.p.y = r->dst.y + r->height; @@ -778,30 +762,24 @@ g4x_emit_composite_primitive_affine_source(struct sna *sna, &v[1], &v[2]); v[1] *= op->src.scale[0]; v[2] *= op->src.scale[1]; - v[3] = 1.; - v[4] = 1.; dst.p.x = r->dst.x; - v[5] = dst.f; + v[3] = dst.f; _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x, op->src.offset[1] + r->src.y + r->height, op->src.transform, - &v[6], &v[7]); - v[6] *= op->src.scale[0]; - v[7] *= op->src.scale[1]; - v[8] = 0.; - v[9] = 1.; + &v[4], &v[5]); + v[4] *= op->src.scale[0]; + v[5] *= op->src.scale[1]; dst.p.y = r->dst.y; - v[10] = dst.f; + v[6] = dst.f; _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x, op->src.offset[1] + r->src.y, op->src.transform, - &v[11], &v[12]); - v[11] *= op->src.scale[0]; - v[12] *= op->src.scale[1]; - v[13] = 0.; - v[14] = 0.; + &v[7], &v[8]); + v[7] *= op->src.scale[0]; + v[8] *= op->src.scale[1]; } fastcall static void @@ -1026,8 +1004,6 @@ static bool g4x_rectangle_begin(struct sna *sna, /* 7xpipelined pointers + 6xprimitive + 1xflush */ ndwords = op->need_magic_ca_pass? 20 : 6; - if (FLUSH_EVERY_VERTEX) - ndwords += 1; if ((sna->render_state.gen4.vb_id & (1 << id)) == 0) ndwords += 5; @@ -1045,7 +1021,7 @@ static bool g4x_rectangle_begin(struct sna *sna, static int g4x_get_rectangles__flush(struct sna *sna, const struct sna_composite_op *op) { - if (!kgem_check_batch(&sna->kgem, (FLUSH_EVERY_VERTEX || op->need_magic_ca_pass) ? 25 : 6)) + if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 25 : 6)) return 0; if (!kgem_check_reloc_and_exec(&sna->kgem, 1)) return 0; @@ -1273,7 +1249,7 @@ g4x_emit_pipelined_pointers(struct sna *sna, OUT_BATCH(sna->render_state.gen4.vs); OUT_BATCH(GEN4_GS_DISABLE); /* passthrough */ OUT_BATCH(GEN4_CLIP_DISABLE); /* passthrough */ - OUT_BATCH(sna->render_state.gen4.sf[!!(op->u.gen4.ve_id & 2)]); + OUT_BATCH(sna->render_state.gen4.sf[1]); OUT_BATCH(sna->render_state.gen4.wm + sp); OUT_BATCH(sna->render_state.gen4.cc + bp); @@ -1315,9 +1291,9 @@ g4x_emit_vertex_elements(struct sna *sna, */ struct gen4_render_state *render = &sna->render_state.gen4; int id = op->u.gen4.ve_id; - int selem, nelem; uint32_t w_component; uint32_t src_format; + int selem; if (render->ve_id == id) return; @@ -1333,14 +1309,13 @@ g4x_emit_vertex_elements(struct sna *sna, w_component = GEN4_VFCOMPONENT_STORE_SRC; selem = 3; } - nelem = id & 2 ? 2 : 1; /* The VUE layout * dword 0-3: position (x, y, 1.0, 1.0), * dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0) * [optional] dword 8-11: texture coordinate 1 (u1, v1, w1, 1.0) */ - OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + nelem) - 1)); + OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + 2) - 1)); /* x,y */ OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | @@ -1363,15 +1338,21 @@ g4x_emit_vertex_elements(struct sna *sna, (2*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* VUE offset in dwords */ /* u1, v1, w1 */ + OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | + src_format << VE0_FORMAT_SHIFT | + ((1 + selem) * 4) << VE0_OFFSET_SHIFT); /* vb offset in bytes */ if (id & 2) { - OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | - src_format << VE0_FORMAT_SHIFT | - ((1 + selem) * 4) << VE0_OFFSET_SHIFT); /* vb offset in bytes */ OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | w_component << VE1_VFCOMPONENT_2_SHIFT | GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT | (3*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* VUE offset in dwords */ + } else { + OUT_BATCH(GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT | + GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT | + GEN4_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | + GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT | + (3*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* VUE offset in dwords */ } } @@ -1715,8 +1696,6 @@ g4x_render_video(struct sna *sna, OUT_VERTEX_F((box->x1 - dxo) * src_scale_x); OUT_VERTEX_F((box->y1 - dyo) * src_scale_y); - _FLUSH(); - if (!DAMAGE_IS_ALL(priv->gpu_damage)) { sna_damage_add_box(&priv->gpu_damage, &r); sna_damage_subtract_box(&priv->cpu_damage, &r); @@ -2392,6 +2371,7 @@ g4x_render_composite(struct sna *sna, if (tmp->src.transform == NULL && tmp->mask.transform == NULL) tmp->prim_emit = g4x_emit_composite_primitive_identity_source_mask; + tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine; } else { if (tmp->src.is_solid) tmp->prim_emit = g4x_emit_composite_primitive_solid; @@ -2399,16 +2379,17 @@ g4x_render_composite(struct sna *sna, tmp->prim_emit = g4x_emit_composite_primitive_identity_source; else if (tmp->src.is_affine) tmp->prim_emit = g4x_emit_composite_primitive_affine_source; + + tmp->floats_per_vertex = 3 + !tmp->is_affine; } - tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine; tmp->floats_per_rect = 3*tmp->floats_per_vertex; tmp->u.gen4.wm_kernel = g4x_choose_composite_kernel(tmp->op, - mask != NULL, - tmp->has_component_alpha, - tmp->is_affine); - tmp->u.gen4.ve_id = 1 << 1 | tmp->is_affine; + tmp->mask.bo != NULL, + tmp->has_component_alpha, + tmp->is_affine); + tmp->u.gen4.ve_id = (tmp->mask.bo != NULL) << 1 | tmp->is_affine; tmp->blt = g4x_render_composite_blt; tmp->box = g4x_render_composite_box; @@ -2796,8 +2777,6 @@ g4x_render_copy_one(struct sna *sna, OUT_VERTEX(dx, dy); OUT_VERTEX_F(sx*op->src.scale[0]); OUT_VERTEX_F(sy*op->src.scale[1]); - - _FLUSH(); } static inline bool prefer_blt_copy(struct sna *sna, unsigned flags) @@ -3094,20 +3073,14 @@ g4x_render_fill_rectangle(struct sna *sna, OUT_VERTEX(x+w, y+h); OUT_VERTEX_F(1); OUT_VERTEX_F(1); - OUT_VERTEX_F(1); - OUT_VERTEX_F(1); OUT_VERTEX(x, y+h); OUT_VERTEX_F(0); OUT_VERTEX_F(1); - OUT_VERTEX_F(0); - OUT_VERTEX_F(1); OUT_VERTEX(x, y); OUT_VERTEX_F(0); OUT_VERTEX_F(0); - OUT_VERTEX_F(0); - OUT_VERTEX_F(0); } static bool @@ -3184,10 +3157,10 @@ g4x_render_fill_boxes(struct sna *sna, g4x_composite_solid_init(sna, &tmp.src, pixel); tmp.is_affine = true; - tmp.floats_per_vertex = 5; - tmp.floats_per_rect = 15; + tmp.floats_per_vertex = 3; + tmp.floats_per_rect = 9; tmp.u.gen4.wm_kernel = WM_KERNEL; - tmp.u.gen4.ve_id = 1 | 1 << 1; + tmp.u.gen4.ve_id = 1; if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { kgem_submit(&sna->kgem); @@ -3289,10 +3262,10 @@ g4x_render_fill(struct sna *sna, uint8_t alu, op->base.mask.bo = NULL; op->base.is_affine = true; - op->base.floats_per_vertex = 5; - op->base.floats_per_rect = 15; + op->base.floats_per_vertex = 3; + op->base.floats_per_rect = 9; op->base.u.gen4.wm_kernel = WM_KERNEL; - op->base.u.gen4.ve_id = 1 | 1 << 1; + op->base.u.gen4.ve_id = 1; if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { kgem_submit(&sna->kgem); @@ -3365,13 +3338,13 @@ g4x_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, tmp.mask.bo = NULL; tmp.is_affine = true; - tmp.floats_per_vertex = 5; - tmp.floats_per_rect = 15; - tmp.has_component_alpha = 0; + tmp.floats_per_vertex = 3; + tmp.floats_per_rect = 9; + tmp.has_component_alpha = false; tmp.need_magic_ca_pass = false; tmp.u.gen4.wm_kernel = WM_KERNEL; - tmp.u.gen4.ve_id = 1 | 1 << 1; + tmp.u.gen4.ve_id = 1; if (!kgem_check_bo(&sna->kgem, bo, NULL)) { _kgem_submit(&sna->kgem); |