summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2009-12-13 10:44:12 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2010-01-08 19:21:31 +0000
commit4902f546be19e3d5bb47f6c75e2199dc4856c0f4 (patch)
treef968fbac699a8f594c0681816cc3e355490e849b
parent83626aba357ffb4dd7931daaf163c1dd1d08f9d3 (diff)
downloadxf86-video-intel-4902f546be19e3d5bb47f6c75e2199dc4856c0f4.tar.gz
xf86-video-intel-4902f546be19e3d5bb47f6c75e2199dc4856c0f4.tar.bz2
xf86-video-intel-4902f546be19e3d5bb47f6c75e2199dc4856c0f4.zip
i965: Ensure that URB_FENCE is aligned to 64-bytes
The PRM (Vol 1, p32) specifies that the URB_FENCE command must not cross a cache-line boundary (64-bytes) in order to workaround a silicon issue. Ensure that it does not by inserting an alignment point before the atomic section. This is a slightly too large hammer, but the easiest method to work with the current BEGIN_BATCH/ADVANCE_BATCH protections. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--src/i830_batchbuffer.h15
-rw-r--r--src/i965_render.c31
2 files changed, 36 insertions, 10 deletions
diff --git a/src/i830_batchbuffer.h b/src/i830_batchbuffer.h
index 1beba4f82..1fc273bb9 100644
--- a/src/i830_batchbuffer.h
+++ b/src/i830_batchbuffer.h
@@ -80,6 +80,20 @@ static inline void intel_batch_emit_dword(intel_screen_private *intel, uint32_t
intel->batch_used += 4;
}
+static inline void intel_batch_align(intel_screen_private *intel, uint32_t align)
+{
+ uint32_t delta;
+
+ assert(intel->batch_ptr != NULL);
+ assert(align);
+
+ if ((delta = intel->batch_used & (align - 1))) {
+ delta = align - delta;
+ memset (intel->batch_ptr + intel->batch_used, 0, delta);
+ intel->batch_used += delta;
+ }
+}
+
static inline void
intel_batch_emit_reloc(intel_screen_private *intel,
dri_bo * bo,
@@ -132,6 +146,7 @@ intel_batch_emit_reloc_pixmap(intel_screen_private *intel, PixmapPtr pixmap,
delta);
}
+#define ALIGN_BATCH(align) intel_batch_align(intel, align);
#define OUT_BATCH(dword) intel_batch_emit_dword(intel, dword)
#define OUT_RELOC(bo, read_domains, write_domains, delta) \
diff --git a/src/i965_render.c b/src/i965_render.c
index 066901c05..7866dd750 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -1160,14 +1160,13 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn)
/* Begin the long sequence of commands needed to set up the 3D
* rendering pipe
*/
- {
- ATOMIC_BATCH(2);
- OUT_BATCH(MI_FLUSH |
- MI_STATE_INSTRUCTION_CACHE_FLUSH |
- BRW_MI_GLOBAL_SNAPSHOT_RESET);
- OUT_BATCH(MI_NOOP);
- ADVANCE_BATCH();
- }
+
+ /* URB fence. Erratum (Vol 1a, p32): URB_FENCE must not cross a
+ * cache-line (64 bytes). Start by aligning this sequence of ops to
+ * a cache-line...
+ */
+ ALIGN_BATCH(64);
+
{
if (IS_IGDNG(intel))
ATOMIC_BATCH(14);
@@ -1175,6 +1174,9 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn)
ATOMIC_BATCH(12);
/* Match Mesa driver setup */
+ OUT_BATCH(MI_FLUSH |
+ MI_STATE_INSTRUCTION_CACHE_FLUSH |
+ BRW_MI_GLOBAL_SNAPSHOT_RESET);
if (IS_G4X(intel) || IS_IGDNG(intel))
OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
else
@@ -1213,9 +1215,9 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn)
OUT_BATCH(BRW_STATE_SIP | 0);
OUT_RELOC(render_state->sip_kernel_bo,
I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
- OUT_BATCH(MI_NOOP);
ADVANCE_BATCH();
}
+
{
int pipe_ctrl;
ATOMIC_BATCH(26);
@@ -1279,7 +1281,16 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn)
offsetof(struct gen4_cc_unit_state,
cc_state[src_blend][dst_blend]));
- /* URB fence */
+ /* URB fence. Erratum (Vol 1a, p32): URB_FENCE must not cross a
+ * cache-line (64 bytes).
+ *
+ * 21 preceding dwords since start of section: 84 bytes.
+ * 12 bytes for URB_FENCE, implies that the end-of-instruction
+ * does not cross the cache-line boundary...
+ *
+ * A total of 33 or 35 dwords since alignment: 132, 140 bytes.
+ * Again, the URB_FENCE will not cross a cache-line.
+ */
OUT_BATCH(BRW_URB_FENCE |
UF0_CS_REALLOC |
UF0_SF_REALLOC |