diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2009-12-13 10:44:12 +0000 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2010-01-08 19:21:31 +0000 |
commit | 4902f546be19e3d5bb47f6c75e2199dc4856c0f4 (patch) | |
tree | f968fbac699a8f594c0681816cc3e355490e849b | |
parent | 83626aba357ffb4dd7931daaf163c1dd1d08f9d3 (diff) | |
download | xf86-video-intel-4902f546be19e3d5bb47f6c75e2199dc4856c0f4.tar.gz xf86-video-intel-4902f546be19e3d5bb47f6c75e2199dc4856c0f4.tar.bz2 xf86-video-intel-4902f546be19e3d5bb47f6c75e2199dc4856c0f4.zip |
i965: Ensure that URB_FENCE is aligned to 64-bytes
The PRM (Vol 1, p32) specifies that the URB_FENCE command must not cross
a cache-line boundary (64-bytes) in order to workaround a silicon issue.
Ensure that it does not by inserting an alignment point before the atomic
section.
This is a slightly too large hammer, but the easiest method to work with
the current BEGIN_BATCH/ADVANCE_BATCH protections.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r-- | src/i830_batchbuffer.h | 15 | ||||
-rw-r--r-- | src/i965_render.c | 31 |
2 files changed, 36 insertions, 10 deletions
diff --git a/src/i830_batchbuffer.h b/src/i830_batchbuffer.h index 1beba4f82..1fc273bb9 100644 --- a/src/i830_batchbuffer.h +++ b/src/i830_batchbuffer.h @@ -80,6 +80,20 @@ static inline void intel_batch_emit_dword(intel_screen_private *intel, uint32_t intel->batch_used += 4; } +static inline void intel_batch_align(intel_screen_private *intel, uint32_t align) +{ + uint32_t delta; + + assert(intel->batch_ptr != NULL); + assert(align); + + if ((delta = intel->batch_used & (align - 1))) { + delta = align - delta; + memset (intel->batch_ptr + intel->batch_used, 0, delta); + intel->batch_used += delta; + } +} + static inline void intel_batch_emit_reloc(intel_screen_private *intel, dri_bo * bo, @@ -132,6 +146,7 @@ intel_batch_emit_reloc_pixmap(intel_screen_private *intel, PixmapPtr pixmap, delta); } +#define ALIGN_BATCH(align) intel_batch_align(intel, align); #define OUT_BATCH(dword) intel_batch_emit_dword(intel, dword) #define OUT_RELOC(bo, read_domains, write_domains, delta) \ diff --git a/src/i965_render.c b/src/i965_render.c index 066901c05..7866dd750 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -1160,14 +1160,13 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn) /* Begin the long sequence of commands needed to set up the 3D * rendering pipe */ - { - ATOMIC_BATCH(2); - OUT_BATCH(MI_FLUSH | - MI_STATE_INSTRUCTION_CACHE_FLUSH | - BRW_MI_GLOBAL_SNAPSHOT_RESET); - OUT_BATCH(MI_NOOP); - ADVANCE_BATCH(); - } + + /* URB fence. Erratum (Vol 1a, p32): URB_FENCE must not cross a + * cache-line (64 bytes). Start by aligning this sequence of ops to + * a cache-line... + */ + ALIGN_BATCH(64); + { if (IS_IGDNG(intel)) ATOMIC_BATCH(14); @@ -1175,6 +1174,9 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn) ATOMIC_BATCH(12); /* Match Mesa driver setup */ + OUT_BATCH(MI_FLUSH | + MI_STATE_INSTRUCTION_CACHE_FLUSH | + BRW_MI_GLOBAL_SNAPSHOT_RESET); if (IS_G4X(intel) || IS_IGDNG(intel)) OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); else @@ -1213,9 +1215,9 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn) OUT_BATCH(BRW_STATE_SIP | 0); OUT_RELOC(render_state->sip_kernel_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_BATCH(MI_NOOP); ADVANCE_BATCH(); } + { int pipe_ctrl; ATOMIC_BATCH(26); @@ -1279,7 +1281,16 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn) offsetof(struct gen4_cc_unit_state, cc_state[src_blend][dst_blend])); - /* URB fence */ + /* URB fence. Erratum (Vol 1a, p32): URB_FENCE must not cross a + * cache-line (64 bytes). + * + * 21 preceding dwords since start of section: 84 bytes. + * 12 bytes for URB_FENCE, implies that the end-of-instruction + * does not cross the cache-line boundary... + * + * A total of 33 or 35 dwords since alignment: 132, 140 bytes. + * Again, the URB_FENCE will not cross a cache-line. + */ OUT_BATCH(BRW_URB_FENCE | UF0_CS_REALLOC | UF0_SF_REALLOC | |