summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnas Nashif <anas.nashif@intel.com>2013-02-11 07:30:29 -0800
committerAnas Nashif <anas.nashif@intel.com>2013-02-11 07:30:29 -0800
commitc4f30fa8253338176ec71f157200b8e2824c0f15 (patch)
tree6b7485eb6f028539ce3dcc40770ee35889eda025
parent1501461b978a770b6fc8883901d6c3d177661667 (diff)
downloadxf86-video-intel-c4f30fa8253338176ec71f157200b8e2824c0f15.tar.gz
xf86-video-intel-c4f30fa8253338176ec71f157200b8e2824c0f15.tar.bz2
xf86-video-intel-c4f30fa8253338176ec71f157200b8e2824c0f15.zip
Imported Upstream version 2.21.2upstream/2.21.2upstream
-rw-r--r--ChangeLog4807
-rw-r--r--Makefile.am1
-rw-r--r--Makefile.in12
-rw-r--r--NEWS290
-rw-r--r--aclocal.m452
-rw-r--r--config.h.in12
-rwxr-xr-xconfigure296
-rw-r--r--configure.ac74
-rw-r--r--man/Makefile.in9
-rw-r--r--man/intel.man18
-rw-r--r--src/Makefile.in9
-rw-r--r--src/compat-api.h54
-rw-r--r--src/i965_3d.c4
-rw-r--r--src/i965_render.c32
-rw-r--r--src/i965_video.c6
-rw-r--r--src/intel.h6
-rw-r--r--src/intel_batchbuffer.c72
-rw-r--r--src/intel_display.c30
-rw-r--r--src/intel_dri.c83
-rw-r--r--src/intel_driver.c51
-rw-r--r--src/intel_driver.h11
-rw-r--r--src/intel_hwmc.c6
-rw-r--r--src/intel_memory.c6
-rw-r--r--src/intel_module.c169
-rw-r--r--src/intel_options.c6
-rw-r--r--src/intel_options.h4
-rw-r--r--src/intel_uxa.c24
-rw-r--r--src/intel_video.c48
-rw-r--r--src/legacy/Makefile.in9
-rw-r--r--src/legacy/i810/Makefile.in9
-rw-r--r--src/legacy/i810/xvmc/Makefile.in9
-rw-r--r--src/render_program/Makefile.am16
-rw-r--r--src/render_program/Makefile.in25
-rw-r--r--src/sna/Makefile.am8
-rw-r--r--src/sna/Makefile.in65
-rw-r--r--src/sna/atomic.h89
-rw-r--r--src/sna/brw/Makefile.in9
-rw-r--r--src/sna/brw/brw_disasm.c43
-rw-r--r--src/sna/brw/brw_eu.c2
-rw-r--r--src/sna/brw/brw_eu.h4
-rw-r--r--src/sna/brw/brw_eu_emit.c132
-rw-r--r--src/sna/brw/brw_wm.c68
-rw-r--r--src/sna/compiler.h2
-rw-r--r--src/sna/fb/Makefile.in9
-rw-r--r--src/sna/fb/fb.h15
-rw-r--r--src/sna/fb/fbbitmap.c41
-rw-r--r--src/sna/fb/fbblt.c6
-rw-r--r--src/sna/fb/fbpict.c12
-rw-r--r--src/sna/fb/fbpict.h20
-rw-r--r--src/sna/fb/fbpoint.c4
-rw-r--r--src/sna/fb/fbseg.c3
-rw-r--r--src/sna/gen2_render.c346
-rw-r--r--src/sna/gen3_render.c842
-rw-r--r--src/sna/gen4_render.c1605
-rw-r--r--src/sna/gen4_render.h73
-rw-r--r--src/sna/gen4_source.c179
-rw-r--r--src/sna/gen4_source.h22
-rw-r--r--src/sna/gen4_vertex.c1543
-rw-r--r--src/sna/gen4_vertex.h16
-rw-r--r--src/sna/gen5_render.c1405
-rw-r--r--src/sna/gen5_render.h80
-rw-r--r--src/sna/gen6_render.c1316
-rw-r--r--src/sna/gen7_render.c1289
-rw-r--r--src/sna/kgem.c1584
-rw-r--r--src/sna/kgem.h187
-rw-r--r--src/sna/kgem_debug.c43
-rw-r--r--src/sna/kgem_debug.h2
-rw-r--r--src/sna/kgem_debug_gen5.c21
-rw-r--r--src/sna/kgem_debug_gen6.c12
-rw-r--r--src/sna/sna.h111
-rw-r--r--src/sna/sna_accel.c1910
-rw-r--r--src/sna/sna_blt.c439
-rw-r--r--src/sna/sna_composite.c139
-rw-r--r--src/sna/sna_damage.c7
-rw-r--r--src/sna/sna_damage.h29
-rw-r--r--src/sna/sna_display.c381
-rw-r--r--src/sna/sna_dri.c1104
-rw-r--r--src/sna/sna_driver.c217
-rw-r--r--src/sna/sna_glyphs.c82
-rw-r--r--src/sna/sna_gradient.c100
-rw-r--r--src/sna/sna_io.c105
-rw-r--r--src/sna/sna_render.c309
-rw-r--r--src/sna/sna_render.h100
-rw-r--r--src/sna/sna_render_inline.h93
-rw-r--r--src/sna/sna_threads.c306
-rw-r--r--src/sna/sna_tiling.c4
-rw-r--r--src/sna/sna_trapezoids.c1555
-rw-r--r--src/sna/sna_vertex.c37
-rw-r--r--src/sna/sna_video.c217
-rw-r--r--src/sna/sna_video.h6
-rw-r--r--src/sna/sna_video_hwmc.c164
-rw-r--r--src/sna/sna_video_hwmc.h26
-rw-r--r--src/sna/sna_video_overlay.c21
-rw-r--r--src/sna/sna_video_sprite.c82
-rw-r--r--src/sna/sna_video_textured.c19
-rw-r--r--src/xvmc/Makefile.am5
-rw-r--r--src/xvmc/Makefile.in14
-rw-r--r--src/xvmc/shader/Makefile.in9
-rw-r--r--src/xvmc/shader/mc/Makefile.am4
-rw-r--r--src/xvmc/shader/mc/Makefile.in13
-rw-r--r--src/xvmc/shader/vld/Makefile.am4
-rw-r--r--src/xvmc/shader/vld/Makefile.in13
-rw-r--r--test/Makefile.am11
-rw-r--r--test/Makefile.in74
-rw-r--r--test/lowlevel-blt-bench.c135
-rwxr-xr-xtest/mkvsync.sh27
-rw-r--r--test/test.h5
-rw-r--r--test/test_display.c17
-rw-r--r--uxa/Makefile.in9
-rw-r--r--uxa/uxa-accel.c2
-rw-r--r--uxa/uxa-render.c2
111 files changed, 16961 insertions, 8303 deletions
diff --git a/ChangeLog b/ChangeLog
index d33c6084c..ca65287f0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,4810 @@
+commit a241949c05f44792f51a5bd1e246a44693cb5b06
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Feb 10 14:20:59 2013 +0000
+
+ 2.21.2 release
+
+commit 0d75b19979b1ac14353765e2bb84c6a466129109
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Feb 10 15:47:53 2013 +0000
+
+ sna: Restore glyphs with xorg-1.12
+
+ That simple and innocuous build fix for xorg-1.13 bizarrely causes
+ missing glyphs with earlier Xorgs.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 9fd0d8873a5a5c4f77904cab0b9909ca941b5dae
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Feb 10 14:29:29 2013 +0000
+
+ NEWS: fix bug url
+
+ The dangers of cutting and pasting from git log.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 617fadf3acf7bf75fb203c1e85fd0ddb98b3dbb9
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Feb 10 14:20:59 2013 +0000
+
+ 2.21.1 release
+
+commit 3169a4e53cf39cc3d5c18ac6add909aa3a58de7e
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Feb 10 11:57:14 2013 +0000
+
+ sna: Reorder some includes so that compat-api.h comes after the headers it wraps
+
+ Fixes the build in cases where the compat-api.h was defining macros to
+ subvert the real functions found in the xorg includes
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 71fbad64c5cfe6832a03815bece4c89d15253e1a
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Feb 10 10:54:17 2013 +0000
+
+ configure: Fix typo in checking for libdrm_intel
+
+ The package name is libdrm_intel not libdrm_intel-1, an obvious
+ cut'n'paste error from testing for pixman-1.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 3cbdfb54d1fcfed7745111e861e19b7bbac243cc
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Feb 9 19:15:20 2013 +0000
+
+ sna: Backport to squeeze - Xorg-1.6, pixman-0.16, libdrm-2.4.21
+
+ The principle change is to switch to the old Privates API and undo the
+ Region renames.
+
+ The downside is that this ignores the critical bugfixes made to the
+ xserver since xorg-1.6 - but I assume that whoever wants to run the
+ latest hardware on the old xservers is also backporting those stability
+ fixes...
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 42a6b25817985e22e7d462be87fbd97973d96a29
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Feb 9 15:30:58 2013 +0000
+
+ sna: Fix alignment of the base of partial buffers for pre-G33 chipsets
+
+ The older chipsets have much more restrictive alignment rules for the
+ base address of tiled but unfenced objects.
+
+ Bugzilla: https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1120108
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 80044e54634d0836694d5aa6f98ce22fe38d367f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Feb 9 09:57:26 2013 +0000
+
+ sna: Promote to GPU is only partially damaged on the CPU but busy on the GPU
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit d18cb72a94fad0ee99ab361c21d643c927d29c35
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Feb 8 22:31:19 2013 +0000
+
+ sna: Randomly perturb 'wedged' to hunt for faults
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit ce9f0448367ea6a90490a28150bfdc0a76500129
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Feb 8 16:01:54 2013 +0000
+
+ sna/gen6: Use GT2 settings for both GT2 and GT2+
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit ae5399aaf9ef57d33e8fd957e8a96964897c09b3
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Feb 8 11:31:21 2013 +0000
+
+ sna: Force the fallback path for unaccelerated randr damage
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit c69b4389abc324533a9a311c17a667bf8a1e1673
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Feb 7 22:54:37 2013 +0000
+
+ sna/gen4: Split the have_render flag in separate prefer_gpu hints
+
+ The idea is to implement more fine-grained checks as we may want
+ different heuristics for desktops with GT1s than for mobile GT2s, etc.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit bec99de812ce6a1bbc2c8e4cfd05f4f74c560ea6
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Feb 8 00:53:10 2013 +0000
+
+ sna: Remove the bogus assertions on buffer domains
+
+ Just a few lines earlier we already have the correct assertion that the
+ buffer was not in the GPU domain, so had these two been correct, they
+ would have still been redundant.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 8d1d3c6e6102ff20fbff74ec6b3b2e94ee757015
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Feb 7 14:47:07 2013 +0000
+
+ sna: Fixup an invalid assertion
+
+ We may choose to operate inplace on a buffer last used by the CPU if we
+ are discarding all the existing damage.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit c405dba367bdca51221bd2464213199783dc18fe
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Feb 7 13:41:42 2013 +0000
+
+ sna: Also assert that the GPU is not wedged before continuing a batch
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit feeff6fcefccdca5335fea55c2fdbf8a4004c175
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Feb 7 13:33:58 2013 +0000
+
+ sna: Force GTT readback if the GPU is wedged
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 8a272971d5971a56f57dde00dceb082d0b142c8c
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Feb 6 17:59:10 2013 +0000
+
+ sna: Allow inplace uploads to utilise GTT on LLC machines
+
+ Rather than arbitrarily disable the fallback paths for LLC, allow it to
+ utilise any available GTT buffers for inplace uploads. The best
+ explanation so far is that with the streaming is that we are trashing
+ the LLC. On other machines, the difference is in the noise.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit bc8a2c30c4f6bb9ce751b6717a3a2feaea0d6d4b
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Feb 7 10:42:58 2013 +0000
+
+ sna: Only try the SRC fixup into the buffer if it is CPU mapped
+
+ On one particular machine, this operation is behaving as if it is
+ reading back UC memory during the explicit write-only composite.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 889ed28f52bccdbc54692ea075f95f9635a8d58a
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Feb 7 10:42:21 2013 +0000
+
+ sna: Correctly align used buffers to the following page boundary
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 974b6a97d78dadf09be8a2c4f61020f15d80d558
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Feb 6 17:02:27 2013 +0000
+
+ sna: Fallback to non-LLC paths after an allocation failure for an LLC buffer
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 5c8084ef04cb0a7da064fb1e13c8ef7dae528b1b
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Feb 6 16:39:31 2013 +0000
+
+ intel: Becareful not to match UMS against future generations
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit be241fb25ed0a8d41a642ea811253207f88d0962
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Feb 6 16:38:12 2013 +0000
+
+ sna: Free the handle after pwrite buffer allocation failure
+
+ Having just allocated the handle, we need to free it if we then fail to
+ allocate memory for the buffer.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 4b3b25f0be33d3af3ccecfb3193fc2d365445fdf
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Feb 6 16:37:21 2013 +0000
+
+ sna: Flush our caches if we fail to mmap an object
+
+ The likely cause for a mmap failure is that we hold too many objects
+ open or have exhausted our address space. In both cases, we need to trim
+ our caches before continuing.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit daba1ae3e7f0532cc53d9a5178778dbaec203052
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Feb 6 16:17:36 2013 +0000
+
+ sna: Correctly handle failure to CPU map a new allocation
+
+ If we fail to CPU map, we want to fallback to just using pwrite with
+ normal memory.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 0adb0b5e1ebcf3ddfeddae99d96912ec4c090832
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Feb 6 16:02:30 2013 +0000
+
+ sna: Handle mapped buffer allocation failure for LLC
+
+ The presumption was that if we had LLC we would have allocated the
+ buffer by that point - however, it was remotely possible to have fallen
+ through and so we need to handle those cases.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit f4cff22afae598f41adf36cd149223d1f7dd6b6e
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Feb 6 15:15:36 2013 +0000
+
+ sna: Relax the buffer size assertion to only be larger than required
+
+ Not all paths request alloc pages, a few just request sufficient pages
+ for the original size. So we can only assert that condition is
+ satisfied.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 8bc593c732a2f1ccd1bdabc071c709a44222db61
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Feb 6 15:11:00 2013 +0000
+
+ sna: Make sure we always replace io buffers before inserting into the cache
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 5f72158919098dd5684d1c56d1ba643cc3be2c7d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Feb 6 15:10:23 2013 +0000
+
+ configure: XvMC support is optional, so make failure to find xcb non-fatal
+
+commit cd6d8f9b9df02934ebfff76cb40410c8ce3887dd
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Feb 6 10:37:50 2013 +0000
+
+ xvmc: Add the complementary XCB_CFLAGS
+
+ After splitting the xvmc dependences into xcb and non-xcb, we then also
+ have to add the xcb CFLAGS to build libIntelXVmc.la
+
+ Reported-by: Julien Cristau <jcristau@debian.org>
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit b96ee47ad97943c3dccd40d9570e29002dc3d85f
+Author: Paul Menzel <paulepanter@users.sourceforge.net>
+Date: Sun Feb 3 13:33:08 2013 +0100
+
+ configure.ac: Split out XCB libraries from `XVMCLIB` into `XCB`
+
+ Building the package under Debian Sid/unstable, `dh_shlibdeps` informs
+ that `libI810XvMC.so.1.0.0` does not need to be linked against
+ `libX11-xcb.so.1`, `libxcb-dri2.so.0`, `libxcb-util.so.0` or
+ `libxcb.so.1` [1].
+
+ $ debuild -b -us -uc
+ […]
+ make[1]: Entering directory `/src/xserver-xorg-video-intel'
+ dh_shlibdeps -- --warnings=6
+ dpkg-shlibdeps: Warnung: debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 sollte nicht gegen libX11-xcb.so.1 gelinkt werden (es verwendet keines der Bibliotheks-Symbole)
+ dpkg-shlibdeps: Warnung: debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 sollte nicht gegen libxcb-dri2.so.0 gelinkt werden (es verwendet keines der Bibliotheks-Symbole)
+ dpkg-shlibdeps: Warnung: debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 sollte nicht gegen libxcb-util.so.0 gelinkt werden (es verwendet keines der Bibliotheks-Symbole)
+ dpkg-shlibdeps: Warnung: debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 sollte nicht gegen libxcb.so.1 gelinkt werden (es verwendet keines der Bibliotheks-Symbole)
+ make[1]: Leaving directory `/src/xserver-xorg-video-intel'
+ […]
+
+ Moving `x11-xcb`, `xcb-dri2` and `xcb-aux` from `XVMCLIBS` into `XCB`
+ and adding `XCB_LIBS` only to the `LIBADD` variables of `libIntelXvMC`
+ makes the warnings go away and the libraries are still built without any
+ issues.
+
+ make[1]: Entering directory `/src/xserver-xorg-video-intel'
+ dh_shlibdeps -- --warnings=6
+ make[1]: Leaving directory `/src/xserver-xorg-video-intel'
+ dh_installdeb -O--builddirectory=build/
+ dh_xsf_substvars -O--builddirectory=build/
+ dh_gencontrol -O--builddirectory=build/
+ dpkg-gencontrol: Warnung: Feld Depends von Paket xserver-xorg-video-intel-dbg: unbekannte Substitutionsvariable ${shlibs:Depends}
+ dh_md5sums -O--builddirectory=build/
+ dh_builddeb -O--builddirectory=build/
+ dpkg-deb: Paket »xserver-xorg-video-intel« wird in »../xserver-xorg-video-intel_2.19.0-6.1_i386.deb« gebaut.
+ dpkg-deb: Paket »xserver-xorg-video-intel-dbg« wird in »../xserver-xorg-video-intel-dbg_2.19.0-6.1_i386.deb« gebaut.
+ dpkg-genchanges -b >../xserver-xorg-video-intel_2.19.0-6.1_i386.changes
+ dpkg-genchanges: rein binärer Upload - es ist kein Quellcode hinzugefügt
+ dpkg-source --after-build xserver-xorg-video-intel
+ dpkg-buildpackage: Binärpaket(e) hochzuladen (keine Quellen enthalten)
+ Now running lintian...
+ W: xserver-xorg-video-intel: hardening-no-relro usr/lib/libI810XvMC.so.1.0.0
+ W: xserver-xorg-video-intel: hardening-no-fortify-functions usr/lib/libI810XvMC.so.1.0.0
+ W: xserver-xorg-video-intel: hardening-no-relro usr/lib/libIntelXvMC.so.1.0.0
+ W: xserver-xorg-video-intel: hardening-no-fortify-functions usr/lib/libIntelXvMC.so.1.0.0
+ W: xserver-xorg-video-intel: hardening-no-relro usr/lib/xorg/modules/drivers/intel_drv.so
+ W: xserver-xorg-video-intel: hardening-no-fortify-functions usr/lib/xorg/modules/drivers/intel_drv.so
+ N: 1 tag overridden (1 warning)
+ Finished running lintian.
+
+ The modules were originally added with the following commit present
+ since tag 2.10.0.
+
+ commit 3e8f2eae3a586aa29be4858698e666e0ec778cea
+ Author: Eric Anholt <eric@anholt.net>
+ Date: Thu Oct 15 13:48:56 2009 -0700
+
+ XVMC: Use XCB DRI2 instead of cargo-culting our own copy of Xlib stuff. (v2)
+
+ [1] https://buildd.debian.org/status/fetch.php?pkg=xserver-xorg-video-intel&arch=i386&ver=2%3A2.19.0-6&stamp=1347825458
+
+ Signed-off-by: Paul Menzel <paulepanter@users.sourceforge.net>
+
+commit 93770c709aa7d3719b7c717040b16c8f82d5c207
+Author: Paul Menzel <paulepanter@users.sourceforge.net>
+Date: Tue Jan 22 10:47:22 2013 +0100
+
+ NEWS: Fix a typo: a*n* inadvertent
+
+commit a8cfddd280b5220f23565b21c91f3f7dd10bbe91
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Feb 5 22:06:03 2013 +0000
+
+ sna: Tidy buffer allocation size assertions
+
+ Rather than perilously update a local variable with the allocated size,
+ just use the size of the bo in the assertion that is large enough to
+ satisfy the allocation request.
+
+ Reported-by: Jiri Slaby <jirislaby@gmail.com>
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 82dc91e8c24a1fbbf03dcf89a3955319b3399ea0
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Feb 5 21:50:43 2013 +0000
+
+ test: Add a very basic blt benchmark
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 9c80a0337ec12b6baab5aab380503e672e925677
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Feb 5 14:56:10 2013 +0000
+
+ sna: ValleyView uses the same scanline registers as SandyBridge
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 4c45e3fe456d211afc6ba69878b413a72ef5d0bf
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Feb 5 14:45:39 2013 +0000
+
+ intel: add more ValleyView PCI IDs
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit c6101d9d71a86a579ff9771d456b234a38bd80b7
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Feb 5 11:02:30 2013 +0000
+
+ man: Fix a typo s/debuging/debugging/
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit f33c90f7ada238683433d05492434120d06ea1fc
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Feb 1 19:34:56 2013 +0000
+
+ NEWS: Trivial typo s/utilile/utilise/
+
+commit 6346c844525c2b3a82c16fe10485b901a2b5ddbc
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Feb 5 10:17:45 2013 +0000
+
+ sna/gen4: Remove old single-thread SF w/a
+
+ The alternative of disabling GPU spans seems to be far more effective.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 1565917f10d9fb3c7e2e7e273173c38c364b9861
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Feb 5 10:11:14 2013 +0000
+
+ sna/gen4: Disable non-rectilinear GPU span compositing
+
+ This seems to be the primary victim of the render corruption, so disable
+ until the root cause is fixed.
+
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=55500
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 37bc822190f36be7b021167ba4d306bbcd97957b
+Author: Damien Lespiau <damien.lespiau@intel.com>
+Date: Fri Jan 18 14:13:08 2013 +0000
+
+ build: Make generation of gen code depend on intel-gen4asm
+
+ This way, when a new intel-gen4asm is available (because one just hacked
+ on it and has installed a new version for instance) the shaders will be
+ recompiled. This helps catching regressions, testing the latest changes
+ in the assembler haven't broken too many things.
+
+ Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 18f8d2291fbb53ac993b926c247ca981e1e5207b
+Author: Damien Lespiau <damien.lespiau@intel.com>
+Date: Fri Jan 18 14:13:07 2013 +0000
+
+ build: Use $(AM_V_GEN) to silence the assembly of gen programs
+
+ Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit eea535b7e2a35ec4dfa50550b674d4212676d2ee
+Author: Damien Lespiau <damien.lespiau@intel.com>
+Date: Fri Jan 18 14:13:06 2013 +0000
+
+ build: Make autoreconf honour ACLOCAL_FLAGS
+
+ When running autoreconf, it's possible to give flags to the underlying
+ aclocal by declaring a ACLOCAL_AMFLAGS variable in the top level
+ Makefile.am.
+
+ Putting ${ACLOCAL_FLAGS} there allows the user to set an environment
+ variable up before running autogen.sh and pull in the right directories
+ to look for m4 macros, say an up-to-date version of the xorg-util macros.
+
+ Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 9640640ab02d5de630e903116c1b104752f8b604
+Author: Paul Menzel <paulepanter@users.sourceforge.net>
+Date: Sat Feb 2 11:44:54 2013 +0100
+
+ configure.ac: Do not include `xext` and `xfixes` in `XVMCLIB`
+
+ Building the package under Debian Sid/unstable, `dh_shlibdeps` informs
+ that `libIntelXvMC.so.1.0.0` does not need to be linked against
+ `libXext.so.6` or `libXfixes.so.3` [1].
+
+ $ debuild -b -us -uc
+ […]
+ make[1]: Entering directory `/build/buildd-xserver-xorg-video-intel_2.19.0-6-i386-9thLfo/xserver-xorg-video-intel-2.19.0'
+ dh_shlibdeps -- --warnings=6
+ dpkg-shlibdeps: warning: debian/xserver-xorg-video-intel/usr/lib/libIntelXvMC.so.1.0.0 should not be linked against libXext.so.6 (it uses none of the library's symbols)
+ dpkg-shlibdeps: warning: debian/xserver-xorg-video-intel/usr/lib/libIntelXvMC.so.1.0.0 should not be linked against libXfixes.so.3 (it uses none of the library's symbols)
+ dpkg-shlibdeps: warning: debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 should not be linked against libXext.so.6 (it uses none of the library's symbols)
+ dpkg-shlibdeps: warning: debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 should not be linked against libXfixes.so.3 (it uses none of the library's symbols)
+ dpkg-shlibdeps: warning: debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 should not be linked against libX11-xcb.so.1 (it uses none of the library's symbols)
+ dpkg-shlibdeps: warning: debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 should not be linked against libxcb-dri2.so.0 (it uses none of the library's symbols)
+ dpkg-shlibdeps: warning: debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 should not be linked against libxcb-util.so.0 (it uses none of the library's symbols)
+ dpkg-shlibdeps: warning: debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 should not be linked against libxcb.so.1 (it uses none of the library's symbols)
+ dpkg-shlibdeps: warning: package could avoid a useless dependency if debian/xserver-xorg-video-intel/usr/lib/libIntelXvMC.so.1.0.0 debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 were not linked against libXext.so.6 (they use none of the library's symbols)
+ dpkg-shlibdeps: warning: package could avoid a useless dependency if debian/xserver-xorg-video-intel/usr/lib/libIntelXvMC.so.1.0.0 debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 were not linked against libXfixes.so.3 (they use none of the library's symbols)
+ make[1]: Leaving directory `/build/buildd-xserver-xorg-video-intel_2.19.0-6-i386-9thLfo/xserver-xorg-video-intel-2.19.0'
+ dh_installdeb -a -O--builddirectory=build/
+ […]
+
+ Not populating `XVMCLIB` with `xext` and `xfixes` makes the warning go
+ away and the libraries are still built without any issues.
+
+ [1] https://buildd.debian.org/status/fetch.php?pkg=xserver-xorg-video-intel&arch=i386&ver=2%3A2.19.0-6&stamp=1347825458
+
+ Signed-off-by: Paul Menzel <paulepanter@users.sourceforge.net>
+
+commit 9807bba950078d86a25b91064ecfebaa0ee459e3
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Feb 1 18:25:48 2013 +0000
+
+ sna: Drop bogus refcnt assertion during kgem_bo_retire()
+
+ As we may call it kgem_bo_sync(), during preparation of the upload
+ buffer which in turn may operate on an object straight out of the snoop
+ cache and hence not yet referenced (or in some cases, ever).
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit a5561f13498066922b54af04cc71549322ce0e3b
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Feb 1 18:05:35 2013 +0000
+
+ sna: Do not add the INPLACE hint if we have the ASYNC hint set
+
+ If the caller is preparing to use the GPU to rendering into the CPU bo,
+ it will request an ASYNC migration. In those cases, we do not want to
+ substitute it with an INPLACE operation.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit d3ff1cb9d7f788002337b1e6c4c81c58112b85b1
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Feb 1 13:46:33 2013 +0000
+
+ 2.21.0 release
+
+commit 008f8230a7c47f1249eb51e53b3abf158f2a42bf
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Feb 1 01:54:52 2013 +0000
+
+ sna: Assert that if we have GPU damage we have a GPU bo
+
+ Scatter the asserts around the migration points to catch where this
+ invariant may be untrue.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit cf0576f87102b1535268691e7e29661b0f9ee73b
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Feb 1 00:19:21 2013 +0000
+
+ sna/video: Correct computation of planar frame size
+
+ The total frame size is less than 3 times the subsampled chroma planes
+ due to the additional alignment bytes.
+
+ Bugzilla: https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1104180
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 268285d9a64fc47fe81fe5bfbfbd1890dad53e1e
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 31 21:57:41 2013 +0000
+
+ sna/gen3+: Flush vertex threads before touching global state
+
+ We need to be careful not just when finishing the current vbo to
+ synchronize with the sharing threads, but also before we emit the batch
+ state that no other thread will try and do the same.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 1239e012ae6d4f00ce73f32d7244905a601170ea
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 31 19:18:17 2013 +0000
+
+ sna: Make sure the needs_flush is always accompanied by a tracking request
+
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=47597
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 9712f49fddc8be939f77c25fcb907873af44619f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 31 18:08:05 2013 +0000
+
+ sna: Remove stale assertion
+
+ Now the reset is meant to restablish 'rq' if the bo was busy.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit cd7df0004cf6e423d2ae6c0cf83a84e0031161b4
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 31 17:32:57 2013 +0000
+
+ sna: Pass width/height to composite for rotated displays
+
+ This is essential to handle displays that are too large to be rendered
+ normally via the 3D pipeline and so that the bounds of the fixup region
+ are known.
+
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=60124
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 38376b56cfe0dfc603bce48e37432622ef9a0135
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 31 17:29:10 2013 +0000
+
+ sna: Remember to move scanouts to the scanout cache after retiring
+
+ Reported-by: Jiri Slaby <jirislaby@gmail.com>
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 0a08de1f02577aef0da289108270c1b35e5d9703
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 31 16:39:47 2013 +0000
+
+ sna: After removing the bo from a batch, check whether it is still busy
+
+ If we transfer a bo to the current batch, then subsequently discard it,
+ we lose the information about its current active state. Try to recover
+ this information, by querying the kernel and adding it to the flushing
+ list if necessary.
+
+ Reported-by: Jiri Slaby <jirislaby@gmail.com>
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit fff0686342f8ec3b3f3510340e073defdf2fb73f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 31 12:40:21 2013 +0000
+
+ sna/traps: Thread the fallback rectilinear compositor
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 839542d219bd919c99398d514c1d194d18b78eff
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 31 12:08:52 2013 +0000
+
+ sna/traps: Allow inplace compositing for non-GPU buffers and rectilinear traps
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit e329e04b10c88afb40f2fd8fdad5b24b9f7dfc15
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 31 11:33:37 2013 +0000
+
+ sna/traps: Translate the extents for the rasterization threads
+
+ The single-threaded code used the pre-computed width/height and only
+ required the origin from the bounds. However, the threads need to
+ allocate memory for themselves based on the computed bounds, and so it
+ helps if those bounds are then correct (rather than only the top-left
+ being in local space with the bottom-right in global coordinates).
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 033f75e5bd94e226e719f87ed4e0091845384679
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 31 01:38:01 2013 +0000
+
+ sna: Stage retirement through the flushing list
+
+ If the kernel replies that a bo is still busy, stage its retirement
+ through the flushing list to be certain that we never stall on a
+ subsequent write.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 5f5711e62cc4c8ca15782376c4047174299e2db0
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 31 01:21:08 2013 +0000
+
+ sna: Disable dangerous assertions that depend upon external state
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 42529336fd92d39a5a5eceb07f2838d4be50fa8e
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 31 00:58:51 2013 +0000
+
+ sna: Prevent falling back to swrast if source is on the GPU
+
+ Currently if the dst is wholly contained within the CPU, then we try to
+ continue to operate on the GPU. However, if we have FORCE_GPU set, it
+ means that one of the sources for the operation resides on the GPU, and
+ that would require a readback in order to perform the operation on the
+ CPU. Hence, if we try to use a CPU bo and fail, convert back to using
+ the GPU bo if forced.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit c2d06c407e1c2cbbf3f7f6c4989710a799cd43d0
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 30 21:17:42 2013 +0000
+
+ sna: Improve DBG output for damaged slave outputs
+
+ After computing the intersection of the damage with the slave, give the
+ region extents.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 8867aa6a46c33fd2abf3b3f0b1d6115bad6c8017
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 30 21:15:55 2013 +0000
+
+ sna/dri: Handle change of BackBuffer across a pending flip
+
+ If we encounter a delayed flip with a different back buffer than the
+ current, simply update the info rather than bug out.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit a31fd03bd4c87c48dc3ca15e3082e29348224b8c
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 30 17:26:28 2013 +0000
+
+ sna: Add a bunch of assertions to make sure we do not misplace scanouts
+
+ As scanouts are uncached, they need to be treated carefully and
+ decontaminated before being placed in the general cache. So double check
+ that no bo in those caches are still marked as a scanout.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 6f1b862282ddb4545987fb9f0a45b528b7b7b5ee
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 30 15:44:53 2013 +0000
+
+ sna: Pass the correct WRITE hint when migrating for rendering into the CPU bo
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 5011ed2e729d46fe3cff5454e15a0fd16441f7e1
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 30 15:44:22 2013 +0000
+
+ sna: Only discard the clear hint when writing inplace to the GPU pixmap
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 6312f58014c0bb4afa56855be1e9becc3e3cc3d7
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 30 15:43:05 2013 +0000
+
+ sna: Don't force a migration from CPU rendering for a DRI2 flushed pixmap
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 60a3b370aea0cf9ffb4947a73984c877b4695d4e
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 30 15:41:51 2013 +0000
+
+ sna: Retire the bo after a set-domain(CPU,0)
+
+ Having relaxed the earlier assertion because the kernel is wrong, we can
+ now retire for READ-READ optimisations.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 78ad5a742f40c2311bfe90997aebedeb998464e5
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 30 15:40:06 2013 +0000
+
+ sna: Relax assertion the the kernel considers the bo idle when we call retire
+
+ All the callers have explicitly changed the domain upon the bo before
+ calling kgem_bo_retire(), so we still get the occasional sporadic
+ failure as kgem_busy() reports true. Kill the assertion for now.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 83bcd310d279758542e366348f808d7ca0f6d0bb
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 30 13:18:21 2013 +0000
+
+ sna: Prefer to use snooped buffers for readbacks
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 496f3ff04453524639a52a3b9dfcb8e198e5e597
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 30 12:21:33 2013 +0000
+
+ uxa: Harden against failures to submit batchbuffers
+
+ If we fail to submit a batchbuffer, the driver is broken and likely to
+ continue to fail to render. Give up, and fallback to swrast so that the
+ session remains usable.
+
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=59771
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 04d48fee713e7bbc9cdf4f09855f6663a4bdc59f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 30 11:46:20 2013 +0000
+
+ sna: Fix errors found from asserts in a66c5f9ed51e
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit bc8b191ef6f5030d17a3b6497d1fd7556756c1ff
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 30 09:04:10 2013 +0000
+
+ sna: Return early if the Drawable box exactly matches one CRTC
+
+ If we are trying to find the best coverage, then by definition if the
+ drawable is an exact match for one CRTC, we can stop looking.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit de28027ffc649920268ae6fdd64146f08310e8a4
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 30 08:42:48 2013 +0000
+
+ sna/dri: Make sure we discard the existing mappings when swapping GPU bo
+
+ If the GPU bo is currently mapped to the Pixmap, we need to be sure to
+ invalidate that mapping if we swap the GPU bo (for SwapBuffers). If we
+ forget, we leave a dangling pointer to chase.
+
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=60042
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit cf9b9ac3186299ab2418c55e73e19c81e5f615a4
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 30 08:40:53 2013 +0000
+
+ sna: Only discard the mapping prior to the actual read when uploading
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit a66c5f9ed51e1dcfc2ab03339795b73617629196
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 30 08:22:00 2013 +0000
+
+ sna: Before replacing the devPrivate.ptr assert it is not already mapped
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 3fdd28419adee7145d3925cff2704143a324e9d3
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 29 22:26:15 2013 +0000
+
+ sna: Only migrate the sample box if using the BLT engine for a composite
+
+ Modify the presumption that if we are using a core operation on a shadow
+ pixmap, then we are likely to continue migrating that pixmap back and
+ forth.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 0c3b0f11d718d915e502582e9fadd5c0577640db
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 29 22:24:30 2013 +0000
+
+ sna: Verify that we always add the SHM CPU bo to the flush list when using
+
+ As we need to synchronize that bo before the next reply, we need to keep
+ track of it whenever it is active on the GPU.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit f743cd5734ca502aa8bdb0e1327fe84d6ce82755
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 29 18:04:40 2013 +0000
+
+ sna: Avoid promoting SHM CPU bo to GPU to maintain coherence with SHM clients
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 9383c5efe9ace34970abddc5e3c84c32505b537f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 29 17:24:24 2013 +0000
+
+ sna/gen3+: Fix a DBG for composite_boxes()
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit b02a1ea5573b6f0b58a037dd4788c04c296f7ff3
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 29 09:28:33 2013 +0000
+
+ sna: Add GT1/GT2 thread counts for Haswell
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 1dc2d9ede5c7f330ebadf85d987559c8a6cb1c6b
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Jan 28 23:14:57 2013 +0000
+
+ sna: Add some more paranoia that we correctly map before fallbacks
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 63c71bcd96202e6da44d6776d119a82f0c06d386
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 27 23:17:13 2013 +0000
+
+ sna: Fix typo in vertex count for threaded source span emitter
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit b0d26ca9312695d05c29503a3f892e7f2c5816dd
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 27 21:07:03 2013 +0000
+
+ sna: Replace the forced vertex finish with just a wait
+
+ When completing a batch mid-operation, we need to wait upon the other
+ threads to complete their writes so that memory is coherent before
+ submitting the work to the GPU. This was achieved by forcing the finish,
+ but all that from that is the wait, which makes the handling of threads
+ much explicit and removes the unnecessary vbo refresh.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit b0c3170c1092d01b4937f352a3962854785ee549
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 27 19:09:38 2013 +0000
+
+ sna: Add the pixmap to the flushing list when creating for inplace CPU writes
+
+ Reported-by: Jiri Slaby <jirislaby@gmail.com>
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=47597
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 73f574945f2cac14f9bafa6395e2c4dbb16fcf5d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 27 16:02:52 2013 +0000
+
+ sna: Disable all signals in the render threads
+
+ X uses them (SIGIO especially) for input handling, and gets rightfully
+ confused if it finds itself in a different thread.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 9a7bf70365980809d0f02190f2f620a957ff1ba8
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Jan 26 23:03:33 2013 +0000
+
+ sna: Enable threaded rasterisation for non-antialiased geometry
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 8178cff5718e69e14d3953a7f754d7585a06838f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Jan 26 14:41:04 2013 +0000
+
+ sna: Begin sketching out a threaded rasteriser for spans
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 8ffb3f50b3b4601401da76e2848e059ab63231f4
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Jan 25 10:45:39 2013 +0000
+
+ sna: Spawn threads to rasterize trapezoids through pixman
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 0ec2f3a8bac96acc55c8fdb432b97d026abaafb4
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 24 23:10:39 2013 +0000
+
+ sna: Spawn threads to composite trapezoids inplace
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 427b7311fe1b66d54518bae45e9fa149bda8a6e8
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 24 22:25:46 2013 +0000
+
+ sna: Perform the last threaded composite operation directly
+
+ The point of the refactor was to execute the last stage of the composite
+ in the master thread, so do so.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 326dcd75f2202b1af29e986f5efb6b1e133217cb
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 24 20:58:53 2013 +0000
+
+ sna: Parse cpuinfo to determine the actual number of physical cores/caches
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit f597b647180c1e7bf83693060f244926191b7462
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 24 18:45:35 2013 +0000
+
+ sna: Tidy construction of data for threaded composite
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 1643c97f8f7b49738b649b5f7d1e574d689d167e
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 24 18:24:02 2013 +0000
+
+ sna: Use threads for simple mask generation
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit d60128c55e8f5f69476d42c20f2fd62ccc0f411e
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 24 15:41:29 2013 +0000
+
+ sna/dri: Compensate clipExtents for drawable offset
+
+ The clipExtents is in screen coordinates whereas we just want to confirm
+ that the maximum pixel to be copied lies with the DRI2 buffer, which is
+ relative to the drawable.
+
+ Reported-by: Matthieu Baerts <matttbe@gmail.com>
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=59806
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 264b3b72500c5af74d124a214347d45c9cb90a1d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 24 15:06:12 2013 +0000
+
+ sna: Refactor to use a common fbComposite fallback
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 8ecfbea9d1f83b2de62bee0f58299e7a90c741d1
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 24 14:46:03 2013 +0000
+
+ sna: Experiment with a threaded renderer for fallback compositing
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 778dba90cfc4e801a975bd661c56a565ce60524b
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 23 21:32:29 2013 +0000
+
+ sna/dri: Don't contribute missed frames to the target_msc
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 50b41cb485ffb38e6bf705a3a62840bb78af669b
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 23 21:16:49 2013 +0000
+
+ sna/dri: Only reject DRI2 buffers that are too small for the request blit
+
+ The goal is to reject stale DRI2 buffers that are smaller than the
+ target due to not-yet-handled ConfigureNotify, but not to reject
+ blitting from Windows that are larger than the frontbuffer.
+
+ Fixes a regression from the overzealous
+ commit b27ecf3059bc066ef59f2a71c1d8d8f0ffec7191
+ Author: Chris Wilson <chris@chris-wilson.co.uk>
+ Date: Mon Nov 12 14:06:06 2012 +0000
+
+ sna/dri: Prevent scheduling a swap on stale buffers
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 98b312e579385e6e4adf6bf0abe20f8ca84592af
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 23 20:51:35 2013 +0000
+
+ sna/dri: Stop feeding I915_TILING_Y to mesa i915c
+
+ Only i915g handles Y-tiling, and we can't differentiate between the two
+ types of clients.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 31796400915a06fc789088b7dcfcecd6ea91e195
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 23 19:37:23 2013 +0000
+
+ sna: Clean up WAIT_FOR_EVENT on gen2/3
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit ea8148b24d48db4f46205817db8a55dd6ea1a4b3
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 23 17:47:12 2013 +0000
+
+ sna/dri: Prefer to use the BLT ring for vsync'ed copies on IVB+
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 3c3a87a2d4261cbd66602812637328a04787f510
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 23 17:35:50 2013 +0000
+
+ sna/gen6: Correct the event definition for secondary pipes for MI_WAIT_FOR_EVENT
+
+ It helps to wait upon the event we program and enable.
+
+ References: https://bugzilla.kernel.org/show_bug.cgi
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 88753c5a8c6c9acf086d81828260adf330eebb1a
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 23 17:35:50 2013 +0000
+
+ sna/gen7: Correct the event definition for secondary pipes for MI_WAIT_FOR_EVENT
+
+ It helps to wait upon the event we program and enable.
+
+ References: https://bugzilla.kernel.org/show_bug.cgi
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2d92d8ec562cb1e6b9dca28074adca670734233c
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 22 09:24:04 2013 +0000
+
+ sna: Extend rectangular PolyLines to cover corner pixels on ccw paths
+
+ Reported-by: Joe Peterson <joe@skyrush.com>
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=55484
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit c8817e24a9d97110a961c3803290e38ff5cbfc9a
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 22 09:06:50 2013 +0000
+
+ sna/gen7: Fix inversion of bool return code from CA pass
+
+ As we inverted the predicate, we no longer restored the original
+ operation after performing a CA pass - glyph would randomly become
+ white.
+
+ Reported-by: Jiri Slaby<jirislaby@gmail.com>
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=47597
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 10f549332e315cfe2cc86aadab94a95ae6757c34
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Jan 21 16:48:34 2013 +0000
+
+ sna: Free a non-reusable bo if it expires on the flushing list
+
+ Still no sure just how the bo ends up there, but as there seems to be
+ the occasional malinger, just free it.
+
+ Reported-by: Jiri Slaby <jirislaby@gmail.com>
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=47597
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit d7f0df27edb20b052ad39beb26a0b1924f432618
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Jan 21 16:34:09 2013 +0000
+
+ sna: Use the maximum backlight value if we fail to read the current value
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 46a3a68e60a1d0a598ec8ece81088a4e6491de55
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Jan 21 16:29:30 2013 +0000
+
+ sna: Assert that if marked as a scanout it is indeed bound.
+
+ On further review, the invariant must have been violated earlier, so
+ make the assert earlier.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 0507d55dd1bc8fedae524a410a9e7b53f1dad920
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Jan 21 16:24:49 2013 +0000
+
+ sna: Only add bound scanouts to the scanout list
+
+ If we never used the bo as an actual scanout it will never have had been
+ moved to the uncached domain and so we can return it back to the system
+ cache.
+
+ Reported-by: Jiri Slaby <jirislaby@gmail.com>
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=47597
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 5a0bc67ba57cf698e100df617474669ed5d036d6
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Jan 21 11:41:38 2013 +0000
+
+ sna: New execbuffer flags for lut-handle and fast-relocs are upstream
+
+ Now the flags are upstream, we can rely on runtime tests as the
+ interface is now frozen.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 208ca91a31182e8ddad36e6a735c725362cbd071
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 20 18:02:41 2013 +0000
+
+ sna/gen7: Place the vsync commands in the same cacheline
+
+ Do as told; both the LRI and WAIT_FOR_EVENT need to be in the same
+ cacheline for an unspecified reason.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 9a3e3abfe9b624af2354c5a69778aee3024fe46c
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 20 17:48:31 2013 +0000
+
+ sna/gen7: Offset start/end scanlines by one
+
+ The hardware needs to be programmed with the line before the desired
+ scanline, wrapping around as required.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit e6a64f872bfd026aa1ba1bd44b1298918c819849
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 20 16:59:58 2013 +0000
+
+ sna/gen3+: Remove bogus assertion that the vbo in included before finish
+
+ If we are carrying over a nearly full vbo from one batch to the next, we
+ may indeed finish it prior to writing any new primitives and so the
+ assert is truly bogus.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 5de919336fc1ba1c4116e18ba0560cdb7b0589f0
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 20 16:36:17 2013 +0000
+
+ sna/gen6: Tweak programming scanline values
+
+ The documentation says that both start/end scanline need to be the line
+ before the desired value, and so to program the first scanline we need
+ to set it to the last scanline. The docs also say that the lower 3 bits
+ are ignored, so tweaked the values programmed accordingly with an extra
+ check that the window is not reduced to 0.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2f9ac4e8a17e9d60bbb55c46929c37e92181d804
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 20 15:53:32 2013 +0000
+
+ sna/gen3+: And restore non-CA compositing state after the CA pass
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 650c9d5ce80afc1d4c8d9f77f6679f085fa4dc9d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 20 14:58:42 2013 +0000
+
+ sna/gen3+: Reset vertex relocation state after discarding the batch
+
+ Fixes a regression from commit a6ecb6d31d8c543f38fca0be6b0ec82e59dcd8d2
+ Author: Chris Wilson <chris@chris-wilson.co.uk>
+ Date: Wed Jan 16 09:14:40 2013 +0000
+
+ sna: Discard the batch if we are discarding the only buffer in it
+
+ as we may keep a stale relocation for the vertex buffer alive if we
+ attempt to clear the bo using the render engine before discarding it.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 492952e0d6362a046a666956afdf8f9bc0f2b7e7
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 20 14:55:06 2013 +0000
+
+ sna/gen3+: Handle flushing vbo for CA glyphs
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit b52c921204df6b2486717fcef05b4a1993aa1071
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 20 14:02:07 2013 +0000
+
+ sna: Adapt error detection and handling for invalid batchbuffers
+
+ Allow the DDX to continue even if the kernel rejects our batchbuffers by
+ disabling hw acceleration - just extends the existing hang detection to
+ also handle the driver producing garbage.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 8215a278f20d34819536edbda05a108a860fefb9
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 20 12:36:07 2013 +0000
+
+ sna/gen3: Always close the vertices for a batch, even if the vbo is empty
+
+ In the case where we emit a no-op, we may not attempt to finish binding
+ the vbo as it is considered empty. This leaves a stray relocation for
+ the next batch, and also causes it to believe that it has a vbo bound
+ already.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit a88a9b9a59fa2d5fd427fa6e1f74fb9844379264
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 20 12:06:09 2013 +0000
+
+ 2.20.19 release
+
+commit 7822bbacbece6fcb2e12863cd6c7a53ab614c37c
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 20 11:43:49 2013 +0000
+
+ test: Add script to generate source file for testing vsync
+
+ Courtesy of an original script by Mark Schreiber,
+ https://bugs.freedesktop.org/show_bug.cgi?id=59606
+
+commit 9329d8755981989ccbe66df6085fbab7c809a2c6
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 20 10:14:21 2013 +0000
+
+ sna: Make DEBUG_SYNC a configure option
+
+ As it is advisable to combined the synchronous rendering debug option
+ with other debugging options, it is more convenient to make it into a
+ configure option: --enable-debug=sync
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit c9263f192e2f85dd961bc1c4e9ca8180db874517
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 20 01:39:12 2013 +0000
+
+ sna: Apply DEBUG_SYNC prior to emitting error report
+
+ This is handy for the case where the batch triggers a GPU hang rather
+ than being rejected by the kernel.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 42ab789cce8423d99864776c6d5ba759c4129b54
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Jan 18 13:56:53 2013 +0000
+
+ sna: Clear the non-intersecting damage after skipping the slave update
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 828a3a80aa3f0692e7be2831d58bccf02e2c481d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Jan 18 13:16:23 2013 +0000
+
+ uxa: Clip dirty region to slave pixmap before appending damage
+
+ Fixes regression from
+
+ commit c789d06cf8a0debc67058d7be1483f5b542e2baa
+ Author: Dave Airlie <airlied@redhat.com>
+ Date: Mon Jan 7 13:57:21 2013 +1000
+
+ intel: fixup damage posting to be done correctly around slave pixmap
+
+ which causes the entire slave scanout to be readback from uncached
+ memory every time a pixel is modified.
+
+ Reported-by: Stephen Liang <inteldriver@angrywalls.com>
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=59539
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit e17eaf540b614cdcb8f7349dd01852c3afc5ab05
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Jan 18 13:09:36 2013 +0000
+
+ sna: Replace double negative '!RegionNotEmpty' with the equivalent RegionNil
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2de43a0164ba5364ffd7cb48f0bccc9873e87332
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Jan 18 12:01:54 2013 +0000
+
+ sna: Skip an empty slave update
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 38de17f80d780bf219fc3c4018ad9cc8808ba50f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Jan 18 10:16:42 2013 +0000
+
+ sna: Remove bogus assertion invalidated by 'read-read' sync
+
+ If we perform a read-read synchronisation, the kernel may still believe
+ that the bo is busy as it remains on the active lists being read by the
+ GPU.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 9f68ac60ae37cc72503ec40691d1ae43a476f8e7
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 17 20:00:34 2013 +0000
+
+ sna/dri: Explicitly flag sync copies for the backends
+
+ As gen6/7 need to prevent ring switching and perform a rendercopy if we
+ need to perform a vsync'ed copy.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 1ee00c408d8142cfaf4202393c2364c9ae73cb6e
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 17 13:09:47 2013 +0000
+
+ sna/trapezoids: Fix horizontal offset for inplace operation
+
+ Remember that for an inplace operation we are not dealing with an a8
+ mask, but rather a x8r8g8b8 surface and so need to step accordingly.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 0d749f93ea52161e59da1adca1a22e96ba293551
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 17 12:28:18 2013 +0000
+
+ sna: Drop the MOVE_WHOLE_HINT for PutImage
+
+ It is not as clearly beneficial as for GetImage, as for example toolkits
+ may only push the shadows around a window.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit dc643ef753bcfb69685f1eb10828d0c8f830c30e
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 17 12:27:55 2013 +0000
+
+ sna: Apply read-only synchronization hints for move-to-cpu
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 18035a21e147788bea03ab2175ca03ae951701ce
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 17 11:52:10 2013 +0000
+
+ sna: Remove the confusion of the pixmap->undamaged
+
+ This was to track a pixmap that had been used for migration (i.e had in
+ the past been used for mixed rendering). It is no longer used so remove
+ it.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 46141d277f326ae78f7b0e927a500e0eb1987f1b
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 17 10:16:24 2013 +0000
+
+ sna: Consider fill style for XPolyRectangle
+
+ The rectangle outline is not always solid...
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit d5c8d38afaba04281157bafe212e93f010ae00f5
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 17 10:10:54 2013 +0000
+
+ sna: Refactor to remove a goto from sna_put_zpixmap_blt()
+
+ The complexity of the function has been moved to move-to-cpu so we can
+ take further advantage of the simplified logic in put_zpixmap to clean
+ up the code by removing an unwanted goto.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 9552438caa4d295c99a9b8821cf2644739861c6a
+Author: Colin Walters <walters@verbum.org>
+Date: Wed Jan 4 17:37:06 2012 -0500
+
+ autogen.sh: Implement GNOME Build API
+
+ http://people.gnome.org/~walters/docs/build-api.txt
+
+ Signed-off-by: Adam Jackson <ajax@redhat.com>
+
+commit 87d773249af18ae8722aacb7306b0eee51a90dbc
+Author: Adam Jackson <ajax@redhat.com>
+Date: Wed Jan 16 13:18:23 2013 -0500
+
+ configure: Drop AM_MAINTAINER_MODE
+
+ Signed-off-by: Adam Jackson <ajax@redhat.com>
+
+commit dbf1cfec9cd4e9efe7650f2940c92b4e51214288
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 16 12:20:48 2013 +0000
+
+ 2.20.18 release
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 47caffc50b5cdd288ad868fa9a697f0d4e2d28dc
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 16 10:49:24 2013 +0000
+
+ sna: Restrict upload buffers to reduce sampler TLB misses
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit ab36300a22222086b94857f356612106ffbeb480
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 16 09:17:59 2013 +0000
+
+ sna: Correct DBG to refer to the actual tiling mode forced
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit a6ecb6d31d8c543f38fca0be6b0ec82e59dcd8d2
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 16 09:14:40 2013 +0000
+
+ sna: Discard the batch if we are discarding the only buffer in it
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 26db2438e34feb8f28444bf7418869b4ecd870da
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 16 09:00:21 2013 +0000
+
+ sna: Fix computation of large object sizes to prevent overflow
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 54c1d97d5ab325874e1c7b2639e58111d7a6b93f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 16 09:00:04 2013 +0000
+
+ sna: Add DBG for when we add the inplace hint
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 588c5aa6bca441d7c9305fe2fcf268e89b6b617d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 15 22:21:56 2013 +0000
+
+ sna: Revert use of a separate CAN_CREATE_SMALL flag
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit af85ffdec7047efa452d6bab3a0ee3889dd4f046
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 15 20:37:11 2013 +0000
+
+ sna: Avoid serialising on an move-to-cpu for an async operation
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit d70be85dc723168a481c1955444afd951c4817bf
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 15 20:16:45 2013 +0000
+
+ sna: Assert that we never try to mix INPLACE / ASYNC hints for move-to-cpu
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 1287c3a24c277cb42930d8af2943b9f7b016f31d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 15 18:59:15 2013 +0000
+
+ sna: Specialise sna_get_image_blt for clears to avoid sync readback
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit da4972eec57e662b98a7abced6338ceb8a533a48
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 15 18:34:07 2013 +0000
+
+ sna/trapezoids: Avoid the multiply for an opaque source
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 7f968c8c991cff751459939bdb42e14255f529b7
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 15 18:41:00 2013 +0000
+
+ sna: Add DBG to use_shm_bo()
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit af63fab5047a43716c5df875ddc50f7c877f8a83
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 15 18:21:11 2013 +0000
+
+ sna: Hint that a copy from a SHM bo will likely be the last in a batch
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 1be436409222c00ff66c6d747487b77f1037b27a
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 15 18:20:29 2013 +0000
+
+ sna: Pass the async hint for the upload into the GPU
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2113f7f440dd2f10e80f0bb3bd5cd155f7e19098
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 15 09:33:03 2013 +0000
+
+ sna: Free the SHM pixmaps after b266ae6f6f
+
+ Since b266ae6f6f protected the static allocations from being reaped in
+ the normal course of events, we need to penetrate those defenses in
+ order to finally free the SHM mappings.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 441c481630a5cf09a7eb26d5db80b1e60cb2b10f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 15 01:26:19 2013 +0000
+
+ sna: Mark uploads with async hints when appropriate
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 6abd442279fd32d1ce9b33a72eabbeb922316151
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 15 00:15:23 2013 +0000
+
+ sna: Avoid allocating an active CPU bo unnecessarily
+
+ If we will not write back the GPU damage to the bo as we intend to
+ overwrite it for the next operation, we can forgo allocating the active
+ CPU bo and skip the synchronisation overhead.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit f235c74cd661970c76e152777e9a2c314a368a56
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Jan 14 15:49:42 2013 +0000
+
+ sna: Tweak considering of last-cpu placement for inplace regions
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 70c5e41b519e44e620948d683d3b1111494d2f48
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Jan 14 15:03:59 2013 +0000
+
+ sna: Limit temporary userptr uploads to large busy targets or LLC machines
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit cf860da1c78244036c59edf934b312cc1367e8aa
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Jan 14 12:50:54 2013 +0000
+
+ sna: Apply PutImage optimisations to move-to-cpu
+
+ We can replace the custom heuristics for PutImage by applying them to
+ the common path, where hopefully they are equally valid.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit e4ad4477815abe31b1a2323673da86a6def2f246
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Jan 14 13:12:46 2013 +0000
+
+ sna: Use userptr to accelerate GetImage
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 3cc04a8e24f02248b6382c9bc354ea15c42b17b6
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 13 17:34:03 2013 +0000
+
+ sna: Initialize src_bo to detect allocation failure
+
+ sna_accel.c: In function 'sna_put_image':
+ sna_accel.c:3730:18: warning: 'src_bo' may be used uninitialized in this
+ function [-Wmaybe-uninitialized]
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 3f04b0b98d7f861ff58b82c99d33b7eacfcda5f7
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 13 17:31:15 2013 +0000
+
+ sna: Check size against aperture before attempting to perform the GTT mapping
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 7a7db06c62228acc6d1c03e800c7afa84e886f5a
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 13 13:45:18 2013 +0000
+
+ sna: Add a compile flag for measuring impact of userptr uploads
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit bcc212dc7a939505a678f97f6700eee99204249f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 13 13:36:09 2013 +0000
+
+ sna: Use the pixmap size (not drawable) to determine replacement
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 32f43f618d9b11ea44b3e01a95ac3f239a731ad2
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 13 13:23:24 2013 +0000
+
+ sna: Allow large image uploads to utilize temporary mappings
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit bf2b2e2f91208412c8b74a95859def501514be43
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 13 12:24:44 2013 +0000
+
+ sna: Allow creation of a CPU map for pixmaps if needed
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit b266ae6f6f8fb4c494ece532ae4621055e66beb2
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 13 11:30:07 2013 +0000
+
+ sna: Relax limitation on not mapping GPU bo with shadow pointers
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit a2d82161436e489f23637d793c737bc6950a62b8
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 13 10:17:33 2013 +0000
+
+ sna: Correct a few assertions after enabling read-only mappings
+
+ As these do not flush the active state if we have read-read mappings, we
+ need to be careful with our asserts concerning the busy flag.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit ab01fd696e1137ddfb9a85ae68c15c05900f0e8e
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Jan 12 09:17:03 2013 +0000
+
+ sna: Experiment with a CPU mapping for certain fallbacks
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 03d392cd1d87e17129c42e4d822d3d1749edb02e
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Jan 12 08:51:52 2013 +0000
+
+ sna: Tweak max object sizes to take account of aperture restrictions
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit d111c464bfbae57bb7141872810c88b88f30c087
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Jan 12 08:15:13 2013 +0000
+
+ sna: After a size check, double check the batch before flushing
+
+ As we may fail the size check with an empty batch and a pair of large
+ bo, we need to check before submitting that batch in order to not run
+ afoul of our internal sanity checks.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit ec77a07b41f1062b941774f3782b51d21e7824dd
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Jan 11 11:40:57 2013 +0000
+
+ sna/dri: Prefer to preserve the ring of the destination bo
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 42f1026e11527cb62b4522b44e71a4e72582a876
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Jan 11 11:40:16 2013 +0000
+
+ sna: Reorder struct kgem_bo to move related data into the same cacheline
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit aead71051ed757e7565d395c858bf8ab8f0b0ff6
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Jan 11 01:30:43 2013 +0000
+
+ sna: Disable memcpy_to_tiled_x() uploads on 32-bit systems
+
+ It's far too slow due to the register starved instruction set producing
+ attrocious code and the extra overhead in the kernel for managing memory
+ mappings.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 220970b1a484e283e2bbb44f79df613ce1ee1146
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 10 19:43:05 2013 +0000
+
+ sna: Also prefer to use the GPU for uploads into a tiled bo
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 672e59851c427c63f43cde7dfd1688a72100e3b3
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 10 19:35:29 2013 +0000
+
+ sna: Prefer userptr if copying to a tiled bo
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 441ef916ae6569c88b3d6abaf7fea4d69be49d76
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 10 19:14:21 2013 +0000
+
+ intel: Throttle harder
+
+ Filling the rings is a very unpleasant user experience, so cap the
+ number of batches we allow to be inflight at any one time.
+
+ Interestingly, as also found with SNA, throttling can improve
+ performance by reducing RSS. However, typically throughput is improved
+ (at the expense of latency) by oversubscribing work to the GPU and a
+ 10-20% slowdown is commonplace for cairo-traces. Notably, x11perf is
+ less affected and in particular application level benchmarks show no
+ change.
+
+ Note that this exposes another bug in libdrm-intel 2.4.40 on gen2/3.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit a37d56f338c5fae832d5eeea1283b6dbde827678
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 10 16:28:24 2013 +0000
+
+ sna: Use some surplus bits to back our temporary pixman_image_t
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 09ea1f4402b3bd0e411b90eb5575b3ff066d7356
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 10 16:26:24 2013 +0000
+
+ sna: Prefer to use the GPU for copies from SHM onto tiled destinations
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit c63147a3c33fd26f5c04a8648881659b4a90df06
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 10 15:15:15 2013 +0000
+
+ sna: Allow CPU bo to copy to GPU bo if the device is idle.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2933e7595838c28081810d4959ca1e005a0419e1
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 10 13:07:19 2013 +0000
+
+ sna: Ignore the last pixmap cpu setting if overwritting all damage
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 934ea64f7ff080b00d00c50ba94f63247d7bb130
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 10 13:06:06 2013 +0000
+
+ sna: With a GPU bo and a shm source, do not fall all the way back
+
+ The normal source upload into GPU bo knows a few more tricks that we may
+ want to apply first before copying into the shadow of the GPU bo.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 8a8edfe4076ee08558c76eddbb68426e4563888c
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 10 03:31:37 2013 +0000
+
+ sna: Make sure all outputs are disabled if no CompatOutput is defined
+
+ If we have to fallback and the configuration is wonky, make sure that
+ all known outputs are disabled as we takeover the console.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 5449e16c0c2b6ca5af4acf42703164b9d2b2d822
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 10 02:54:41 2013 +0000
+
+ sna: Open-code xf86CompatOutput() to avoid invalid pointers
+
+ config->compat_output needs to be sanitized during device initialization
+ or we may dereference an invalid xf86OutputPtr.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 8881a14200580db731ca6902b289b08989aaa61e
+Author: Mickaël THOMAS <mickael9@gmail.com>
+Date: Mon Jan 7 20:47:51 2013 +0100
+
+ Set initial value for backlight_active_level
+
+ If the "Backlight" option is set, backlight_active_level is not set which
+ results in a default value of 0, causing a black screen upon starting Xorg.
+
+commit b8c9598294eaa16e0d1578ad98896f6ec5ba37cf
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Jan 7 13:57:21 2013 +1000
+
+ sna: fixup damage posting to be done correctly around slave pixmap
+
+ Copied from commit c789d06cf8a0debc67058d7be1483f5b542e2baa
+ Author: Dave Airlie <airlied@redhat.com>
+ Date: Mon Jan 7 13:57:21 2013 +1000
+
+ This fixes the damage posting to happen in the correct ordering,
+ not sure if this fixes anything, but it should make things more consistent.
+
+commit c789d06cf8a0debc67058d7be1483f5b542e2baa
+Author: Dave Airlie <airlied@redhat.com>
+Date: Mon Jan 7 13:57:21 2013 +1000
+
+ intel: fixup damage posting to be done correctly around slave pixmap
+
+ This fixes the damage posting to happen in the correct ordering,
+ not sure if this fixes anything, but it should make things more consistent.
+
+ Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 5891c89ff2be277d1a833d4bc092b65184c1f3d6
+Author: Dave Airlie <airlied@redhat.com>
+Date: Mon Jan 7 13:54:47 2013 +1000
+
+ intel: drop pointless error printf in the slave pixmap sync code.
+
+ This is left over and spams logs, get rid.
+
+ Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 27550e81482229007fa9e0e9769fdd20f3616b23
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 6 17:29:19 2013 +0000
+
+ sna/dri: Transfer the DRI2 reference to the new TearFree pixmap
+
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=58814
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 1a5e4fb725da2eb25cf7f476290c02e9880a4efc
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 6 17:08:56 2013 +0000
+
+ sna: Only disable upon a failed pageflip after at least one pipe flips
+
+ If we have yet to update a pipe for a pageflip, then the state remains
+ consistent and we can fallback to a blit without disabling any pipes. If
+ we fail after flipping a pipe, then unless we disable an output the
+ state becomes inconsistent (the pipes disagree on what the attached fb
+ is).
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit dd66ba8e5666a1ce7da0ddc226d074f591e1fa22
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 6 16:13:56 2013 +0000
+
+ sna: Try to create userptr with the unsync'ed flag set first
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 9051f43fa3c8d011921ac6ff75b763280f26d98f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 6 15:20:14 2013 +0000
+
+ sna/gen4+: Handle solids passed to the general texcoord emitter
+
+ The general texcoord emitter does handle solids (for the case of a
+ transformed mask) and so we need to be careful to setup the
+ VERTEX_ELEMENTS accordingly.
+
+ Fixes regression from
+ commit 2559cfcc4cbc1d0d84b048565cad3bfee61df8da
+ Author: Chris Wilson <chris@chris-wilson.co.uk>
+ Date: Wed Jan 2 10:22:14 2013 +0000
+
+ sna/gen4+: Specialise linear vertex emissio
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 4af910e8be92e0ca241ce1e93e322c712dcbe340
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 6 13:43:55 2013 +0000
+
+ sna/gen4+: Trim the redundant float from the fill vertices
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 3244e4b23397f54ca76876dd76ebea9a0abd357e
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Jan 6 13:24:23 2013 +0000
+
+ Revert "sna/gen4+: Backport tight vertex packing for simple renderblits"
+
+ This reverts commit 8ff76fad1fadc5e309f9a12c30f883460a432049 and
+ commit 48e4dc4bd4b2980f0f804f572d0e3fc1bb4bc21e.
+
+ I forgot gen4 and gen5 do not have the 'non-normalized' bit in their
+ sampler states.
+
+commit d3be77f87916e38af717bafaf2000becd5180d76
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Jan 5 18:07:50 2013 +0000
+
+ sna/trapezoids: filter out cancelling edges upon insertion to edge-list
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2b4a2f52c47a24c297312d51f9a8299c9a54a697
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Jan 5 17:21:34 2013 +0000
+
+ sna/trapezoids: filter out zero-length runs
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 59a7b8b32c694735942fd7e42c1382d91004b0b1
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Jan 4 18:22:14 2013 +0000
+
+ sna: Clear up the caches after handling a request allocation failure
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 3c31a9fc210221ba8e7922bec80c15ec39cab7bc
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Jan 4 18:11:12 2013 +0000
+
+ sna: Embed the pre-allocation of the static request into the device
+
+ So that in the cache where we are driving multiple independent screens
+ each having their own device, we do not share the global reserved
+ request in the event of an allocation failure.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit b5b3cfb0ad1cc5e66c99035f526946bf41011e13
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 3 23:33:44 2013 +0000
+
+ sna: Flush the batch prior to referencing work from another ring
+
+ In the case where the kernel is inserting semaphores to serialise work
+ between rings, we want to only delay the surface that is coming from the
+ other ring and not interfere with work already queued.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit ea2da97773d858001f98adc880f24b9671c51b2f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 3 16:47:14 2013 +0000
+
+ sna: Convert allocation request from bytes to num_pages when shrinking
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2bd6e4dcd43bb0d836f12232050e73ce1510bb0f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 3 16:38:33 2013 +0000
+
+ sna: Add a pair of asserts to validate fls()/cache_bucket()
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit f9d2730974a869f15eac599ca865b50a9a9658d9
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 3 15:20:45 2013 +0000
+
+ sna: Also recognise __i386__ for fls asm
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 69dde74a003ba0168ceca1558a4cb69097421b92
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Jan 3 15:20:23 2013 +0000
+
+ sna: Fix off-by-one in C version of fls
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit fc702cdf534a4694a64408428e8933497a7fc06e
+Author: Matt Turner <mattst88@gmail.com>
+Date: Wed Jan 2 16:07:54 2013 +0000
+
+ sna: Rewrite __fls without dependence upon x86 assembly
+
+ The asm() prevents SNA from compiling on ia64.
+
+ Fixes https://bugs.gentoo.org/show_bug.cgi?id=448570
+
+commit bc67bdcec832f4302951f2789456666dee2f496c
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 2 13:47:51 2013 +0000
+
+ sna/gen6+: Fine tune placement of DRI copies
+
+ Avoid offsetting the overhead of the render copy only to be penalised by
+ the overhead of the semaphore. So compromise.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2559cfcc4cbc1d0d84b048565cad3bfee61df8da
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Jan 2 10:22:14 2013 +0000
+
+ sna/gen4+: Specialise linear vertex emission
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 0996ed85fd8bd79f41f28908733b85566f9e2b69
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 1 22:53:26 2013 +0000
+
+ sna/gen2+: Precompute the affine transformation scale factors
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit d36cae801f1dcb06d4f93f2f27cc9b9de73e89c9
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 1 21:03:06 2013 +0000
+
+ sna/gen4+: Tidy special handling of 2s2s vertex elements
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 8582c6f0bbe1bf01324b46933ff2f50c65f2a82d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 1 20:53:12 2013 +0000
+
+ sna/gen6+: Remove vestigial CC viewport state
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 24264af2912f9abae5aff2a6fb5a50383d9e33be
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 1 20:39:23 2013 +0000
+
+ sna: Fast path inplace addition of solid trapezoids
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit e9a9f9b02978cb2d73c38163827eb7141ebed16c
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 1 16:40:28 2013 +0000
+
+ sna: Micro-optimise glyph_valid()
+
+ Note that this requires fixing up the glyph->info if the xserver didn't
+ create a GlyphPicture.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 372c14aae8f4fd2c5865b9d23cd825dcbc33765f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 1 15:49:12 2013 +0000
+
+ sna: Remove some obsolete Options
+
+ Throttling and delayed-flush are now redundant.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 65924da91da4bb617df1bb0a7c3e9d4aa475b6b1
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 1 11:40:15 2013 +0000
+
+ sna: Tidy compat interfaces
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 0a35d9287397031c95ebd9dc53b68e33e7dcf092
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 1 11:12:02 2013 +0000
+
+ sna/gen2: Always try to use the BLT pipeline first
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit c1457fbd8a169ee19c8e625ea4e779180eb4b070
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Jan 1 10:49:27 2013 +0000
+
+ sna/gen2: Tidy a pair of vertex emitters
+
+ Switch to the new inline scaled transforms.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 48a5797c0f227204d0723de0ef34b046964c571e
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Dec 31 17:30:40 2012 +0000
+
+ sna/gen4: Tweak single-thread SF w/a for solids
+
+ Allow multiple threads for the rare case of compositing with a solid
+ color.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit e4f6ba6b47c41645a40e314f14047ba0b5f93a01
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Dec 31 14:06:36 2012 +0000
+
+ sna/gen6+: Hint that we prefer to use the BLT with uncached scanouts
+
+ Once again balancing the trade-off of faster smaller copies with the BLT
+ versus the faster larger copies the RENDER ring.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 6e87e7ddfe0c21e0fb6b3c2cb940a40aa7d4e061
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Dec 31 14:03:16 2012 +0000
+
+ sna/dri: Use the default choice of backend for copying the region
+
+ Notably, if everything is idle, using the BLT is a win as we can emit
+ them so much faster than a rendercopy, and as the target is uncached we
+ do not benefit as much from the rendercache.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit a7988bf77f5a106a48b6e39b6eaf60ef2f8bec11
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Dec 30 14:50:49 2012 +0000
+
+ sna/dri: Fix triple buffering to not penalise missed frames
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 736b89504a32239a0c7dfb5961c1b8292dd744bd
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Dec 30 10:32:18 2012 +0000
+
+ uxa: Align surface allocations to even tile rows
+
+ Align surface sizes to an even number of tile rows to cater for sampler
+ prefetch. If we read beyond the last page we may catch the PTE in a
+ state of flux and trigger a GPU hang. Also detected by enabling invalid
+ PTE access checking.
+
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=56916
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=55984
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 43336c632beb5d599ec0fc614434b88ef7a26422
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Dec 29 16:47:53 2012 +0000
+
+ sna: Seed the solid color cache with an invalid value to prevent false hits
+
+ After flushing, we *do* need to make sure we cannot hit a false lookup
+ via the last cache.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit f6050382095c3bc4f78bc4ff9e9c6086e58d6b28
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Dec 29 16:41:03 2012 +0000
+
+ sna/dri: Gracefully handle failures from pageflip
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 1c2ece369177ea6c3fd2f254b2554ceadf5590de
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Dec 29 15:53:23 2012 +0000
+
+ sna/gen4+: Try using the BLT before doing a tiled copy
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 09ca8feb3455c979e799ddf26daae8f2de2813e1
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Dec 29 15:42:02 2012 +0000
+
+ sna: Move the primary color cache into the alpha cache
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 8c56c9b1da9e078bd5b7ff4ebc5d8b23f593d500
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Dec 29 14:14:41 2012 +0000
+
+ sna: Allow a flush to occur before batching a flush-bo
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2f53fb389c001f68134f514e30e25e91de41fb9d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 28 22:58:02 2012 +0000
+
+ sna: DBG compile fixes
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit dba83dacd2ccbb2ac23b205ce2a872a889fa30bd
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 28 19:23:36 2012 +0000
+
+ sna/gen3: Use inline transform+scale function
+
+ So as to avoid reading back from the vbo (which may be wc mapped).
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit f0fca544b0602bc4ed2f68e8d260e0a3745b4bad
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 28 18:52:44 2012 +0000
+
+ sna/gen4+: Check for a spare exec slot for an outstanding vbo
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit c6e850b626f4bb44876c683d596ea38f8f6c30ae
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 28 17:14:52 2012 +0000
+
+ sna/gen4+: Trim an extraneous coordinate from solid composite emission
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 3fdc9923447538ed65bf9ffa189d7290ce804730
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 28 17:14:52 2012 +0000
+
+ sna/gen4+: Trim an extraneous coordinate from solid span emission
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit fdd6d222bc92b3e385f5d62f5e03dfd86f290e45
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 28 17:08:00 2012 +0000
+
+ sna/gen4+: Tidy emit_spans_affine()
+
+ gcc produced abysmal code for the inlined emission, so hand unroll it
+ for sanity.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 5d222d4d21e6e3af5316728e0da49a014e9fea21
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 28 17:08:00 2012 +0000
+
+ sna/gen4+: Tidy emit_spans_solid()
+
+ gcc produced abysmal code for the inlined emission, so hand unroll it
+ for sanity.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 4528f68eff33a5c2f9c1d884e9b3f7228053e0f4
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 28 16:45:50 2012 +0000
+
+ sna: Only allocate a busy CPU bo for a GPU readback
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 99fdd1a1c6aa52688c2c821a90f86700b7ee34b2
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 28 16:33:59 2012 +0000
+
+ sna: Mark kgem_bo_retire() as static
+
+ The exported function is not used, so mark it static and strengthen the
+ assertions.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 548d284b8cf8cc2b311efe3287e0ae956738189a
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 28 14:49:38 2012 +0000
+
+ sna: Skip copying fbcon if we are already on the scanout
+
+ If we are already the scanout, then there is little point copying to
+ ourselves... Should be paranoia.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 583efd4ba067a0a4319e43ebc18dd81ed9c8db0a
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 27 17:59:59 2012 +0000
+
+ sna: Sanity check config->compat_output
+
+ In a headless setup this may be left initialised to -1.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 7725df8aa1b3eab97618311e3f24769a318bd804
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 27 14:01:59 2012 +0000
+
+ sna/gen2,3: Remove gen-specific vertex_offset
+
+ Remove the duplication of vertex_offset in favour of the common
+ vertex_offset.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 46af1ff126f3fb1f9470b0cbb19c7c2b09d5b92a
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 27 00:40:08 2012 +0000
+
+ sna/gen6+: Tidy up ring preferences
+
+ Remove a few duplicated tests.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit dd5b653aa2c5fe2e062533db35c83a40c1952ea6
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 27 09:54:35 2012 +0000
+
+ sna: Do not try and set a 0x0 mode
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 861c2362dd38d7d43fe7ffb181cb197199a1c570
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Dec 26 14:12:42 2012 +0000
+
+ sna/gen6+: Tweak to only consider active ring on destination
+
+ Otherwise we decide to use BLT when hitting the render/sampler cache
+ is preferrable for a source bo.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit f9b6aa3aaf784f9149e091a646673ddf341cd7ca
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Dec 26 13:05:52 2012 +0000
+
+ sna: Explicitly track self-relocation entries
+
+ Avoid having to walk the full relocation array for the few entries that
+ need to be updated for the batch buffer offset.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 90b1b220ee7a3c543301956b01c54a4a04632db4
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Dec 26 12:51:58 2012 +0000
+
+ 2.20.17 release
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 52fd223fc970118cbdcb31f9574414debc905e9c
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 21 21:36:30 2012 +0000
+
+ sna/video: Initialise alignment for video ports > 0
+
+ We repeatedly set the alignment value on the first port, rather than
+ once for each.
+
+ Reported-by: Jiri Slaby <jirislaby@gmail.com>
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=47597
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 3793ccf7804cfc870b46c623dfeefbe0c381c1d4
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 21 14:48:07 2012 +0000
+
+ sna: Remove assertions that the pixmap is wholly defined when uploading
+
+ As the user may only write to a portion of a pixmap (thus only creating
+ a small amount of damage) and then attempt to use the whole as a source,
+ we run the risk of triggering an assertion that the whole was defined.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 07dde33a4f51941b4f612823ea6ea7ca01a6efbc
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 21 14:35:32 2012 +0000
+
+ sna: Remove a pair of stale assertions
+
+ For gen2-5, it does not matter what mode the batch is in when we
+ insert the scanline wait. With the more aggressive batch flushing, and
+ relaxed assigned of mode for those generations, we are likely to see
+ that the batch is idle when we go to insert the waits.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit bdd0cca4e1192df0038621925c4e6243ba419a81
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 21 14:20:23 2012 +0000
+
+ sna: Refactor test for a rotation matrix
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 347c5a7b33729f1bedd408d2ef24756d51b66f1d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 21 10:40:47 2012 +0000
+
+ sna/dri: Refactor get_current_msc between blit/flip paths
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 8a67d3f808fcc7c8c51553b1703e8312f28b87a1
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 21 10:21:06 2012 +0000
+
+ sna/dri: Set the correct current_msc for the no readback path
+
+ If we are asked to render immediately, then in order to pass the tests
+ when comparing it to target, we need to set the current_msc to the
+ ultimate future value, -1.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 48e4dc4bd4b2980f0f804f572d0e3fc1bb4bc21e
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 20 21:54:25 2012 +0000
+
+ sna/gen4: Backport tight vertex packing of renderblits
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 08d2b073692836aa22f65f8ba30db5d14550c03e
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 20 21:30:32 2012 +0000
+
+ sna/gen4: Backport more recent state tracking tweaks
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 8ff76fad1fadc5e309f9a12c30f883460a432049
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 20 20:57:40 2012 +0000
+
+ sna/gen5: Backport tight vertex packing for simple renderblits
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 9144c951915a1e0c1899a72161f9f0f1ab9b9ac4
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 21 09:44:52 2012 +0000
+
+ sna/dri: Avoid querying the current-msc with swapbbufers wait disabled
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 84c327e17f68c4a56fcb76be1f45ab6d35291b5d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 20 19:44:46 2012 +0000
+
+ sna/video: Assert that the frame is initialised
+
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=47597
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 4d750219925cb3199ebc6751cdbd2862dfb4cdfe
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 20 19:34:41 2012 +0000
+
+ uxa/dri: Correct the destination of the blit after chained flip is broken
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit ca5c028c2b4d9bf02002acd484054fe427ea8d09
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 20 19:31:44 2012 +0000
+
+ glamor: Release the drawable after passing to glamor_push_pixels
+
+ An unlikely path, but a double prepare instead of a prepare/finish.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit de2de36049e2958a60f63fadffe8f54de8da1e56
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 20 19:29:31 2012 +0000
+
+ sna: Check the correct variable for a failed allocation
+
+ Having already checked 'dst' and just allocated 'src', that is who we
+ should be checking.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit deb908fda74541fba649349db279715b05d0554e
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 20 19:22:32 2012 +0000
+
+ intel: ODEV_ATTRIB_PATH is no longer printed, so kill the temporary variable
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 0f84ecfc3cd7dfe7f43ff99a6498d2ceccd90225
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 20 12:00:00 2012 +0000
+
+ sna/gen4+: Amalgamate all the gen4-7 vertex buffer emission
+
+ Having reduced all the vb code for these generations to the same set of
+ routines, we can refactor them into a single set of functions.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 1f4ede0ef8f8a8d07e11781ad05617ecdfcd3faf
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Dec 19 20:39:10 2012 +0000
+
+ sna: Do not throttle before move-to-cpu
+
+ The idea being that when creating a surface to perform inplace
+ rasterisation, we won't be using the GPU for a while and so give it time
+ to naturally throttle.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 5deba2832dc42072d9abaeaa7934bc0e1b28b3ed
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Dec 19 20:03:33 2012 +0000
+
+ sna: Ignore throttling during vertex close
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit f91a24fdba517c8e9df5a074db2c789fbf066bb3
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 20 09:46:32 2012 +0000
+
+ sna/video: Remove XvMCScreenInitProc
+
+ The symbols disappears without warning in xorg-1.14
+
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=58552
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit ee99511846a0f10abeeba8d25d8fb5bf59621b02
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Dec 19 18:02:50 2012 +0000
+
+ sna/gen4+: Tweak preference of GPU placement for spans
+
+ If the CPU bo is busy, make sure we do not stall for an inplace
+ operation.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit bfd96b092db5e4e0fc2446752deafd1156cf37b3
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Dec 18 20:54:33 2012 +0000
+
+ sna/video: Fix presentation of cropped sprites
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2df1b1abf0728f28d2803a096f945779cbe7c70b
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Dec 18 16:07:26 2012 +0000
+
+ sna/video: Fix up copying cropped textured video packed data
+
+ Simply ignore the cropping and copy the whole plane rather than
+ complicate the computation of the packed destination pixels.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 8d523fa824dcb1987557164d048711c1745de378
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Dec 18 16:07:26 2012 +0000
+
+ sna/video: Fix up destination offset for copying cropped textured video planes
+
+ Oh fun. Textured video expects the source content to be relative to the
+ origin, whereas overlay video expects the source at the origin.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 7bb4573fcc2cf1b8b6bff5d885a2fa81200d2fd7
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Dec 18 15:48:21 2012 +0000
+
+ sna/video: Fix up the image size for copying
+
+ Yikes, setting image.x2 == image.x1 meant no data was copied whilst the
+ video was clipped.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 551b400377ddc5eb1e89b8b5827a42e810c8d23d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Dec 18 15:14:00 2012 +0000
+
+ sna/video: Amalgamate the computation of source vs dest offsets
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit d96a226cc59c641c10153ae3a086a5138c852423
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Dec 18 14:26:18 2012 +0000
+
+ sna/video: Fix adjustment of drawable vs source origin wrt to clip
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 79cb6304e983514dd754065e65e2381a903f9bd6
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Dec 18 13:49:59 2012 +0000
+
+ sna/xvmc: Clean up to avoid crash'n'burn
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 0d26082303f3f4006ce4974d402c560613081b23
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Dec 18 10:54:28 2012 +0000
+
+ sna: Prefer the GPU once again for PolyPoint
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 0e0a2d300633122d6d0f6f82ff110f513b4e64d7
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Dec 18 10:27:04 2012 +0000
+
+ sna/gen7: Mark the ring switch before checking bo
+
+ As we may do a batch submission due to the change of mode.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit f522fbe7c98ffad86126c3666b2d9f7e616480b8
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Dec 17 23:04:25 2012 +0000
+
+ sna: Refine check for an unset context switch
+
+ So it appears that we end up performing a context switch on an empty
+ batch, but already has a mode. This is caught later, too late, by
+ assertions. However, we can change the guards slightly to prevent those
+ assertions without altering the code too greatly. And I can then think
+ how to detect where we are setting a mode on the batch but doing no
+ work - which is likely masking a bigger bug.
+
+ Reported-by: Jiri Slaby <jirislaby@gmail.com>
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=47597
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 6c50cf4809816dbbd93d54f589a79b0dab996180
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Dec 17 22:27:14 2012 +0000
+
+ sna: Untangle the confusion of caching large LLC bo
+
+ We only use a single cache for very large buffers, so we need to be
+ careful that we set the tiling on them. More so, we need to take extra
+ care when allocating large CPU bo from that cache to be sure that they
+ are untiled and the flags are true.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit e474abea7cf761e78e777db07b41ec99c0b6f59f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Dec 17 15:38:04 2012 +0000
+
+ sna: Promote pinned-batches to run-time detection
+
+ Now that the feature has been committed upstream, we can rely on the
+ runtime detection.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 4d7e3a9123cf41d2dd97c0a8a0d461c189064822
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Dec 17 12:34:05 2012 +0000
+
+ uxa: Fix copy'n'paste of false not FALSE
+
+ Bugzilla; https://bugs.freedesktop.org/show_bug.cgi?id=58406
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 7a4d1136bd09bfd4d2657c0b1b64d553eeb6ed4f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Dec 17 09:41:47 2012 +0000
+
+ sna/video: Pass along the video source offset
+
+ Fortunately nobody had yet noticed that all videos were assumed to play
+ with a matching src/dst origin.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit dfe9d18f9f97a77ceeb410307010424c789c8bd1
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Dec 17 01:06:57 2012 +0000
+
+ sna: Limit the default upload buffer size to half the cpu cache
+
+ This seems to help with small slow caches.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 5b0572503eab235bc7eff20d369241330c41e630
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Dec 16 23:04:55 2012 +0000
+
+ sna: Enable support for opting out of the kernel CS workaround
+
+ Keeping a set of pinned batches in userspace is considerably faster as
+ we can avoid the blit overhead. However, combining the two approaches
+ yields even greater performance, as fast as without either w/a, and yet
+ stable.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 805f78addf3ffb36c736df680806cf722b18fea9
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Dec 16 22:04:54 2012 +0000
+
+ sna: Try to reuse pinned batches by inspecting the kernel busy status
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit f1aec676810c4a4c180b342d9a83254e08dd55da
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Dec 16 17:37:32 2012 +0000
+
+ sna: Precompute the base set of batch-flags
+
+ This is to make it easier to extend in future.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit c7ac12003bd0c7d85fa47d43ee2734b222d84a61
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Dec 16 15:28:24 2012 +0000
+
+ sna: Only flush at the low fence wm if idle
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 4580bbeac0051417cb03f272112b0cfe697e31b3
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Dec 16 15:00:21 2012 +0000
+
+ intel: Support debugging through AccelMethod
+
+ Ease debugging by allowing all acceleration or render acceleration to be
+ disabled through AccelMethod:
+
+ Option "AccelMethod" "off" -> disable all acceleration
+ Option "AccelMethod" "blt" -> disable render acceleration (only use BLT)
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 58770b7d6401d2d81f7fee1c8c0e788d44149712
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Dec 16 14:59:03 2012 +0000
+
+ man: Describe Option "AccelMethod"
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 83609af3681fad58af88387077bf7ce0c001a1da
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Dec 16 10:53:26 2012 +0000
+
+ sna: Tweak the idle SHM CopyArea path to also replace a busy GPU bo
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 6490585f65bde487da7bc41fa5cb1c5a028d0bf4
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Dec 15 23:26:30 2012 +0000
+
+ sna: Do not force use of the GPU for a copy from a SHM pixmap
+
+ As we will undoubtably flush and sync upon the SHM request very shortly
+ afterwards, we only want to use the GPU for the SHM upload iff it is
+ currently busy.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 3a08f091875f2f0f49697ba9852077094b3a704b
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Dec 15 22:53:44 2012 +0000
+
+ sna/gen6+: Tweak prefer-blt-bo
+
+ Split the decision between where it is imperative to use the BLT to
+ avoid TLB misses and the second case where it is merely preferential to
+ witch.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit ac9ef1fc606e87b48baa47be22bf828dcfe6659f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Dec 15 20:49:56 2012 +0000
+
+ sna/gen6+: Keep the bo on its current ring
+
+ Track the most recent ring each bo is executed on, and prefer to keep it
+ on that ring for the next operation.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 15ccb7148d15d776a661c1b8c5b9b2360fcae4ad
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Dec 15 20:07:56 2012 +0000
+
+ sna/gen6+: Apply the is_scanout to the key not value in the binding cache
+
+ Oops, we never managed to reuse the cached location of the target
+ surface as we entered it into the cache with the wrong key.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit fde25b08922d97ca0d4a69c654bf690edbd53b3d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Dec 15 18:59:53 2012 +0000
+
+ sna/trapezoids: Add another inline hint
+
+ cell_list_alloc() is only called from one place, and the compiler should
+ already be inlining it - but does not appear to be. Hint harder.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2a21c8b351052be9c32c5669264fb05a8510c957
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Dec 15 17:56:27 2012 +0000
+
+ sna: Include shm hint in render placement
+
+ The goal is to reduce the preference of rendering to a SHM pixmap - only
+ if it is already active, will we consider continuing to use it on the
+ GPU.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit a467102a9539c7f4fa8d0700ecdcaba49d77b3f7
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Dec 15 10:00:48 2012 +0000
+
+ 2.20.16 release
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit b0f8c823b6cafdfdd064c09d58174f946e290541
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Dec 15 09:28:04 2012 +0000
+
+ sna/dri: Fallback to a blit after a failed flip
+
+ ...rather than force the exchange.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2c71a8e08abce74b269687d3a6c1edd7f9d643d3
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Dec 15 09:27:07 2012 +0000
+
+ sna/dri: Honour TripleBuffer Option
+
+ In case anyone ever wants to disable the default.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 6593ad3fecb3d044ee5ca161176d8ecaa0b4126a
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 14 23:48:00 2012 +0000
+
+ sna/dri: Store and check size of front/back bo attached to a DRI2 drawable
+
+ So that we can prevent feeding back a stale bo when the DRI2 client
+ tries to swap an old buffer.
+
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=57212
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 9df9585cb00958b42461b3139bb7aec32090a869
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 14 15:37:56 2012 +0000
+
+ sna: Reduce fence watermarks
+
+ Further restrict the amount of fenced bo we try to fit into the batch to
+ make it easier for the kernel to accommodate the request.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 0d3ba44e448c152a570cc469d289ab057fa7be5c
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 14 12:47:46 2012 +0000
+
+ sna/gen2+: Experiment with not forcing migration to GPU after CPU rasterisation
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit d867fd01cb0060342102a79600daf43e3dc44a07
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 14 13:10:54 2012 +0000
+
+ sna/gen3: Don't combine primitives if beginning a ca 2-pass
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 3ca503dac2ea6c036e7ebe878b41923541daf225
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 14 12:49:14 2012 +0000
+
+ sna/gen3: Remove stray setting of vertex_start
+
+ It is always done at the beginning of vertex emission.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 7f76f100e8033497620ee46548df45afff41064a
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 13 23:12:42 2012 +0000
+
+ sna/gen2: Reorder reuse_source() to avoid NULL dereference for solids
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 044a54c23384756a5dc1895473abf34f7abb3d83
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 13 23:05:30 2012 +0000
+
+ sna/gen2: Initialise channel->is_affine for solid
+
+ In case we hit a path were we avoid reusing the source for the mask and
+ leave is_affine unset for a solid mask.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 29afd0dc8e893cc4110ee0d70546775dae86ddb3
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 13 22:53:18 2012 +0000
+
+ sna/gen2: Assertions
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 4d2abe1e3daac74747535d88dff34b024b87bbe9
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 13 22:09:37 2012 +0000
+
+ sna/gen3: Remove incorrect optimisation of an opaque source for CA
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit d428dbf7ad7c246acb1c301b73b9df4a1309de03
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 13 14:53:50 2012 +0000
+
+ sna/gen2: Program solid mask using the DIFFUSE component
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 9e7311516da81ab45484b291ec668503c5ded0bb
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 13 14:40:25 2012 +0000
+
+ sna/gen2: Align surface sizes to an even tile
+
+ Makes this 855gm much happier.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit e646047a563598948206167765eaaf4192cfd77f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 13 14:23:54 2012 +0000
+
+ sna: Fix up BLT overwrite detection to use target_handle
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 4f96439e39a4bf4b127af9ccfdc09d061caff9bd
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 13 13:15:52 2012 +0000
+
+ sna: Fix typo for 830/845 gen
+
+ Must remember, its octal not decimal.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit f631a56bcb3ff1ce1942b828325a157cef1e0880
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 13 00:55:45 2012 +0000
+
+ sna: Only flush the batch after an actual relocation
+
+ As we may write preparatory instructions into the batch before checking
+ for a flush.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 74bbf20e6e652cba55d6d0bc17066f4112f8548c
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Dec 12 21:56:22 2012 +0000
+
+ sna: Improve the initialisation failure path for pinned batches
+
+ Simplify the later checks by always populating the lists with a single,
+ albeit unpinned, bo in the case we fail to create pinned batches.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 52c8c9218c8f28fb049b02214d833912a803f911
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Dec 12 21:19:02 2012 +0000
+
+ sna: Fix the error path in kgem_init_pinned_batches() to use the right iter
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit c7f7dd61fd07dbf938fc6ba711de07986d35ce1f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Dec 12 19:43:19 2012 +0000
+
+ sna: Pin some batches to avoid CS incoherence on 830/845
+
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=26345
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit b154d0dc404a152e1283a013a78be06b8d734867
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Dec 12 18:34:54 2012 +0000
+
+ sna/gen2: STIPPLE requires an argument
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 9001263b32efde1361555432914d9ac3ee780511
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Dec 12 12:03:40 2012 +0000
+
+ sna/gen3+: Use nearest for unscaled videos
+
+ If the output is unscaled, then we do not require pixel interpolation
+ (and planar formats are exactly subsampled).
+
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=58185
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2dbe7d91a7f15a3a9ddad696c5088ca98898fca2
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Dec 12 09:50:34 2012 +0000
+
+ sna/gen4: Use the single-threaded SF w/a for spans as well
+
+ Fixes the flickering seen in the fishtank demo, for example.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2502218a9340044bb660be68f059971119077e29
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Dec 11 17:47:09 2012 +0000
+
+ sna/dri: Fix handling of current_msc > target_msc
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 39d6c65f437d7877c1647b7ecf45e76daabc76a6
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Dec 11 15:30:54 2012 +0000
+
+ sna/dri: Query current msc before use
+
+ Might be worth caching the last-known-value so we can skip the query for
+ an old swap request.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 162e9a2bfc693db186aa481551cf76b3dc5ee55c
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Dec 10 23:11:48 2012 +0000
+
+ sna/dri: Disable name exchanges for SwapBuffers
+
+ The DRI2 protocol is inherently racy. Fortuituously, this can be swept
+ under the carpet by forcing the serialisation between the DRI2 clients
+ by using a blit for the SwapBuffers.
+
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=58005
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 0c2287c735f990a98b39d00f28168d7a5df25aba
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Dec 10 20:09:22 2012 +0000
+
+ sna/dri: Only special case 'divisor && msc-passed' for immediate flipping
+
+ As Jesse pointed out, it is legal for the client to request that the
+ flip be some frame in the future even with no divisor.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2ab29a1688cd313768d928e87e145570f35b4a70
+Author: Jesse Barnes <jbarnes@virtuousgeek.org>
+Date: Mon Dec 10 14:55:32 2012 -0800
+
+ dri2: don't schedule a flip prematurely at ScheduleSwap time
+
+ If divisor is 0 but the current MSC is behind the target, we shouldn't
+ schedule a flip (which will occur at the next vblank) or we'll end up
+ displaying it early and returning the wrong timestamp.
+
+ Preserve the optimization though by allowing us to schedule a flip if
+ both the divisor is 0 and the current MSC is equal to or ahead of the
+ target; this avoids a round trip through the kernel.
+
+ Reported-by: Mario Kleiner <mario.kleiner@tuebingen.mpg.de>
+ Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
+
+commit 986e13a56a8544d5b32dbcaacbc0ee9cf5d47e27
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Dec 10 17:29:08 2012 +0000
+
+ sna: Try installing a fallback config on VT enter in case full desiredMode fails
+
+ This can happen naturally for 3-pipe config on Ivybridge or if the
+ outputs are rearranged whilst we slept. Instead of failing to change the
+ display on the VT, install at least a fb on the CompatOutput so that
+ hopefully the DE can take over, or give some control to the user.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 8c3b82f207bc8cf697646d3324cb4103da3b7856
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Dec 10 16:20:52 2012 +0000
+
+ sna: Avoid reusing the same 'busy' bit for two different meanings.
+
+ Oops, I thought the 'busy' bit was now used and apparently forgot it is
+ used to control the periodic flushing...
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit d1b479a3404e6b52a23e0443c36d0682cbaf3c2f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Dec 10 11:16:04 2012 +0000
+
+ sna: Compromise and only flush a split batch if writing to scanout
+
+ A compromise between not flushing quick enough and flushing too often,
+ hopefully.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 3e9120d73c6f0c0e06b617da91cc2edce4434bc3
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Dec 10 11:05:16 2012 +0000
+
+ sna: Immediately flush a split batch
+
+ If we submit a batch early (for example if the GPU is idle), then submit
+ whatever else the client drew immediately upon completion of its
+ blockhandler. This is required to prevent flashing due to visible delay
+ between the clear at the start of the cycle and then the overdraw later.
+
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=51718
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit aa8c5d8201006397bb32ed6bc28618f9aa77a68a
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 6 22:08:08 2012 +0000
+
+ sna/sprite: Add a DBG to report whether the kernel supports sprites
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 25c0d440dee45c03f5e45b8e0e45071c0c32f507
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Dec 9 12:11:53 2012 +0000
+
+ sna: Move source to CPU prior to referencing for inplace trapezoids
+
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56825
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 3e82fcc8d243b7ffe1a3d3249a5cdb5fd068093f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Dec 9 11:19:13 2012 +0000
+
+ sna/gen4+: Refine test for preferring GPU spans
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit c8f622726a4463b419d032b379576cfb3bc492df
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Dec 9 09:26:03 2012 +0000
+
+ sna: Replace remaining kgem_is_idle() with kgem_ring_is_idle()
+
+ Further experimentation...
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 4e4e10935d2815fb62aeaedbfffe10aad115ec0b
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Dec 8 22:39:32 2012 +0000
+
+ sna: Flush upon change of target if GPU is idle
+
+ The aim is to improve GPU concurrency by keeping it busy. The possible
+ complication is that we incur more overhead due to small batches.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit cef11795f627a393d4254845b0a19eefbf6c782c
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 7 17:40:37 2012 +0000
+
+ sna: Convert the ring from BLT/3D to the internal index for kgem_ring_is_idle()
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 52405b2aed492dc7f76fbf082122842f621e7c06
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 7 17:24:42 2012 +0000
+
+ sna: Only inspect the target ring for busyness
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 4b7bbb2a23b03bac63f864c33f47fab88dedbf67
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 7 16:43:32 2012 +0000
+
+ sna: Only flush before adding fresh surfaces to the batch
+
+ Previously, before every operation we would look to see if the GPU was
+ idle and we were running under a DRI compositor. If the GPU was idle, we
+ would flush the batch in the hope that we reduce the cost of the context
+ switch and copy from the compositor (by completing the work earlier).
+ However, we would complete the work far too earlier and as a result
+ would need to flush the batch before every single operation resulting in
+ extra overhead and reduced performance. For example, the gtkperf
+ circles benchmark under gnome-shell/compiz would be 2x slower on
+ Ivybridge.
+
+ Reported-by: Michael Larabel <michael@phoronix.com>
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 65a8c23ca1bc8e2ebd087027a30358704d4bf11c
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Dec 7 14:56:18 2012 +0000
+
+ sna: Only flush at the low apeture watermark if idle
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 4bfc5e90f54be1b0997dec9e81796d67b376a01f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Nov 8 23:42:10 2012 +0000
+
+ sna: Mark proxies as dirty on first relocation
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 1d2fa5731b7ecfe34a8af809e45bcd3b0b70c890
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 6 22:15:09 2012 +0000
+
+ Remove the default log message
+
+ Breaks compilation with xorg-1.13
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 7f4d4afa629bd18be89d7270e6178a865cf9586e
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 6 17:08:15 2012 +0000
+
+ Fix compilation of UMS probe following 13f47008ec
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit d733f7d1f1dc343ac34c4a27ac99d71fc0572bc2
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 6 16:55:00 2012 +0000
+
+ sna/gen4+: Add common glyph-to-dst emitters
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 13f47008ec411609968c40b8ec34dd495f14c50b
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Dec 6 14:05:33 2012 +0000
+
+ Refactor the common probe methods for scrn construction
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 0040eb84c9187476a75202ebb251dd74354e4fc7
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Dec 5 09:25:17 2012 +0000
+
+ sna: Don't disable CPU bo if supported on unknown hw
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 14069f48645867a735ebdccb1e27a62364643c38
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Dec 5 09:24:02 2012 +0000
+
+ sna: Assume that future hardware only gets more flexible
+
+ E.g. that BLT can always write to cacheable memory, inflexible fences
+ are a thing of the past, etc.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit b82bfcb54a6af0d1ee17806ef94d9da504cad606
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Dec 4 17:26:47 2012 +0000
+
+ sna/gen6+: Cache the scanout targets separately to avoid override PTE caching
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 626dd1324dd2c5b14ca4aff598b5eb1e45550e69
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Dec 4 12:52:50 2012 +0000
+
+ sna/gen4: Workaround render corruption with multiple SF threads
+
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=57410
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit dbdb8fabfbade3f19fd8af3524468b5e6668bb66
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Dec 4 12:37:23 2012 +0000
+
+ sna/gen4: Special case solids through the general vertex emitter
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit a17354d5ce6aeeab3f6e42aba63fce06ad18c526
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Dec 4 12:01:26 2012 +0000
+
+ sna/gen4: Remove unused CC viewport
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit b9afb9cb614d2ad44330eed03b3f577a35184a88
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Dec 4 11:14:58 2012 +0000
+
+ sna/gen4: Avoid emitting URB_FENCE across a cache-line
+
+ Old erratum.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 08c30c48bdd0db500498bd9617c15f37bacd8de9
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Dec 3 13:23:33 2012 +0000
+
+ sna: Tidy addition of fake GTF modes for panels
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 6f675eeaeade4728af566891b2afbe5b44fbdc2e
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Dec 3 10:47:35 2012 +0000
+
+ 2.20.15 release
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 37eb7343be1aeeb90a860096756603a577df1a77
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Dec 1 09:40:11 2012 +0000
+
+ sna/gen5: Inspired by gen4, reorder the flushing
+
+ This may not be totally safe, but it is a nicer explanation for random
+ single character corruption.
+
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=51422
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit a8a99428c14c8aed2082853cc60d0f98a1fa2d86
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Dec 1 09:44:49 2012 +0000
+
+ sna/dri: Unknown generations are given the max value and presume i965_dri.so
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 382bb7bf77fca412fdefd7c304f395d1fe9483b5
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 30 16:24:30 2012 +0000
+
+ sna/gen6+: Only apply the BLT w/a for self-copies
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 1aee8acacfe5869a072d9f20f3b8290b16683260
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 30 12:17:25 2012 +0000
+
+ sna: Unify gen4 acceleration again
+
+ After disabling render-to-Y, 965g seems just as happy with the new code
+ paths as g4x.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 5d6dd9c5a7eeb1f879525430ad89ab74d427e469
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 30 12:12:49 2012 +0000
+
+ Convert generation counter to octal
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 131600020638ef15166361214cd5e1a0c08c2ea6
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 30 11:43:56 2012 +0000
+
+ sna: Prevent gen4 from rendering to I915_TILING_Y
+
+ It always seems to end up in a hang...
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit ee4d1873ff504c2150b51d13864164b02b85dd53
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 30 09:52:49 2012 +0000
+
+ sna/g4x: And remove one more flush point
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 1b6c1a30723b1d13e9bd3df0b59a8d75639c89be
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 30 09:27:57 2012 +0000
+
+ sna: Increase tiling alignment to an even tile
+
+ Seems to help g4x.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 6aeb6fdf75fa322d8f5ffe393337d8195d7a6a62
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 30 09:03:53 2012 +0000
+
+ sna/g4x: Remove the flush before the pipelined pointer changes
+
+ This one appears unneeded. Hopefully.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 8be2c6695509809c0ab0c5c014e11dc733f73006
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Nov 29 19:56:15 2012 +0000
+
+ sna/g4x: Emit the flush prior to changing pipelined state
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2d5d55702bb8eced32d5b8cb3c0cd125fd99d6dc
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Nov 28 15:46:29 2012 +0000
+
+ sna/gen6+: Override PTE cacheability bits for reads
+
+ This is primarily useful for enabling the render cache for reads from
+ scanouts.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 3ed87945c9e83fefcbda053b616856658bf2ac8e
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Nov 28 11:05:40 2012 +0000
+
+ sna/gen5+: Disable max-vbo address
+
+ As we do not use this feature, disable it and save the relocation.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit b2c9e9da639a134577485f83e0f66f54e2371b98
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Nov 27 11:32:11 2012 +0000
+
+ sna/blt: Avoid clobbering common state before converting to a BLT
+
+ In case we need to continue on with the render operation, we need to
+ preserve the existing state.
+
+ Reported-by: Jiri Slaby <jirislaby@gmail.com>
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=57601
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 1e06d19a00f5a5a05369deeb3c5ae15b282c0f92
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Nov 26 15:30:09 2012 +0000
+
+ sna: Disable shadow tracking upon regen
+
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=56608
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=52255
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit d21ed3a6aba5ae227cc5ecd164f3c18bc48c69af
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Nov 26 10:34:28 2012 +0000
+
+ sna: Use a single execobject flag to mark read/write domains
+
+ Slight modification to the proposed API to only pass the simplified
+ domain tracking now performed by the kernel.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 7a904ce839933d57176e013cdad147533c33ca2f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Nov 26 08:48:31 2012 +0000
+
+ 2.20.14 release
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 1367e3f9ef5f606c8927cbde441a2ea41fa6d025
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Nov 25 00:24:45 2012 +0000
+
+ sna: Exploit the alpha-fixup of the BLT for texture format conversions
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 6d6399f97cf7cb91dcf89e9a5cd1243f761e4314
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Nov 25 00:05:44 2012 +0000
+
+ sna: Transform a simple repeat pattern into range of the BLT
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 39f1e228b74e98d3d87157cf093fc56ca31e6b13
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Nov 24 20:16:29 2012 +0000
+
+ sna: Make GPU idle more consistent during wakeup
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 40b6121746c55153de444ccb753df80706ff3a69
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 23 18:21:41 2012 +0000
+
+ sna/g4x: Refine the w/a for the broken sf shader
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit e0b906b09697b5fe7a5be2fdc52abd9b1c73f96d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 23 18:01:25 2012 +0000
+
+ sna/g4x: Use the render pipeline more often for composite operations
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 54d8968da5ae39bfbcad511322926931bce2bda3
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 23 17:47:49 2012 +0000
+
+ sna/gen4: Revert changes to 965g[m]
+
+ The changes tested on g45/gm45 prove to be highly unstable on 965gm,
+ suggesting a radical difference in the nature of the bugs between the
+ two generations. In theory, g4x has additional features that could be
+ exploited over and above gen4 which may prove interesting in the future.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit d560296561f68c3ac841345c0f4ce2c8e7381156
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 23 16:35:06 2012 +0000
+
+ sna/gen4: Don't force a flush for the dirty target if the we do not read back
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 4023b2044757a9a67d564be0c8adf4885973a6e3
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 23 15:42:18 2012 +0000
+
+ sna/gen4: Force composite(WHITE, glyph) for building the glyphstring mask
+
+ For reasons that are not apparent, if we don't composite with
+ source/mask for the glyph strings, there appears to be some cache
+ corruption. About as bizarre as the rest of gen4 idiosynacracies.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit f74b62755c6e41097c23cc506984859e556a3415
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 23 14:59:42 2012 +0000
+
+ sna/gen4: Set composite op before testing for a BLT compatible op
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 4c922eb52cadb867a0a15929e5a214c84a5992f3
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 23 14:19:59 2012 +0000
+
+ sna/gen4: Pass the mask channel explicitly rather than through a dummy mask
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2e68efa8ec66b4c89e9816bfa15067b398da5e3e
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 23 14:04:51 2012 +0000
+
+ sna/gen4: Reduce the flush before performing the CA pass
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 43aff6708fe97aa2fae0e30f98fc7cd9d7311b75
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 23 13:37:44 2012 +0000
+
+ sna/gen4: Update render fill routines to use the dummy mask
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 352910712266202ef017066891ec383fd037fc4a
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 23 13:29:01 2012 +0000
+
+ sna/gen4: Move the flush from inside the spans to emit_state()
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 217f3e835b99002669999f818afa0d5c3a1cc852
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 23 12:32:14 2012 +0000
+
+ sna/gen4: Backport the more efficient composite box emitters
+
+ Now that we aren't flushing after every single rectangle, we can strive
+ for a little efficiency.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit d3145e3f8146e7d864d32aec49c44c04d619e56a
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 23 12:28:21 2012 +0000
+
+ sna/gen4: Use a dummy white mask to avoid the flush w/a when compositing
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 65d530b59b957cc5f303ae819baad8075a555ac0
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Nov 22 08:18:38 2012 +0000
+
+ Revert "uxa: Refactor early-exit paths of uxa_try_driver_composite()."
+
+ This reverts commit fa5c573455cf63090dbb6d167d4e5f1cb23daf72 as it
+ causes a SIGSEGV.
+
+ Reported-by: lu hua <huax.lu@intel.com>
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=57389
+
+commit d3a49f36395d737698616fe8ba9da7b74cd2d89a
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Nov 21 18:41:00 2012 +0000
+
+ sna/gen3+: Clear the render.vbo when replacing it for vertex upload
+
+ As we may trigger a flush and a retire when searching for a vertex
+ buffer for the new vertices, we need to be careful to decouple the
+ destroyed vbo in order to avoid a use-after-free when inspecting the
+ state.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 1c57a52a1f46e8401429955d8c96fd5095c9012a
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Nov 21 18:17:31 2012 +0000
+
+ sna: Assert that we do not replace a GPU bo with a proxy
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 8da12a00ee653510e1f1a6fecb28dbb36faa8400
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Nov 21 17:28:46 2012 +0000
+
+ sna: Skip uploading unintialiased pixmap data
+
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=47597
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 0696ea4bd601ed823dbded03eaef6f316df2a5e8
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Nov 21 17:25:33 2012 +0000
+
+ sna: Add the missing assertion for !proxy
+
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=47597
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit ee72375ecd4f6d6e756bc361fa512b6675309540
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Nov 21 17:08:53 2012 +0000
+
+ sna: Do not migrate uninitialised pixmaps
+
+ Reported-by: Jiri Slaby <jirislaby@gmail.com>
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=47597
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 44dad490140d85a4c0dcb916030c36a838670c01
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Nov 21 16:22:35 2012 +0000
+
+ sna: Do not dispose of a shadow pixmap
+
+ Fixes regression from 2249e9edc37811c07e2807d6b4def05585b44c22
+
+ Reported-by: Jiri Slaby <jirislaby@gmail.com>
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=47597
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 9c627a05247690891062a2c0c1c8f7bbc0273104
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Nov 21 16:03:02 2012 +0000
+
+ sna: Remove the kgem_bo_is_mappable refcnt assertion from freed paths
+
+ A few callers of kgem_bo_is_mappable operate on freed bo, and so need to
+ avoid the assert(bo->refcnt).
+
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=47597
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit c0c48c7a5aca4d24936efbeaefc7674ada2ef87f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Nov 21 14:46:45 2012 +0000
+
+ sna: Add a few refcnt assertions
+
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=47597
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 0884777c33d20dbc329b98ad0db5ffb0df93ac8c
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Nov 21 14:31:44 2012 +0000
+
+ sna: Fix bogus assertion from 03fb9ded43
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2249e9edc37811c07e2807d6b4def05585b44c22
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Nov 21 13:05:36 2012 +0000
+
+ sna: Dispose of local copy for render sources
+
+ If we transfer the pixmap to the GPU to use as a render source, presume
+ that we will not need to then touch the local copy (at least for a
+ while) and so return that memory to the system.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 736bb0f7058bf05ef48cdfe6a30d880de817aff9
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Nov 21 12:16:46 2012 +0000
+
+ sna: Tighten a couple of assertions for damage with use_bo
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 03fb9ded43f9bf8b73f99d5b3a8dc592fe22b523
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Nov 20 21:13:07 2012 +0000
+
+ sna: Assert that we do not create a proxy with existent GPU damage
+
+ References: http://bugs.freedesktop.org/show_bug.cgi?id=47597
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 9ab1d1f94e502e5fde87e7c171f3502f8a55f22b
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Nov 20 18:42:58 2012 +0000
+
+ sna/dri: Queue a vblank-continuation after flip-completion
+
+ If a vblank request was delayed due to a pending flip, we need to make
+ sure that we then queue it after that flip or else progress ceases.
+
+ Reported-by: Jiri Slaby <jirislaby@gmail.com>
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56423
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=57156
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 7a7a76b359f73a4c4bcda0d88004f4dd5e94a186
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Nov 20 16:05:32 2012 +0000
+
+ sna/dri: Avoid a NULL dereference inside a DBG
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit fa5c573455cf63090dbb6d167d4e5f1cb23daf72
+Author: Eric Anholt <eric@anholt.net>
+Date: Sat Nov 17 13:11:13 2012 -0800
+
+ uxa: Refactor early-exit paths of uxa_try_driver_composite().
+
+ Saves 200b of code at -O2.
+
+commit edefb64d2b1c95b0b678cb222273ab64b2e6db2a
+Author: Eric Anholt <eric@anholt.net>
+Date: Sat Nov 17 13:11:12 2012 -0800
+
+ uxa: Work around uninitialized-value warning.
+
+ The compiler isn't noticing that localDst only diverges from pDst when
+ the _copy variables have also been set.
+
+commit 18b2e2a82724407196001ca853bd83150c66c5bd
+Author: Eric Anholt <eric@anholt.net>
+Date: Sat Nov 17 13:11:10 2012 -0800
+
+ uxa: Add printf attribute to intel_debug_fallback().
+
+ Shuts up a bunch of warnings with xorg's shared warning flags, and
+ should give us more informative warnings in our code.
+
+commit 2d1e267e662505ca0dd318765464a24739dc5bd8
+Author: Eric Anholt <eric@anholt.net>
+Date: Sat Nov 17 13:11:09 2012 -0800
+
+ uxa/dri: Factor out the repeated swap fallback code.
+
+commit cd2f373da7a14e004c999f9f0efaf88c785d3d3f
+Author: Eric Anholt <eric@anholt.net>
+Date: Sat Nov 17 13:11:08 2012 -0800
+
+ configure.ac: Fix bad syntax for test calls
+
+commit b8c01d9bd7ce5656706ebebd16e5a8c5ca0ba487
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Nov 19 15:20:10 2012 +0000
+
+ sna/dri: Add an assert that the cached DRI2 buffer is pinned for DRI
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 84b1a02fa9fde02366e0384044526982e70d0e8d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Nov 19 13:41:28 2012 +0000
+
+ sna/dri: Avoid setting off-delay after a failed flip (use-after-free)
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit b4dd1103a55406bcd65f137c668701074a5c41b6
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Nov 18 12:21:49 2012 +0000
+
+ sna/gen6+: Tweak prefer-blt to offset latency when in rc6
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 0cb8544dc16d4c1e9ae7f1ee74ee26c7501a9ed7
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Nov 18 12:13:46 2012 +0000
+
+ Remove useless indirection of intel_chipsets
+
+ Once upon a time this was used to hide a compiler warning about a
+ pointer mismatch, now the compiler still warns about the cast, making
+ the indirect moot.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 8509e474f57533fc6afcf213165f4c8633631fb5
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 16 23:02:44 2012 +0000
+
+ sna/dri: Clear the last-used-cpu flag when performing CopyRegion
+
+ Keeps the internal bookkeeping intact after the small bypass.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 866ed4a26cbbb29ef3845b0aa56383c4d951c65a
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Nov 15 15:13:14 2012 +0000
+
+ sna/dri: Add a couple of more asserts to track injection of a rogue bo
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit d2897cb0136ffec83365c7530ed544b562cac478
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Nov 13 12:43:44 2012 +0000
+
+ sna/gen4,5: Fix the opacity shader to use the right vertex attribute
+
+ Reported-by: Edward Sheldrake <ejsheldrake@gmail.com>
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=57054
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 440eaa049756e5266f3bd80e2751f1fd0d5f9890
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Nov 13 12:42:58 2012 +0000
+
+ sna/gen4: Tidy emission of opacity vertex attribute
+
+ Just make it more consistent between the various emitters
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit b6d2bb961517623d46aa6944307cb998ee125459
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Nov 13 10:05:56 2012 +0000
+
+ sna/gen4: Do not prefer inplace non-rectilinear spans
+
+ As gen4 requires the per-rectangle vertex flush, emitting spans on the
+ GPU is inefficient and so we prefer to composite the mask instead.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit ae293609c7400cd3c753ed3762772264c4741df5
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Nov 13 10:21:29 2012 +0000
+
+ sna/gen4: Always initialise redirect
+
+ Do not assume the caller cleared the composite-op structure for us.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2954f15e2bcb590a90c2cb6077c0843ee25a4413
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Nov 13 09:46:19 2012 +0000
+
+ sna: Specialise the decision for inplace xor uploads
+
+ Fixes a regression from
+
+ commit 0be1d964713ca407f029278a8256d02d925dc9da
+ Author: Chris Wilson <chris@chris-wilson.co.uk>
+ Date: Tue Sep 11 21:48:24 2012 +0100
+
+ sna: Use inplace X tiling for LLC uploads
+
+ which introduced the ability to swizzle into CPU maps, but also
+ convinced the xorg path to the same - which for large images blows up.
+
+ Reported-by: Michael Laß <bevan@bi-co.net>
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=57031
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 66eb0adffa63ef8ece7621ba90dc96af91549612
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Nov 12 14:45:56 2012 +0000
+
+ sna/dri: Apply the can_blit() check for CopyRegion
+
+ CopyRegion() also needs to check for stale bo in case the pixmap
+ dimensions have changed size and so may cause out-of-bounds read/writes.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit b27ecf3059bc066ef59f2a71c1d8d8f0ffec7191
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Nov 12 14:06:06 2012 +0000
+
+ sna/dri: Prevent scheduling a swap on stale buffers
+
+ If the screen has been reconfigured and the DRI client tries to swap the
+ old buffers (having not processed the invalidate event and retrieved the
+ current names), quietly drop the request. If we try to obey the request,
+ we will end up attaching a back buffer that is the wrong size to the
+ scanout...
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 34aa1e3b2702a55799a5655a3ba10bce4cc2065a
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Nov 12 11:22:53 2012 +0000
+
+ sna: Compile against ancient libdrm
+
+ We need to trim the sprite video overlays if the prototypes are not
+ known.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 8f1afde57dca27e6542b0b8e7c87750f3d6367bf
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Nov 11 16:16:20 2012 +0000
+
+ 2.20.13 release
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit b16219a19f48b52dda91f26fcbbbbeda056589ab
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Nov 11 11:05:35 2012 +0000
+
+ sna: Filter out the full-damage marker when undoing redirection
+
+ ==25902== Invalid read of size 4
+ ==25902== at 0x4980E13: _list_del (intel_list.h:218)
+ ==25902== by 0x4980EAB: list_del (intel_list.h:240)
+ ==25902== by 0x4981F4B: free_list (sna_damage.c:403)
+ ==25902== by 0x4985131: __sna_damage_destroy (sna_damage.c:1467)
+ ==25902== by 0x49A5276: sna_render_composite_redirect_done (sna_render.c:1921)
+ ==25902== by 0x49C68FC: gen2_render_composite_done (gen2_render.c:1136)
+ ==25902== by 0x497F90F: sna_composite (sna_composite.c:567)
+ ==25902== by 0x4994725: glyphs_via_mask (sna_glyphs.c:1139)
+ ==25902== by 0x4995FB7: sna_glyphs (sna_glyphs.c:1688)
+ ==25902== by 0x8150EB4: ??? (in /usr/bin/Xorg)
+ ==25902== by 0x813CA38: CompositeGlyphs (in /usr/bin/Xorg)
+ ==25902== by 0x8146DE1: ??? (in /usr/bin/Xorg)
+ ==25902== Address 0x7c079ac2 is not stack'd, malloc'd or (recently) free'd
+
+ Reported-by: bonbons67@internet.lu
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56785
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 69acbb77e8aad3370d5e8d9a9e067c54872d7082
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Nov 11 10:49:59 2012 +0000
+
+ sna: Fix printing of uninitialied value in DBG
+
+ ==25902== Use of uninitialised value of size 4
+ ==25902== at 0x423098E: _itoa_word (_itoa.c:196)
+ ==25902== by 0x4233F7F: vfprintf (vfprintf.c:1602)
+ ==25902== by 0x42FAFAD: __vsnprintf_chk (vsnprintf_chk.c:65)
+ ==25902== by 0x81DBE8E: Xvscnprintf (in /usr/bin/Xorg)
+ ==25902== by 0x81DC8FB: LogVMessageVerb (in /usr/bin/Xorg)
+ ==25902== by 0x81DCA62: LogVWrite (in /usr/bin/Xorg)
+ ==25902== by 0x81DCA9B: VErrorF (in /usr/bin/Xorg)
+ ==25902== by 0x81DC333: ErrorF (in /usr/bin/Xorg)
+ ==25902== by 0x49B2FA8: trapezoid_span_inplace__x8r8g8b8 (sna_trapezoids.c:5069)
+ ==25902== by 0x49B3407: trapezoid_span_inplace (sna_trapezoids.c:5166)
+ ==25902== by 0x49B4C96: sna_composite_trapezoids (sna_trapezoids.c:5619)
+
+ Reported-by: bonbons67@internet.lu
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56785
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 66e4c8ff40ab8cf722efa4293bb17b0d8f2dfa88
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Nov 11 09:40:09 2012 +0000
+
+ sna: Flush pending rendering before enabling an output
+
+ This is to prevent falling in the trap of the rendering being delayed
+ until the next client renders some new content.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 94dd0b9ee9f55e7c09b8c0ee18939fa69ce66da2
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Nov 10 16:52:09 2012 +0000
+
+ sna/gen2: Fix use of uninitialised redirection
+
+ ==29553== Invalid read of size 4
+ ==29553== at 0x4980E1B: _list_del (intel_list.h:218)
+ ==29553== by 0x4980EB3: list_del (intel_list.h:240)
+ ==29553== by 0x4981F53: free_list (sna_damage.c:403)
+ ==29553== by 0x4985139: __sna_damage_destroy (sna_damage.c:1467)
+ ==29553== by 0x49A527E: sna_render_composite_redirect_done (sna_render.c:1921)
+ ==29553== by 0x49C6904: gen2_render_composite_done (gen2_render.c:1136)
+ ==29553== by 0x497F917: sna_composite (sna_composite.c:567)
+ ==29553== by 0x8150C41: ??? (in /usr/bin/Xorg)
+ ==29553== by 0x8142F13: CompositePicture (in /usr/bin/Xorg)
+ ==29553== by 0x8145F58: ??? (in /usr/bin/Xorg)
+ ==29553== by 0x81436F2: ??? (in /usr/bin/Xorg)
+ ==29553== by 0x807965C: ??? (in /usr/bin/Xorg)
+ ==29553== Address 0x9407e188 is not stack'd, malloc'd or (recently) free'd
+
+ Reported-by: bonbons67@internet.lu
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56785
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 0f1c30818c9d782b066147448bbcc9ac95ac834f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Nov 10 16:52:09 2012 +0000
+
+ sna: Fix use of uninitialised value in DBG
+
+ ==29553== Use of uninitialised value of size 4
+ ==29553== at 0x4230964: _itoa_word (_itoa.c:195)
+ ==29553== by 0x4233F7F: vfprintf (vfprintf.c:1602)
+ ==29553== by 0x42FAFAD: __vsnprintf_chk (vsnprintf_chk.c:65)
+ ==29553== by 0x81DBE8E: Xvscnprintf (in /usr/bin/Xorg)
+ ==29553== by 0x81DC8FB: LogVMessageVerb (in /usr/bin/Xorg)
+ ==29553== by 0x81DCA62: LogVWrite (in /usr/bin/Xorg)
+ ==29553== by 0x81DCA9B: VErrorF (in /usr/bin/Xorg)
+ ==29553== by 0x81DC333: ErrorF (in /usr/bin/Xorg)
+ ==29553== by 0x49434F0: kgem_create_buffer (kgem.c:4887)
+ ==29553== by 0x4943B09: kgem_create_buffer_2d (kgem.c:4969)
+ ==29553== by 0x4943E19: kgem_upload_source_image (kgem.c:5021)
+ ==29553== by 0x49A0567: upload (sna_render.c:505)
+ ==29553==
+
+ Reported-by: bonbons67@internet.lu
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=56785
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit cc2b13c9c05e57dc5004d93b56f332ea95f0a4ef
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Nov 10 11:50:15 2012 +0000
+
+ sna: Specify read/write domains for no-relocation fastpath
+
+ On review (read triggering BUGs), we do need to supply the domain tracking
+ of the buffers that is being replaced from the relocation path.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 0c4a2bcc3d63ecc02e3a940e38e9a416b51ad0c8
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Nov 10 12:34:52 2012 +0000
+
+ sna: Allow snooped buffers to be retained (and reused) between batches
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit f5d79b202dd448e61ab6ffce26fe9cbf9051d770
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Nov 10 10:30:04 2012 +0000
+
+ sna/gen2: Add a modicum of fallback DBG
+
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=56785
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 27327633138dce159ca2e91fe5eac1565bd45e1c
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 9 17:08:01 2012 +0000
+
+ sna/gen4: Only 965gm suffers the !snoop restriction
+
+ So fixup the bogus assertion for g4x
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 8d3b5ea135fd8f16da2cbfb98041e32c7001a38f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 9 15:31:03 2012 +0000
+
+ xvmc: Use DRMINTEL_LIBS instead of hardcoding -ldrm_intel
+
+ Reported-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit f040b97b01495aa43f7771ebb8ca5c0d44038bc1
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Nov 8 23:42:10 2012 +0000
+
+ sna: Mark no-reloc write buffers
+
+ If we bypass the relocation processing, we also then bypass the
+ pending-write analysis, so we need to supply those to the kernel
+ ourselves (to maintain gpu-cpu coherency).
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 85ba7e96268dbb8da4bb34078333695a451c6570
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Nov 8 15:56:13 2012 +0000
+
+ sna: Experiment with using reloc.handle as an index into the execbuffer
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 93d8dddbb92431d6e2c48a17b71cac9f7047902e
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Nov 8 09:41:21 2012 +0000
+
+ sna: Set the known offset for the batch as well
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 120fa0ef8d04f5e82e5f7a0636033d3d96efa1e8
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Nov 7 17:41:20 2012 +0000
+
+ sna: Support a fast no relocation changed path
+
+ x11perf -copywinwin10 on gm45 with c2d L9400:
+ before: 553,000 op/s
+ after: 565,000 op/s
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit b7d2fcf47a9569d0944097a8be60ca3be72b42f6
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Nov 8 08:55:25 2012 +0000
+
+ Remove reliance on hard-coded DRI name
+
+ This provides for using the existing DDX with future DRI drivers which
+ may break from the traditional names - but only with the help of the
+ user/packager. This scheme needs to be replaced with a robust mechanism
+ for driver loading if AIGLX and co are to be kept.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit cefce9c81585b73db7620e08fcf60c89a8204873
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Nov 6 17:26:34 2012 +0000
+
+ sna: Abandon kernels that do not support execbuffer2
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit b2245838c15b54d72557de8facb7cc15d59624ae
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Nov 6 16:32:32 2012 +0000
+
+ sna/gen4: opacity spans requires the per-rectangle flush w/a
+
+ Note that this is worsened, but not caused, by:
+
+ commit e1a63de8991a6586b83c06bcb3369208871cf43d
+ Author: Chris Wilson <chris@chris-wilson.co.uk>
+ Date: Fri Nov 2 09:10:32 2012 +0000
+
+ sna/gen4+: Prefer GPU spans if the destination is active
+
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=55500
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit a0540ebff083974688c863e08203e3d71a297340
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Nov 6 16:00:42 2012 +0000
+
+ sna/gen4: Remove a couple of old, now redundant, w/a flushes
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit aaaa6c356456a4bab595c6e6485893c538064e37
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Nov 6 14:34:51 2012 +0000
+
+ sna/gen4: Flush after pipelined pointer updates (inverted logic!)
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 28bda6707d979bca29dbea04e932819de204d920
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Nov 5 22:30:29 2012 +0000
+
+ sna: Prevent use-after-free during partial move-to-gpu
+
+ As we reuse the input parameter 'box' to hold the array of boxes that
+ need to be migrated, we need to be careful that we do not later confuse
+ it with the original input parameter. Otherwise,
+
+ ==1315== Invalid read of size 2
+ ==1315== at 0x928B091: box_inplace (sna.h:506)
+ ==1315== by 0x9292278: sna_pixmap_move_area_to_gpu (sna_accel.c:2554)
+ ==1315== by 0x9292C14: sna_drawable_use_bo (sna_accel.c:2774)
+ ==1315== by 0x9356C01: gen7_composite_set_target (gen7_render.c:2448)
+ ==1315== by 0x9357AA2: gen7_render_composite (gen7_render.c:2800)
+ ==1315== by 0x92DB12E: glyphs_to_dst (sna_glyphs.c:552)
+ ==1315== by 0x92DEA8D: sna_glyphs (sna_glyphs.c:1664)
+ ==1315== by 0x4F920E: damageGlyphs (in /tmp/Xorg)
+ ==1315== by 0x4F2FF6: ProcRenderCompositeGlyphs (in /tmp/Xorg)
+ ==1315== by 0x437260: Dispatch (in /tmp/Xorg)
+ ==1315== by 0x426466: main (in /tmp/Xorg)
+ ==1315== Address 0xd637054 is 20 bytes inside a block of size 208,464 free'd
+ ==1315== at 0x4C2A2FC: free (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so)
+ ==1315== by 0x92CCFCD: __sna_damage_destroy (sna_damage.c:1469)
+ ==1315== by 0x928AD74: sna_damage_destroy (sna_damage.h:284)
+ ==1315== by 0x9291CB2: sna_pixmap_move_area_to_gpu (sna_accel.c:2470)
+ ==1315== by 0x9292C14: sna_drawable_use_bo (sna_accel.c:2774)
+ ==1315== by 0x9356C01: gen7_composite_set_target (gen7_render.c:2448)
+ ==1315== by 0x9357AA2: gen7_render_composite (gen7_render.c:2800)
+ ==1315== by 0x92DB12E: glyphs_to_dst (sna_glyphs.c:552)
+ ==1315== by 0x92DEA8D: sna_glyphs (sna_glyphs.c:1664)
+ ==1315== by 0x4F920E: damageGlyphs (in /tmp/Xorg)
+ ==1315== by 0x4F2FF6: ProcRenderCompositeGlyphs (in /tmp/Xorg)
+ ==1315== by 0x437260: Dispatch (in /tmp/Xorg)
+
+ Reported-by: Matti Ruohonen <kiesus@gmail.com>
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56591
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit e62b0cbf958d1ad95e4522973253a1ae5c1a4da9
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 2 15:54:20 2012 +0000
+
+ sna: Add a small delay during startup if another master is still active
+
+ There exists a race with plymouthd that can cause the drm device to
+ reject us as the rightful master, and so cause X to fail to load. Try
+ waiting for a couple of seconds for whatever it was to close before
+ giving in.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit e1a63de8991a6586b83c06bcb3369208871cf43d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Nov 2 09:10:32 2012 +0000
+
+ sna/gen4+: Prefer GPU spans if the destination is active
+
+ Trying to avoid using the inplace scanline rasteriser for large shapes.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 72bcd8f85c98502b13a67d9c606371afe513584c
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Nov 1 09:30:18 2012 +0000
+
+ sna: Try to reduce ping-pong migration for intermixed render/legacy code paths
+
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=56591
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 4e363906a5ef15e1eb0a387cfb6b3445ac185b9d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Oct 31 22:58:59 2012 +0000
+
+ sna: Set a valid box when checking for GPU bo for BLT composite ops
+
+ Reported-by: Jiri Slaby <jirislaby@gmail.com>
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=47597
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit b924831e445615b82a53b10e1849720e933eddfe
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Oct 31 22:55:56 2012 +0000
+
+ sna: Preserve mode if flushing before a scanline wait
+
+ Reported-by: Jiri Slaby <jirislaby@gmail.com>
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=47597
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 678f9586807071bef813bb69d451f14d2fcbcc04
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Oct 31 11:26:18 2012 +0000
+
+ sna: assert that the source is not the GTT mapping when uploading
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 783b8048a6d1a9fd0a73ebf7768ae17dc0b21900
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Oct 31 11:16:09 2012 +0000
+
+ sna: Prefer to use the GPU for uploads if continuing on the GPU
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 74c912880c302889f38fe5898c8038a0ba20e5db
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Oct 31 10:57:54 2012 +0000
+
+ sna: Fix a typo in a DBG
+
+ Reported-by: Matti Ruohonen <kiesus@gmail.com>
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit f48a821aa73cb40a51baafc6cd2b063f1f91d864
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Oct 31 10:53:47 2012 +0000
+
+ sna: Add a few DBG tracepoints to screen init/fini
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit bf81d552c4be039fbcf3272387828b1a8b3fbdb8
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Oct 31 08:50:44 2012 +0000
+
+ sna: Clamp the drawable box to prevent int16 overflow
+
+ And assert that the box is valid when migrating.
+
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=56591
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 31eb704b2ad7c861ec4e61fb9de0e9592fc6d269
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Oct 26 13:57:30 2012 +0100
+
+ sna: Ensure that the trap is clipped if it ends within the boundary pixel
+
+ Reported-and-tested-by: Jiri Slaby <jirislaby@gmail.com>
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56395
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit ef431b2d35c1bf4d77bbcc73688951d22f6aa135
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu Oct 25 10:15:39 2012 +0100
+
+ uxa: Drain the DRM event queue before server regeneration
+
+ Adam Jackson notes that what appeared to be my paranoid ramblings in SNA
+ actually served a purpose - it prevents a server crash following
+ server regen if an indirect client happened to be running at the time
+ (e.g. LIBGL_INDIRECT_ALWAYS=1 glxgears).
+
+ Reported-by: Adam Jackson <ajax@redhat.com>
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit efb8ff16491ecfb4d9c0c6a718684310d949d8d3
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Oct 24 22:56:20 2012 +0100
+
+ sna: Add missing ValidatePicture() for flattening alphamaps
+
+ Reported-by: Armands Liepins <armandsl@gmail.com>
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56367
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 1a489142c8e6a4828348cc9afbd0f430d3b1e2d8
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Oct 23 23:43:50 2012 +0100
+
+ sna: Disable RandR hotplug events if Xinerama is enabled
+
+ Since RandR itself is disabled if Xinerama is enabled, for example with
+ ZaphodHeads, calling RRGetInfo() upon a hotplug event generates an
+ assertion.
+
+ Reported-by: Stephen Liang <inteldriver@angrywalls.com>
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=55260
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit d87c2756db1af6e4af15864ab0f44d1454079236
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Oct 23 15:50:56 2012 +0100
+
+ sna: Beware 16-bit overflow when computing sample areas
+
+ Reported-by: Ognian Tenchev <drJeckyll@Jeckyll.net>
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=56324
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit c7f48684cdebc24128a5fa5678614af3deb14b3b
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Oct 23 15:17:56 2012 +0100
+
+ sna: Only disallow hw sprite scaling on Haswell
+
+ Earlier chips (Ironlake, Sandybridge and Ivybridge) have integrated
+ sprite scalers.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 5c3ea9cf6900855502fcd56214a1b9e180265ff5
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Oct 22 22:35:17 2012 +0100
+
+ sna: Update DRI buffer if attached to the framebuffer for TearFree flips
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 4dfc83359d006a4e410e3280003b49683309afc3
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Oct 22 14:56:01 2012 +0100
+
+ sna: Tidy udev install/remove and add a couple of lines of DBG
+
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=55260
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 4d9687d49c9869b2e88d408e5f451c9a1f8f3389
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Mon Oct 22 13:41:54 2012 +0100
+
+ sna: Refactor the common code to enable a timer
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit fb729788872ccb429ddde8a9a4281b1933243096
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Oct 21 14:36:48 2012 +0100
+
+ sna: Only query the system time if we are processing timers
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit c0d6a75f02eb97e5c80a4345ae5c68e9a81d49b6
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Oct 21 14:32:14 2012 +0100
+
+ sna: Use the FLUSH_TIMER as the only wakeup timer source
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 7bc829c39a203c17053eb728412f698a429ad9fe
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Oct 21 14:24:01 2012 +0100
+
+ sna: Remove the unused inactive eviction
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 9fa6e4aa2daee99ff5f6efc11232de22100bac80
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Oct 21 12:48:06 2012 +0100
+
+ intel: Sanity check that the platform probes points to a i915.ko GEM device
+
+ References: https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1069031
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit f6eed98fcfea54d50a282ac71ee654645551ae11
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sun Oct 21 10:46:14 2012 +0100
+
+ sna: Mark the to-be-damaged region first, then Process afterwards
+
+ Damage is processed in two phases, with the actual Damage being appended
+ before the operation is performed so that a copy can be made before
+ modification (e.g. software cursors).
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 60e4e009f90543bfd57f6a4e51ebc5b32b4af33b
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Oct 20 17:59:45 2012 +0100
+
+ sna: Move the source region to the CPU for a self-copy fallback CopyArea
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 7ff8b1ba543508f3b209f2ade7655c3aa34d546d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Oct 20 16:23:26 2012 +0100
+
+ 2.20.12 release
+
+ How embarrassing! My fault for rushing :(
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 695b2ce2d32bde191080129b55f9bf8a9d48ee77
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Oct 20 16:19:21 2012 +0100
+
+ uxa: Fixup drm_intel_bo_disable_reuse() typo
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2083e253b3d1ecc218ab1e523e4026ddd4561112
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Oct 20 16:07:11 2012 +0100
+
+ 2.20.11 release
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit fc0ba65f5efe217f2ab5e920255d2133d7c9e5e8
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat Oct 20 09:29:10 2012 +0100
+
+ uxa: Disable bo reuse after binding to a scanout
+
+ On gen6+, bo are expected to be LLC by default. However, as using the bo
+ for the scanout causes it to be moved into the uncached domain, this
+ assumption is then false and we should release the bo back to the system
+ rather than spread the uncached buffers around. The most common
+ allocator of scanouts is for pageflipping which are already non-reusable
+ due to the DRI2 export, so there should actually be little impact.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit f4c32af48b0c92a48131090886a6a6b6c45dbe34
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Oct 19 16:29:19 2012 +0100
+
+ sna: Clear the damage along with the BO when forcing the stall for inplace BLT
+
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56180
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 299232bdb69c8c2b6231905e0f45e9cfe74fe09a
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri Oct 19 15:02:00 2012 +0100
+
+ sna: Reorder final checks for using the BO and setting the damage pointer
+
+ When we return NULL from sna_drawable_use_bo(), the expectation is that
+ the damage pointer is also NULL. However, one SHM path leaked.
+
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=56180
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 257abfdabe39629fb458ed65fab11283f7518dc4
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Oct 17 23:34:22 2012 +0100
+
+ sna/gen4: Presume we need a flush upon state change similar to gen5+
+
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=55627
+ References: https://bugs.freedesktop.org/show_bug.cgi?id=55500
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 8238c672984e31ae655353d6412e3395a9cdfbe6
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Oct 17 22:16:29 2012 +0100
+
+ sna: secure batches accepted upstream, so simply use runtime detection
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 540666a0a81c7daedbd47830d0932df5e57ec903
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Oct 17 16:59:05 2012 +0100
+
+ sna/overlay: Move bo out of GTT domain after binding to overlay plane
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 891bae4aa91e85542dcbe38f6ee92141e3efc801
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Oct 17 11:29:10 2012 +0100
+
+ sna: Use the secure batches to program scanline waits on gen6+
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 41be80a8cae1eb0e294392e5033511bfdf2895c5
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Oct 17 11:25:52 2012 +0100
+
+ sna: Enable support for SECURE batch buffers
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit ba6c82cd9d8089354b90632ca8edbb35cc09b9c4
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed Oct 17 13:54:51 2012 +0100
+
+ sna/dri: Defensively check for GTT mmap failure during fallback
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 7927f9a351ead1a5593bc91e465706bdd889bb8d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Oct 16 17:56:30 2012 +0100
+
+ sna/gen7: Filter BLEND flags for CA glyphs
+
+ Fixes regression from commit c51aaa731e5cffc892e59730194ad7c98789b02b
+ Author: Chris Wilson <chris@chris-wilson.co.uk>
+ Date: Thu Oct 11 11:36:00 2012 +0100
+
+ sna/gen7: Replace bogus state tracking assertion
+
+ The assumption being that we only used the encoded flags for determining
+ the composite state is false for the magic CA pass.
+
+ Reported-by: Oleksij Rempel <bug-track@fisher-privat.net>
+ Reported-by: Eyal Lotem <eyal.lotem@gmail.com>
+ Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56037
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 2ad4aa195571fe214ccffa55e123507f1be66243
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue Oct 16 11:59:28 2012 +0100
+
+ sna: Drop fake tiled CPU mapping
+
+ The only path where this is correct already handles it as the special
+ case that it is, everywhere else it just nonsense.
+
+ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
commit b42d81b63f5b6a571faffaadd42c74adce40128a
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sun Oct 14 09:15:38 2012 +0100
diff --git a/Makefile.am b/Makefile.am
index b3d37b273..5001674ee 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -18,6 +18,7 @@
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS}
SUBDIRS = man
diff --git a/Makefile.in b/Makefile.in
index 6f2e8f426..edf7e301a 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -225,7 +225,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MAN_SUBSTS = @MAN_SUBSTS@
@@ -264,6 +263,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@
VERSION = @VERSION@
X11_CFLAGS = @X11_CFLAGS@
X11_LIBS = @X11_LIBS@
+XCB_CFLAGS = @XCB_CFLAGS@
+XCB_LIBS = @XCB_LIBS@
XORG_CFLAGS = @XORG_CFLAGS@
XORG_LIBS = @XORG_LIBS@
XORG_MAN_PAGE = @XORG_MAN_PAGE@
@@ -322,6 +323,7 @@ target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
+ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS}
SUBDIRS = man $(am__append_1) src $(am__append_2)
MAINTAINERCLEANFILES = ChangeLog INSTALL
all: config.h
@@ -330,7 +332,7 @@ all: config.h
.SUFFIXES:
am--refresh: Makefile
@:
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -357,9 +359,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
$(SHELL) ./config.status --recheck
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+$(top_srcdir)/configure: $(am__configure_deps)
$(am__cd) $(srcdir) && $(AUTOCONF)
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
$(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)
$(am__aclocal_m4_deps):
@@ -370,7 +372,7 @@ config.h: stamp-h1
stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status
@rm -f stamp-h1
cd $(top_builddir) && $(SHELL) ./config.status config.h
-$(srcdir)/config.h.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+$(srcdir)/config.h.in: $(am__configure_deps)
($(am__cd) $(top_srcdir) && $(AUTOHEADER))
rm -f stamp-h1
touch $@
diff --git a/NEWS b/NEWS
index be1070043..32977fa19 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,293 @@
+Release 2.21.2 (2013-02-10)
+===========================
+Pass the brown paper bags, I need half a dozen or so. That seemingly
+innocuous build fix to xorg-1.13 happned to have the little side-effect
+of breaking glyph rendering on xorg-1.12 and older on 64-bit machines.
+
+
+Release 2.21.1 (2013-02-10)
+===========================
+A fix for a potential GPU hang on 945gm (GMA3100) and earlier chipsets,
+along with backporting SNA to the packages found in stable distributions
+like Debian 6.0 (Squeeze).
+
+ * Cleanup compilation warnings from deblint, thanks to Paul Menzel
+
+ * Minor build improvements by Damien Lespiau.
+
+ * Disable generating span geometry for non-rectilinear spans on gen4
+ in order to work around and prevent one class of render corruption.
+
+ * Prevent cache thrashing and severe performance degradation on LLC
+ machines for streaming texture updates. However, note the effect was
+ only observed on just one particular laptop.
+
+ * Fix alignment of subsurface proxies for old chipsets.
+ https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1120108
+
+ * Repair build against Xorg-1.6 and contemporary packages.
+
+
+Release 2.21.0 (2013-02-01)
+===========================
+A few new features:
+
+ * Enable render acceleration for Haswell GT1/GT2.
+
+ * Enable multi-threaded rasterisation of trapezoids and fallback composition
+
+ * Utilise a new kernel interface (v3.9) for processing relocations
+
+along with a few older features from the 2.20.x series:
+
+ * PRIME support for hotplug GPUs and hybrid systems
+
+ * Support for IvyBridge GT1 machines, aka HD2500 graphics.
+
+ * Stable 830gm/845g support, at last!
+
+As usual we have a large number of bug fixes since the last release:
+
+ * Prevent a stray relocation being left after a buffer is removed from
+ a batch, leading to GPU hangs.
+
+ * Make the driver more robust against its own failures to submit batches
+ by falling back to software rendering.
+
+ * Fix emission of scanline waits for secondary pipes on gen6/7. Otherwise
+ you may encounter GPU hangs in MI_WAIT_FOR_EVENT.
+
+ * Fix a missing corner pixel when drawing rectangles with PolyLines
+ https://bugs.freedesktop.org/show_bug.cgi?id=55484
+
+ * Don't try to use Y-tiling colour buffers with mesa/i915c as mesa
+ doesn't support them and wil fallback to software rendering
+
+ * Ensure that any cached mmaps are invalidated for a SwapBuffers
+ https://bugs.freedesktop.org/show_bug.cgi?id=60042
+
+ * Correctly handle the composition of rotated displays too large for the
+ 3D pipeline
+ https://bugs.freedesktop.org/show_bug.cgi?id=60124
+
+ * Fix the computation of the planar video frame size
+ https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1104180
+
+
+Release 2.20.19 (2013-01-20)
+============================
+A quick release as the last broke USB DisplayLink slave outputs badly. The
+performance of those displays was unusable due to an inadvertent change that
+caused us to flush the entire scanout over the USB for every drawing
+operation.
+
+ * Implement the GNOME Build API. A couple of minor changes to make
+ integrators and distributors lives a little easier, or at least more
+ consistent.
+
+ * Correctly offset inplace trapezoids for subwindows, such as the GTK+
+ close button after it has a background image uploaded.
+
+ * Explicitly prevent ring-switching for synchronized rendering to
+ scanouts (for vsync).
+
+ * Clip dirty region to slave pixmaps (otherwise UDL is nigh unusuable)
+ https://bugs.freedesktop.org/show_bug.cgi?id=59539
+
+
+Release 2.20.18 (2013-01-16)
+============================
+A bunch of miscellaneous fixes for assertion failures and various
+performance regressions when mixing new methods for offloads, along with
+a couple of improvements for rendering with gen4.
+
+ * Remove use of packed unnormalized texture coordinates on gen4/5 as
+ these GPUs do not support unnormalized coordinates in the sampler.
+
+ * Remove dependency upon x86 asm for cross-building to unsupported
+ architectures.
+ https://bugs.gentoo.org/show_bug.cgi?id=448570
+
+ * Apply damage around PRIME updates in the correct order.
+
+ * Correctly read the initial backlight level for when the user
+ overrides UXA's choice of backlight controller.
+
+ * Throttle UXA and prevent it queuing work much faster than the GPU can
+ complete it. This realised itself in impossible performance figures and
+ the entire display freezing for several seconds whlist the GPU caught
+ up. One side effect is that it also caused the DDX to consume more
+ memory than was required as it could not recycle buffers quick
+ enough, and in some cases this produces a marked improvement in
+ performance. Also note on gen2/3 this requires a new libdrm [2.4.41]
+ in order to prevent a bug causing the DDX to fallback to swrast.
+
+Release 2.20.17 (2012-12-26)
+============================
+A minor update to prepare for co-operating with the kernel over managing
+stability on 830gm/845g. On this pair of chipsets, the kernel will perform
+an extra copy of the batchbuffer into reserved memory, which prevents them
+from randomly dying. However, that extra copy does have a noticeable
+impact upon throughput, so we also have a mechanism for userspace to
+opt-out of the kernel workaround and take responsibility for ensuring its
+batches are coherent.
+
+ * Build fixes against xorg-1.14
+ https://bugs.freedesktop.org/show_bug.cgi?id=58552
+ https://bugs.freedesktop.org/show_bug.cgi?id=58406
+
+ * Fixed the origin of cropped (textured) video windows (Xv and XvMC)
+ https://bugs.freedesktop.org/show_bug.cgi?id=23033
+
+ * Fix potential corruption when using images larger than ~1GiB
+
+
+Release 2.20.16 (2012-12-15)
+============================
+Rejoice! We have found a trick to make 830gm/845g stable at long last.
+Ever since the switch to GEM and dynamic video memory, those early
+second generation chipsets have been plagued by instability. The lack of
+flushing cachelines from the CPU to GMCH was eventually solved by using
+an undocmented bit, but 830/845 were still hanging under memory pressure.
+These deaths were all due to garbage finding its way into the command
+streamer, and they go away if we take a leaf out of the original driver
+and never reuse those pages for anything else. So for the first time
+ever, I have been able to complete running the test suite on an 845g,
+even whilst thrashing the page and buffer caches!
+
+ * Run the SF stage as single-threaded on gen4 to workaround a few issues
+ https://bugs.freedesktop.org/show_bug.cgi?id=57410
+
+ * Keep the scanout SURFACE_STATE separate to avoid overriding its
+ memory access control on gen6/7 (i.e. writes to the scanout need to
+ be kept out of the render cache)
+
+ * Tune batch flushing after an operation to an exported surface under a
+ compositor.
+
+ * Make sure the source is on the CPU for inplace composition of trapezoids
+ using the CPU
+ https://bugs.freedesktop.org/show_bug.cgi?id=56825
+
+ * Immediately flush in the block hander after a split batch to reduce
+ latency between the two halves of an operation.
+ https://bugs.freedesktop.org/show_bug.cgi?id=51718
+
+ * Install a fallback config if we fail to install the desired config
+ at VT switch (i.e. booting, after resume with 3 incompatible pipes on
+ Ivybridge)
+
+ * Pin batches to avoid CS incoherence on 830/845
+ https://bugs.freedesktop.org/show_bug.cgi?id=26345
+
+
+Release 2.20.15 (2012-12-03)
+============================
+And lo, enabling more of the common acceleration paths for gen4 revealed
+another lurking bug - something is wrong with how we prepare Y-tiling
+surfaces for rendering. For the time being, we can surreptiously disable
+them for gen4 and avoid hitting GPU hangs.
+
+ * Avoid clobbering the render state after failing to convert the
+ operation to use the blitter.
+ https://bugs.freedesktop.org/show_bug.cgi?id=57601
+
+ * Disable shadow tracking upon server regeneration, and so fix a crash
+ if you restart the server whilst a RandR transform (e.g. rotation) is
+ in effect.
+ https://bugs.freedesktop.org/show_bug.cgi?id=52255
+ https://bugs.freedesktop.org/show_bug.cgi?id=56608
+
+
+Release 2.20.14 (2012-11-26)
+============================
+The highlight of this release is gen4, from 965g to gm45. Quite an old
+bug surfaced in the shader assembly, sparking a chance to review a few
+design choices within that backend and experiment on fresh ways to
+workaround the remaining issues.
+
+ * Avoid using inplace XOR'ed uploads for very large buffers
+ https://bugs.freedesktop.org/show_bug.cgi?id=57031
+
+ * Fix the gen4/5 opacity shader
+ https://bugs.freedesktop.org/show_bug.cgi?id=57054
+
+ * Queue a pending vblank request after flip completion
+ https://bugs.freedesktop.org/show_bug.cgi?id=56423
+
+ * Avoid migrating an uninitialised pixmap for use as a render source
+ https://bugs.freedesktop.org/show_bug.cgi?id=47597
+
+ * Improve handing of texture fallbacks for 830/845.
+ https://bugs.freedesktop.org/show_bug.cgi?id=57392
+
+
+Release 2.20.13 (2012-11-11)
+============================
+Nothing but bug fixes. Many thanks to everyone who took the time to
+report their issues, and for their help in improving the driver.
+
+ * Sanity check the platform probe points to our expected i915 device
+ https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1069031
+
+ * Prevent 16-bit overflow for computing the sample area to upload of
+ sources for render operations
+ https://bugs.freedesktop.org/show_bug.cgi?id=56324
+
+ * Clamp the drawable box for migration to prevent 16-bit overflow
+ https://bugs.freedesktop.org/show_bug.cgi?id=56591
+
+ * Disable RandR hotplug events if Xinerama is enabled and thereby prevent
+ a crash upon hotplug
+ https://bugs.freedesktop.org/show_bug.cgi?id=55260
+
+ * Call ValidatePicture before attempting to flatten the alphamaps
+ https://bugs.freedesktop.org/show_bug.cgi?id=56367
+
+ * Clip the trapezoid correctly if it ends on the boundary pixel
+ https://bugs.freedesktop.org/show_bug.cgi?id=56395
+
+ * Make sure the pipeline choice is propagated to the scanline wait
+ across a batch flush
+ https://bugs.freedesktop.org/show_bug.cgi?id=47597
+
+ * Set the valid drawable box when choosing placement of BLT composite ops
+ https://bugs.freedesktop.org/show_bug.cgi?id=47597
+
+ * Prevent use-after-free when promoting a partial-GPU bo to a full-GPU bo
+ https://bugs.freedesktop.org/show_bug.cgi?id=56591
+
+ * gen4 opacity spans require the per-rectangle workaround
+ https://bugs.freedesktop.org/show_bug.cgi?id=55500
+
+ * Prevent use of invalid damage pointers when redirecting rendering
+ https://bugs.freedesktop.org/show_bug.cgi?id=56785
+
+
+Release 2.20.12 (2012-10-20)
+============================
+More bug reports, more bug fixes! Perhaps the headline feature is
+that with a secure batches, coming to a 3.8 kernel near you, we may
+finally have the ability to perform updates to the scanout synchronized
+to the refresh rate on later SandyBridge and IvyBridge chipsets. It comes
+at quite a power cost as we need to keep the GPU out of its power saving
+modes, but it should allow legacy vsync to function at last. But this
+should allow us to address a longstanding issue with tearing on
+SandyBridge+.
+
+ * Fix component-alpha rendering on IvyBridge, for example subpixel
+ antialiased glyphs.
+ https://bugs.freedesktop.org/show_bug.cgi?id=56037
+
+ * Flush before some "pipelined" state changes on gen4. The evidence is
+ that the same flushes as required on gen5+ are also required for gen4.
+ https://bugs.freedesktop.org/show_bug.cgi?id=55627
+
+ * Prevent a potential crash when forcing a stall on a busy CPU bo
+ https://bugs.freedesktop.org/show_bug.cgi?id=56180
+
+[Release 2.20.11 contained a typo causing UXA to fail immediately.]
+
Release 2.20.10 (2012-10-14)
============================
The last couple of weeks have been fairly retrospective, a dive into
diff --git a/aclocal.m4 b/aclocal.m4
index fa97284c0..77dfdcecf 100644
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -1338,7 +1338,14 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
LD="${LD-ld} -m elf_i386_fbsd"
;;
x86_64-*linux*)
- LD="${LD-ld} -m elf_i386"
+ case `/usr/bin/file conftest.o` in
+ *x86-64*)
+ LD="${LD-ld} -m elf32_x86_64"
+ ;;
+ *)
+ LD="${LD-ld} -m elf_i386"
+ ;;
+ esac
;;
ppc64-*linux*|powerpc64-*linux*)
LD="${LD-ld} -m elf32ppclinux"
@@ -1702,7 +1709,8 @@ AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl
;;
*)
lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null`
- if test -n "$lt_cv_sys_max_cmd_len"; then
+ if test -n "$lt_cv_sys_max_cmd_len" && \
+ test undefined != "$lt_cv_sys_max_cmd_len"; then
lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
else
@@ -11251,46 +11259,6 @@ fi
rmdir .tst 2>/dev/null
AC_SUBST([am__leading_dot])])
-# Add --enable-maintainer-mode option to configure. -*- Autoconf -*-
-# From Jim Meyering
-
-# Copyright (C) 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008,
-# 2011 Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 5
-
-# AM_MAINTAINER_MODE([DEFAULT-MODE])
-# ----------------------------------
-# Control maintainer-specific portions of Makefiles.
-# Default is to disable them, unless `enable' is passed literally.
-# For symmetry, `disable' may be passed as well. Anyway, the user
-# can override the default with the --enable/--disable switch.
-AC_DEFUN([AM_MAINTAINER_MODE],
-[m4_case(m4_default([$1], [disable]),
- [enable], [m4_define([am_maintainer_other], [disable])],
- [disable], [m4_define([am_maintainer_other], [enable])],
- [m4_define([am_maintainer_other], [enable])
- m4_warn([syntax], [unexpected argument to AM@&t@_MAINTAINER_MODE: $1])])
-AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
- dnl maintainer-mode's default is 'disable' unless 'enable' is passed
- AC_ARG_ENABLE([maintainer-mode],
-[ --][am_maintainer_other][-maintainer-mode am_maintainer_other make rules and dependencies not useful
- (and sometimes confusing) to the casual installer],
- [USE_MAINTAINER_MODE=$enableval],
- [USE_MAINTAINER_MODE=]m4_if(am_maintainer_other, [enable], [no], [yes]))
- AC_MSG_RESULT([$USE_MAINTAINER_MODE])
- AM_CONDITIONAL([MAINTAINER_MODE], [test $USE_MAINTAINER_MODE = yes])
- MAINT=$MAINTAINER_MODE_TRUE
- AC_SUBST([MAINT])dnl
-]
-)
-
-AU_DEFUN([jm_MAINTAINER_MODE], [AM_MAINTAINER_MODE])
-
# Check to see how 'make' treats includes. -*- Autoconf -*-
# Copyright (C) 2001, 2002, 2003, 2005, 2009 Free Software Foundation, Inc.
diff --git a/config.h.in b/config.h.in
index b02108a49..546254232 100644
--- a/config.h.in
+++ b/config.h.in
@@ -11,6 +11,9 @@
/* Enable pixmap debugging */
#undef DEBUG_PIXMAP
+/* Enable synchronous rendering for debugging */
+#undef DEBUG_SYNC
+
/* Default acceleration method */
#undef DEFAULT_ACCEL_METHOD
@@ -23,6 +26,12 @@
/* Enable pixman glyph cache */
#undef HAS_PIXMAN_GLYPHS
+/* Enable pixman triangle rasterisation */
+#undef HAS_PIXMAN_TRIANGLES
+
+/* Enable if your compiler supports the Intel __sync_* atomic primitives */
+#undef HAVE_ATOMIC_PRIMITIVES
+
/* Define to 1 if you have the <dgaproc.h> header file. */
#undef HAVE_DGAPROC_H
@@ -47,6 +56,9 @@
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
+/* Enable if you have libatomic-ops-dev installed */
+#undef HAVE_LIB_ATOMIC_OPS
+
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
diff --git a/configure b/configure
index cd430d053..96a698d92 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for xf86-video-intel 2.20.10.
+# Generated by GNU Autoconf 2.69 for xf86-video-intel 2.21.2.
#
# Report bugs to <https://bugs.freedesktop.org/enter_bug.cgi?product=xorg>.
#
@@ -591,8 +591,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='xf86-video-intel'
PACKAGE_TARNAME='xf86-video-intel'
-PACKAGE_VERSION='2.20.10'
-PACKAGE_STRING='xf86-video-intel 2.20.10'
+PACKAGE_VERSION='2.21.2'
+PACKAGE_STRING='xf86-video-intel 2.21.2'
PACKAGE_BUGREPORT='https://bugs.freedesktop.org/enter_bug.cgi?product=xorg'
PACKAGE_URL=''
@@ -651,6 +651,8 @@ KMS_ONLY_FALSE
KMS_ONLY_TRUE
XVMC_FALSE
XVMC_TRUE
+XCB_LIBS
+XCB_CFLAGS
XVMCLIB_LIBS
XVMCLIB_CFLAGS
DRI2_FALSE
@@ -681,10 +683,10 @@ LIBGLAMOR_LIBS
LIBGLAMOR_CFLAGS
GLAMOR_FALSE
GLAMOR_TRUE
-DRMINTEL_LIBS
-DRMINTEL_CFLAGS
UXA_FALSE
UXA_TRUE
+DRMINTEL_LIBS
+DRMINTEL_CFLAGS
SNA_FALSE
SNA_TRUE
HAVE_X11_FALSE
@@ -773,9 +775,6 @@ CPPFLAGS
LDFLAGS
CFLAGS
CC
-MAINT
-MAINTAINER_MODE_FALSE
-MAINTAINER_MODE_TRUE
am__untar
am__tar
AMTAR
@@ -840,7 +839,6 @@ SHELL'
ac_subst_files=''
ac_user_opts='
enable_option_checking
-enable_maintainer_mode
enable_dependency_tracking
enable_selective_werror
enable_strict_compilation
@@ -907,6 +905,8 @@ PCIACCESS_CFLAGS
PCIACCESS_LIBS
XVMCLIB_CFLAGS
XVMCLIB_LIBS
+XCB_CFLAGS
+XCB_LIBS
VALGRIND_CFLAGS
VALGRIND_LIBS'
@@ -1449,7 +1449,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures xf86-video-intel 2.20.10 to adapt to many kinds of systems.
+\`configure' configures xf86-video-intel 2.21.2 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1520,7 +1520,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of xf86-video-intel 2.20.10:";;
+ short | recursive ) echo "Configuration of xf86-video-intel 2.21.2:";;
esac
cat <<\_ACEOF
@@ -1528,8 +1528,6 @@ Optional Features:
--disable-option-checking ignore unrecognized --enable/--with options
--disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no)
--enable-FEATURE[=ARG] include FEATURE [ARG=yes]
- --enable-maintainer-mode enable make rules and dependencies not useful
- (and sometimes confusing) to the casual installer
--disable-dependency-tracking speeds up one-time build
--enable-dependency-tracking do not reject slow dependency extractors
--disable-selective-werror
@@ -1636,6 +1634,8 @@ Some influential environment variables:
C compiler flags for XVMCLIB, overriding pkg-config
XVMCLIB_LIBS
linker flags for XVMCLIB, overriding pkg-config
+ XCB_CFLAGS C compiler flags for XCB, overriding pkg-config
+ XCB_LIBS linker flags for XCB, overriding pkg-config
VALGRIND_CFLAGS
C compiler flags for VALGRIND, overriding pkg-config
VALGRIND_LIBS
@@ -1707,7 +1707,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-xf86-video-intel configure 2.20.10
+xf86-video-intel configure 2.21.2
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2122,7 +2122,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by xf86-video-intel $as_me 2.20.10, which was
+It was created by xf86-video-intel $as_me 2.21.2, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2942,7 +2942,7 @@ fi
# Define the identity of the package.
PACKAGE='xf86-video-intel'
- VERSION='2.20.10'
+ VERSION='2.21.2'
cat >>confdefs.h <<_ACEOF
@@ -2983,29 +2983,6 @@ am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable maintainer-specific portions of Makefiles" >&5
-$as_echo_n "checking whether to enable maintainer-specific portions of Makefiles... " >&6; }
- # Check whether --enable-maintainer-mode was given.
-if test "${enable_maintainer_mode+set}" = set; then :
- enableval=$enable_maintainer_mode; USE_MAINTAINER_MODE=$enableval
-else
- USE_MAINTAINER_MODE=no
-fi
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $USE_MAINTAINER_MODE" >&5
-$as_echo "$USE_MAINTAINER_MODE" >&6; }
- if test $USE_MAINTAINER_MODE = yes; then
- MAINTAINER_MODE_TRUE=
- MAINTAINER_MODE_FALSE='#'
-else
- MAINTAINER_MODE_TRUE='#'
- MAINTAINER_MODE_FALSE=
-fi
-
- MAINT=$MAINTAINER_MODE_TRUE
-
-
-
# Require X.Org macros 1.8 or later for MAN_SUBSTS set by XORG_MANPAGE_SECTIONS
@@ -11514,7 +11491,8 @@ else
;;
*)
lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null`
- if test -n "$lt_cv_sys_max_cmd_len"; then
+ if test -n "$lt_cv_sys_max_cmd_len" && \
+ test undefined != "$lt_cv_sys_max_cmd_len"; then
lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
else
@@ -13050,7 +13028,14 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
LD="${LD-ld} -m elf_i386_fbsd"
;;
x86_64-*linux*)
- LD="${LD-ld} -m elf_i386"
+ case `/usr/bin/file conftest.o` in
+ *x86-64*)
+ LD="${LD-ld} -m elf32_x86_64"
+ ;;
+ *)
+ LD="${LD-ld} -m elf_i386"
+ ;;
+ esac
;;
ppc64-*linux*|powerpc64-*linux*)
LD="${LD-ld} -m elf32ppclinux"
@@ -18261,6 +18246,72 @@ else
fi
+# Check for atomic intrinsics
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for native atomic primitives" >&5
+$as_echo_n "checking for native atomic primitives... " >&6; }
+if ${intel_cv_atomic_primitives+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+
+ intel_cv_atomic_primitives="none"
+
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int atomic_add(int i) { return __sync_fetch_and_add (&i, 1); }
+int atomic_cmpxchg(int i, int j, int k) { return __sync_val_compare_and_swap (&i, j, k); }
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ intel_cv_atomic_primitives="Intel"
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+
+ if test "x$intel_cv_atomic_primitives" = "xnone"; then
+ ac_fn_c_check_header_mongrel "$LINENO" "atomic_ops.h" "ac_cv_header_atomic_ops_h" "$ac_includes_default"
+if test "x$ac_cv_header_atomic_ops_h" = xyes; then :
+ intel_cv_atomic_primitives="libatomic-ops"
+fi
+
+
+ fi
+
+ # atomic functions defined in <atomic.h> & libc on Solaris
+ if test "x$intel_cv_atomic_primitives" = "xnone"; then
+ ac_fn_c_check_func "$LINENO" "atomic_cas_uint" "ac_cv_func_atomic_cas_uint"
+if test "x$ac_cv_func_atomic_cas_uint" = xyes; then :
+ intel_cv_atomic_primitives="Solaris"
+fi
+
+ fi
+
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $intel_cv_atomic_primitives" >&5
+$as_echo "$intel_cv_atomic_primitives" >&6; }
+if test "x$intel_cv_atomic_primitives" = xIntel; then
+
+$as_echo "#define HAVE_ATOMIC_PRIMITIVES 1" >>confdefs.h
+
+fi
+if test "x$intel_cv_atomic_primitives" = "xlibatomic-ops"; then
+
+$as_echo "#define HAVE_LIB_ATOMIC_OPS 1" >>confdefs.h
+
+fi
+
+if test "x$intel_cv_atomic_primitives" = "xnone"; then
+ as_fn_error $? "xf86-video-intel depends upon atomic operations, which were not found for your compiler/cpu. Try compiling with -march=native, or install the libatomics-op-dev package." "$LINENO" 5
+fi
+
# Check whether --enable-udev was given.
if test "${enable_udev+set}" = set; then :
enableval=$enable_udev; UDEV="$enableval"
@@ -18341,7 +18392,7 @@ else
$as_echo "yes" >&6; }
udev=yes
fi
- if test x$UDEV == xyes -a x$udev != xyes; then
+ if test x$UDEV = xyes -a x$udev != xyes; then
as_fn_error $? "udev support requested but not found (libudev)" "$LINENO" 5
fi
if test x$udev = xyes; then
@@ -18473,7 +18524,7 @@ fi
required_xorg_xserver_version=1.6
-required_pixman_version=0.24
+required_pixman_version=0.16
if pkg-config --exists 'pixman-1 >= 0.27.1'; then
@@ -18481,6 +18532,12 @@ $as_echo "#define HAS_PIXMAN_GLYPHS 1" >>confdefs.h
fi
+if pkg-config --exists 'pixman-1 >= 0.24.0'; then
+
+$as_echo "#define HAS_PIXMAN_TRIANGLES 1" >>confdefs.h
+
+fi
+
# Check whether --enable-sna was given.
if test "${enable_sna+set}" = set; then :
enableval=$enable_sna; SNA="$enableval"
@@ -18507,7 +18564,6 @@ if test "x$SNA" = "xauto" && pkg-config --exists "xorg-server >= 1.10"; then
SNA=yes
fi
if test "x$SNA" != "xno"; then
- required_xorg_xserver_version=1.10
$as_echo "#define USE_SNA 1" >>confdefs.h
@@ -18529,21 +18585,19 @@ $as_echo "$SNA" >&6; }
if test "${enable_uxa+set}" = set; then :
enableval=$enable_uxa; UXA="$enableval"
else
- UXA=yes
+ UXA=auto
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to include UXA support" >&5
$as_echo_n "checking whether to include UXA support... " >&6; }
- if test x$UXA != xno; then
- UXA_TRUE=
- UXA_FALSE='#'
-else
- UXA_TRUE='#'
- UXA_FALSE=
+if test "x$UXA" = "xauto"; then
+ if ! pkg-config --exists 'libdrm_intel >= 2.4.29'; then
+ UXA=no
+ fi
+ if ! pkg-config --exists 'pixman-1 >= 0.24.0'; then
+ UXA=no
+ fi
fi
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $UXA" >&5
-$as_echo "$UXA" >&6; }
if test "x$UXA" != "xno"; then
$as_echo "#define USE_UXA 1" >>confdefs.h
@@ -18639,8 +18693,20 @@ else
$as_echo "yes" >&6; }
fi
+ required_pixman_version=0.24
+ UXA=yes
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $UXA" >&5
+$as_echo "$UXA" >&6; }
+ if test x$UXA != xno; then
+ UXA_TRUE=
+ UXA_FALSE='#'
+else
+ UXA_TRUE='#'
+ UXA_FALSE=
fi
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to include GLAMOR support" >&5
$as_echo_n "checking whether to include GLAMOR support... " >&6; }
# Check whether --enable-glamor was given.
@@ -19278,12 +19344,12 @@ if test -n "$DRM_CFLAGS"; then
pkg_cv_DRM_CFLAGS="$DRM_CFLAGS"
elif test -n "$PKG_CONFIG"; then
if test -n "$PKG_CONFIG" && \
- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libdrm >= 2.4.24\""; } >&5
- ($PKG_CONFIG --exists --print-errors "libdrm >= 2.4.24") 2>&5
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libdrm >= 2.4.20\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "libdrm >= 2.4.20") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; then
- pkg_cv_DRM_CFLAGS=`$PKG_CONFIG --cflags "libdrm >= 2.4.24" 2>/dev/null`
+ pkg_cv_DRM_CFLAGS=`$PKG_CONFIG --cflags "libdrm >= 2.4.20" 2>/dev/null`
test "x$?" != "x0" && pkg_failed=yes
else
pkg_failed=yes
@@ -19295,12 +19361,12 @@ if test -n "$DRM_LIBS"; then
pkg_cv_DRM_LIBS="$DRM_LIBS"
elif test -n "$PKG_CONFIG"; then
if test -n "$PKG_CONFIG" && \
- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libdrm >= 2.4.24\""; } >&5
- ($PKG_CONFIG --exists --print-errors "libdrm >= 2.4.24") 2>&5
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libdrm >= 2.4.20\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "libdrm >= 2.4.20") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; then
- pkg_cv_DRM_LIBS=`$PKG_CONFIG --libs "libdrm >= 2.4.24" 2>/dev/null`
+ pkg_cv_DRM_LIBS=`$PKG_CONFIG --libs "libdrm >= 2.4.20" 2>/dev/null`
test "x$?" != "x0" && pkg_failed=yes
else
pkg_failed=yes
@@ -19321,14 +19387,14 @@ else
_pkg_short_errors_supported=no
fi
if test $_pkg_short_errors_supported = yes; then
- DRM_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libdrm >= 2.4.24" 2>&1`
+ DRM_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libdrm >= 2.4.20" 2>&1`
else
- DRM_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libdrm >= 2.4.24" 2>&1`
+ DRM_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libdrm >= 2.4.20" 2>&1`
fi
# Put the nasty error message in config.log where it belongs
echo "$DRM_PKG_ERRORS" >&5
- as_fn_error $? "Package requirements (libdrm >= 2.4.24) were not met:
+ as_fn_error $? "Package requirements (libdrm >= 2.4.20) were not met:
$DRM_PKG_ERRORS
@@ -19702,12 +19768,12 @@ if test -n "$XVMCLIB_CFLAGS"; then
pkg_cv_XVMCLIB_CFLAGS="$XVMCLIB_CFLAGS"
elif test -n "$PKG_CONFIG"; then
if test -n "$PKG_CONFIG" && \
- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"xvmc xext xfixes dri2proto x11-xcb xcb-dri2 xcb-aux\""; } >&5
- ($PKG_CONFIG --exists --print-errors "xvmc xext xfixes dri2proto x11-xcb xcb-dri2 xcb-aux") 2>&5
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"xvmc dri2proto\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "xvmc dri2proto") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; then
- pkg_cv_XVMCLIB_CFLAGS=`$PKG_CONFIG --cflags "xvmc xext xfixes dri2proto x11-xcb xcb-dri2 xcb-aux" 2>/dev/null`
+ pkg_cv_XVMCLIB_CFLAGS=`$PKG_CONFIG --cflags "xvmc dri2proto" 2>/dev/null`
test "x$?" != "x0" && pkg_failed=yes
else
pkg_failed=yes
@@ -19719,12 +19785,12 @@ if test -n "$XVMCLIB_LIBS"; then
pkg_cv_XVMCLIB_LIBS="$XVMCLIB_LIBS"
elif test -n "$PKG_CONFIG"; then
if test -n "$PKG_CONFIG" && \
- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"xvmc xext xfixes dri2proto x11-xcb xcb-dri2 xcb-aux\""; } >&5
- ($PKG_CONFIG --exists --print-errors "xvmc xext xfixes dri2proto x11-xcb xcb-dri2 xcb-aux") 2>&5
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"xvmc dri2proto\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "xvmc dri2proto") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; then
- pkg_cv_XVMCLIB_LIBS=`$PKG_CONFIG --libs "xvmc xext xfixes dri2proto x11-xcb xcb-dri2 xcb-aux" 2>/dev/null`
+ pkg_cv_XVMCLIB_LIBS=`$PKG_CONFIG --libs "xvmc dri2proto" 2>/dev/null`
test "x$?" != "x0" && pkg_failed=yes
else
pkg_failed=yes
@@ -19745,9 +19811,9 @@ else
_pkg_short_errors_supported=no
fi
if test $_pkg_short_errors_supported = yes; then
- XVMCLIB_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "xvmc xext xfixes dri2proto x11-xcb xcb-dri2 xcb-aux" 2>&1`
+ XVMCLIB_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "xvmc dri2proto" 2>&1`
else
- XVMCLIB_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "xvmc xext xfixes dri2proto x11-xcb xcb-dri2 xcb-aux" 2>&1`
+ XVMCLIB_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "xvmc dri2proto" 2>&1`
fi
# Put the nasty error message in config.log where it belongs
echo "$XVMCLIB_PKG_ERRORS" >&5
@@ -19762,7 +19828,78 @@ else
XVMCLIB_LIBS=$pkg_cv_XVMCLIB_LIBS
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
- XVMC=yes
+
+fi
+
+pkg_failed=no
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for XCB" >&5
+$as_echo_n "checking for XCB... " >&6; }
+
+if test -n "$XCB_CFLAGS"; then
+ pkg_cv_XCB_CFLAGS="$XCB_CFLAGS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"x11-xcb xcb-dri2 xcb-aux\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "x11-xcb xcb-dri2 xcb-aux") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_XCB_CFLAGS=`$PKG_CONFIG --cflags "x11-xcb xcb-dri2 xcb-aux" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+if test -n "$XCB_LIBS"; then
+ pkg_cv_XCB_LIBS="$XCB_LIBS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"x11-xcb xcb-dri2 xcb-aux\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "x11-xcb xcb-dri2 xcb-aux") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_XCB_LIBS=`$PKG_CONFIG --libs "x11-xcb xcb-dri2 xcb-aux" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+
+
+
+if test $pkg_failed = yes; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+ _pkg_short_errors_supported=yes
+else
+ _pkg_short_errors_supported=no
+fi
+ if test $_pkg_short_errors_supported = yes; then
+ XCB_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "x11-xcb xcb-dri2 xcb-aux" 2>&1`
+ else
+ XCB_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "x11-xcb xcb-dri2 xcb-aux" 2>&1`
+ fi
+ # Put the nasty error message in config.log where it belongs
+ echo "$XCB_PKG_ERRORS" >&5
+
+ XVMC=no
+elif test $pkg_failed = untried; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ XVMC=no
+else
+ XCB_CFLAGS=$pkg_cv_XCB_CFLAGS
+ XCB_LIBS=$pkg_cv_XCB_LIBS
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
fi
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to include XvMC support" >&5
@@ -19818,7 +19955,7 @@ else
DEBUG_FALSE=
fi
- if test x$FULL_DEBUG == xfull; then
+ if test x$DEBUG = xfull; then
FULL_DEBUG_TRUE=
FULL_DEBUG_FALSE='#'
else
@@ -19909,6 +20046,11 @@ $as_echo "#define HAVE_VALGRIND 1" >>confdefs.h
fi
fi
+if test "x$DEBUG" = xsync; then
+
+$as_echo "#define DEBUG_SYNC 1" >>confdefs.h
+
+fi
if test "x$DEBUG" = xmemory; then
$as_echo "#define DEBUG_MEMORY 1" >>confdefs.h
@@ -20055,10 +20197,6 @@ else
am__EXEEXT_FALSE=
fi
-if test -z "${MAINTAINER_MODE_TRUE}" && test -z "${MAINTAINER_MODE_FALSE}"; then
- as_fn_error $? "conditional \"MAINTAINER_MODE\" was never defined.
-Usually this means the macro was only invoked conditionally." "$LINENO" 5
-fi
if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then
as_fn_error $? "conditional \"AMDEP\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
@@ -20532,7 +20670,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by xf86-video-intel $as_me 2.20.10, which was
+This file was extended by xf86-video-intel $as_me 2.21.2, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -20598,7 +20736,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-xf86-video-intel config.status 2.20.10
+xf86-video-intel config.status 2.21.2
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
diff --git a/configure.ac b/configure.ac
index 972d9188e..3a4b6dbcb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,7 +23,7 @@
# Initialize Autoconf
AC_PREREQ([2.60])
AC_INIT([xf86-video-intel],
- [2.20.10],
+ [2.21.2],
[https://bugs.freedesktop.org/enter_bug.cgi?product=xorg],
[xf86-video-intel])
AC_CONFIG_SRCDIR([Makefile.am])
@@ -32,7 +32,6 @@ AC_CONFIG_AUX_DIR(.)
# Initialize Automake
AM_INIT_AUTOMAKE([foreign dist-bzip2])
-AM_MAINTAINER_MODE
# Require X.Org macros 1.8 or later for MAN_SUBSTS set by XORG_MANPAGE_SECTIONS
m4_ifndef([XORG_MACROS_VERSION],
@@ -105,6 +104,40 @@ if test x$ASM != "xno"; then
fi
AM_CONDITIONAL(HAVE_GEN4ASM, test x$gen4asm = xyes)
+# Check for atomic intrinsics
+AC_CACHE_CHECK([for native atomic primitives], intel_cv_atomic_primitives,
+[
+ intel_cv_atomic_primitives="none"
+
+ AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+int atomic_add(int i) { return __sync_fetch_and_add (&i, 1); }
+int atomic_cmpxchg(int i, int j, int k) { return __sync_val_compare_and_swap (&i, j, k); }
+ ]],[[]])],
+ [intel_cv_atomic_primitives="Intel"],[])
+
+ if test "x$intel_cv_atomic_primitives" = "xnone"; then
+ AC_CHECK_HEADER([atomic_ops.h], intel_cv_atomic_primitives="libatomic-ops")
+ fi
+
+ # atomic functions defined in <atomic.h> & libc on Solaris
+ if test "x$intel_cv_atomic_primitives" = "xnone"; then
+ AC_CHECK_FUNC([atomic_cas_uint],
+ intel_cv_atomic_primitives="Solaris")
+ fi
+
+])
+if test "x$intel_cv_atomic_primitives" = xIntel; then
+ AC_DEFINE(HAVE_ATOMIC_PRIMITIVES, 1,
+ [Enable if your compiler supports the Intel __sync_* atomic primitives])
+fi
+if test "x$intel_cv_atomic_primitives" = "xlibatomic-ops"; then
+ AC_DEFINE(HAVE_LIB_ATOMIC_OPS, 1, [Enable if you have libatomic-ops-dev installed])
+fi
+
+if test "x$intel_cv_atomic_primitives" = "xnone"; then
+ AC_MSG_ERROR([xf86-video-intel depends upon atomic operations, which were not found for your compiler/cpu. Try compiling with -march=native, or install the libatomics-op-dev package.])
+fi
+
AC_ARG_ENABLE(udev,
AS_HELP_STRING([--disable-udev],
[Disable udev-based monitor hotplug detection [default=auto]]),
@@ -113,7 +146,7 @@ AC_ARG_ENABLE(udev,
if test x$UDEV != "xno"; then
PKG_CHECK_MODULES(UDEV, [libudev], [udev=yes], [udev=no])
- if test x$UDEV == xyes -a x$udev != xyes; then
+ if test x$UDEV = xyes -a x$udev != xyes; then
AC_MSG_ERROR([udev support requested but not found (libudev)])
fi
if test x$udev = xyes; then
@@ -151,12 +184,16 @@ AC_ARG_ENABLE(ums-only, AS_HELP_STRING([--enable-ums-only],
[UMS_ONLY=no])
required_xorg_xserver_version=1.6
-required_pixman_version=0.24
+required_pixman_version=0.16
if pkg-config --exists 'pixman-1 >= 0.27.1'; then
AC_DEFINE([HAS_PIXMAN_GLYPHS], 1, [Enable pixman glyph cache])
fi
+if pkg-config --exists 'pixman-1 >= 0.24.0'; then
+ AC_DEFINE([HAS_PIXMAN_TRIANGLES], 1, [Enable pixman triangle rasterisation])
+fi
+
AC_ARG_ENABLE(sna,
AS_HELP_STRING([--enable-sna],
[Enable SandyBridge's New Acceleration (SNA) [default=auto]]),
@@ -168,7 +205,6 @@ if test "x$SNA" = "xauto" && pkg-config --exists "xorg-server >= 1.10"; then
SNA=yes
fi
if test "x$SNA" != "xno"; then
- required_xorg_xserver_version=1.10
AC_DEFINE(USE_SNA, 1, [Enable SNA support])
fi
AC_MSG_CHECKING([whether to include SNA support])
@@ -179,14 +215,24 @@ AC_ARG_ENABLE(uxa,
AS_HELP_STRING([--enable-uxa],
[Enable Unified Acceleration Architecture (UXA) [default=yes]]),
[UXA="$enableval"],
- [UXA=yes])
+ [UXA=auto])
AC_MSG_CHECKING([whether to include UXA support])
-AM_CONDITIONAL(UXA, test x$UXA != xno)
-AC_MSG_RESULT([$UXA])
+if test "x$UXA" = "xauto"; then
+ if ! pkg-config --exists 'libdrm_intel >= 2.4.29'; then
+ UXA=no
+ fi
+ if ! pkg-config --exists 'pixman-1 >= 0.24.0'; then
+ UXA=no
+ fi
+fi
if test "x$UXA" != "xno"; then
AC_DEFINE(USE_UXA, 1, [Enable UXA support])
PKG_CHECK_MODULES(DRMINTEL, [libdrm_intel >= 2.4.29])
+ required_pixman_version=0.24
+ UXA=yes
fi
+AC_MSG_RESULT([$UXA])
+AM_CONDITIONAL(UXA, test x$UXA != xno)
AC_MSG_CHECKING([whether to include GLAMOR support])
AC_ARG_ENABLE(glamor,
@@ -314,7 +360,7 @@ XORG_DRIVER_CHECK_EXT(XF86DRI, xextproto x11)
XORG_DRIVER_CHECK_EXT(DPMSExtension, xextproto)
# Obtain compiler/linker options for the driver dependencies
-PKG_CHECK_MODULES(DRM, [libdrm >= 2.4.24]) # libdrm_intel is checked separately
+PKG_CHECK_MODULES(DRM, [libdrm >= 2.4.20]) # libdrm_intel is checked separately
PKG_CHECK_MODULES(DRI, [xf86driproto], , DRI=no)
PKG_CHECK_MODULES(DRI2, [dri2proto >= 2.6],, DRI2=no)
PKG_CHECK_MODULES(PCIACCESS, [pciaccess >= 0.10])
@@ -370,9 +416,8 @@ AM_CONDITIONAL(DRI2, test "x$DRI2" = xyes)
AC_MSG_RESULT([$DRI2])
if test "$XVMC" = yes; then
- PKG_CHECK_MODULES(XVMCLIB,
- [xvmc xext xfixes dri2proto x11-xcb xcb-dri2 xcb-aux],
- [XVMC=yes], [XVMC=no])
+ PKG_CHECK_MODULES(XVMCLIB, [xvmc dri2proto], [], [XVMC=no])
+ PKG_CHECK_MODULES(XCB, [x11-xcb xcb-dri2 xcb-aux], [], [XVMC=no])
fi
AC_MSG_CHECKING([whether to include XvMC support])
AC_MSG_RESULT([$XVMC])
@@ -391,7 +436,7 @@ if test "x$UMS_ONLY" = xyes; then
fi
AM_CONDITIONAL(DEBUG, test x$DEBUG != xno)
-AM_CONDITIONAL(FULL_DEBUG, test x$FULL_DEBUG == xfull)
+AM_CONDITIONAL(FULL_DEBUG, test x$DEBUG = xfull)
if test "x$DEBUG" = xno; then
AC_DEFINE(NDEBUG,1,[Disable internal debugging])
fi
@@ -401,6 +446,9 @@ if test "x$DEBUG" != xno; then
AC_DEFINE([HAVE_VALGRIND], 1, [Use valgrind intrinsics to suppress false warnings])
fi
fi
+if test "x$DEBUG" = xsync; then
+ AC_DEFINE(DEBUG_SYNC,1,[Enable synchronous rendering for debugging])
+fi
if test "x$DEBUG" = xmemory; then
AC_DEFINE(DEBUG_MEMORY,1,[Enable memory debugging])
fi
diff --git a/man/Makefile.in b/man/Makefile.in
index 29efd9588..278ae4405 100644
--- a/man/Makefile.in
+++ b/man/Makefile.in
@@ -196,7 +196,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MAN_SUBSTS = @MAN_SUBSTS@
@@ -235,6 +234,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@
VERSION = @VERSION@
X11_CFLAGS = @X11_CFLAGS@
X11_LIBS = @X11_LIBS@
+XCB_CFLAGS = @XCB_CFLAGS@
+XCB_LIBS = @XCB_LIBS@
XORG_CFLAGS = @XORG_CFLAGS@
XORG_LIBS = @XORG_LIBS@
XORG_MAN_PAGE = @XORG_MAN_PAGE@
@@ -304,7 +305,7 @@ all: all-am
.SUFFIXES:
.SUFFIXES: .$(DRIVER_MAN_SUFFIX) .man
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -329,9 +330,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
diff --git a/man/intel.man b/man/intel.man
index 0942dc1c0..fbd0230aa 100644
--- a/man/intel.man
+++ b/man/intel.man
@@ -116,6 +116,24 @@ The following driver
.B Options
are supported for the 830M and later chipsets:
.TP
+.BI "Option \*qNoAccel\*q \*q" boolean \*q
+Disable or enable acceleration.
+.IP
+Default: acceleration is enabled.
+.TP
+.BI "Option \*qAccelMethod\*q \*q" string \*q
+Select acceleration method.
+There are a couple of backends available for accelerating the DDX. \*qUXA\*q (Unified
+Acceleration Architecture) is the mature backend that was introduced to support
+the GEM driver model. It is in the process of being superseded by \*qSNA\*q
+(Sandybridge's New Acceleration). Until that process is complete, the ability to
+choose which backend to use remains for backwards compatibility.
+In addition, there are a pair of sub-options to limit the acceleration for
+debugging use. Specify \*qoff\*q to disable all acceleration, or \*qblt\*q to
+disable render acceleration and only use the BLT engine.
+.IP
+Default: use UXA (render acceleration)
+.TP
.BI "Option \*qVideoKey\*q \*q" integer \*q
This is the same as the
.B \*qColorKey\*q
diff --git a/src/Makefile.in b/src/Makefile.in
index 3c5a911f7..e28de984e 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -343,7 +343,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MAN_SUBSTS = @MAN_SUBSTS@
@@ -382,6 +381,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@
VERSION = @VERSION@
X11_CFLAGS = @X11_CFLAGS@
X11_LIBS = @X11_LIBS@
+XCB_CFLAGS = @XCB_CFLAGS@
+XCB_LIBS = @XCB_LIBS@
XORG_CFLAGS = @XORG_CFLAGS@
XORG_LIBS = @XORG_LIBS@
XORG_MAN_PAGE = @XORG_MAN_PAGE@
@@ -463,7 +464,7 @@ all: all-recursive
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -488,9 +489,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
install-intel_drv_laLTLIBRARIES: $(intel_drv_la_LTLIBRARIES)
diff --git a/src/compat-api.h b/src/compat-api.h
index 6b7657241..6d147c74d 100644
--- a/src/compat-api.h
+++ b/src/compat-api.h
@@ -28,6 +28,10 @@
#ifndef COMPAT_API_H
#define COMPAT_API_H
+#include <xorg-server.h>
+#include <xorgVersion.h>
+
+#include <picturestr.h>
#ifndef GLYPH_HAS_GLYPH_PICTURE_ACCESSOR
#define GetGlyphPicture(g, s) GlyphPicture((g))[(s)->myNum]
#define SetGlyphPicture(g, s, p) GlyphPicture((g))[(s)->myNum] = p
@@ -103,4 +107,54 @@
#endif
+#ifndef INCLUDE_LEGACY_REGION_DEFINES
+#define RegionCreate(r, s) REGION_CREATE(NULL, r, s)
+#define RegionBreak(r) REGION_BREAK(NULL, r)
+#define RegionSizeof REGION_SZOF
+#define RegionBoxptr REGION_BOXPTR
+#define RegionEnd REGION_END
+#define RegionExtents(r) REGION_EXTENTS(NULL, r)
+#define RegionRects REGION_RECTS
+#define RegionNumRects REGION_NUM_RECTS
+#define RegionContainsRect(r, b) RECT_IN_REGION(NULL, r, b)
+#define RegionContainsPoint(r, x, y, b) POINT_IN_REGION(NULL, r, x, y, b)
+#define RegionCopy(res, r) REGION_COPY(NULL, res, r)
+#define RegionIntersect(res, r1, r2) REGION_INTERSECT(NULL, res, r1, r2)
+#define RegionUnion(res, r1, r2) REGION_UNION(NULL, res, r1, r2)
+#define RegionTranslate(r, x, y) REGION_TRANSLATE(NULL, r, x, y)
+#define RegionUninit(r) REGION_UNINIT(NULL, r)
+#define region_from_bitmap BITMAP_TO_REGION
+#define RegionNil REGION_NIL
+#define RegionNull(r) REGION_NULL(NULL, r)
+#define RegionNotEmpty(r) REGION_NOTEMPTY(NULL, r)
+#define RegionEmpty(r) REGION_EMPTY(NULL, r)
+#define RegionDestroy(r) REGION_DESTROY(NULL, r)
+#else
+#define region_from_bitmap BitmapToRegion
+#endif
+
+#ifndef _X_UNUSED
+#define _X_UNUSED
+#endif
+
+#if HAS_DEVPRIVATEKEYREC
+#define __get_private(p, key) dixGetPrivateAddr(&(p)->devPrivates, &(key))
+#else
+#define __get_private(p, key) dixLookupPrivate(&(p)->devPrivates, &(key))
+typedef int DevPrivateKeyRec;
+static inline void FreePixmap(PixmapPtr pixmap)
+{
+ dixFreePrivates(pixmap->devPrivates);
+ free(pixmap);
+}
+#endif
+
+#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,9,99,902,0)
+#define SourceValidate(d, x, y, w, h, mode) \
+ if ((d)->pScreen->SourceValidate) (d)->pScreen->SourceValidate(d, x, y, w, h, mode)
+#else
+#define SourceValidate(d, x, y, w, h, mode) \
+ if ((d)->pScreen->SourceValidate) (d)->pScreen->SourceValidate(d, x, y, w, h)
+#endif
+
#endif
diff --git a/src/i965_3d.c b/src/i965_3d.c
index a18db1251..fe2d9aa6b 100644
--- a/src/i965_3d.c
+++ b/src/i965_3d.c
@@ -35,7 +35,7 @@
void
gen6_upload_invariant_states(intel_screen_private *intel)
{
- Bool ivb = INTEL_INFO(intel)->gen >= 70;
+ Bool ivb = INTEL_INFO(intel)->gen >= 070;
OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2));
OUT_BATCH(BRW_PIPE_CONTROL_IS_FLUSH |
@@ -280,7 +280,7 @@ gen7_upload_bypass_states(intel_screen_private *intel)
void
gen6_upload_vs_state(intel_screen_private *intel)
{
- Bool ivb = INTEL_INFO(intel)->gen >= 70;
+ Bool ivb = INTEL_INFO(intel)->gen >= 070;
/* disable VS constant buffer */
OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | ((ivb ? 7 : 5) - 2));
OUT_BATCH(0);
diff --git a/src/i965_render.c b/src/i965_render.c
index 42b195992..39698b0dc 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -1054,7 +1054,7 @@ i965_create_sampler_state(intel_screen_private *intel,
sampler_state_extend_t mask_extend,
drm_intel_bo * border_color_bo)
{
- if (INTEL_INFO(intel)->gen < 70)
+ if (INTEL_INFO(intel)->gen < 070)
return gen4_create_sampler_state(intel, src_filter, src_extend,
mask_filter, mask_extend,
border_color_bo);
@@ -1417,7 +1417,7 @@ i965_set_picture_surface_state(intel_screen_private *intel,
PicturePtr picture, PixmapPtr pixmap,
Bool is_dst)
{
- if (INTEL_INFO(intel)->gen < 70)
+ if (INTEL_INFO(intel)->gen < 070)
return gen4_set_picture_surface_state(intel, picture, pixmap, is_dst);
return gen7_set_picture_surface_state(intel, picture, pixmap, is_dst);
}
@@ -1571,7 +1571,7 @@ static void i965_emit_composite_state(struct intel_screen_private *intel)
}
/* Match Mesa driver setup */
- if (INTEL_INFO(intel)->gen >= 45)
+ if (INTEL_INFO(intel)->gen >= 045)
OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
else
OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
@@ -1751,7 +1751,7 @@ static Bool i965_composite_check_aperture(intel_screen_private *intel)
render_state->gen6_depth_stencil_bo,
};
- if (INTEL_INFO(intel)->gen >= 60)
+ if (INTEL_INFO(intel)->gen >= 060)
return drm_intel_bufmgr_check_aperture_space(gen6_bo_table,
ARRAY_SIZE(gen6_bo_table)) == 0;
else
@@ -2181,7 +2181,7 @@ static void i965_select_vertex_buffer(struct intel_screen_private *intel)
if (intel->vertex_id & (1 << id))
return;
- if (INTEL_INFO(intel)->gen >= 70)
+ if (INTEL_INFO(intel)->gen >= 070)
modifyenable = GEN7_VB0_ADDRESS_MODIFYENABLE;
/* Set up the pointer to our (single) vertex buffer */
@@ -2190,7 +2190,7 @@ static void i965_select_vertex_buffer(struct intel_screen_private *intel)
/* XXX could use multiple vbo to reduce relocations if
* frequently switching between vertex sizes, like rgb10text.
*/
- if (INTEL_INFO(intel)->gen >= 60) {
+ if (INTEL_INFO(intel)->gen >= 060) {
OUT_BATCH((id << GEN6_VB0_BUFFER_INDEX_SHIFT) |
GEN6_VB0_VERTEXDATA |
modifyenable |
@@ -2201,7 +2201,7 @@ static void i965_select_vertex_buffer(struct intel_screen_private *intel)
(4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
}
OUT_RELOC(intel->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
- if (INTEL_INFO(intel)->gen >= 50)
+ if (INTEL_INFO(intel)->gen >= 050)
OUT_RELOC(intel->vertex_bo,
I915_GEM_DOMAIN_VERTEX, 0,
sizeof(intel->vertex_ptr) - 1);
@@ -2252,7 +2252,7 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
if (intel->needs_render_state_emit) {
i965_bind_surfaces(intel);
- if (INTEL_INFO(intel)->gen >= 60)
+ if (INTEL_INFO(intel)->gen >= 060)
gen6_emit_composite_state(intel);
else
i965_emit_composite_state(intel);
@@ -2271,7 +2271,7 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
i965_select_vertex_buffer(intel);
if (intel->vertex_offset == 0) {
- if (INTEL_INFO(intel)->gen >= 70) {
+ if (INTEL_INFO(intel)->gen >= 070) {
OUT_BATCH(BRW_3DPRIMITIVE | (7 - 2));
OUT_BATCH(BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
_3DPRIM_RECTLIST);
@@ -2298,7 +2298,7 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
w, h);
intel->vertex_index += 3;
- if (INTEL_INFO(intel)->gen < 50) {
+ if (INTEL_INFO(intel)->gen < 050) {
/* XXX OMG! */
i965_vertex_flush(intel);
OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
@@ -2355,7 +2355,7 @@ void gen4_render_state_init(ScrnInfoPtr scrn)
assert(intel->gen4_render_state != NULL);
}
- if (INTEL_INFO(intel)->gen >= 60)
+ if (INTEL_INFO(intel)->gen >= 060)
return gen6_render_state_init(scrn);
render = intel->gen4_render_state;
@@ -2601,7 +2601,7 @@ gen6_composite_cc_state_pointers(intel_screen_private *intel,
cc_bo = render_state->cc_state_bo;
depth_stencil_bo = render_state->gen6_depth_stencil_bo;
}
- if (INTEL_INFO(intel)->gen >= 70) {
+ if (INTEL_INFO(intel)->gen >= 070) {
gen7_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset);
} else {
gen6_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset);
@@ -2619,7 +2619,7 @@ gen6_composite_sampler_state_pointers(intel_screen_private *intel,
intel->gen6_render_state.samplers = bo;
- if (INTEL_INFO(intel)->gen >= 70)
+ if (INTEL_INFO(intel)->gen >= 070)
gen7_upload_sampler_state_pointers(intel, bo);
else
gen6_upload_sampler_state_pointers(intel, bo);
@@ -2628,7 +2628,7 @@ gen6_composite_sampler_state_pointers(intel_screen_private *intel,
static void
gen6_composite_wm_constants(intel_screen_private *intel)
{
- Bool ivb = INTEL_INFO(intel)->gen >= 70;
+ Bool ivb = INTEL_INFO(intel)->gen >= 070;
/* disable WM constant buffer */
OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | ((ivb ? 7 : 5) - 2));
OUT_BATCH(0);
@@ -2652,7 +2652,7 @@ gen6_composite_sf_state(intel_screen_private *intel,
intel->gen6_render_state.num_sf_outputs = num_sf_outputs;
- if (INTEL_INFO(intel)->gen >= 70)
+ if (INTEL_INFO(intel)->gen >= 070)
gen7_upload_sf_state(intel, num_sf_outputs, 1);
else
gen6_upload_sf_state(intel, num_sf_outputs, 1);
@@ -2839,7 +2839,7 @@ gen6_emit_composite_state(struct intel_screen_private *intel)
sampler_state_extend_t mask_extend = composite_op->mask_extend;
Bool is_affine = composite_op->is_affine;
Bool has_mask = intel->render_mask != NULL;
- Bool ivb = INTEL_INFO(intel)->gen >= 70;
+ Bool ivb = INTEL_INFO(intel)->gen >= 070;
uint32_t src, dst;
drm_intel_bo *ps_sampler_state_bo = render->ps_sampler_state_bo[src_filter][src_extend][mask_filter][mask_extend];
diff --git a/src/i965_video.c b/src/i965_video.c
index 3276788fb..65f60612a 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -897,7 +897,7 @@ i965_emit_video_setup(ScrnInfoPtr scrn, drm_intel_bo * surface_state_binding_tab
/* brw_debug (scrn, "before base address modify"); */
/* Match Mesa driver setup */
- if (INTEL_INFO(intel)->gen >= 45)
+ if (INTEL_INFO(intel)->gen >= 045)
OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
else
OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
@@ -1428,7 +1428,7 @@ gen6_create_vidoe_objects(ScrnInfoPtr scrn)
const uint32_t *packed_ps_kernel, *planar_ps_kernel;
unsigned int packed_ps_size, planar_ps_size;
- if (INTEL_INFO(intel)->gen >= 70) {
+ if (INTEL_INFO(intel)->gen >= 070) {
create_sampler_state = gen7_create_sampler_state;
packed_ps_kernel = &ps_kernel_packed_static_gen7[0][0];
packed_ps_size = sizeof(ps_kernel_packed_static_gen7);
@@ -1787,7 +1787,7 @@ void Gen6DisplayVideoTextured(ScrnInfoPtr scrn,
PixmapPtr,
drm_intel_bo *, uint32_t);
- if (INTEL_INFO(intel)->gen >= 70) {
+ if (INTEL_INFO(intel)->gen >= 070) {
create_dst_surface_state = gen7_create_dst_surface_state;
create_src_surface_state = gen7_create_src_surface_state;
emit_video_setup = gen7_emit_video_setup;
diff --git a/src/intel.h b/src/intel.h
index a5603fee6..d4c9aff21 100644
--- a/src/intel.h
+++ b/src/intel.h
@@ -182,7 +182,7 @@ typedef struct intel_screen_private {
unsigned int batch_emit_start;
/** Number of bytes to be emitted in the current BEGIN_BATCH. */
uint32_t batch_emitting;
- dri_bo *batch_bo;
+ dri_bo *batch_bo, *last_batch_bo[2];
/** Whether we're in a section of code that can't tolerate flushing */
Bool in_batch_atomic;
/** Ending batch_used that was verified by intel_start_batch_atomic() */
@@ -366,6 +366,7 @@ extern Bool intel_mode_pre_init(ScrnInfoPtr pScrn, int fd, int cpp);
extern void intel_mode_init(struct intel_screen_private *intel);
extern void intel_mode_disable_unused_functions(ScrnInfoPtr scrn);
extern void intel_mode_remove_fb(intel_screen_private *intel);
+extern void intel_mode_close(intel_screen_private *intel);
extern void intel_mode_fini(intel_screen_private *intel);
extern int intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, xf86CrtcPtr crtc);
@@ -552,6 +553,9 @@ intel_get_transformed_coordinates_3d(int x, int y, PictTransformPtr transform,
float *x_out, float *y_out, float *z_out);
static inline void
+intel_debug_fallback(ScrnInfoPtr scrn, const char *format, ...) _X_ATTRIBUTE_PRINTF(2, 3);
+
+static inline void
intel_debug_fallback(ScrnInfoPtr scrn, const char *format, ...)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c
index 46f22bc36..a44a15632 100644
--- a/src/intel_batchbuffer.c
+++ b/src/intel_batchbuffer.c
@@ -67,17 +67,26 @@ void intel_next_vertex(intel_screen_private *intel)
dri_bo_alloc(intel->bufmgr, "vertex", sizeof (intel->vertex_ptr), 4096);
}
-static void intel_next_batch(ScrnInfoPtr scrn)
+static dri_bo *bo_alloc(ScrnInfoPtr scrn)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
-
+ int size = 4 * 4096;
/* The 865 has issues with larger-than-page-sized batch buffers. */
if (IS_I865G(intel))
- intel->batch_bo =
- dri_bo_alloc(intel->bufmgr, "batch", 4096, 4096);
- else
- intel->batch_bo =
- dri_bo_alloc(intel->bufmgr, "batch", 4096 * 4, 4096);
+ size = 4096;
+ return dri_bo_alloc(intel->bufmgr, "batch", size, 4096);
+}
+
+static void intel_next_batch(ScrnInfoPtr scrn, int mode)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+ dri_bo *tmp;
+
+ drm_intel_gem_bo_clear_relocs(intel->batch_bo, 0);
+
+ tmp = intel->last_batch_bo[mode];
+ intel->last_batch_bo[mode] = intel->batch_bo;
+ intel->batch_bo = tmp;
intel->batch_used = 0;
@@ -95,12 +104,25 @@ void intel_batch_init(ScrnInfoPtr scrn)
intel->batch_emitting = 0;
intel->vertex_id = 0;
- intel_next_batch(scrn);
+ intel->last_batch_bo[0] = bo_alloc(scrn);
+ intel->last_batch_bo[1] = bo_alloc(scrn);
+
+ intel->batch_bo = bo_alloc(scrn);
+ intel->batch_used = 0;
+ intel->last_3d = LAST_3D_OTHER;
}
void intel_batch_teardown(ScrnInfoPtr scrn)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(intel->last_batch_bo); i++) {
+ if (intel->last_batch_bo[i] != NULL) {
+ dri_bo_unreference(intel->last_batch_bo[i]);
+ intel->last_batch_bo[i] = NULL;
+ }
+ }
if (intel->batch_bo != NULL) {
dri_bo_unreference(intel->batch_bo);
@@ -162,7 +184,7 @@ void intel_batch_emit_flush(ScrnInfoPtr scrn)
assert (!intel->in_batch_atomic);
/* Big hammer, look to the pipelined flushes in future. */
- if ((INTEL_INFO(intel)->gen >= 60)) {
+ if ((INTEL_INFO(intel)->gen >= 060)) {
if (intel->current_batch == BLT_BATCH) {
BEGIN_BATCH_BLT(4);
OUT_BATCH(MI_FLUSH_DW | 2);
@@ -171,7 +193,7 @@ void intel_batch_emit_flush(ScrnInfoPtr scrn)
OUT_BATCH(0);
ADVANCE_BATCH();
} else {
- if ((INTEL_INFO(intel)->gen == 60)) {
+ if ((INTEL_INFO(intel)->gen == 060)) {
/* HW-Workaround for Sandybdrige */
intel_emit_post_sync_nonzero_flush(scrn);
} else {
@@ -187,7 +209,7 @@ void intel_batch_emit_flush(ScrnInfoPtr scrn)
}
} else {
flags = MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE;
- if (INTEL_INFO(intel)->gen >= 40)
+ if (INTEL_INFO(intel)->gen >= 040)
flags = 0;
BEGIN_BATCH(1);
@@ -239,22 +261,21 @@ void intel_batch_submit(ScrnInfoPtr scrn)
}
if (ret != 0) {
- if (ret == -EIO) {
- static int once;
-
- /* The GPU has hung and unlikely to recover by this point. */
- if (!once) {
+ static int once;
+ if (!once) {
+ if (ret == -EIO) {
+ /* The GPU has hung and unlikely to recover by this point. */
xf86DrvMsg(scrn->scrnIndex, X_ERROR, "Detected a hung GPU, disabling acceleration.\n");
xf86DrvMsg(scrn->scrnIndex, X_ERROR, "When reporting this, please include i915_error_state from debugfs and the full dmesg.\n");
- uxa_set_force_fallback(xf86ScrnToScreen(scrn), TRUE);
- intel->force_fallback = TRUE;
- once = 1;
+ } else {
+ /* The driver is broken. */
+ xf86DrvMsg(scrn->scrnIndex, X_ERROR,
+ "Failed to submit batch buffer, expect rendering corruption: %s.\n ",
+ strerror(-ret));
}
- } else {
- xf86DrvMsg(scrn->scrnIndex, X_ERROR,
- "Failed to submit batch buffer, expect rendering corruption "
- "or even a frozen display: %s.\n",
- strerror(-ret));
+ uxa_set_force_fallback(xf86ScrnToScreen(scrn), TRUE);
+ intel->force_fallback = TRUE;
+ once = 1;
}
}
@@ -273,8 +294,7 @@ void intel_batch_submit(ScrnInfoPtr scrn)
if (intel->debug_flush & DEBUG_FLUSH_WAIT)
drm_intel_bo_wait_rendering(intel->batch_bo);
- dri_bo_unreference(intel->batch_bo);
- intel_next_batch(scrn);
+ intel_next_batch(scrn, intel->current_batch == I915_EXEC_BLT);
if (intel->batch_commit_notify)
intel->batch_commit_notify(intel);
diff --git a/src/intel_display.c b/src/intel_display.c
index d58e6e0b6..5ee955ee6 100644
--- a/src/intel_display.c
+++ b/src/intel_display.c
@@ -31,6 +31,7 @@
#include <sys/types.h>
#include <sys/stat.h>
+#include <sys/poll.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
@@ -270,6 +271,7 @@ intel_output_backlight_init(xf86OutputPtr output)
intel_output->backlight_iface = str;
intel_output->backlight_max = intel_output_backlight_get_max(output);
if (intel_output->backlight_max > 0) {
+ intel_output->backlight_active_level = intel_output_backlight_get(output);
xf86DrvMsg(output->scrn->scrnIndex, X_CONFIG,
"found backlight control interface %s\n", path);
return;
@@ -493,6 +495,8 @@ intel_crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode,
ErrorF("failed to add fb\n");
return FALSE;
}
+
+ drm_intel_bo_disable_reuse(intel->front_buffer);
}
saved_mode = crtc->mode;
@@ -597,6 +601,8 @@ intel_crtc_shadow_allocate(xf86CrtcPtr crtc, int width, int height)
return NULL;
}
+ drm_intel_bo_disable_reuse(intel_crtc->rotate_bo);
+
intel_crtc->rotate_pitch = rotate_pitch;
return intel_crtc->rotate_bo;
}
@@ -723,6 +729,8 @@ intel_set_scanout_pixmap(xf86CrtcPtr crtc, PixmapPtr ppix)
ErrorF("have front buffer\n");
}
+ drm_intel_bo_disable_reuse(bo);
+
intel_crtc->scanout_pixmap = ppix;
return drmModeAddFB(intel->drmSubFD, ppix->drawable.width,
ppix->drawable.height, ppix->drawable.depth,
@@ -1494,6 +1502,7 @@ intel_xf86crtc_resize(ScrnInfoPtr scrn, int width, int height)
if (ret)
goto fail;
+ drm_intel_bo_disable_reuse(intel->front_buffer);
intel->front_pitch = pitch;
intel->front_tiling = tiling;
@@ -1555,6 +1564,7 @@ intel_do_pageflip(intel_screen_private *intel,
new_front->handle, &new_fb_id))
goto error_out;
+ drm_intel_bo_disable_reuse(new_front);
intel_glamor_flush(intel);
intel_batch_submit(scrn);
@@ -1822,6 +1832,26 @@ intel_mode_remove_fb(intel_screen_private *intel)
}
}
+static Bool has_pending_events(int fd)
+{
+ struct pollfd pfd;
+ pfd.fd = fd;
+ pfd.events = POLLIN;
+ return poll(&pfd, 1, 0) == 1;
+}
+
+void
+intel_mode_close(intel_screen_private *intel)
+{
+ struct intel_mode *mode = intel->modes;
+
+ if (mode == NULL)
+ return;
+
+ while (has_pending_events(mode->fd))
+ drmHandleEvent(mode->fd, &mode->event_context);
+}
+
void
intel_mode_fini(intel_screen_private *intel)
{
diff --git a/src/intel_dri.c b/src/intel_dri.c
index 867a4653f..f3512034a 100644
--- a/src/intel_dri.c
+++ b/src/intel_dri.c
@@ -451,7 +451,7 @@ I830DRI2CopyRegion(DrawablePtr drawable, RegionPtr pRegion,
/* Wait for the scanline to be outside the region to be copied */
if (scrn->vtSema &&
pixmap_is_scanout(get_drawable_pixmap(dst)) &&
- intel->swapbuffers_wait && INTEL_INFO(intel)->gen < 60) {
+ intel->swapbuffers_wait && INTEL_INFO(intel)->gen < 060) {
BoxPtr box;
BoxRec crtcbox;
int y1, y2;
@@ -485,20 +485,20 @@ I830DRI2CopyRegion(DrawablePtr drawable, RegionPtr pRegion,
* of extra time for the blitter to start up and
* do its job for a full height blit
*/
- if (full_height && INTEL_INFO(intel)->gen < 40)
+ if (full_height && INTEL_INFO(intel)->gen < 040)
y2 -= 2;
if (pipe == 0) {
event = MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW;
load_scan_lines_pipe =
MI_LOAD_SCAN_LINES_DISPLAY_PIPEA;
- if (full_height && INTEL_INFO(intel)->gen >= 40)
+ if (full_height && INTEL_INFO(intel)->gen >= 040)
event = MI_WAIT_FOR_PIPEA_SVBLANK;
} else {
event = MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW;
load_scan_lines_pipe =
MI_LOAD_SCAN_LINES_DISPLAY_PIPEB;
- if (full_height && INTEL_INFO(intel)->gen >= 40)
+ if (full_height && INTEL_INFO(intel)->gen >= 040)
event = MI_WAIT_FOR_PIPEB_SVBLANK;
}
@@ -547,6 +547,23 @@ I830DRI2CopyRegion(DrawablePtr drawable, RegionPtr pRegion,
intel_batch_submit(scrn);
}
+static void
+I830DRI2FallbackBlitSwap(DrawablePtr drawable,
+ DRI2BufferPtr dst,
+ DRI2BufferPtr src)
+{
+ BoxRec box;
+ RegionRec region;
+
+ box.x1 = 0;
+ box.y1 = 0;
+ box.x2 = drawable->width;
+ box.y2 = drawable->height;
+ REGION_INIT(pScreen, &region, &box, 0);
+
+ I830DRI2CopyRegion(drawable, &region, dst, src);
+}
+
#if DRI2INFOREC_VERSION >= 4
static void I830DRI2ReferenceBuffer(DRI2Buffer2Ptr buffer)
@@ -996,17 +1013,8 @@ void I830DRI2FrameEventHandler(unsigned int frame, unsigned int tv_sec,
/* else fall through to exchange/blit */
case DRI2_SWAP: {
- BoxRec box;
- RegionRec region;
-
- box.x1 = 0;
- box.y1 = 0;
- box.x2 = drawable->width;
- box.y2 = drawable->height;
- REGION_INIT(pScreen, &region, &box, 0);
-
- I830DRI2CopyRegion(drawable,
- &region, swap_info->front, swap_info->back);
+ I830DRI2FallbackBlitSwap(drawable,
+ swap_info->front, swap_info->back);
DRI2SwapComplete(swap_info->client, drawable, frame, tv_sec, tv_usec,
DRI2_BLIT_COMPLETE,
swap_info->client ? swap_info->event_complete : NULL,
@@ -1089,17 +1097,10 @@ void I830DRI2FlipEventHandler(unsigned int frame, unsigned int tv_sec,
i830_dri2_del_frame_event(chain_drawable, chain);
} else if (!can_exchange(chain_drawable, chain->front, chain->back) ||
!I830DRI2ScheduleFlip(intel, chain_drawable, chain)) {
- BoxRec box;
- RegionRec region;
-
- box.x1 = 0;
- box.y1 = 0;
- box.x2 = chain_drawable->width;
- box.y2 = chain_drawable->height;
- REGION_INIT(pScreen, &region, &box, 0);
+ I830DRI2FallbackBlitSwap(chain_drawable,
+ chain->front,
+ chain->back);
- I830DRI2CopyRegion(chain_drawable, &region,
- chain->front, chain->back);
DRI2SwapComplete(chain->client, chain_drawable, frame, tv_sec, tv_usec,
DRI2_BLIT_COMPLETE,
chain->client ? chain->event_complete : NULL,
@@ -1162,8 +1163,6 @@ I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
DRI2FrameEventPtr swap_info = NULL;
enum DRI2FrameEventType swap_type = DRI2_SWAP;
CARD64 current_msc;
- BoxRec box;
- RegionRec region;
/* Drawable not displayed... just complete the swap */
if (pipe == -1)
@@ -1231,7 +1230,13 @@ I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
* the swap.
*/
if (divisor == 0 || current_msc < *target_msc) {
- if (flip && I830DRI2ScheduleFlip(intel, draw, swap_info))
+ /*
+ * If we can, schedule the flip directly from here rather
+ * than waiting for an event from the kernel for the current
+ * (or a past) MSC.
+ */
+ if (flip && divisor == 0 && current_msc >= *target_msc &&
+ I830DRI2ScheduleFlip(intel, draw, swap_info))
return TRUE;
vbl.request.type =
@@ -1313,14 +1318,7 @@ I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
return TRUE;
blit_fallback:
- box.x1 = 0;
- box.y1 = 0;
- box.x2 = draw->width;
- box.y2 = draw->height;
- REGION_INIT(pScreen, &region, &box, 0);
-
- I830DRI2CopyRegion(draw, &region, front, back);
-
+ I830DRI2FallbackBlitSwap(draw, front, back);
DRI2SwapComplete(client, draw, 0, 0, 0, DRI2_BLIT_COMPLETE, func, data);
if (swap_info)
i830_dri2_del_frame_event(draw, swap_info);
@@ -1515,6 +1513,17 @@ out_complete:
static int dri2_server_generation;
#endif
+static const char *dri_driver_name(intel_screen_private *intel)
+{
+ const char *s = xf86GetOptValString(intel->Options, OPTION_DRI);
+ Bool dummy;
+
+ if (s == NULL || xf86getBoolValue(&dummy, s))
+ return INTEL_INFO(intel)->gen < 040 ? "i915" : "i965";
+
+ return s;
+}
+
Bool I830DRI2ScreenInit(ScreenPtr screen)
{
ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
@@ -1564,7 +1573,7 @@ Bool I830DRI2ScreenInit(ScreenPtr screen)
intel->deviceName = drmGetDeviceNameFromFd(intel->drmSubFD);
memset(&info, '\0', sizeof(info));
info.fd = intel->drmSubFD;
- info.driverName = INTEL_INFO(intel)->gen < 40 ? "i915" : "i965";
+ info.driverName = dri_driver_name(intel);
info.deviceName = intel->deviceName;
#if DRI2INFOREC_VERSION == 1
diff --git a/src/intel_driver.c b/src/intel_driver.c
index 65a50088e..780710624 100644
--- a/src/intel_driver.c
+++ b/src/intel_driver.c
@@ -221,11 +221,19 @@ static Bool I830GetEarlyOptions(ScrnInfoPtr scrn)
return TRUE;
}
+static Bool intel_option_cast_string_to_bool(intel_screen_private *intel,
+ int id, Bool val)
+{
+ xf86getBoolValue(&val, xf86GetOptValString(intel->Options, id));
+ return val;
+}
+
static void intel_check_dri_option(ScrnInfoPtr scrn)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
+
intel->directRenderingType = DRI_NONE;
- if (!xf86ReturnOptValBool(intel->Options, OPTION_DRI, TRUE))
+ if (!intel_option_cast_string_to_bool(intel, OPTION_DRI, TRUE))
intel->directRenderingType = DRI_DISABLED;
if (scrn->depth != 16 && scrn->depth != 24 && scrn->depth != 30) {
@@ -317,7 +325,7 @@ static int intel_init_bufmgr(intel_screen_private *intel)
list_init(&intel->batch_pixmaps);
- if ((INTEL_INFO(intel)->gen == 60)) {
+ if ((INTEL_INFO(intel)->gen == 060)) {
intel->wa_scratch_bo =
drm_intel_bo_alloc(intel->bufmgr, "wa scratch",
4096, 4096);
@@ -397,13 +405,14 @@ static Bool can_accelerate_blt(struct intel_screen_private *intel)
if (INTEL_INFO(intel)->gen == -1)
return FALSE;
- if (xf86ReturnOptValBool(intel->Options, OPTION_ACCEL_DISABLE, FALSE)) {
+ if (xf86ReturnOptValBool(intel->Options, OPTION_ACCEL_DISABLE, FALSE) ||
+ !intel_option_cast_string_to_bool(intel, OPTION_ACCEL_METHOD, TRUE)) {
xf86DrvMsg(intel->scrn->scrnIndex, X_CONFIG,
"Disabling hardware acceleration.\n");
return FALSE;
}
- if (INTEL_INFO(intel)->gen == 60) {
+ if (INTEL_INFO(intel)->gen == 060) {
struct pci_device *const device = intel->PciInfo;
/* Sandybridge rev07 locks up easily, even with the
@@ -418,7 +427,7 @@ static Bool can_accelerate_blt(struct intel_screen_private *intel)
}
}
- if (INTEL_INFO(intel)->gen >= 60) {
+ if (INTEL_INFO(intel)->gen >= 060) {
drm_i915_getparam_t gp;
int value;
@@ -579,7 +588,7 @@ static Bool I830PreInit(ScrnInfoPtr scrn, int flags)
intel->has_relaxed_fencing =
xf86ReturnOptValBool(intel->Options,
OPTION_RELAXED_FENCING,
- INTEL_INFO(intel)->gen >= 33);
+ INTEL_INFO(intel)->gen >= 033);
/* And override the user if there is no kernel support */
if (intel->has_relaxed_fencing)
intel->has_relaxed_fencing = has_relaxed_fencing(intel);
@@ -677,7 +686,7 @@ void IntelEmitInvarientState(ScrnInfoPtr scrn)
}
#ifdef INTEL_PIXMAP_SHARING
-static Bool
+static void
redisplay_dirty(ScreenPtr screen, PixmapDirtyUpdatePtr dirty)
{
ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
@@ -686,8 +695,19 @@ redisplay_dirty(ScreenPtr screen, PixmapDirtyUpdatePtr dirty)
int was_blocked;
PixmapRegionInit(&pixregion, dirty->slave_dst->master_pixmap);
+ RegionTranslate(&pixregion, dirty->x, dirty->y);
+ RegionIntersect(&pixregion, &pixregion, DamageRegion(dirty->damage));
+ RegionTranslate(&pixregion, -dirty->x, -dirty->y);
+ was_blocked = RegionNil(&pixregion);
+ DamageRegionAppend(&dirty->slave_dst->drawable, &pixregion);
+ RegionUninit(&pixregion);
+ if (was_blocked)
+ return;
+ PixmapRegionInit(&pixregion, dirty->slave_dst->master_pixmap);
PixmapSyncDirtyHelper(dirty, &pixregion);
+ RegionUninit(&pixregion);
+
intel_batch_submit(scrn);
if (!intel->has_prime_vmap_flush) {
drm_intel_bo *bo = intel_get_pixmap_bo(dirty->slave_dst->master_pixmap);
@@ -695,10 +715,10 @@ redisplay_dirty(ScreenPtr screen, PixmapDirtyUpdatePtr dirty)
drm_intel_bo_map(bo, FALSE);
drm_intel_bo_unmap(bo);
xf86UnblockSIGIO(was_blocked);
- }
- DamageRegionAppend(&dirty->slave_dst->drawable, &pixregion);
- RegionUninit(&pixregion);
- return 0;
+ }
+
+ DamageRegionProcessPending(&dirty->slave_dst->drawable);
+ return;
}
static void
@@ -710,7 +730,6 @@ intel_dirty_update(ScreenPtr screen)
if (xorg_list_is_empty(&screen->pixmap_dirty_list))
return;
- ErrorF("list is not empty\n");
xorg_list_for_each_entry(ent, &screen->pixmap_dirty_list, ent) {
region = DamageRegion(ent->damage);
if (RegionNotEmpty(region)) {
@@ -921,7 +940,7 @@ I830ScreenInit(SCREEN_INIT_ARGS_DECL)
intel_batch_init(scrn);
- if (INTEL_INFO(intel)->gen >= 40)
+ if (INTEL_INFO(intel)->gen >= 040)
gen4_render_state_init(scrn);
miClearVisualTypes();
@@ -1014,7 +1033,7 @@ I830ScreenInit(SCREEN_INIT_ARGS_DECL)
xf86DPMSInit(screen, xf86DPMSSet, 0);
#ifdef INTEL_XVMC
- if (INTEL_INFO(intel)->gen >= 40)
+ if (INTEL_INFO(intel)->gen >= 040)
intel->XvMCEnabled = TRUE;
from = ((intel->directRenderingType == DRI_DRI2) &&
xf86GetOptValBool(intel->Options, OPTION_XVMC,
@@ -1139,6 +1158,8 @@ static Bool I830CloseScreen(CLOSE_SCREEN_ARGS_DECL)
I830UeventFini(scrn);
#endif
+ intel_mode_close(intel);
+
DeleteCallback(&FlushCallback, intel_flush_callback, scrn);
intel_glamor_close_screen(screen);
@@ -1174,7 +1195,7 @@ static Bool I830CloseScreen(CLOSE_SCREEN_ARGS_DECL)
intel_batch_teardown(scrn);
- if (INTEL_INFO(intel)->gen >= 40)
+ if (INTEL_INFO(intel)->gen >= 040)
gen4_render_state_cleanup(scrn);
xf86_cursors_fini(screen);
diff --git a/src/intel_driver.h b/src/intel_driver.h
index b7190620d..c98025bac 100644
--- a/src/intel_driver.h
+++ b/src/intel_driver.h
@@ -230,6 +230,9 @@
#define PCI_CHIP_HASWELL_CRW_S_GT2_PLUS 0x0D3A
#define PCI_CHIP_VALLEYVIEW_PO 0x0f30
+#define PCI_CHIP_VALLEYVIEW_1 0x0f31
+#define PCI_CHIP_VALLEYVIEW_2 0x0f32
+#define PCI_CHIP_VALLEYVIEW_3 0x0f33
#endif
@@ -249,7 +252,7 @@
#define CHIP_REVISION(p) (p)->revision
#define INTEL_INFO(intel) ((intel)->info)
-#define IS_GENx(intel, X) (INTEL_INFO(intel)->gen >= 10*(X) && INTEL_INFO(intel)->gen < 10*((X)+1))
+#define IS_GENx(intel, X) (INTEL_INFO(intel)->gen >= 8*(X) && INTEL_INFO(intel)->gen < 8*((X)+1))
#define IS_GEN1(intel) IS_GENx(intel, 1)
#define IS_GEN2(intel) IS_GENx(intel, 2)
#define IS_GEN3(intel) IS_GENx(intel, 3)
@@ -257,7 +260,7 @@
#define IS_GEN5(intel) IS_GENx(intel, 5)
#define IS_GEN6(intel) IS_GENx(intel, 6)
#define IS_GEN7(intel) IS_GENx(intel, 7)
-#define IS_HSW(intel) (INTEL_INFO(intel)->gen == 75)
+#define IS_HSW(intel) (INTEL_INFO(intel)->gen == 075)
/* Some chips have specific errata (or limits) that we need to workaround. */
#define IS_I830(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_I830_M)
@@ -270,8 +273,8 @@
#define IS_965_Q(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I965_Q)
/* supports Y tiled surfaces (pre-965 Mesa isn't ready yet) */
-#define SUPPORTS_YTILING(pI810) (INTEL_INFO(intel)->gen >= 40)
-#define HAS_BLT(pI810) (INTEL_INFO(intel)->gen >= 60)
+#define SUPPORTS_YTILING(pI810) (INTEL_INFO(intel)->gen >= 040)
+#define HAS_BLT(pI810) (INTEL_INFO(intel)->gen >= 060)
struct intel_device_info {
int gen;
diff --git a/src/intel_hwmc.c b/src/intel_hwmc.c
index af8bd8134..25978d22a 100644
--- a/src/intel_hwmc.c
+++ b/src/intel_hwmc.c
@@ -75,11 +75,11 @@ static int create_context(ScrnInfoPtr scrn, XvMCContextPtr pContext,
contextRec->type = XVMC_I915_MPEG2_MC;
contextRec->i915.use_phys_addr = 0;
} else {
- if (INTEL_INFO(intel)->gen >= 45)
+ if (INTEL_INFO(intel)->gen >= 045)
contextRec->type = XVMC_I965_MPEG2_VLD;
else
contextRec->type = XVMC_I965_MPEG2_MC;
- contextRec->i965.is_g4x = INTEL_INFO(intel)->gen == 45;
+ contextRec->i965.is_g4x = INTEL_INFO(intel)->gen == 045;
contextRec->i965.is_965_q = IS_965_Q(intel);
contextRec->i965.is_igdng = IS_GEN5(intel);
}
@@ -227,7 +227,7 @@ Bool intel_xvmc_adaptor_init(ScreenPtr pScreen)
name = "i915_xvmc",
pAdapt->num_surfaces = ARRAY_SIZE(surface_info_i915);
pAdapt->surfaces = surface_info_i915;
- } else if (INTEL_INFO(intel)->gen >= 45) {
+ } else if (INTEL_INFO(intel)->gen >= 045) {
name = "xvmc_vld",
pAdapt->num_surfaces = ARRAY_SIZE(surface_info_vld);
pAdapt->surfaces = surface_info_vld;
diff --git a/src/intel_memory.c b/src/intel_memory.c
index f08ebdd01..e51fa33a9 100644
--- a/src/intel_memory.c
+++ b/src/intel_memory.c
@@ -94,7 +94,7 @@ unsigned long intel_get_fence_size(intel_screen_private *intel, unsigned long si
unsigned long i;
unsigned long start;
- if (INTEL_INFO(intel)->gen >= 40 || intel->has_relaxed_fencing) {
+ if (INTEL_INFO(intel)->gen >= 040 || intel->has_relaxed_fencing) {
/* The 965 can have fences at any page boundary. */
return ALIGN(size, 4096);
} else {
@@ -127,7 +127,7 @@ intel_get_fence_pitch(intel_screen_private *intel, unsigned long pitch,
return pitch;
/* 965+ is flexible */
- if (INTEL_INFO(intel)->gen >= 40)
+ if (INTEL_INFO(intel)->gen >= 040)
return ALIGN(pitch, tile_width);
/* Pre-965 needs power of two tile width */
@@ -173,7 +173,7 @@ static inline int intel_pad_drawable_width(int width)
static size_t
agp_aperture_size(struct pci_device *dev, int gen)
{
- return dev->regions[gen < 30 ? 0 : 2].size;
+ return dev->regions[gen < 030 ? 0 : 2].size;
}
static void intel_set_gem_max_sizes(ScrnInfoPtr scrn)
diff --git a/src/intel_module.c b/src/intel_module.c
index e6ca964d6..141f77afe 100644
--- a/src/intel_module.c
+++ b/src/intel_module.c
@@ -56,62 +56,62 @@ static const struct intel_device_info intel_generic_info = {
};
static const struct intel_device_info intel_i81x_info = {
- .gen = 10,
+ .gen = 010,
};
static const struct intel_device_info intel_i830_info = {
- .gen = 20,
+ .gen = 020,
};
static const struct intel_device_info intel_i845_info = {
- .gen = 20,
+ .gen = 020,
};
static const struct intel_device_info intel_i855_info = {
- .gen = 21,
+ .gen = 021,
};
static const struct intel_device_info intel_i865_info = {
- .gen = 22,
+ .gen = 022,
};
static const struct intel_device_info intel_i915_info = {
- .gen = 30,
+ .gen = 030,
};
static const struct intel_device_info intel_i945_info = {
- .gen = 31,
+ .gen = 031,
};
static const struct intel_device_info intel_g33_info = {
- .gen = 33,
+ .gen = 033,
};
static const struct intel_device_info intel_i965_info = {
- .gen = 40,
+ .gen = 040,
};
static const struct intel_device_info intel_g4x_info = {
- .gen = 45,
+ .gen = 045,
};
static const struct intel_device_info intel_ironlake_info = {
- .gen = 50,
+ .gen = 050,
};
static const struct intel_device_info intel_sandybridge_info = {
- .gen = 60,
+ .gen = 060,
};
static const struct intel_device_info intel_ivybridge_info = {
- .gen = 70,
+ .gen = 070,
};
static const struct intel_device_info intel_valleyview_info = {
- .gen = 70,
+ .gen = 071,
};
static const struct intel_device_info intel_haswell_info = {
- .gen = 75,
+ .gen = 075,
};
-static const SymTabRec _intel_chipsets[] = {
+static const SymTabRec intel_chipsets[] = {
{PCI_CHIP_I810, "i810"},
{PCI_CHIP_I810_DC100, "i810-dc100"},
{PCI_CHIP_I810_E, "i810e"},
@@ -199,9 +199,7 @@ static const SymTabRec _intel_chipsets[] = {
{PCI_CHIP_VALLEYVIEW_PO, "ValleyView PO board" },
{-1, NULL}
};
-#define NUM_CHIPSETS (sizeof(_intel_chipsets) / sizeof(_intel_chipsets[0]))
-
-static SymTabRec *intel_chipsets = (SymTabRec *) _intel_chipsets;
+#define NUM_CHIPSETS (sizeof(intel_chipsets) / sizeof(intel_chipsets[0]))
#define INTEL_DEVICE_MATCH(d,i) \
{ 0x8086, (d), PCI_MATCH_ANY, PCI_MATCH_ANY, 0x3 << 16, 0xff << 16, (intptr_t)(i) }
@@ -308,6 +306,9 @@ static const struct pci_id_match intel_device_match[] = {
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_S_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_PO, &intel_valleyview_info ),
+ INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_1, &intel_valleyview_info ),
+ INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_2, &intel_valleyview_info ),
+ INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_3, &intel_valleyview_info ),
INTEL_DEVICE_MATCH (PCI_MATCH_ANY, &intel_generic_info ),
#endif
@@ -383,7 +384,7 @@ static Bool intel_driver_func(ScrnInfoPtr pScrn,
}
}
-static Bool has_kernel_mode_setting(struct pci_device *dev)
+static Bool has_kernel_mode_setting(const struct pci_device *dev)
{
char id[20];
int ret, fd;
@@ -418,7 +419,6 @@ static Bool has_kernel_mode_setting(struct pci_device *dev)
if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
ret = FALSE;
}
-
close(fd);
}
@@ -465,50 +465,15 @@ static enum accel_method { UXA, SNA } get_accel_method(void)
}
#endif
-/*
- * intel_pci_probe --
- *
- * Look through the PCI bus to find cards that are intel boards.
- * Setup the dispatch table for the rest of the driver functions.
- *
- */
-static Bool intel_pci_probe(DriverPtr driver,
- int entity_num,
- struct pci_device *device,
- intptr_t match_data)
+static Bool
+intel_scrn_create(DriverPtr driver,
+ int entity_num,
+ intptr_t match_data,
+ unsigned flags)
{
ScrnInfoPtr scrn;
- PciChipsets intel_pci_chipsets[NUM_CHIPSETS];
- unsigned i;
-
- if (!has_kernel_mode_setting(device)) {
-#if KMS_ONLY
- return FALSE;
-#else
- switch (DEVICE_ID(device)) {
- case PCI_CHIP_I810:
- case PCI_CHIP_I810_DC100:
- case PCI_CHIP_I810_E:
- case PCI_CHIP_I815:
- break;
- default:
- return FALSE;
- }
-#endif
- }
- for (i = 0; i < NUM_CHIPSETS; i++) {
- intel_pci_chipsets[i].numChipset = intel_chipsets[i].token;
- intel_pci_chipsets[i].PCIid = intel_chipsets[i].token;
-#if XORG_VERSION_CURRENT < XORG_VERSION_NUMERIC(1,6,99,0,0)
- intel_pci_chipsets[i].resList = RES_SHARED_VGA;
-#else
- intel_pci_chipsets[i].dummy = NULL;
-#endif
- }
-
- scrn = xf86ConfigPciEntity(NULL, 0, entity_num, intel_pci_chipsets,
- NULL, NULL, NULL, NULL, NULL);
+ scrn = xf86AllocateScreen(driver, flags);
if (scrn == NULL)
return FALSE;
@@ -518,14 +483,13 @@ static Bool intel_pci_probe(DriverPtr driver,
scrn->driverPrivate = (void *)(match_data | 1);
scrn->Probe = NULL;
+ if (xf86IsEntitySharable(entity_num))
+ xf86SetEntityShared(entity_num);
+ xf86AddEntityToScreen(scrn, entity_num);
+
#if !KMS_ONLY
- switch (DEVICE_ID(device)) {
- case PCI_CHIP_I810:
- case PCI_CHIP_I810_DC100:
- case PCI_CHIP_I810_E:
- case PCI_CHIP_I815:
+ if ((unsigned)((struct intel_device_info *)match_data)->gen < 020)
return lg_i810_init(scrn);
- }
#endif
#if !UMS_ONLY
@@ -533,7 +497,6 @@ static Bool intel_pci_probe(DriverPtr driver,
#if USE_SNA
case SNA: return sna_init_scrn(scrn, entity_num);
#endif
-
#if USE_UXA
case UXA: return intel_init_scrn(scrn);
#endif
@@ -545,6 +508,37 @@ static Bool intel_pci_probe(DriverPtr driver,
return FALSE;
}
+/*
+ * intel_pci_probe --
+ *
+ * Look through the PCI bus to find cards that are intel boards.
+ * Setup the dispatch table for the rest of the driver functions.
+ *
+ */
+static Bool intel_pci_probe(DriverPtr driver,
+ int entity_num,
+ struct pci_device *device,
+ intptr_t match_data)
+{
+ if (!has_kernel_mode_setting(device)) {
+#if KMS_ONLY
+ return FALSE;
+#else
+ switch (DEVICE_ID(device)) {
+ case PCI_CHIP_I810:
+ case PCI_CHIP_I810_DC100:
+ case PCI_CHIP_I810_E:
+ case PCI_CHIP_I815:
+ break;
+ default:
+ return FALSE;
+ }
+#endif
+ }
+
+ return intel_scrn_create(driver, entity_num, match_data, 0);
+}
+
#ifdef XSERVER_PLATFORM_BUS
static Bool
intel_platform_probe(DriverPtr driver,
@@ -552,13 +546,14 @@ intel_platform_probe(DriverPtr driver,
struct xf86_platform_device *dev,
intptr_t match_data)
{
- ScrnInfoPtr scrn = NULL;
- char *path = xf86_get_platform_device_attrib(dev, ODEV_ATTRIB_PATH);
unsigned scrn_flags = 0;
if (!dev->pdev)
return FALSE;
+ if (!has_kernel_mode_setting(dev->pdev))
+ return FALSE;
+
/* Allow ourselves to act as a slaved output if not primary */
if (flags & PLATFORM_PROBE_GPU_SCREEN) {
flags &= ~PLATFORM_PROBE_GPU_SCREEN;
@@ -569,37 +564,7 @@ intel_platform_probe(DriverPtr driver,
if (flags)
return FALSE;
- scrn = xf86AllocateScreen(driver, scrn_flags);
- if (scrn == NULL)
- return FALSE;
-
- scrn->driverVersion = INTEL_VERSION;
- scrn->driverName = INTEL_DRIVER_NAME;
- scrn->name = INTEL_NAME;
- scrn->driverPrivate = (void *)(match_data | 1);
- scrn->Probe = NULL;
-
- if (xf86IsEntitySharable(entity_num))
- xf86SetEntityShared(entity_num);
- xf86AddEntityToScreen(scrn, entity_num);
-
- xf86DrvMsg(scrn->scrnIndex, X_INFO,
- "using device path '%s'\n", path ? path : "Default device");
-
-#if !UMS_ONLY
- switch (get_accel_method()) {
-#if USE_SNA
- case SNA: return sna_init_scrn(scrn, entity_num);
-#endif
-#if USE_UXA
- case UXA: return intel_init_scrn(scrn);
-#endif
-
- default: break;
- }
-#endif
-
- return FALSE;
+ return intel_scrn_create(driver, entity_num, match_data, scrn_flags);
}
#endif
diff --git a/src/intel_options.c b/src/intel_options.c
index dcab9e729..fda2e8b0f 100644
--- a/src/intel_options.c
+++ b/src/intel_options.c
@@ -8,12 +8,13 @@ const OptionInfoRec intel_options[] = {
{OPTION_ACCEL_DISABLE, "NoAccel", OPTV_BOOLEAN, {0}, 0},
{OPTION_ACCEL_METHOD, "AccelMethod", OPTV_STRING, {0}, 0},
{OPTION_BACKLIGHT, "Backlight", OPTV_STRING, {0}, 0},
- {OPTION_DRI, "DRI", OPTV_BOOLEAN, {0}, 1},
+ {OPTION_DRI, "DRI", OPTV_STRING, {0}, 0},
{OPTION_COLOR_KEY, "ColorKey", OPTV_INTEGER, {0}, 0},
{OPTION_VIDEO_KEY, "VideoKey", OPTV_INTEGER, {0}, 0},
{OPTION_TILING_2D, "Tiling", OPTV_BOOLEAN, {0}, 1},
{OPTION_TILING_FB, "LinearFramebuffer", OPTV_BOOLEAN, {0}, 0},
{OPTION_SWAPBUFFERS_WAIT, "SwapbuffersWait", OPTV_BOOLEAN, {0}, 1},
+ {OPTION_TRIPLE_BUFFER, "TripleBuffer", OPTV_BOOLEAN, {0}, 1},
{OPTION_PREFER_OVERLAY, "XvPreferOverlay", OPTV_BOOLEAN, {0}, 0},
{OPTION_HOTPLUG, "HotPlug", OPTV_BOOLEAN, {0}, 1},
{OPTION_RELAXED_FENCING,"RelaxedFencing", OPTV_BOOLEAN, {0}, 1},
@@ -21,9 +22,7 @@ const OptionInfoRec intel_options[] = {
{OPTION_XVMC, "XvMC", OPTV_BOOLEAN, {0}, 1},
#endif
#ifdef USE_SNA
- {OPTION_THROTTLE, "Throttle", OPTV_BOOLEAN, {0}, 1},
{OPTION_ZAPHOD, "ZaphodHeads", OPTV_STRING, {0}, 0},
- {OPTION_DELAYED_FLUSH, "DelayedFlush", OPTV_BOOLEAN, {0}, 1},
{OPTION_TEAR_FREE, "TearFree", OPTV_BOOLEAN, {0}, 0},
{OPTION_CRTC_PIXMAPS, "PerCrtcPixmaps", OPTV_BOOLEAN, {0}, 0},
#endif
@@ -33,7 +32,6 @@ const OptionInfoRec intel_options[] = {
{OPTION_DEBUG_FLUSH_CACHES, "DebugFlushCaches", OPTV_BOOLEAN, {0}, 0},
{OPTION_DEBUG_WAIT, "DebugWait", OPTV_BOOLEAN, {0}, 0},
{OPTION_BUFFER_CACHE, "BufferCache", OPTV_BOOLEAN, {0}, 1},
- {OPTION_TRIPLE_BUFFER, "TripleBuffer", OPTV_BOOLEAN, {0}, 1},
#endif
{-1, NULL, OPTV_NONE, {0}, 0}
};
diff --git a/src/intel_options.h b/src/intel_options.h
index 3b5262a55..8fa7a8fb8 100644
--- a/src/intel_options.h
+++ b/src/intel_options.h
@@ -20,6 +20,7 @@ enum intel_options {
OPTION_TILING_2D,
OPTION_TILING_FB,
OPTION_SWAPBUFFERS_WAIT,
+ OPTION_TRIPLE_BUFFER,
OPTION_PREFER_OVERLAY,
OPTION_HOTPLUG,
OPTION_RELAXED_FENCING,
@@ -28,9 +29,7 @@ enum intel_options {
#define INTEL_XVMC 1
#endif
#ifdef USE_SNA
- OPTION_THROTTLE,
OPTION_ZAPHOD,
- OPTION_DELAYED_FLUSH,
OPTION_TEAR_FREE,
OPTION_CRTC_PIXMAPS,
#endif
@@ -40,7 +39,6 @@ enum intel_options {
OPTION_DEBUG_FLUSH_CACHES,
OPTION_DEBUG_WAIT,
OPTION_BUFFER_CACHE,
- OPTION_TRIPLE_BUFFER,
#endif
NUM_OPTIONS,
};
diff --git a/src/intel_uxa.c b/src/intel_uxa.c
index 6d202c776..2f141735d 100644
--- a/src/intel_uxa.c
+++ b/src/intel_uxa.c
@@ -170,7 +170,7 @@ intel_uxa_pixmap_compute_size(PixmapPtr pixmap,
pitch = (w * pixmap->drawable.bitsPerPixel + 7) / 8;
pitch = ALIGN(pitch, 64);
size = pitch * ALIGN (h, 2);
- if (INTEL_INFO(intel)->gen < 40) {
+ if (INTEL_INFO(intel)->gen < 040) {
/* Gen 2/3 has a maximum stride for tiling of
* 8192 bytes.
*/
@@ -209,7 +209,7 @@ intel_uxa_pixmap_compute_size(PixmapPtr pixmap,
tile_height = 8;
else
tile_height = 32;
- aligned_h = ALIGN(h, tile_height);
+ aligned_h = ALIGN(h, 2*tile_height);
*stride = intel_get_fence_pitch(intel,
ALIGN(pitch, 512),
@@ -331,7 +331,7 @@ static void intel_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2)
cmd |=
XY_COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB;
- if (INTEL_INFO(intel)->gen >= 40 && intel_pixmap_tiled(pixmap)) {
+ if (INTEL_INFO(intel)->gen >= 040 && intel_pixmap_tiled(pixmap)) {
assert((pitch % 512) == 0);
pitch >>= 2;
cmd |= XY_COLOR_BLT_TILED;
@@ -470,7 +470,7 @@ intel_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1,
XY_SRC_COPY_BLT_WRITE_ALPHA |
XY_SRC_COPY_BLT_WRITE_RGB;
- if (INTEL_INFO(intel)->gen >= 40) {
+ if (INTEL_INFO(intel)->gen >= 040) {
if (intel_pixmap_tiled(dest)) {
assert((dst_pitch % 512) == 0);
dst_pitch >>= 2;
@@ -1281,7 +1281,7 @@ intel_limits_init(intel_screen_private *intel)
* the front, which will have an appropriate pitch/offset already set up,
* so UXA doesn't need to worry.
*/
- if (INTEL_INFO(intel)->gen >= 40) {
+ if (INTEL_INFO(intel)->gen >= 040) {
intel->accel_pixmap_offset_alignment = 4 * 2;
intel->accel_max_x = 8192;
intel->accel_max_y = 8192;
@@ -1292,6 +1292,17 @@ intel_limits_init(intel_screen_private *intel)
}
}
+static Bool intel_option_accel_blt(intel_screen_private *intel)
+{
+ const char *s;
+
+ s = xf86GetOptValString(intel->Options, OPTION_ACCEL_METHOD);
+ if (s == NULL)
+ return FALSE;
+
+ return strcasecmp(s, "blt") == 0;
+}
+
Bool intel_uxa_init(ScreenPtr screen)
{
ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
@@ -1338,7 +1349,8 @@ Bool intel_uxa_init(ScreenPtr screen)
intel->uxa_driver->done_copy = intel_uxa_done;
/* Composite */
- if (IS_GEN2(intel)) {
+ if (intel_option_accel_blt(intel)) {
+ } else if (IS_GEN2(intel)) {
intel->uxa_driver->check_composite = i830_check_composite;
intel->uxa_driver->check_composite_target = i830_check_composite_target;
intel->uxa_driver->check_composite_texture = i830_check_composite_texture;
diff --git a/src/intel_video.c b/src/intel_video.c
index 09782aa5d..6cce18240 100644
--- a/src/intel_video.c
+++ b/src/intel_video.c
@@ -353,7 +353,7 @@ void I830InitVideo(ScreenPtr screen)
* supported hardware.
*/
if (scrn->bitsPerPixel >= 16 &&
- INTEL_INFO(intel)->gen >= 30) {
+ INTEL_INFO(intel)->gen >= 030) {
texturedAdaptor = I830SetupImageVideoTextured(screen);
if (texturedAdaptor != NULL) {
xf86DrvMsg(scrn->scrnIndex, X_INFO,
@@ -436,7 +436,7 @@ static XF86VideoAdaptorPtr I830SetupImageVideoOverlay(ScreenPtr screen)
adapt->pPortPrivates[0].ptr = (pointer) (adaptor_priv);
adapt->nAttributes = NUM_ATTRIBUTES;
- if (INTEL_INFO(intel)->gen >= 30)
+ if (INTEL_INFO(intel)->gen >= 030)
adapt->nAttributes += GAMMA_ATTRIBUTES; /* has gamma */
adapt->pAttributes =
xnfalloc(sizeof(XF86AttributeRec) * adapt->nAttributes);
@@ -445,7 +445,7 @@ static XF86VideoAdaptorPtr I830SetupImageVideoOverlay(ScreenPtr screen)
memcpy((char *)att, (char *)Attributes,
sizeof(XF86AttributeRec) * NUM_ATTRIBUTES);
att += NUM_ATTRIBUTES;
- if (INTEL_INFO(intel)->gen >= 30) {
+ if (INTEL_INFO(intel)->gen >= 030) {
memcpy((char *)att, (char *)GammaAttributes,
sizeof(XF86AttributeRec) * GAMMA_ATTRIBUTES);
}
@@ -495,7 +495,7 @@ static XF86VideoAdaptorPtr I830SetupImageVideoOverlay(ScreenPtr screen)
/* Allow the pipe to be switched from pipe A to B when in clone mode */
xvPipe = MAKE_ATOM("XV_PIPE");
- if (INTEL_INFO(intel)->gen >= 30) {
+ if (INTEL_INFO(intel)->gen >= 030) {
xvGamma0 = MAKE_ATOM("XV_GAMMA0");
xvGamma1 = MAKE_ATOM("XV_GAMMA1");
xvGamma2 = MAKE_ATOM("XV_GAMMA2");
@@ -681,17 +681,17 @@ I830SetPortAttributeOverlay(ScrnInfoPtr scrn,
adaptor_priv->desired_crtc = NULL;
else
adaptor_priv->desired_crtc = xf86_config->crtc[value];
- } else if (attribute == xvGamma0 && (INTEL_INFO(intel)->gen >= 30)) {
+ } else if (attribute == xvGamma0 && (INTEL_INFO(intel)->gen >= 030)) {
adaptor_priv->gamma0 = value;
- } else if (attribute == xvGamma1 && (INTEL_INFO(intel)->gen >= 30)) {
+ } else if (attribute == xvGamma1 && (INTEL_INFO(intel)->gen >= 030)) {
adaptor_priv->gamma1 = value;
- } else if (attribute == xvGamma2 && (INTEL_INFO(intel)->gen >= 30)) {
+ } else if (attribute == xvGamma2 && (INTEL_INFO(intel)->gen >= 030)) {
adaptor_priv->gamma2 = value;
- } else if (attribute == xvGamma3 && (INTEL_INFO(intel)->gen >= 30)) {
+ } else if (attribute == xvGamma3 && (INTEL_INFO(intel)->gen >= 030)) {
adaptor_priv->gamma3 = value;
- } else if (attribute == xvGamma4 && (INTEL_INFO(intel)->gen >= 30)) {
+ } else if (attribute == xvGamma4 && (INTEL_INFO(intel)->gen >= 030)) {
adaptor_priv->gamma4 = value;
- } else if (attribute == xvGamma5 && (INTEL_INFO(intel)->gen >= 30)) {
+ } else if (attribute == xvGamma5 && (INTEL_INFO(intel)->gen >= 030)) {
adaptor_priv->gamma5 = value;
} else if (attribute == xvColorKey) {
adaptor_priv->colorKey = value;
@@ -704,7 +704,7 @@ I830SetPortAttributeOverlay(ScrnInfoPtr scrn,
attribute == xvGamma2 ||
attribute == xvGamma3 ||
attribute == xvGamma4 ||
- attribute == xvGamma5) && (INTEL_INFO(intel)->gen >= 30)) {
+ attribute == xvGamma5) && (INTEL_INFO(intel)->gen >= 030)) {
OVERLAY_DEBUG("GAMMA\n");
}
@@ -739,17 +739,17 @@ I830GetPortAttribute(ScrnInfoPtr scrn,
if (c == xf86_config->num_crtc)
c = -1;
*value = c;
- } else if (attribute == xvGamma0 && (INTEL_INFO(intel)->gen >= 30)) {
+ } else if (attribute == xvGamma0 && (INTEL_INFO(intel)->gen >= 030)) {
*value = adaptor_priv->gamma0;
- } else if (attribute == xvGamma1 && (INTEL_INFO(intel)->gen >= 30)) {
+ } else if (attribute == xvGamma1 && (INTEL_INFO(intel)->gen >= 030)) {
*value = adaptor_priv->gamma1;
- } else if (attribute == xvGamma2 && (INTEL_INFO(intel)->gen >= 30)) {
+ } else if (attribute == xvGamma2 && (INTEL_INFO(intel)->gen >= 030)) {
*value = adaptor_priv->gamma2;
- } else if (attribute == xvGamma3 && (INTEL_INFO(intel)->gen >= 30)) {
+ } else if (attribute == xvGamma3 && (INTEL_INFO(intel)->gen >= 030)) {
*value = adaptor_priv->gamma3;
- } else if (attribute == xvGamma4 && (INTEL_INFO(intel)->gen >= 30)) {
+ } else if (attribute == xvGamma4 && (INTEL_INFO(intel)->gen >= 030)) {
*value = adaptor_priv->gamma4;
- } else if (attribute == xvGamma5 && (INTEL_INFO(intel)->gen >= 30)) {
+ } else if (attribute == xvGamma5 && (INTEL_INFO(intel)->gen >= 030)) {
*value = adaptor_priv->gamma5;
} else if (attribute == xvColorKey) {
*value = adaptor_priv->colorKey;
@@ -1313,18 +1313,18 @@ intel_wait_for_scanline(ScrnInfoPtr scrn, PixmapPtr pixmap,
* of extra time for the blitter to start up and
* do its job for a full height blit
*/
- if (full_height && INTEL_INFO(intel)->gen < 40)
+ if (full_height && INTEL_INFO(intel)->gen < 040)
y2 -= 2;
if (pipe == 0) {
pipe = MI_LOAD_SCAN_LINES_DISPLAY_PIPEA;
event = MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW;
- if (full_height && INTEL_INFO(intel)->gen >= 40)
+ if (full_height && INTEL_INFO(intel)->gen >= 040)
event = MI_WAIT_FOR_PIPEA_SVBLANK;
} else {
pipe = MI_LOAD_SCAN_LINES_DISPLAY_PIPEB;
event = MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW;
- if (full_height && INTEL_INFO(intel)->gen >= 40)
+ if (full_height && INTEL_INFO(intel)->gen >= 040)
event = MI_WAIT_FOR_PIPEB_SVBLANK;
}
@@ -1381,7 +1381,7 @@ intel_setup_dst_params(ScrnInfoPtr scrn, intel_adaptor_private *adaptor_priv, sh
if (adaptor_priv->textured) {
pitchAlign = 4;
} else {
- if (INTEL_INFO(intel)->gen >= 40)
+ if (INTEL_INFO(intel)->gen >= 040)
/* Actually the alignment is 64 bytes, too. But the
* stride must be at least 512 bytes. Take the easy fix
* and align on 512 bytes unconditionally. */
@@ -1561,16 +1561,16 @@ I830PutImageTextured(ScrnInfoPtr scrn,
return BadAlloc;
}
- if (crtc && adaptor_priv->SyncToVblank != 0 && INTEL_INFO(intel)->gen < 60) {
+ if (crtc && adaptor_priv->SyncToVblank != 0 && INTEL_INFO(intel)->gen < 060) {
intel_wait_for_scanline(scrn, pixmap, crtc, clipBoxes);
}
- if (INTEL_INFO(intel)->gen >= 60) {
+ if (INTEL_INFO(intel)->gen >= 060) {
Gen6DisplayVideoTextured(scrn, adaptor_priv, id, clipBoxes,
width, height, dstPitch, dstPitch2,
src_w, src_h,
drw_w, drw_h, pixmap);
- } else if (INTEL_INFO(intel)->gen >= 40) {
+ } else if (INTEL_INFO(intel)->gen >= 040) {
I965DisplayVideoTextured(scrn, adaptor_priv, id, clipBoxes,
width, height, dstPitch, dstPitch2,
src_w, src_h,
diff --git a/src/legacy/Makefile.in b/src/legacy/Makefile.in
index a086138e1..928cf6401 100644
--- a/src/legacy/Makefile.in
+++ b/src/legacy/Makefile.in
@@ -218,7 +218,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MAN_SUBSTS = @MAN_SUBSTS@
@@ -257,6 +256,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@
VERSION = @VERSION@
X11_CFLAGS = @X11_CFLAGS@
X11_LIBS = @X11_LIBS@
+XCB_CFLAGS = @XCB_CFLAGS@
+XCB_LIBS = @XCB_LIBS@
XORG_CFLAGS = @XORG_CFLAGS@
XORG_LIBS = @XORG_LIBS@
XORG_MAN_PAGE = @XORG_MAN_PAGE@
@@ -324,7 +325,7 @@ EXTRA_DIST = README
all: all-recursive
.SUFFIXES:
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -349,9 +350,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
diff --git a/src/legacy/i810/Makefile.in b/src/legacy/i810/Makefile.in
index 8f339e3fc..c992feb5f 100644
--- a/src/legacy/i810/Makefile.in
+++ b/src/legacy/i810/Makefile.in
@@ -237,7 +237,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MAN_SUBSTS = @MAN_SUBSTS@
@@ -276,6 +275,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@
VERSION = @VERSION@
X11_CFLAGS = @X11_CFLAGS@
X11_LIBS = @X11_LIBS@
+XCB_CFLAGS = @XCB_CFLAGS@
+XCB_LIBS = @XCB_LIBS@
XORG_CFLAGS = @XORG_CFLAGS@
XORG_LIBS = @XORG_LIBS@
XORG_MAN_PAGE = @XORG_MAN_PAGE@
@@ -349,7 +350,7 @@ all: all-recursive
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -374,9 +375,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
diff --git a/src/legacy/i810/xvmc/Makefile.in b/src/legacy/i810/xvmc/Makefile.in
index 6e03d10c7..830d67767 100644
--- a/src/legacy/i810/xvmc/Makefile.in
+++ b/src/legacy/i810/xvmc/Makefile.in
@@ -206,7 +206,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MAN_SUBSTS = @MAN_SUBSTS@
@@ -245,6 +244,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@
VERSION = @VERSION@
X11_CFLAGS = @X11_CFLAGS@
X11_LIBS = @X11_LIBS@
+XCB_CFLAGS = @XCB_CFLAGS@
+XCB_LIBS = @XCB_LIBS@
XORG_CFLAGS = @XORG_CFLAGS@
XORG_LIBS = @XORG_LIBS@
XORG_MAN_PAGE = @XORG_MAN_PAGE@
@@ -314,7 +315,7 @@ all: all-am
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -339,9 +340,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
install-libLTLIBRARIES: $(lib_LTLIBRARIES)
diff --git a/src/render_program/Makefile.am b/src/render_program/Makefile.am
index 8b82b2e0e..1298625fd 100644
--- a/src/render_program/Makefile.am
+++ b/src/render_program/Makefile.am
@@ -191,21 +191,21 @@ if HAVE_GEN4ASM
SUFFIXES = .g4a .g4b .g5a .g5b .g6a .g6b .g7a .g7b
.g4a.g4b:
- m4 -I$(srcdir) -s $< > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m
+ $(AM_V_GEN)m4 -I$(srcdir) -s $< > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m
.g5a.g5b:
- m4 -I$(srcdir) -s $< > $*.g5m && @INTEL_GEN4ASM@ -g 5 -o $@ $*.g5m && rm $*.g5m
+ $(AM_V_GEN)m4 -I$(srcdir) -s $< > $*.g5m && @INTEL_GEN4ASM@ -g 5 -o $@ $*.g5m && rm $*.g5m
.g6a.g6b:
- m4 -I$(srcdir) -s $< > $*.g6m && @INTEL_GEN4ASM@ -g 6 -o $@ $*.g6m && rm $*.g6m
+ $(AM_V_GEN)m4 -I$(srcdir) -s $< > $*.g6m && @INTEL_GEN4ASM@ -g 6 -o $@ $*.g6m && rm $*.g6m
.g7a.g7b:
- m4 -I$(srcdir) -s $< > $*.g7m && @INTEL_GEN4ASM@ -g 7 -o $@ $*.g7m && rm $*.g7m
+ $(AM_V_GEN)m4 -I$(srcdir) -s $< > $*.g7m && @INTEL_GEN4ASM@ -g 7 -o $@ $*.g7m && rm $*.g7m
-$(INTEL_G4B): $(INTEL_G4I)
-$(INTEL_G5B): $(INTEL_G4I)
-$(INTEL_G6B): $(INTEL_G4I) $(INTEL_G6I)
-$(INTEL_G7B): $(INTEL_G4I) $(INTEL_G6I)
+$(INTEL_G4B): $(INTEL_GEN4ASM) $(INTEL_G4I)
+$(INTEL_G5B): $(INTEL_GEN4ASM) $(INTEL_G4I)
+$(INTEL_G6B): $(INTEL_GEN4ASM) $(INTEL_G4I) $(INTEL_G6I)
+$(INTEL_G7B): $(INTEL_GEN4ASM) $(INTEL_G4I) $(INTEL_G6I)
BUILT_SOURCES= $(INTEL_G4B) $(INTEL_G5B) $(INTEL_G6B) $(INTEL_G7B)
diff --git a/src/render_program/Makefile.in b/src/render_program/Makefile.in
index c079921f3..c941f240c 100644
--- a/src/render_program/Makefile.in
+++ b/src/render_program/Makefile.in
@@ -143,7 +143,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MAN_SUBSTS = @MAN_SUBSTS@
@@ -182,6 +181,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@
VERSION = @VERSION@
X11_CFLAGS = @X11_CFLAGS@
X11_LIBS = @X11_LIBS@
+XCB_CFLAGS = @XCB_CFLAGS@
+XCB_LIBS = @XCB_LIBS@
XORG_CFLAGS = @XORG_CFLAGS@
XORG_LIBS = @XORG_LIBS@
XORG_MAN_PAGE = @XORG_MAN_PAGE@
@@ -435,7 +436,7 @@ all: $(BUILT_SOURCES)
.SUFFIXES:
.SUFFIXES: .g4a .g4b .g5a .g5b .g6a .g6b .g7a .g7b
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -460,9 +461,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
@@ -627,21 +628,21 @@ uninstall-am:
uninstall uninstall-am
@HAVE_GEN4ASM_TRUE@.g4a.g4b:
-@HAVE_GEN4ASM_TRUE@ m4 -I$(srcdir) -s $< > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m
+@HAVE_GEN4ASM_TRUE@ $(AM_V_GEN)m4 -I$(srcdir) -s $< > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m
@HAVE_GEN4ASM_TRUE@.g5a.g5b:
-@HAVE_GEN4ASM_TRUE@ m4 -I$(srcdir) -s $< > $*.g5m && @INTEL_GEN4ASM@ -g 5 -o $@ $*.g5m && rm $*.g5m
+@HAVE_GEN4ASM_TRUE@ $(AM_V_GEN)m4 -I$(srcdir) -s $< > $*.g5m && @INTEL_GEN4ASM@ -g 5 -o $@ $*.g5m && rm $*.g5m
@HAVE_GEN4ASM_TRUE@.g6a.g6b:
-@HAVE_GEN4ASM_TRUE@ m4 -I$(srcdir) -s $< > $*.g6m && @INTEL_GEN4ASM@ -g 6 -o $@ $*.g6m && rm $*.g6m
+@HAVE_GEN4ASM_TRUE@ $(AM_V_GEN)m4 -I$(srcdir) -s $< > $*.g6m && @INTEL_GEN4ASM@ -g 6 -o $@ $*.g6m && rm $*.g6m
@HAVE_GEN4ASM_TRUE@.g7a.g7b:
-@HAVE_GEN4ASM_TRUE@ m4 -I$(srcdir) -s $< > $*.g7m && @INTEL_GEN4ASM@ -g 7 -o $@ $*.g7m && rm $*.g7m
+@HAVE_GEN4ASM_TRUE@ $(AM_V_GEN)m4 -I$(srcdir) -s $< > $*.g7m && @INTEL_GEN4ASM@ -g 7 -o $@ $*.g7m && rm $*.g7m
-@HAVE_GEN4ASM_TRUE@$(INTEL_G4B): $(INTEL_G4I)
-@HAVE_GEN4ASM_TRUE@$(INTEL_G5B): $(INTEL_G4I)
-@HAVE_GEN4ASM_TRUE@$(INTEL_G6B): $(INTEL_G4I) $(INTEL_G6I)
-@HAVE_GEN4ASM_TRUE@$(INTEL_G7B): $(INTEL_G4I) $(INTEL_G6I)
+@HAVE_GEN4ASM_TRUE@$(INTEL_G4B): $(INTEL_GEN4ASM) $(INTEL_G4I)
+@HAVE_GEN4ASM_TRUE@$(INTEL_G5B): $(INTEL_GEN4ASM) $(INTEL_G4I)
+@HAVE_GEN4ASM_TRUE@$(INTEL_G6B): $(INTEL_GEN4ASM) $(INTEL_G4I) $(INTEL_G6I)
+@HAVE_GEN4ASM_TRUE@$(INTEL_G7B): $(INTEL_GEN4ASM) $(INTEL_G4I) $(INTEL_G6I)
@HAVE_GEN4ASM_TRUE@clean-local:
@HAVE_GEN4ASM_TRUE@ -rm -f $(INTEL_G4B) $(INTEL_G4B_GEN5)
diff --git a/src/sna/Makefile.am b/src/sna/Makefile.am
index 306996b57..c74c904dc 100644
--- a/src/sna/Makefile.am
+++ b/src/sna/Makefile.am
@@ -34,9 +34,11 @@ AM_CFLAGS += @VALGRIND_CFLAGS@
endif
noinst_LTLIBRARIES = libsna.la
+libsna_la_LDFLAGS = -pthread
libsna_la_LIBADD = @UDEV_LIBS@ -lm @DRM_LIBS@ brw/libbrw.la fb/libfb.la
libsna_la_SOURCES = \
+ atomic.h \
blt.c \
compiler.h \
kgem.c \
@@ -62,6 +64,8 @@ libsna_la_SOURCES = \
sna_trapezoids.c \
sna_tiling.c \
sna_transform.c \
+ sna_threads.c \
+ sna_vertex.c \
sna_video.c \
sna_video.h \
sna_video_overlay.c \
@@ -73,6 +77,10 @@ libsna_la_SOURCES = \
gen3_render.h \
gen4_render.c \
gen4_render.h \
+ gen4_source.c \
+ gen4_source.h \
+ gen4_vertex.c \
+ gen4_vertex.h \
gen5_render.c \
gen5_render.h \
gen6_render.c \
diff --git a/src/sna/Makefile.in b/src/sna/Makefile.in
index 978d36e1d..7b80b60be 100644
--- a/src/sna/Makefile.in
+++ b/src/sna/Makefile.in
@@ -106,18 +106,19 @@ am__DEPENDENCIES_1 =
@DRI2_TRUE@am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1)
libsna_la_DEPENDENCIES = brw/libbrw.la fb/libfb.la \
$(am__DEPENDENCIES_2)
-am__libsna_la_SOURCES_DIST = blt.c compiler.h kgem.c kgem.h rop.h \
- sna.h sna_accel.c sna_blt.c sna_composite.c sna_damage.c \
+am__libsna_la_SOURCES_DIST = atomic.h blt.c compiler.h kgem.c kgem.h \
+ rop.h sna.h sna_accel.c sna_blt.c sna_composite.c sna_damage.c \
sna_damage.h sna_display.c sna_driver.c sna_glyphs.c \
sna_gradient.c sna_io.c sna_module.h sna_render.c sna_render.h \
sna_render_inline.h sna_reg.h sna_stream.c sna_trapezoids.c \
- sna_tiling.c sna_transform.c sna_video.c sna_video.h \
- sna_video_overlay.c sna_video_sprite.c sna_video_textured.c \
- gen2_render.c gen2_render.h gen3_render.c gen3_render.h \
- gen4_render.c gen4_render.h gen5_render.c gen5_render.h \
- gen6_render.c gen6_render.h gen7_render.c gen7_render.h \
- sna_dri.c sna_video_hwmc.h sna_video_hwmc.c kgem_debug.c \
- kgem_debug.h kgem_debug_gen2.c kgem_debug_gen3.c \
+ sna_tiling.c sna_transform.c sna_threads.c sna_vertex.c \
+ sna_video.c sna_video.h sna_video_overlay.c sna_video_sprite.c \
+ sna_video_textured.c gen2_render.c gen2_render.h gen3_render.c \
+ gen3_render.h gen4_render.c gen4_render.h gen4_source.c \
+ gen4_source.h gen4_vertex.c gen4_vertex.h gen5_render.c \
+ gen5_render.h gen6_render.c gen6_render.h gen7_render.c \
+ gen7_render.h sna_dri.c sna_video_hwmc.h sna_video_hwmc.c \
+ kgem_debug.c kgem_debug.h kgem_debug_gen2.c kgem_debug_gen3.c \
kgem_debug_gen4.c kgem_debug_gen5.c kgem_debug_gen6.c \
kgem_debug_gen7.c
@DRI2_TRUE@am__objects_1 = sna_dri.lo
@@ -130,14 +131,18 @@ am_libsna_la_OBJECTS = blt.lo kgem.lo sna_accel.lo sna_blt.lo \
sna_composite.lo sna_damage.lo sna_display.lo sna_driver.lo \
sna_glyphs.lo sna_gradient.lo sna_io.lo sna_render.lo \
sna_stream.lo sna_trapezoids.lo sna_tiling.lo sna_transform.lo \
- sna_video.lo sna_video_overlay.lo sna_video_sprite.lo \
- sna_video_textured.lo gen2_render.lo gen3_render.lo \
- gen4_render.lo gen5_render.lo gen6_render.lo gen7_render.lo \
- $(am__objects_1) $(am__objects_2) $(am__objects_3)
+ sna_threads.lo sna_vertex.lo sna_video.lo sna_video_overlay.lo \
+ sna_video_sprite.lo sna_video_textured.lo gen2_render.lo \
+ gen3_render.lo gen4_render.lo gen4_source.lo gen4_vertex.lo \
+ gen5_render.lo gen6_render.lo gen7_render.lo $(am__objects_1) \
+ $(am__objects_2) $(am__objects_3)
libsna_la_OBJECTS = $(am_libsna_la_OBJECTS)
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
am__v_lt_0 = --silent
+libsna_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(libsna_la_LDFLAGS) $(LDFLAGS) -o $@
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__depfiles_maybe = depfiles
@@ -281,7 +286,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MAN_SUBSTS = @MAN_SUBSTS@
@@ -320,6 +324,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@
VERSION = @VERSION@
X11_CFLAGS = @X11_CFLAGS@
X11_LIBS = @X11_LIBS@
+XCB_CFLAGS = @XCB_CFLAGS@
+XCB_LIBS = @XCB_LIBS@
XORG_CFLAGS = @XORG_CFLAGS@
XORG_LIBS = @XORG_LIBS@
XORG_MAN_PAGE = @XORG_MAN_PAGE@
@@ -383,24 +389,27 @@ AM_CFLAGS = @CWARNFLAGS@ -I$(top_srcdir)/src \
-I$(top_srcdir)/src/render_program @XORG_CFLAGS@ @UDEV_CFLAGS@ \
@DRM_CFLAGS@ $(NULL) $(am__append_1) $(am__append_2)
noinst_LTLIBRARIES = libsna.la
+libsna_la_LDFLAGS = -pthread
libsna_la_LIBADD = @UDEV_LIBS@ -lm @DRM_LIBS@ brw/libbrw.la \
fb/libfb.la $(am__append_4)
-libsna_la_SOURCES = blt.c compiler.h kgem.c kgem.h rop.h sna.h \
- sna_accel.c sna_blt.c sna_composite.c sna_damage.c \
+libsna_la_SOURCES = atomic.h blt.c compiler.h kgem.c kgem.h rop.h \
+ sna.h sna_accel.c sna_blt.c sna_composite.c sna_damage.c \
sna_damage.h sna_display.c sna_driver.c sna_glyphs.c \
sna_gradient.c sna_io.c sna_module.h sna_render.c sna_render.h \
sna_render_inline.h sna_reg.h sna_stream.c sna_trapezoids.c \
- sna_tiling.c sna_transform.c sna_video.c sna_video.h \
- sna_video_overlay.c sna_video_sprite.c sna_video_textured.c \
- gen2_render.c gen2_render.h gen3_render.c gen3_render.h \
- gen4_render.c gen4_render.h gen5_render.c gen5_render.h \
- gen6_render.c gen6_render.h gen7_render.c gen7_render.h \
- $(NULL) $(am__append_3) $(am__append_5) $(am__append_6)
+ sna_tiling.c sna_transform.c sna_threads.c sna_vertex.c \
+ sna_video.c sna_video.h sna_video_overlay.c sna_video_sprite.c \
+ sna_video_textured.c gen2_render.c gen2_render.h gen3_render.c \
+ gen3_render.h gen4_render.c gen4_render.h gen4_source.c \
+ gen4_source.h gen4_vertex.c gen4_vertex.h gen5_render.c \
+ gen5_render.h gen6_render.c gen6_render.h gen7_render.c \
+ gen7_render.h $(NULL) $(am__append_3) $(am__append_5) \
+ $(am__append_6)
all: all-recursive
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -425,9 +434,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
@@ -440,7 +449,7 @@ clean-noinstLTLIBRARIES:
rm -f "$${dir}/so_locations"; \
done
libsna.la: $(libsna_la_OBJECTS) $(libsna_la_DEPENDENCIES) $(EXTRA_libsna_la_DEPENDENCIES)
- $(AM_V_CCLD)$(LINK) $(libsna_la_OBJECTS) $(libsna_la_LIBADD) $(LIBS)
+ $(AM_V_CCLD)$(libsna_la_LINK) $(libsna_la_OBJECTS) $(libsna_la_LIBADD) $(LIBS)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
@@ -452,6 +461,8 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen2_render.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen3_render.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen4_render.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen4_source.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen4_vertex.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen5_render.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen6_render.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen7_render.Plo@am__quote@
@@ -475,9 +486,11 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sna_io.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sna_render.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sna_stream.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sna_threads.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sna_tiling.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sna_transform.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sna_trapezoids.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sna_vertex.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sna_video.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sna_video_hwmc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sna_video_overlay.Plo@am__quote@
diff --git a/src/sna/atomic.h b/src/sna/atomic.h
new file mode 100644
index 000000000..306dc6db8
--- /dev/null
+++ b/src/sna/atomic.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifndef ATOMIC_H
+#define ATOMIC_H
+
+#if HAVE_ATOMIC_PRIMITIVES
+
+#define HAS_ATOMIC_OPS 1
+
+typedef struct {
+ int atomic;
+} atomic_t;
+
+# define atomic_read(x) ((x)->atomic)
+# define atomic_set(x, val) ((x)->atomic = (val))
+# define atomic_inc(x) ((void) __sync_fetch_and_add (&(x)->atomic, 1))
+# define atomic_dec_and_test(x) (__sync_fetch_and_add (&(x)->atomic, -1) == 1)
+# define atomic_add(x, v) ((void) __sync_add_and_fetch(&(x)->atomic, (v)))
+# define atomic_dec(x, v) ((void) __sync_sub_and_fetch(&(x)->atomic, (v)))
+# define atomic_cmpxchg(x, oldv, newv) __sync_val_compare_and_swap (&(x)->atomic, oldv, newv)
+
+#endif
+
+#if HAVE_LIB_ATOMIC_OPS
+#include <atomic_ops.h>
+
+#define HAS_ATOMIC_OPS 1
+
+typedef struct {
+ AO_t atomic;
+} atomic_t;
+
+# define atomic_read(x) AO_load_full(&(x)->atomic)
+# define atomic_set(x, val) AO_store_full(&(x)->atomic, (val))
+# define atomic_inc(x) ((void) AO_fetch_and_add1_full(&(x)->atomic))
+# define atomic_add(x, v) ((void) AO_fetch_and_add_full(&(x)->atomic, (v)))
+# define atomic_dec(x, v) ((void) AO_fetch_and_add_full(&(x)->atomic, -(v)))
+# define atomic_dec_and_test(x) (AO_fetch_and_sub1_full(&(x)->atomic) == 1)
+# define atomic_cmpxchg(x, oldv, newv) AO_compare_and_swap_full(&(x)->atomic, oldv, newv)
+
+#endif
+
+#if defined(__sun) && !defined(HAS_ATOMIC_OPS) /* Solaris & OpenSolaris */
+
+#include <sys/atomic.h>
+#define HAS_ATOMIC_OPS 1
+
+typedef struct { uint_t atomic; } atomic_t;
+
+# define atomic_read(x) (int) ((x)->atomic)
+# define atomic_set(x, val) ((x)->atomic = (uint_t)(val))
+# define atomic_inc(x) (atomic_inc_uint (&(x)->atomic))
+# define atomic_dec_and_test(x) (atomic_dec_uint_nv(&(x)->atomic) == 1)
+# define atomic_add(x, v) (atomic_add_int(&(x)->atomic, (v)))
+# define atomic_dec(x, v) (atomic_add_int(&(x)->atomic, -(v)))
+# define atomic_cmpxchg(x, oldv, newv) atomic_cas_uint (&(x)->atomic, oldv, newv)
+
+#endif
+
+#if ! HAS_ATOMIC_OPS
+#error xf86-video-intel requires atomic operations, please define them for your CPU/compiler.
+#endif
+
+#endif
diff --git a/src/sna/brw/Makefile.in b/src/sna/brw/Makefile.in
index 1c8fbf3dd..8d70764e8 100644
--- a/src/sna/brw/Makefile.in
+++ b/src/sna/brw/Makefile.in
@@ -203,7 +203,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MAN_SUBSTS = @MAN_SUBSTS@
@@ -242,6 +241,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@
VERSION = @VERSION@
X11_CFLAGS = @X11_CFLAGS@
X11_LIBS = @X11_LIBS@
+XCB_CFLAGS = @XCB_CFLAGS@
+XCB_LIBS = @XCB_LIBS@
XORG_CFLAGS = @XORG_CFLAGS@
XORG_LIBS = @XORG_LIBS@
XORG_MAN_PAGE = @XORG_MAN_PAGE@
@@ -331,7 +332,7 @@ all: all-am
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -356,9 +357,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
diff --git a/src/sna/brw/brw_disasm.c b/src/sna/brw/brw_disasm.c
index e6da17454..ea6155c8e 100644
--- a/src/sna/brw/brw_disasm.c
+++ b/src/sna/brw/brw_disasm.c
@@ -875,16 +875,17 @@ void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen)
string(file, ")");
}
- if (inst->header.opcode == BRW_OPCODE_SEND && gen < 60)
+ if (inst->header.opcode == BRW_OPCODE_SEND && gen < 060)
format(file, " %d", inst->header.destreg__conditionalmod);
if (opcode[inst->header.opcode].ndst > 0) {
pad(file, 16);
dest(file, inst);
- } else if (gen >= 60 && (inst->header.opcode == BRW_OPCODE_IF ||
- inst->header.opcode == BRW_OPCODE_ELSE ||
- inst->header.opcode == BRW_OPCODE_ENDIF ||
- inst->header.opcode == BRW_OPCODE_WHILE)) {
+ } else if (gen >= 060 &&
+ (inst->header.opcode == BRW_OPCODE_IF ||
+ inst->header.opcode == BRW_OPCODE_ELSE ||
+ inst->header.opcode == BRW_OPCODE_ENDIF ||
+ inst->header.opcode == BRW_OPCODE_WHILE)) {
format(file, " %d", inst->bits1.branch_gen6.jump_count);
}
@@ -901,9 +902,9 @@ void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen)
inst->header.opcode == BRW_OPCODE_SENDC) {
enum brw_message_target target;
- if (gen >= 60)
+ if (gen >= 060)
target = inst->header.destreg__conditionalmod;
- else if (gen >= 50)
+ else if (gen >= 050)
target = inst->bits2.send_gen5.sfid;
else
target = inst->bits3.generic.msg_target;
@@ -912,7 +913,7 @@ void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen)
pad (file, 16);
space = 0;
- if (gen >= 60) {
+ if (gen >= 060) {
control (file, "target function", target_function_gen6,
target, &space);
} else {
@@ -934,19 +935,19 @@ void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen)
inst->bits3.math.precision, &space);
break;
case BRW_SFID_SAMPLER:
- if (gen >= 70) {
+ if (gen >= 070) {
format (file, " (%d, %d, %d, %d)",
inst->bits3.sampler_gen7.binding_table_index,
inst->bits3.sampler_gen7.sampler,
inst->bits3.sampler_gen7.msg_type,
inst->bits3.sampler_gen7.simd_mode);
- } else if (gen >= 50) {
+ } else if (gen >= 050) {
format (file, " (%d, %d, %d, %d)",
inst->bits3.sampler_gen5.binding_table_index,
inst->bits3.sampler_gen5.sampler,
inst->bits3.sampler_gen5.msg_type,
inst->bits3.sampler_gen5.simd_mode);
- } else if (gen >= 45) {
+ } else if (gen >= 045) {
format (file, " (%d, %d)",
inst->bits3.sampler_g4x.binding_table_index,
inst->bits3.sampler_g4x.sampler);
@@ -961,13 +962,13 @@ void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen)
}
break;
case BRW_SFID_DATAPORT_READ:
- if (gen >= 60) {
+ if (gen >= 060) {
format (file, " (%d, %d, %d, %d)",
inst->bits3.gen6_dp.binding_table_index,
inst->bits3.gen6_dp.msg_control,
inst->bits3.gen6_dp.msg_type,
inst->bits3.gen6_dp.send_commit_msg);
- } else if (gen >= 45) {
+ } else if (gen >= 045) {
format (file, " (%d, %d, %d)",
inst->bits3.dp_read_gen5.binding_table_index,
inst->bits3.dp_read_gen5.msg_control,
@@ -981,7 +982,7 @@ void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen)
break;
case BRW_SFID_DATAPORT_WRITE:
- if (gen >= 70) {
+ if (gen >= 070) {
format (file, " (");
control (file, "DP rc message type",
@@ -992,7 +993,7 @@ void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen)
inst->bits3.gen7_dp.binding_table_index,
inst->bits3.gen7_dp.msg_control,
inst->bits3.gen7_dp.msg_type);
- } else if (gen >= 60) {
+ } else if (gen >= 060) {
format (file, " (");
control (file, "DP rc message type",
@@ -1015,14 +1016,14 @@ void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen)
break;
case BRW_SFID_URB:
- if (gen >= 50) {
+ if (gen >= 050) {
format (file, " %d", inst->bits3.urb_gen5.offset);
} else {
format (file, " %d", inst->bits3.urb.offset);
}
space = 1;
- if (gen >= 50) {
+ if (gen >= 050) {
control (file, "urb opcode", urb_opcode,
inst->bits3.urb_gen5.opcode, &space);
}
@@ -1051,7 +1052,7 @@ void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen)
}
if (space)
string (file, " ");
- if (gen >= 50) {
+ if (gen >= 050) {
format (file, "mlen %d",
inst->bits3.generic_gen5.msg_length);
format (file, " rlen %d",
@@ -1068,13 +1069,13 @@ void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen)
string(file, "{");
space = 1;
control(file, "access mode", access_mode, inst->header.access_mode, &space);
- if (gen >= 60)
+ if (gen >= 060)
control(file, "write enable control", wectrl, inst->header.mask_control, &space);
else
control(file, "mask control", mask_ctrl, inst->header.mask_control, &space);
control(file, "dependency control", dep_ctrl, inst->header.dependency_control, &space);
- if (gen >= 60)
+ if (gen >= 060)
qtr_ctrl(file, inst);
else {
if (inst->header.compression_control == BRW_COMPRESSION_COMPRESSED &&
@@ -1089,7 +1090,7 @@ void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen)
}
control(file, "thread control", thread_ctrl, inst->header.thread_control, &space);
- if (gen >= 60)
+ if (gen >= 060)
control(file, "acc write control", accwr, inst->header.acc_wr_control, &space);
if (inst->header.opcode == BRW_OPCODE_SEND ||
inst->header.opcode == BRW_OPCODE_SENDC)
diff --git a/src/sna/brw/brw_eu.c b/src/sna/brw/brw_eu.c
index 7c32ea191..9bd8ba5dc 100644
--- a/src/sna/brw/brw_eu.c
+++ b/src/sna/brw/brw_eu.c
@@ -79,7 +79,7 @@ void brw_set_compression_control(struct brw_compile *p,
{
p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED);
- if (p->gen >= 60) {
+ if (p->gen >= 060) {
/* Since we don't use the 32-wide support in gen6, we translate
* the pre-gen6 compression control here.
*/
diff --git a/src/sna/brw/brw_eu.h b/src/sna/brw/brw_eu.h
index 65e66d5ec..24ab599ad 100644
--- a/src/sna/brw/brw_eu.h
+++ b/src/sna/brw/brw_eu.h
@@ -1862,7 +1862,7 @@ static inline void brw_set_saturate(struct brw_compile *p, unsigned value)
static inline void brw_set_acc_write_control(struct brw_compile *p, unsigned value)
{
- if (p->gen >= 60)
+ if (p->gen >= 060)
p->current->header.acc_wr_control = value;
}
@@ -1938,7 +1938,7 @@ static inline void brw_##OP(struct brw_compile *p, \
rnd = brw_next_insn(p, BRW_OPCODE_##OP); \
brw_set_dest(p, rnd, dest); \
brw_set_src0(p, rnd, src); \
- if (p->gen < 60) { \
+ if (p->gen < 060) { \
/* turn on round-increments */ \
rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
diff --git a/src/sna/brw/brw_eu_emit.c b/src/sna/brw/brw_eu_emit.c
index 3f01ae7b7..5c0b30654 100644
--- a/src/sna/brw/brw_eu_emit.c
+++ b/src/sna/brw/brw_eu_emit.c
@@ -61,7 +61,7 @@ gen6_resolve_implied_move(struct brw_compile *p,
struct brw_reg *src,
unsigned msg_reg_nr)
{
- if (p->gen < 60)
+ if (p->gen < 060)
return;
if (src->file == BRW_MESSAGE_REGISTER_FILE)
@@ -88,7 +88,7 @@ gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
* Since we're pretending to have 16 MRFs anyway, we may as well use the
* registers required for messages with EOT.
*/
- if (p->gen >= 70 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
+ if (p->gen >= 070 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
reg->file = BRW_GENERAL_REGISTER_FILE;
reg->nr += 111;
}
@@ -378,13 +378,13 @@ brw_set_message_descriptor(struct brw_compile *p,
{
brw_set_src1(p, inst, brw_imm_d(0));
- if (p->gen >= 50) {
+ if (p->gen >= 050) {
inst->bits3.generic_gen5.header_present = header_present;
inst->bits3.generic_gen5.response_length = response_length;
inst->bits3.generic_gen5.msg_length = msg_length;
inst->bits3.generic_gen5.end_of_thread = end_of_thread;
- if (p->gen >= 60) {
+ if (p->gen >= 060) {
/* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
inst->header.destreg__conditionalmod = sfid;
} else {
@@ -439,7 +439,7 @@ static void brw_set_math_message(struct brw_compile *p,
brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
msg_length, response_length,
false, false);
- if (p->gen == 50) {
+ if (p->gen == 050) {
insn->bits3.math_gen5.function = function;
insn->bits3.math_gen5.int_type = integer_type;
insn->bits3.math_gen5.precision = low_precision;
@@ -485,7 +485,7 @@ static void brw_set_urb_message(struct brw_compile *p,
{
brw_set_message_descriptor(p, insn, BRW_SFID_URB,
msg_length, response_length, true, end_of_thread);
- if (p->gen >= 70) {
+ if (p->gen >= 070) {
insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
insn->bits3.urb_gen7.offset = offset;
assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
@@ -493,7 +493,7 @@ static void brw_set_urb_message(struct brw_compile *p,
/* per_slot_offset = 0 makes it ignore offsets in message header */
insn->bits3.urb_gen7.per_slot_offset = 0;
insn->bits3.urb_gen7.complete = complete;
- } else if (p->gen >= 50) {
+ } else if (p->gen >= 050) {
insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
insn->bits3.urb_gen5.offset = offset;
insn->bits3.urb_gen5.swizzle_control = swizzle_control;
@@ -525,13 +525,13 @@ brw_set_dp_write_message(struct brw_compile *p,
{
unsigned sfid;
- if (p->gen >= 70) {
+ if (p->gen >= 070) {
/* Use the Render Cache for RT writes; otherwise use the Data Cache */
if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
else
sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
- } else if (p->gen >= 60) {
+ } else if (p->gen >= 060) {
/* Use the render cache for all write messages. */
sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
} else {
@@ -542,18 +542,18 @@ brw_set_dp_write_message(struct brw_compile *p,
msg_length, response_length,
header_present, end_of_thread);
- if (p->gen >= 70) {
+ if (p->gen >= 070) {
insn->bits3.gen7_dp.binding_table_index = binding_table_index;
insn->bits3.gen7_dp.msg_control = msg_control;
insn->bits3.gen7_dp.last_render_target = last_render_target;
insn->bits3.gen7_dp.msg_type = msg_type;
- } else if (p->gen >= 60) {
+ } else if (p->gen >= 060) {
insn->bits3.gen6_dp.binding_table_index = binding_table_index;
insn->bits3.gen6_dp.msg_control = msg_control;
insn->bits3.gen6_dp.last_render_target = last_render_target;
insn->bits3.gen6_dp.msg_type = msg_type;
insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
- } else if (p->gen >= 50) {
+ } else if (p->gen >= 050) {
insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
insn->bits3.dp_write_gen5.msg_control = msg_control;
insn->bits3.dp_write_gen5.last_render_target = last_render_target;
@@ -580,9 +580,9 @@ brw_set_dp_read_message(struct brw_compile *p,
{
unsigned sfid;
- if (p->gen >= 70) {
+ if (p->gen >= 070) {
sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
- } else if (p->gen >= 60) {
+ } else if (p->gen >= 060) {
if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
else
@@ -595,23 +595,23 @@ brw_set_dp_read_message(struct brw_compile *p,
msg_length, response_length,
true, false);
- if (p->gen >= 70) {
+ if (p->gen >= 070) {
insn->bits3.gen7_dp.binding_table_index = binding_table_index;
insn->bits3.gen7_dp.msg_control = msg_control;
insn->bits3.gen7_dp.last_render_target = 0;
insn->bits3.gen7_dp.msg_type = msg_type;
- } else if (p->gen >= 60) {
+ } else if (p->gen >= 060) {
insn->bits3.gen6_dp.binding_table_index = binding_table_index;
insn->bits3.gen6_dp.msg_control = msg_control;
insn->bits3.gen6_dp.last_render_target = 0;
insn->bits3.gen6_dp.msg_type = msg_type;
insn->bits3.gen6_dp.send_commit_msg = 0;
- } else if (p->gen >= 50) {
+ } else if (p->gen >= 050) {
insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
insn->bits3.dp_read_gen5.msg_control = msg_control;
insn->bits3.dp_read_gen5.msg_type = msg_type;
insn->bits3.dp_read_gen5.target_cache = target_cache;
- } else if (p->gen >= 45) {
+ } else if (p->gen >= 045) {
insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
@@ -638,17 +638,17 @@ static void brw_set_sampler_message(struct brw_compile *p,
msg_length, response_length,
header_present, false);
- if (p->gen >= 70) {
+ if (p->gen >= 070) {
insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
insn->bits3.sampler_gen7.sampler = sampler;
insn->bits3.sampler_gen7.msg_type = msg_type;
insn->bits3.sampler_gen7.simd_mode = simd_mode;
- } else if (p->gen >= 50) {
+ } else if (p->gen >= 050) {
insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
insn->bits3.sampler_gen5.sampler = sampler;
insn->bits3.sampler_gen5.msg_type = msg_type;
insn->bits3.sampler_gen5.simd_mode = simd_mode;
- } else if (p->gen >= 45) {
+ } else if (p->gen >= 045) {
insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
insn->bits3.sampler_g4x.sampler = sampler;
insn->bits3.sampler_g4x.msg_type = msg_type;
@@ -706,11 +706,11 @@ brw_IF(struct brw_compile *p, unsigned execute_size)
insn = brw_next_insn(p, BRW_OPCODE_IF);
/* Override the defaults for this instruction: */
- if (p->gen < 60) {
+ if (p->gen < 060) {
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d(0x0));
- } else if (p->gen < 70) {
+ } else if (p->gen < 070) {
brw_set_dest(p, insn, brw_imm_w(0));
insn->bits1.branch_gen6.jump_count = 0;
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
@@ -827,7 +827,7 @@ patch_IF_ELSE(struct brw_compile *p,
/* Jump count is for 64bit data chunk each, so one 128bit instruction
* requires 2 chunks.
*/
- if (p->gen >= 50)
+ if (p->gen >= 050)
br = 2;
assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
@@ -835,7 +835,7 @@ patch_IF_ELSE(struct brw_compile *p,
if (else_inst == NULL) {
/* Patch IF -> ENDIF */
- if (p->gen < 60) {
+ if (p->gen < 060) {
/* Turn it into an IFF, which means no mask stack operations for
* all-false and jumping past the ENDIF.
*/
@@ -843,7 +843,7 @@ patch_IF_ELSE(struct brw_compile *p,
if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
if_inst->bits3.if_else.pop_count = 0;
if_inst->bits3.if_else.pad0 = 0;
- } else if (p->gen < 70) {
+ } else if (p->gen < 070) {
/* As of gen6, there is no IFF and IF must point to the ENDIF. */
if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
} else {
@@ -854,23 +854,23 @@ patch_IF_ELSE(struct brw_compile *p,
else_inst->header.execution_size = if_inst->header.execution_size;
/* Patch IF -> ELSE */
- if (p->gen < 60) {
+ if (p->gen < 060) {
if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
if_inst->bits3.if_else.pop_count = 0;
if_inst->bits3.if_else.pad0 = 0;
- } else if (p->gen <= 70) {
+ } else if (p->gen <= 070) {
if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
}
/* Patch ELSE -> ENDIF */
- if (p->gen < 60) {
+ if (p->gen < 060) {
/* BRW_OPCODE_ELSE pre-gen6 should point just past the
* matching ENDIF.
*/
else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
else_inst->bits3.if_else.pop_count = 1;
else_inst->bits3.if_else.pad0 = 0;
- } else if (p->gen < 70) {
+ } else if (p->gen < 070) {
/* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
} else {
@@ -890,11 +890,11 @@ brw_ELSE(struct brw_compile *p)
insn = brw_next_insn(p, BRW_OPCODE_ELSE);
- if (p->gen < 60) {
+ if (p->gen < 060) {
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d(0x0));
- } else if (p->gen < 70) {
+ } else if (p->gen < 070) {
brw_set_dest(p, insn, brw_imm_w(0));
insn->bits1.branch_gen6.jump_count = 0;
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
@@ -938,11 +938,11 @@ brw_ENDIF(struct brw_compile *p)
insn = brw_next_insn(p, BRW_OPCODE_ENDIF);
- if (p->gen < 60) {
+ if (p->gen < 060) {
brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0)));
brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0)));
brw_set_src1(p, insn, brw_imm_d(0x0));
- } else if (p->gen < 70) {
+ } else if (p->gen < 070) {
brw_set_dest(p, insn, brw_imm_w(0));
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, __retype_d(brw_null_reg()));
@@ -957,11 +957,11 @@ brw_ENDIF(struct brw_compile *p)
insn->header.thread_control = BRW_THREAD_SWITCH;
/* Also pop item off the stack in the endif instruction: */
- if (p->gen < 60) {
+ if (p->gen < 060) {
insn->bits3.if_else.jump_count = 0;
insn->bits3.if_else.pop_count = 1;
insn->bits3.if_else.pad0 = 0;
- } else if (p->gen < 70) {
+ } else if (p->gen < 070) {
insn->bits1.branch_gen6.jump_count = 2;
} else {
insn->bits3.break_cont.jip = 2;
@@ -974,7 +974,7 @@ struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
struct brw_instruction *insn;
insn = brw_next_insn(p, BRW_OPCODE_BREAK);
- if (p->gen >= 60) {
+ if (p->gen >= 060) {
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, brw_imm_d(0x0));
@@ -1041,7 +1041,7 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
*/
struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
{
- if (p->gen >= 60 || p->single_program_flow) {
+ if (p->gen >= 060 || p->single_program_flow) {
return &p->store[p->nr_insn];
} else {
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_DO);
@@ -1068,10 +1068,10 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
struct brw_instruction *insn;
unsigned br = 1;
- if (p->gen >= 50)
+ if (p->gen >= 050)
br = 2;
- if (p->gen >= 70) {
+ if (p->gen >= 070) {
insn = brw_next_insn(p, BRW_OPCODE_WHILE);
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
@@ -1080,7 +1080,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
insn->bits3.break_cont.jip = br * (do_insn - insn);
insn->header.execution_size = BRW_EXECUTE_8;
- } else if (p->gen >= 60) {
+ } else if (p->gen >= 060) {
insn = brw_next_insn(p, BRW_OPCODE_WHILE);
brw_set_dest(p, insn, brw_imm_w(0));
@@ -1126,7 +1126,7 @@ void brw_land_fwd_jump(struct brw_compile *p,
struct brw_instruction *landing = &p->store[p->nr_insn];
unsigned jmpi = 1;
- if (p->gen >= 50)
+ if (p->gen >= 050)
jmpi = 2;
assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
@@ -1195,7 +1195,7 @@ void brw_math(struct brw_compile *p,
unsigned data_type,
unsigned precision)
{
- if (p->gen >= 60) {
+ if (p->gen >= 060) {
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH);
assert(dest.file == BRW_GENERAL_REGISTER_FILE);
@@ -1294,7 +1294,7 @@ void brw_math_16(struct brw_compile *p,
{
struct brw_instruction *insn;
- if (p->gen >= 60) {
+ if (p->gen >= 060) {
insn = brw_next_insn(p, BRW_OPCODE_MATH);
/* Math is the same ISA format as other opcodes, except that CondModifier
@@ -1362,7 +1362,7 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
uint32_t msg_control, msg_type;
int mlen;
- if (p->gen >= 60)
+ if (p->gen >= 060)
offset /= 16;
mrf = __retype_ud(mrf);
@@ -1418,7 +1418,7 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
* protection. Our use of DP writes is all about register
* spilling within a thread.
*/
- if (p->gen >= 60) {
+ if (p->gen >= 060) {
dest = __retype_uw(vec16(brw_null_reg()));
send_commit_msg = 0;
} else {
@@ -1427,13 +1427,13 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
}
brw_set_dest(p, insn, dest);
- if (p->gen >= 60) {
+ if (p->gen >= 060) {
brw_set_src0(p, insn, mrf);
} else {
brw_set_src0(p, insn, brw_null_reg());
}
- if (p->gen >= 60)
+ if (p->gen >= 060)
msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
else
msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
@@ -1470,7 +1470,7 @@ brw_oword_block_read_scratch(struct brw_compile *p,
uint32_t msg_control;
int rlen;
- if (p->gen >= 60)
+ if (p->gen >= 060)
offset /= 16;
mrf = __retype_ud(mrf);
@@ -1507,7 +1507,7 @@ brw_oword_block_read_scratch(struct brw_compile *p,
insn->header.destreg__conditionalmod = mrf.nr;
brw_set_dest(p, insn, dest); /* UW? */
- if (p->gen >= 60) {
+ if (p->gen >= 060) {
brw_set_src0(p, insn, mrf);
} else {
brw_set_src0(p, insn, brw_null_reg());
@@ -1538,7 +1538,7 @@ void brw_oword_block_read(struct brw_compile *p,
struct brw_instruction *insn;
/* On newer hardware, offset is in units of owords. */
- if (p->gen >= 60)
+ if (p->gen >= 060)
offset /= 16;
mrf = __retype_ud(mrf);
@@ -1562,7 +1562,7 @@ void brw_oword_block_read(struct brw_compile *p,
dest = __retype_uw(vec8(dest));
brw_set_dest(p, insn, dest);
- if (p->gen >= 60) {
+ if (p->gen >= 060) {
brw_set_src0(p, insn, mrf);
} else {
brw_set_src0(p, insn, brw_null_reg());
@@ -1634,7 +1634,7 @@ void brw_dp_READ_4_vs(struct brw_compile *p,
struct brw_instruction *insn;
unsigned msg_reg_nr = 1;
- if (p->gen >= 60)
+ if (p->gen >= 060)
location /= 16;
/* Setup MRF[1] with location/offset into const buffer */
@@ -1655,7 +1655,7 @@ void brw_dp_READ_4_vs(struct brw_compile *p,
insn->header.mask_control = BRW_MASK_DISABLE;
brw_set_dest(p, insn, dest);
- if (p->gen >= 60) {
+ if (p->gen >= 060) {
brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
} else {
brw_set_src0(p, insn, brw_null_reg());
@@ -1710,9 +1710,9 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src);
- if (p->gen >= 60)
+ if (p->gen >= 060)
msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
- else if (p->gen >= 45)
+ else if (p->gen >= 045)
msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
else
msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
@@ -1747,7 +1747,7 @@ void brw_fb_WRITE(struct brw_compile *p,
else
dest = __retype_uw(vec8(brw_null_reg()));
- if (p->gen >= 60 && binding_table_index == 0) {
+ if (p->gen >= 060 && binding_table_index == 0) {
insn = brw_next_insn(p, BRW_OPCODE_SENDC);
} else {
insn = brw_next_insn(p, BRW_OPCODE_SEND);
@@ -1756,7 +1756,7 @@ void brw_fb_WRITE(struct brw_compile *p,
insn->header.predicate_control = 0;
insn->header.compression_control = BRW_COMPRESSION_NONE;
- if (p->gen >= 60) {
+ if (p->gen >= 060) {
/* headerless version, just submit color payload */
src0 = brw_message_reg(msg_reg_nr);
@@ -1802,7 +1802,7 @@ void brw_SAMPLE(struct brw_compile *p,
{
assert(writemask);
- if (p->gen < 50 || writemask != WRITEMASK_XYZW) {
+ if (p->gen < 050 || writemask != WRITEMASK_XYZW) {
struct brw_reg m1 = brw_message_reg(msg_reg_nr);
writemask = ~writemask & WRITEMASK_XYZW;
@@ -1828,7 +1828,7 @@ void brw_SAMPLE(struct brw_compile *p,
insn = brw_next_insn(p, BRW_OPCODE_SEND);
insn->header.predicate_control = 0; /* XXX */
insn->header.compression_control = BRW_COMPRESSION_NONE;
- if (p->gen < 60)
+ if (p->gen < 060)
insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_dest(p, insn, dest);
@@ -1865,7 +1865,7 @@ void brw_urb_WRITE(struct brw_compile *p,
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
- if (p->gen >= 70) {
+ if (p->gen >= 070) {
/* Enable Channel Masks in the URB_WRITE_HWORD message header */
brw_push_insn_state(p);
brw_set_access_mode(p, BRW_ALIGN_1);
@@ -1883,7 +1883,7 @@ void brw_urb_WRITE(struct brw_compile *p,
brw_set_src0(p, insn, src0);
brw_set_src1(p, insn, brw_imm_d(0));
- if (p->gen <= 60)
+ if (p->gen <= 060)
insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_urb_message(p,
@@ -1931,7 +1931,7 @@ brw_find_loop_end(struct brw_compile *p, int start)
struct brw_instruction *insn = &p->store[ip];
if (insn->header.opcode == BRW_OPCODE_WHILE) {
- int jip = p->gen <= 70 ? insn->bits1.branch_gen6.jump_count
+ int jip = p->gen <= 070 ? insn->bits1.branch_gen6.jump_count
: insn->bits3.break_cont.jip;
if (ip + jip / br <= start)
return ip;
@@ -1950,7 +1950,7 @@ brw_set_uip_jip(struct brw_compile *p)
int ip;
int br = 2;
- if (p->gen <= 60)
+ if (p->gen <= 060)
return;
for (ip = 0; ip < p->nr_insn; ip++) {
@@ -1961,7 +1961,7 @@ brw_set_uip_jip(struct brw_compile *p)
insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
/* Gen7 UIP points to WHILE; Gen6 points just after it */
insn->bits3.break_cont.uip =
- br * (brw_find_loop_end(p, ip) - ip + (p->gen <= 70 ? 1 : 0));
+ br * (brw_find_loop_end(p, ip) - ip + (p->gen <= 070 ? 1 : 0));
break;
case BRW_OPCODE_CONTINUE:
insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
@@ -1991,7 +1991,7 @@ void brw_ff_sync(struct brw_compile *p,
brw_set_src0(p, insn, src0);
brw_set_src1(p, insn, brw_imm_d(0));
- if (p->gen < 60)
+ if (p->gen < 060)
insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_ff_sync_message(p,
diff --git a/src/sna/brw/brw_wm.c b/src/sna/brw/brw_wm.c
index f54e55efe..e8dc6ac47 100644
--- a/src/sna/brw/brw_wm.c
+++ b/src/sna/brw/brw_wm.c
@@ -41,15 +41,15 @@ static void brw_wm_affine_st(struct brw_compile *p, int dw,
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
- uv = p->gen >= 60 ? 6 : 3;
+ uv = p->gen >= 060 ? 6 : 3;
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- uv = p->gen >= 60 ? 4 : 3;
+ uv = p->gen >= 060 ? 4 : 3;
}
uv += 2*channel;
msg++;
- if (p->gen >= 60) {
+ if (p->gen >= 060) {
brw_PLN(p,
brw_message_reg(msg),
brw_vec1_grf(uv, 0),
@@ -96,7 +96,7 @@ static int brw_wm_sample(struct brw_compile *p, int dw,
int len;
len = dw == 16 ? 4 : 2;
- if (p->gen >= 60) {
+ if (p->gen >= 060) {
header = false;
src0 = brw_message_reg(++msg);
} else {
@@ -125,7 +125,7 @@ static int brw_wm_sample__alpha(struct brw_compile *p, int dw,
rlen = 2;
}
- if (p->gen >= 60)
+ if (p->gen >= 060)
src0 = brw_message_reg(msg);
else
src0 = brw_vec8_grf(0, 0);
@@ -182,7 +182,7 @@ static void brw_fb_write(struct brw_compile *p, int dw)
msg_len = 4;
}
- if (p->gen < 60) {
+ if (p->gen < 060) {
brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
@@ -197,7 +197,7 @@ static void brw_fb_write(struct brw_compile *p, int dw)
insn->header.predicate_control = 0;
insn->header.compression_control = BRW_COMPRESSION_NONE;
- if (p->gen >= 60) {
+ if (p->gen >= 060) {
msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
src0 = brw_message_reg(2);
header = false;
@@ -219,7 +219,7 @@ static void brw_wm_write(struct brw_compile *p, int dw, int src)
{
int n;
- if (dw == 8 && p->gen >= 60) {
+ if (dw == 8 && p->gen >= 060) {
/* XXX pixel execution mask? */
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
@@ -233,11 +233,11 @@ static void brw_wm_write(struct brw_compile *p, int dw, int src)
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
for (n = 0; n < 4; n++) {
- if (p->gen >= 60) {
+ if (p->gen >= 060) {
brw_MOV(p,
brw_message_reg(2 + 2*n),
brw_vec8_grf(src + 2*n, 0));
- } else if (p->gen >= 45 && dw == 16) {
+ } else if (p->gen >= 045 && dw == 16) {
brw_MOV(p,
brw_message_reg(2 + n + BRW_MRF_COMPR4),
brw_vec8_grf(src + 2*n, 0));
@@ -265,7 +265,7 @@ static void brw_wm_write__mask(struct brw_compile *p, int dw,
{
int n;
- if (dw == 8 && p->gen >= 60) {
+ if (dw == 8 && p->gen >= 060) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MUL(p,
@@ -291,12 +291,12 @@ static void brw_wm_write__mask(struct brw_compile *p, int dw,
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
for (n = 0; n < 4; n++) {
- if (p->gen >= 60) {
+ if (p->gen >= 060) {
brw_MUL(p,
brw_message_reg(2 + 2*n),
brw_vec8_grf(src + 2*n, 0),
brw_vec8_grf(mask, 0));
- } else if (p->gen >= 45 && dw == 16) {
+ } else if (p->gen >= 045 && dw == 16) {
brw_MUL(p,
brw_message_reg(2 + n + BRW_MRF_COMPR4),
brw_vec8_grf(src + 2*n, 0),
@@ -327,7 +327,7 @@ static void brw_wm_write__opacity(struct brw_compile *p, int dw,
{
int n;
- if (dw == 8 && p->gen >= 60) {
+ if (dw == 8 && p->gen >= 060) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MUL(p,
@@ -353,12 +353,12 @@ static void brw_wm_write__opacity(struct brw_compile *p, int dw,
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
for (n = 0; n < 4; n++) {
- if (p->gen >= 60) {
+ if (p->gen >= 060) {
brw_MUL(p,
brw_message_reg(2 + 2*n),
brw_vec8_grf(src + 2*n, 0),
brw_vec1_grf(mask, 3));
- } else if (p->gen >= 45 && dw == 16) {
+ } else if (p->gen >= 045 && dw == 16) {
brw_MUL(p,
brw_message_reg(2 + n + BRW_MRF_COMPR4),
brw_vec8_grf(src + 2*n, 0),
@@ -389,7 +389,7 @@ static void brw_wm_write__mask_ca(struct brw_compile *p, int dw,
{
int n;
- if (dw == 8 && p->gen >= 60) {
+ if (dw == 8 && p->gen >= 060) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MUL(p,
@@ -415,12 +415,12 @@ static void brw_wm_write__mask_ca(struct brw_compile *p, int dw,
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
for (n = 0; n < 4; n++) {
- if (p->gen >= 60) {
+ if (p->gen >= 060) {
brw_MUL(p,
brw_message_reg(2 + 2*n),
brw_vec8_grf(src + 2*n, 0),
brw_vec8_grf(mask + 2*n, 0));
- } else if (p->gen >= 45 && dw == 16) {
+ } else if (p->gen >= 045 && dw == 16) {
brw_MUL(p,
brw_message_reg(2 + n + BRW_MRF_COMPR4),
brw_vec8_grf(src + 2*n, 0),
@@ -449,7 +449,7 @@ done:
bool
brw_wm_kernel__affine(struct brw_compile *p, int dispatch)
{
- if (p->gen < 60)
+ if (p->gen < 060)
brw_wm_xy(p, dispatch);
brw_wm_write(p, dispatch, brw_wm_affine(p, dispatch, 0, 1, 12));
@@ -461,7 +461,7 @@ brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch)
{
int src, mask;
- if (p->gen < 60)
+ if (p->gen < 060)
brw_wm_xy(p, dispatch);
src = brw_wm_affine(p, dispatch, 0, 1, 12);
@@ -476,7 +476,7 @@ brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch)
{
int src, mask;
- if (p->gen < 60)
+ if (p->gen < 060)
brw_wm_xy(p, dispatch);
src = brw_wm_affine(p, dispatch, 0, 1, 12);
@@ -491,7 +491,7 @@ brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch)
{
int src, mask;
- if (p->gen < 60)
+ if (p->gen < 060)
brw_wm_xy(p, dispatch);
src = brw_wm_affine__alpha(p, dispatch, 0, 1, 12);
@@ -510,15 +510,15 @@ static void brw_wm_projective_st(struct brw_compile *p, int dw,
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
- uv = p->gen >= 60 ? 6 : 3;
+ uv = p->gen >= 060 ? 6 : 3;
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- uv = p->gen >= 60 ? 4 : 3;
+ uv = p->gen >= 060 ? 4 : 3;
}
uv += 2*channel;
msg++;
- if (p->gen >= 60) {
+ if (p->gen >= 060) {
/* First compute 1/z */
brw_PLN(p,
brw_message_reg(msg),
@@ -594,7 +594,7 @@ static int brw_wm_projective__alpha(struct brw_compile *p, int dw,
bool
brw_wm_kernel__projective(struct brw_compile *p, int dispatch)
{
- if (p->gen < 60)
+ if (p->gen < 060)
brw_wm_xy(p, dispatch);
brw_wm_write(p, dispatch, brw_wm_projective(p, dispatch, 0, 1, 12));
@@ -606,7 +606,7 @@ brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch)
{
int src, mask;
- if (p->gen < 60)
+ if (p->gen < 060)
brw_wm_xy(p, dispatch);
src = brw_wm_projective(p, dispatch, 0, 1, 12);
@@ -621,7 +621,7 @@ brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch)
{
int src, mask;
- if (p->gen < 60)
+ if (p->gen < 060)
brw_wm_xy(p, dispatch);
src = brw_wm_projective(p, dispatch, 0, 1, 12);
@@ -636,7 +636,7 @@ brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch)
{
int src, mask;
- if (p->gen < 60)
+ if (p->gen < 060)
brw_wm_xy(p, dispatch);
src = brw_wm_projective__alpha(p, dispatch, 0, 1, 12);
@@ -651,9 +651,9 @@ brw_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch)
{
int src, mask;
- if (p->gen < 60) {
+ if (p->gen < 060) {
brw_wm_xy(p, dispatch);
- mask = 4;
+ mask = 5;
} else
mask = dispatch == 16 ? 8 : 6;
@@ -668,9 +668,9 @@ brw_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch)
{
int src, mask;
- if (p->gen < 60) {
+ if (p->gen < 060) {
brw_wm_xy(p, dispatch);
- mask = 4;
+ mask = 5;
} else
mask = dispatch == 16 ? 8 : 6;
diff --git a/src/sna/compiler.h b/src/sna/compiler.h
index ff80365eb..b985f2bca 100644
--- a/src/sna/compiler.h
+++ b/src/sna/compiler.h
@@ -36,6 +36,7 @@
#define fastcall __attribute__((regparm(3)))
#define must_check __attribute__((warn_unused_result))
#define constant __attribute__((const))
+#define __packed__ __attribute__((__packed__))
#else
#define likely(expr) (expr)
#define unlikely(expr) (expr)
@@ -44,6 +45,7 @@
#define fastcall
#define must_check
#define constant
+#define __packed__
#endif
#ifdef HAVE_VALGRIND
diff --git a/src/sna/fb/Makefile.in b/src/sna/fb/Makefile.in
index d9ca271f0..d21411b96 100644
--- a/src/sna/fb/Makefile.in
+++ b/src/sna/fb/Makefile.in
@@ -182,7 +182,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MAN_SUBSTS = @MAN_SUBSTS@
@@ -221,6 +220,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@
VERSION = @VERSION@
X11_CFLAGS = @X11_CFLAGS@
X11_LIBS = @X11_LIBS@
+XCB_CFLAGS = @XCB_CFLAGS@
+XCB_LIBS = @XCB_LIBS@
XORG_CFLAGS = @XORG_CFLAGS@
XORG_LIBS = @XORG_LIBS@
XORG_MAN_PAGE = @XORG_MAN_PAGE@
@@ -319,7 +320,7 @@ all: all-am
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -344,9 +345,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
diff --git a/src/sna/fb/fb.h b/src/sna/fb/fb.h
index e58e03967..d99453da6 100644
--- a/src/sna/fb/fb.h
+++ b/src/sna/fb/fb.h
@@ -33,6 +33,7 @@
#include <gcstruct.h>
#include <colormap.h>
#include <windowstr.h>
+#include <regionstr.h>
#include <stdbool.h>
#include <pixman.h>
@@ -45,6 +46,8 @@
#include "sfb.h"
+#include "../../compat-api.h"
+
#define WRITE(ptr, val) (*(ptr) = (val))
#define READ(ptr) (*(ptr))
@@ -294,12 +297,12 @@ extern DevPrivateKeyRec sna_window_key;
static inline FbGCPrivate *fb_gc(GCPtr gc)
{
- return dixGetPrivateAddr(&gc->devPrivates, &sna_gc_key);
+ return (FbGCPrivate *)__get_private(gc, sna_gc_key);
}
static inline PixmapPtr fbGetWindowPixmap(WindowPtr window)
{
- return *(PixmapPtr *)dixGetPrivateAddr(&window->devPrivates, &sna_window_key);
+ return *(PixmapPtr *)__get_private(window, sna_window_key);
}
#ifdef ROOTLESS
@@ -360,8 +363,14 @@ static inline PixmapPtr fbGetWindowPixmap(WindowPtr window)
* XFree86 empties the root BorderClip when the VT is inactive,
* here's a macro which uses that to disable GetImage and GetSpans
*/
+
+#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,10,0,0,0)
#define fbWindowEnabled(pWin) \
- RegionNotEmpty(&(pWin)->drawable.pScreen->root->borderClip)
+ RegionNotEmpty(&(pWin)->drawable.pScreen->root->borderClip)
+#else
+#define fbWindowEnabled(pWin) \
+ RegionNotEmpty(&WindowTable[(pWin)->drawable.pScreen->myNum]->borderClip)
+#endif
#define fbDrawableEnabled(drawable) \
((drawable)->type == DRAWABLE_PIXMAP ? \
TRUE : fbWindowEnabled((WindowPtr) drawable))
diff --git a/src/sna/fb/fbbitmap.c b/src/sna/fb/fbbitmap.c
index 7c037fe36..2ea92a992 100644
--- a/src/sna/fb/fbbitmap.c
+++ b/src/sna/fb/fbbitmap.c
@@ -25,21 +25,50 @@
#include "fb.h"
+static Bool region_grow(RegionPtr region)
+{
+ RegDataPtr data;
+ int n;
+
+ n = 16;
+ if (!region->data) {
+ region->data = malloc(RegionSizeof(n));
+ if (!region->data)
+ return RegionBreak(region);
+ region->data->numRects = 1;
+ *RegionBoxptr(region) = region->extents;
+ } else if (!region->data->size) {
+ region->data = malloc(RegionSizeof(n));
+ if (!region->data)
+ return RegionBreak(region);
+ region->data->numRects = 0;
+ } else {
+ n = 2 * region->data->numRects;
+ data = (RegDataPtr) realloc(region->data, RegionSizeof(n));
+ if (!data)
+ return RegionBreak(region);
+ region->data = data;
+ }
+ region->data->size = n;
+ return TRUE;
+}
+
static inline void add(RegionPtr region,
int16_t x1, int16_t y1, int16_t x2, int16_t y2)
{
BoxPtr r;
- if (region->data->numRects == region->data->size)
- RegionRectAlloc(region, 1);
+ if (region->data->numRects == region->data->size &&
+ !region_grow(region))
+ return;
r = RegionBoxptr(region) + region->data->numRects++;
r->x1 = x1; r->y1 = y1;
r->x2 = x2; r->y2 = y2;
- DBG(("%s[%d/%d]: (%d, %d), (%d, %d)\n",
+ DBG(("%s[%ld/%ld]: (%d, %d), (%d, %d)\n",
__FUNCTION__,
- region->data->numRects, region->data->size,
+ (long)region->data->numRects, (long)region->data->size,
x1, y1, x2, y2));
if (x1 < region->extents.x1)
@@ -149,11 +178,11 @@ fbBitmapToRegion(PixmapPtr pixmap)
} else
region->extents.x1 = region->extents.x2 = 0;
- DBG(("%s: region extents=(%d, %d), (%d, %d) x %d\n",
+ DBG(("%s: region extents=(%d, %d), (%d, %d) x %ld\n",
__FUNCTION__,
region->extents.x1, region->extents.y1,
region->extents.x2, region->extents.y2,
- RegionNumRects(region)));
+ (long)RegionNumRects(region)));
return region;
}
diff --git a/src/sna/fb/fbblt.c b/src/sna/fb/fbblt.c
index d4d20b68b..5ad2e2e25 100644
--- a/src/sna/fb/fbblt.c
+++ b/src/sna/fb/fbblt.c
@@ -270,7 +270,7 @@ fbBlt(FbBits *srcLine, FbStride srcStride, int srcX,
int alu, FbBits pm, int bpp,
Bool reverse, Bool upsidedown)
{
- DBG(("%s %dx%d, alu=%d, pm=%d, bpp=%d (reverse=%d, upsidedown=%d)\n",
+ DBG(("%s %dx%d, alu=%d, pm=%x, bpp=%d (reverse=%d, upsidedown=%d)\n",
__FUNCTION__, width, height, alu, pm, bpp, reverse, upsidedown));
if (alu == GXcopy && pm == FB_ALLONES && ((srcX|dstX|width) & 7) == 0) {
@@ -285,9 +285,9 @@ fbBlt(FbBits *srcLine, FbStride srcStride, int srcX,
s += srcX >> 3;
d += dstX >> 3;
- DBG(("%s fast blt, src_stride=%d, dst_stride=%d, width=%d (offset=%d)\n",
+ DBG(("%s fast blt, src_stride=%d, dst_stride=%d, width=%d (offset=%ld)\n",
__FUNCTION__,
- srcStride, dstStride, width, s - d));
+ srcStride, dstStride, width, (long)(s - d)));
if (width == srcStride && width == dstStride) {
width *= height;
diff --git a/src/sna/fb/fbpict.c b/src/sna/fb/fbpict.c
index a2038518e..906a5f316 100644
--- a/src/sna/fb/fbpict.c
+++ b/src/sna/fb/fbpict.c
@@ -26,25 +26,19 @@
#include <string.h>
#include "fb.h"
-
-#include <picturestr.h>
-#include <mipict.h>
#include "fbpict.h"
static void
SourceValidateOnePicture(PicturePtr picture)
{
DrawablePtr drawable = picture->pDrawable;
- ScreenPtr screen;
if (!drawable)
return;
- screen = drawable->pScreen;
- if (screen->SourceValidate)
- screen->SourceValidate(drawable,
- 0, 0, drawable->width, drawable->height,
- picture->subWindowMode);
+ SourceValidate(drawable,
+ 0, 0, drawable->width, drawable->height,
+ picture->subWindowMode);
}
static void
diff --git a/src/sna/fb/fbpict.h b/src/sna/fb/fbpict.h
index 1ce09df25..560138533 100644
--- a/src/sna/fb/fbpict.h
+++ b/src/sna/fb/fbpict.h
@@ -24,17 +24,23 @@
#ifndef FBPICT_H
#define FBPICT_H
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <picturestr.h>
+
#include "sfb.h"
extern void
fbComposite(CARD8 op,
- PicturePtr pSrc,
- PicturePtr pMask,
- PicturePtr pDst,
- INT16 xSrc,
- INT16 ySrc,
- INT16 xMask,
- INT16 yMask, INT16 xDst, INT16 yDst, CARD16 width, CARD16 height);
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc, INT16 ySrc,
+ INT16 xMask, INT16 yMask,
+ INT16 xDst, INT16 yDst,
+ CARD16 width, CARD16 height);
extern pixman_image_t *image_from_pict(PicturePtr pict,
Bool has_clip,
diff --git a/src/sna/fb/fbpoint.c b/src/sna/fb/fbpoint.c
index 3df79a261..c5f0f876f 100644
--- a/src/sna/fb/fbpoint.c
+++ b/src/sna/fb/fbpoint.c
@@ -93,10 +93,10 @@ fbPolyPoint(DrawablePtr drawable, GCPtr gc,
int xoff, int yoff,
FbBits and, FbBits xor);
- DBG(("%s x %d, clip=[(%d, %d), (%d, %d)]x%d\n", __FUNCTION__, n,
+ DBG(("%s x %d, clip=[(%d, %d), (%d, %d)]x%ld\n", __FUNCTION__, n,
gc->pCompositeClip->extents.x1, gc->pCompositeClip->extents.y1,
gc->pCompositeClip->extents.x2, gc->pCompositeClip->extents.y2,
- RegionNumRects(gc->pCompositeClip)));
+ (long)RegionNumRects(gc->pCompositeClip)));
if (mode == CoordModePrevious)
fbFixCoordModePrevious(n, pt);
diff --git a/src/sna/fb/fbseg.c b/src/sna/fb/fbseg.c
index 5b8173f08..67ad38958 100644
--- a/src/sna/fb/fbseg.c
+++ b/src/sna/fb/fbseg.c
@@ -353,7 +353,8 @@ fbSelectBres(DrawablePtr drawable, GCPtr gc)
FbBres *bres;
DBG(("%s: line=%d, fill=%d, and=%lx, bgand=%lx\n",
- __FUNCTION__, gc->lineStyle, gc->fillStyle, pgc->and, pgc->bgand));
+ __FUNCTION__, gc->lineStyle, gc->fillStyle,
+ (long)pgc->and, (long)pgc->bgand));
assert(gc->lineWidth == 0);
if (gc->lineStyle == LineSolid) {
diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 501266241..4d92adcf7 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -46,9 +46,6 @@
#define NO_FILL_ONE 0
#define NO_FILL_BOXES 0
-#define PREFER_BLT_FILL 1
-#define PREFER_BLT_COPY 1
-
#define MAX_3D_SIZE 2048
#define MAX_3D_PITCH 8192
@@ -175,7 +172,7 @@ gen2_get_card_format(struct sna *sna, uint32_t format)
if (i8xx_tex_formats[i].fmt == format)
return i8xx_tex_formats[i].card_fmt;
- if (sna->kgem.gen < 21) {
+ if (sna->kgem.gen < 021) {
/* Whilst these are not directly supported on 830/845,
* we only enable them when we can implicitly convert
* them to a supported variant through the texture
@@ -203,7 +200,7 @@ gen2_check_format(struct sna *sna, PicturePtr p)
if (i8xx_tex_formats[i].fmt == p->format)
return true;
- if (sna->kgem.gen > 21) {
+ if (sna->kgem.gen > 021) {
for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++)
if (i85x_tex_formats[i].fmt == p->format)
return true;
@@ -396,6 +393,15 @@ gen2_get_blend_factors(const struct sna_composite_op *op,
cblend |= TB0C_OP_MODULATE;
ablend |= TB0A_OP_MODULATE;
+ } else if (op->mask.is_solid) {
+ cblend |= TB0C_ARG2_SEL_DIFFUSE;
+ ablend |= TB0A_ARG2_SEL_DIFFUSE;
+
+ if (op->dst.format == PICT_a8 || !op->has_component_alpha)
+ cblend |= TB0C_ARG2_REPLICATE_ALPHA;
+
+ cblend |= TB0C_OP_MODULATE;
+ ablend |= TB0A_OP_MODULATE;
} else {
cblend |= TB0C_OP_ARG1;
ablend |= TB0A_OP_ARG1;
@@ -504,6 +510,7 @@ static void gen2_emit_invariant(struct sna *sna)
ENABLE_TEX_CACHE);
BATCH(_3DSTATE_STIPPLE);
+ BATCH(0);
BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) |
TEXPIPE_COLOR |
@@ -536,9 +543,9 @@ static void gen2_emit_invariant(struct sna *sna)
}
static void
-gen2_get_batch(struct sna *sna)
+gen2_get_batch(struct sna *sna, const struct sna_composite_op *op)
{
- kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
if (!kgem_check_batch(&sna->kgem, INVARIANT_SIZE+40)) {
DBG(("%s: flushing batch: size %d > %d\n",
@@ -574,7 +581,7 @@ static void gen2_emit_target(struct sna *sna, const struct sna_composite_op *op)
{
assert(!too_large(op->dst.width, op->dst.height));
assert(op->dst.bo->pitch >= 8 && op->dst.bo->pitch <= MAX_3D_PITCH);
- assert(sna->render_state.gen2.vertex_offset == 0);
+ assert(sna->render.vertex_offset == 0);
if (sna->render_state.gen2.target == op->dst.bo->unique_id) {
kgem_bo_mark_dirty(op->dst.bo);
@@ -662,7 +669,7 @@ static void gen2_emit_composite_state(struct sna *sna,
uint32_t cblend, ablend;
int tex;
- gen2_get_batch(sna);
+ gen2_get_batch(sna, op);
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo)
@@ -726,6 +733,12 @@ static void gen2_emit_composite_state(struct sna *sna,
else
texcoordfmt |= TEXCOORDFMT_3D << (2*tex);
gen2_emit_texture(sna, &op->mask, tex++);
+ } else if (op->mask.is_solid) {
+ if (op->mask.u.gen2.pixel != sna->render_state.gen2.diffuse) {
+ BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
+ BATCH(op->mask.u.gen2.pixel);
+ sna->render_state.gen2.diffuse = op->mask.u.gen2.pixel;
+ }
}
v = _3DSTATE_VERTEX_FORMAT_2_CMD | texcoordfmt;
@@ -749,9 +762,9 @@ gen2_emit_composite_linear(struct sna *sna,
{
float v;
- v = (x * channel->u.gen2.linear_dx +
- y * channel->u.gen2.linear_dy +
- channel->u.gen2.linear_offset);
+ v = (x * channel->u.linear.dx +
+ y * channel->u.linear.dy +
+ channel->u.linear.offset);
DBG(("%s: (%d, %d) -> %f\n", __FUNCTION__, x, y, v));
VERTEX(v);
VERTEX(v);
@@ -895,33 +908,30 @@ gen2_emit_composite_primitive_affine(struct sna *sna,
const struct sna_composite_rectangles *r)
{
PictTransform *transform = op->src.transform;
- int16_t dst_x = r->dst.x + op->dst.x;
- int16_t dst_y = r->dst.y + op->dst.y;
int src_x = r->src.x + (int)op->src.offset[0];
int src_y = r->src.y + (int)op->src.offset[1];
- float sx, sy;
+ float *v;
- _sna_get_transformed_coordinates(src_x + r->width, src_y + r->height,
- transform,
- &sx, &sy);
+ v = (float *)sna->kgem.batch + sna->kgem.nbatch;
+ sna->kgem.nbatch += 12;
- gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
- VERTEX(sx * op->src.scale[0]);
- VERTEX(sy * op->src.scale[1]);
+ v[8] = v[4] = r->dst.x + op->dst.x;
+ v[0] = v[4] + r->width;
- _sna_get_transformed_coordinates(src_x, src_y + r->height,
- transform,
- &sx, &sy);
- gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
- VERTEX(sx * op->src.scale[0]);
- VERTEX(sy * op->src.scale[1]);
+ v[9] = r->dst.y + op->dst.y;
+ v[5] = v[1] = v[9] + r->height;
- _sna_get_transformed_coordinates(src_x, src_y,
- transform,
- &sx, &sy);
- gen2_emit_composite_dstcoord(sna, dst_x, dst_y);
- VERTEX(sx * op->src.scale[0]);
- VERTEX(sy * op->src.scale[1]);
+ _sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
+ transform, op->src.scale,
+ &v[2], &v[3]);
+
+ _sna_get_transformed_scaled(src_x, src_y + r->height,
+ transform, op->src.scale,
+ &v[6], &v[7]);
+
+ _sna_get_transformed_scaled(src_x, src_y,
+ transform, op->src.scale,
+ &v[10], &v[11]);
}
fastcall static void
@@ -959,7 +969,7 @@ static void gen2_magic_ca_pass(struct sna *sna,
return;
DBG(("%s: batch=%x, vertex=%x\n", __FUNCTION__,
- sna->kgem.nbatch, sna->render_state.gen2.vertex_offset));
+ sna->kgem.nbatch, sna->render.vertex_offset));
assert(op->mask.bo);
assert(op->has_component_alpha);
@@ -978,7 +988,7 @@ static void gen2_magic_ca_pass(struct sna *sna,
BATCH(ablend);
sna->render_state.gen2.ls2 = 0;
- src = sna->kgem.batch + sna->render_state.gen2.vertex_offset;
+ src = sna->kgem.batch + sna->render.vertex_offset;
dst = sna->kgem.batch + sna->kgem.nbatch;
n = 1 + sna->render.vertex_index;
sna->kgem.nbatch += n;
@@ -993,12 +1003,12 @@ static void gen2_vertex_flush(struct sna *sna,
if (sna->render.vertex_index == 0)
return;
- sna->kgem.batch[sna->render_state.gen2.vertex_offset] |=
+ sna->kgem.batch[sna->render.vertex_offset] |=
sna->render.vertex_index - 1;
gen2_magic_ca_pass(sna, op);
- sna->render_state.gen2.vertex_offset = 0;
+ sna->render.vertex_offset = 0;
sna->render.vertex_index = 0;
}
@@ -1006,7 +1016,6 @@ inline static int gen2_get_rectangles(struct sna *sna,
const struct sna_composite_op *op,
int want)
{
- struct gen2_render_state *state = &sna->render_state.gen2;
int rem = batch_space(sna), size, need;
DBG(("%s: want=%d, floats_per_vertex=%d, rem=%d\n",
@@ -1030,16 +1039,17 @@ inline static int gen2_get_rectangles(struct sna *sna,
}
rem -= need;
- if (state->vertex_offset == 0) {
+ if (sna->render.vertex_offset == 0) {
if ((sna->kgem.batch[sna->kgem.nbatch-1] & ~0xffff) ==
(PRIM3D_INLINE | PRIM3D_RECTLIST)) {
uint32_t *b = &sna->kgem.batch[sna->kgem.nbatch-1];
+ assert(*b & 0xffff);
sna->render.vertex_index = 1 + (*b & 0xffff);
*b = PRIM3D_INLINE | PRIM3D_RECTLIST;
- state->vertex_offset = sna->kgem.nbatch - 1;
+ sna->render.vertex_offset = sna->kgem.nbatch - 1;
assert(!op->need_magic_ca_pass);
} else {
- state->vertex_offset = sna->kgem.nbatch;
+ sna->render.vertex_offset = sna->kgem.nbatch;
BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST);
}
}
@@ -1144,6 +1154,7 @@ gen2_composite_solid_init(struct sna *sna,
channel->filter = PictFilterNearest;
channel->repeat = RepeatNormal;
channel->is_solid = true;
+ channel->is_affine = true;
channel->width = 1;
channel->height = 1;
channel->pict_format = PICT_a8r8g8b8;
@@ -1251,12 +1262,12 @@ gen2_composite_linear_init(struct sna *sna,
dx /= sf;
dy /= sf;
- channel->u.gen2.linear_dx = dx;
- channel->u.gen2.linear_dy = dy;
- channel->u.gen2.linear_offset = -dx*(x0+dst_x-x) + -dy*(y0+dst_y-y);
+ channel->u.linear.dx = dx;
+ channel->u.linear.dy = dy;
+ channel->u.linear.offset = -dx*(x0+dst_x-x) + -dy*(y0+dst_y-y);
DBG(("%s: dx=%f, dy=%f, offset=%f\n",
- __FUNCTION__, dx, dy, channel->u.gen2.linear_offset));
+ __FUNCTION__, dx, dy, channel->u.linear.offset));
return channel->bo != NULL;
}
@@ -1304,7 +1315,8 @@ static bool
gen2_check_card_format(struct sna *sna,
PicturePtr picture,
struct sna_composite_channel *channel,
- int x, int y, int w, int h)
+ int x, int y, int w, int h,
+ bool *fixup_alpha)
{
uint32_t format = picture->format;
unsigned int i;
@@ -1316,7 +1328,7 @@ gen2_check_card_format(struct sna *sna,
for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) {
if (i85x_tex_formats[i].fmt == format) {
- if (sna->kgem.gen >= 21)
+ if (sna->kgem.gen >= 021)
return true;
if (source_is_covered(picture, x, y, w,h)) {
@@ -1324,10 +1336,12 @@ gen2_check_card_format(struct sna *sna,
return true;
}
+ *fixup_alpha = true;
return false;
}
}
+ *fixup_alpha = false;
return false;
}
@@ -1343,6 +1357,7 @@ gen2_composite_picture(struct sna *sna,
PixmapPtr pixmap;
uint32_t color;
int16_t dx, dy;
+ bool fixup_alpha;
DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
__FUNCTION__, x, y, w, h, dst_x, dst_y));
@@ -1417,9 +1432,9 @@ gen2_composite_picture(struct sna *sna,
} else
channel->transform = picture->transform;
- if (!gen2_check_card_format(sna, picture, channel, x, y, w ,h))
+ if (!gen2_check_card_format(sna, picture, channel, x, y, w ,h, &fixup_alpha))
return sna_render_picture_convert(sna, picture, channel, pixmap,
- x, y, w, h, dst_x, dst_y);
+ x, y, w, h, dst_x, dst_y, fixup_alpha);
channel->pict_format = picture->format;
if (too_large(pixmap->drawable.width, pixmap->drawable.height))
@@ -1499,49 +1514,6 @@ gen2_composite_set_target(struct sna *sna,
}
static bool
-try_blt(struct sna *sna,
- PicturePtr dst,
- PicturePtr src,
- int width, int height)
-{
- uint32_t color;
-
- if (sna->kgem.mode != KGEM_RENDER) {
- DBG(("%s: already performing BLT\n", __FUNCTION__));
- return true;
- }
-
- if (too_large(width, height)) {
- DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
- __FUNCTION__, width, height));
- return true;
- }
-
- if (too_large(dst->pDrawable->width, dst->pDrawable->height)) {
- DBG(("%s: target too large for 3D pipe (%d, %d)\n",
- __FUNCTION__,
- dst->pDrawable->width, dst->pDrawable->height));
- return true;
- }
-
- /* If it is a solid, try to use the BLT paths */
- if (sna_picture_is_solid(src, &color))
- return true;
-
- if (!src->pDrawable)
- return false;
-
- if (too_large(src->pDrawable->width, src->pDrawable->height)) {
- DBG(("%s: source too large for 3D pipe (%d, %d)\n",
- __FUNCTION__,
- src->pDrawable->width, src->pDrawable->height));
- return true;
- }
-
- return !is_gpu(src->pDrawable);
-}
-
-static bool
is_unhandled_gradient(PicturePtr picture)
{
if (picture->pDrawable)
@@ -1563,12 +1535,6 @@ has_alphamap(PicturePtr p)
}
static bool
-untransformed(PicturePtr p)
-{
- return !p->transform || pixman_transform_is_int_translate(p->transform);
-}
-
-static bool
need_upload(PicturePtr p)
{
return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
@@ -1614,7 +1580,6 @@ gen2_composite_fallback(struct sna *sna,
PicturePtr mask,
PicturePtr dst)
{
- struct sna_pixmap *priv;
PixmapPtr src_pixmap;
PixmapPtr mask_pixmap;
PixmapPtr dst_pixmap;
@@ -1653,8 +1618,7 @@ gen2_composite_fallback(struct sna *sna,
}
/* If anything is on the GPU, push everything out to the GPU */
- priv = sna_pixmap(dst_pixmap);
- if (priv && priv->gpu_damage && !priv->clear) {
+ if (dst_use_gpu(dst_pixmap)) {
DBG(("%s: dst is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
@@ -1689,14 +1653,14 @@ gen2_composite_fallback(struct sna *sna,
if (too_large(dst_pixmap->drawable.width,
dst_pixmap->drawable.height) &&
- (priv == NULL || DAMAGE_IS_ALL(priv->cpu_damage))) {
+ dst_is_cpu(dst_pixmap)) {
DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
return true;
}
DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
__FUNCTION__));
- return false;
+ return dst_use_cpu(dst_pixmap);
}
static int
@@ -1709,6 +1673,12 @@ reuse_source(struct sna *sna,
if (src_x != msk_x || src_y != msk_y)
return false;
+ if (sna_picture_is_solid(mask, &color))
+ return gen2_composite_solid_init(sna, mc, color);
+
+ if (sc->is_solid)
+ return false;
+
if (src == mask) {
DBG(("%s: mask is source\n", __FUNCTION__));
*mc = *sc;
@@ -1716,12 +1686,6 @@ reuse_source(struct sna *sna,
return true;
}
- if (sna_picture_is_solid(mask, &color))
- return gen2_composite_solid_init(sna, mc, color);
-
- if (sc->is_solid)
- return false;
-
if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
return false;
@@ -1773,13 +1737,8 @@ gen2_render_composite(struct sna *sna,
return false;
}
- /* Try to use the BLT engine unless it implies a
- * 3D -> 2D context switch.
- */
if (mask == NULL &&
- try_blt(sna, dst, src, width, height) &&
- sna_blt_composite(sna,
- op, src, dst,
+ sna_blt_composite(sna, op, src, dst,
src_x, src_y,
dst_x, dst_y,
width, height,
@@ -1805,6 +1764,8 @@ gen2_render_composite(struct sna *sna,
}
tmp->op = op;
+
+ sna_render_composite_redirect_init(tmp);
if (too_large(tmp->dst.width, tmp->dst.height) ||
tmp->dst.bo->pitch > MAX_3D_PITCH) {
if (!sna_render_composite_redirect(sna, tmp,
@@ -1818,6 +1779,8 @@ gen2_render_composite(struct sna *sna,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
+ DBG(("%s: fallback -- unable to prepare source\n",
+ __FUNCTION__));
goto cleanup_dst;
case 0:
gen2_composite_solid_init(sna, &tmp->src, 0);
@@ -1841,6 +1804,8 @@ gen2_render_composite(struct sna *sna,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
+ DBG(("%s: fallback -- unable to prepare mask\n",
+ __FUNCTION__));
goto cleanup_src;
case 0:
gen2_composite_solid_init(sna, &tmp->mask, 0);
@@ -1857,8 +1822,12 @@ gen2_render_composite(struct sna *sna,
tmp->has_component_alpha = true;
if (gen2_blend_op[op].src_alpha &&
(gen2_blend_op[op].src_blend != BLENDFACTOR_ZERO)) {
- if (op != PictOpOver)
- return false;
+ if (op != PictOpOver) {
+ DBG(("%s: fallback -- unsupported CA blend (src_blend=%d)\n",
+ __FUNCTION__,
+ gen2_blend_op[op].src_blend));
+ goto cleanup_src;
+ }
tmp->need_magic_ca_pass = true;
tmp->op = PictOpOutReverse;
@@ -1866,8 +1835,12 @@ gen2_render_composite(struct sna *sna,
}
/* convert solid to a texture (pure convenience) */
- if (tmp->mask.is_solid)
+ if (tmp->mask.is_solid && tmp->src.is_solid) {
+ assert(tmp->mask.is_affine);
tmp->mask.bo = sna_render_get_solid(sna, tmp->mask.u.gen2.pixel);
+ if (!tmp->mask.bo)
+ goto cleanup_src;
+ }
}
tmp->floats_per_vertex = 2;
@@ -1880,18 +1853,27 @@ gen2_render_composite(struct sna *sna,
tmp->prim_emit = gen2_emit_composite_primitive;
if (tmp->mask.bo) {
if (tmp->mask.transform == NULL) {
- if (tmp->src.is_solid)
+ if (tmp->src.is_solid) {
+ assert(tmp->floats_per_rect == 12);
tmp->prim_emit = gen2_emit_composite_primitive_constant_identity_mask;
+ }
}
} else {
- if (tmp->src.is_solid)
+ if (tmp->src.is_solid) {
+ assert(tmp->floats_per_rect == 6);
tmp->prim_emit = gen2_emit_composite_primitive_constant;
- else if (tmp->src.is_linear)
+ } else if (tmp->src.is_linear) {
+ assert(tmp->floats_per_rect == 12);
tmp->prim_emit = gen2_emit_composite_primitive_linear;
- else if (tmp->src.transform == NULL)
+ } else if (tmp->src.transform == NULL) {
+ assert(tmp->floats_per_rect == 12);
tmp->prim_emit = gen2_emit_composite_primitive_identity;
- else if (tmp->src.is_affine)
+ } else if (tmp->src.is_affine) {
+ assert(tmp->floats_per_rect == 12);
+ tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
+ tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
tmp->prim_emit = gen2_emit_composite_primitive_affine;
+ }
}
tmp->blt = gen2_render_composite_blt;
@@ -1905,8 +1887,11 @@ gen2_render_composite(struct sna *sna,
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
- NULL))
+ NULL)) {
+ DBG(("%s: fallback, operation does not fit into GTT\n",
+ __FUNCTION__));
goto cleanup_mask;
+ }
}
gen2_emit_composite_state(sna, tmp);
@@ -2016,8 +2001,8 @@ gen2_emit_composite_spans_primitive_affine_source(struct sna *sna,
{
PictTransform *transform = op->base.src.transform;
uint32_t alpha = (uint8_t)(255 * opacity) << 24;
- float x, y, *v;
-
+ float *v;
+
v = (float *)sna->kgem.batch + sna->kgem.nbatch;
sna->kgem.nbatch += 15;
@@ -2029,26 +2014,20 @@ gen2_emit_composite_spans_primitive_affine_source(struct sna *sna,
*((uint32_t *)v + 7) = alpha;
*((uint32_t *)v + 12) = alpha;
- _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x2,
- (int)op->base.src.offset[1] + box->y2,
- transform,
- &x, &y);
- v[3] = x * op->base.src.scale[0];
- v[4] = y * op->base.src.scale[1];
+ _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2,
+ (int)op->base.src.offset[1] + box->y2,
+ transform, op->base.src.scale,
+ &v[3], &v[4]);
- _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1,
- (int)op->base.src.offset[1] + box->y2,
- transform,
- &x, &y);
- v[8] = x * op->base.src.scale[0];
- v[9] = y * op->base.src.scale[1];
+ _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
+ (int)op->base.src.offset[1] + box->y2,
+ transform, op->base.src.scale,
+ &v[8], &v[9]);
- _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1,
- (int)op->base.src.offset[1] + box->y1,
- transform,
- &x, &y);
- v[13] = x * op->base.src.scale[0];
- v[14] = y * op->base.src.scale[1];
+ _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
+ (int)op->base.src.offset[1] + box->y1,
+ transform, op->base.src.scale,
+ &v[13], &v[14]);
}
static void
@@ -2131,7 +2110,7 @@ static void gen2_emit_composite_spans_state(struct sna *sna,
{
uint32_t unwind;
- gen2_get_batch(sna);
+ gen2_get_batch(sna, &op->base);
gen2_emit_target(sna, &op->base);
unwind = sna->kgem.nbatch;
@@ -2248,7 +2227,7 @@ gen2_check_composite_spans(struct sna *sna,
return false;
if (need_tiling(sna, width, height)) {
- if (!is_gpu(dst->pDrawable)) {
+ if (!is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
DBG(("%s: fallback, tiled operation not on GPU\n",
__FUNCTION__));
return false;
@@ -2289,6 +2268,8 @@ gen2_render_composite_spans(struct sna *sna,
}
tmp->base.op = op;
+
+ sna_render_composite_redirect_init(&tmp->base);
if (too_large(tmp->base.dst.width, tmp->base.dst.height) ||
tmp->base.dst.bo->pitch > MAX_3D_PITCH) {
if (!sna_render_composite_redirect(sna, &tmp->base,
@@ -2321,8 +2302,11 @@ gen2_render_composite_spans(struct sna *sna,
tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3;
if (tmp->base.src.transform == NULL)
tmp->prim_emit = gen2_emit_composite_spans_primitive_identity_source;
- else if (tmp->base.src.is_affine)
+ else if (tmp->base.src.is_affine) {
+ tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
+ tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
tmp->prim_emit = gen2_emit_composite_spans_primitive_affine_source;
+ }
}
tmp->base.mask.bo = NULL;
tmp->base.floats_per_rect = 3*tmp->base.floats_per_vertex;
@@ -2387,7 +2371,7 @@ static void gen2_emit_fill_composite_state(struct sna *sna,
{
uint32_t ls1;
- gen2_get_batch(sna);
+ gen2_get_batch(sna, op);
gen2_emit_target(sna, op);
ls1 = sna->kgem.nbatch;
@@ -2443,24 +2427,6 @@ gen2_render_fill_boxes_try_blt(struct sna *sna,
pixel, box, n);
}
-static inline bool prefer_blt_fill(struct sna *sna)
-{
-#if PREFER_BLT_FILL
- return true;
-#else
- return sna->kgem.mode != KGEM_RENDER;
-#endif
-}
-
-static inline bool prefer_blt_copy(struct sna *sna, unsigned flags)
-{
-#if PREFER_BLT_COPY
- return true;
-#else
- return sna->kgem.mode != KGEM_RENDER;
-#endif
-}
-
static bool
gen2_render_fill_boxes(struct sna *sna,
CARD8 op,
@@ -2483,6 +2449,11 @@ gen2_render_fill_boxes(struct sna *sna,
dst, dst_bo,
box, n);
#endif
+ if (gen2_render_fill_boxes_try_blt(sna, op, format, color,
+ dst, dst_bo,
+ box, n))
+ return true;
+
DBG(("%s (op=%d, format=%x, color=(%04x,%04x,%04x, %04x))\n",
__FUNCTION__, op, (int)format,
@@ -2493,11 +2464,6 @@ gen2_render_fill_boxes(struct sna *sna,
!gen2_check_dst_format(format)) {
DBG(("%s: try blt, too large or incompatible destination\n",
__FUNCTION__));
- if (gen2_render_fill_boxes_try_blt(sna, op, format, color,
- dst, dst_bo,
- box, n))
- return true;
-
if (!gen2_check_dst_format(format))
return false;
@@ -2506,12 +2472,6 @@ gen2_render_fill_boxes(struct sna *sna,
dst, dst_bo, box, n);
}
- if (prefer_blt_fill(sna) &&
- gen2_render_fill_boxes_try_blt(sna, op, format, color,
- dst, dst_bo,
- box, n))
- return true;
-
if (op == PictOpClear)
pixel = 0;
else if (!sna_get_pixel_from_rgba(&pixel,
@@ -2572,7 +2532,7 @@ static void gen2_emit_fill_state(struct sna *sna,
{
uint32_t ls1;
- gen2_get_batch(sna);
+ gen2_get_batch(sna, op);
gen2_emit_target(sna, op);
ls1 = sna->kgem.nbatch;
@@ -2683,8 +2643,7 @@ gen2_render_fill(struct sna *sna, uint8_t alu,
#endif
/* Prefer to use the BLT if already engaged */
- if (prefer_blt_fill(sna) &&
- sna_blt_fill(sna, alu,
+ if (sna_blt_fill(sna, alu,
dst_bo, dst->drawable.bitsPerPixel,
color,
tmp))
@@ -2693,10 +2652,7 @@ gen2_render_fill(struct sna *sna, uint8_t alu,
/* Must use the BLT if we can't RENDER... */
if (too_large(dst->drawable.width, dst->drawable.height) ||
dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH)
- return sna_blt_fill(sna, alu,
- dst_bo, dst->drawable.bitsPerPixel,
- color,
- tmp);
+ return false;
tmp->base.op = alu;
tmp->base.dst.pixmap = dst;
@@ -2761,16 +2717,14 @@ gen2_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
#endif
/* Prefer to use the BLT if already engaged */
- if (prefer_blt_fill(sna) &&
- gen2_render_fill_one_try_blt(sna, dst, bo, color,
+ if (gen2_render_fill_one_try_blt(sna, dst, bo, color,
x1, y1, x2, y2, alu))
return true;
/* Must use the BLT if we can't RENDER... */
if (too_large(dst->drawable.width, dst->drawable.height) ||
bo->pitch < 8 || bo->pitch > MAX_3D_PITCH)
- return gen2_render_fill_one_try_blt(sna, dst, bo, color,
- x1, y1, x2, y2, alu);
+ return false;
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
kgem_submit(&sna->kgem);
@@ -2865,7 +2819,7 @@ static void gen2_emit_copy_state(struct sna *sna, const struct sna_composite_op
{
uint32_t ls1, v;
- gen2_get_batch(sna);
+ gen2_get_batch(sna, op);
if (kgem_bo_is_dirty(op->src.bo)) {
if (op->src.bo == op->dst.bo)
@@ -2925,8 +2879,7 @@ gen2_render_copy_boxes(struct sna *sna, uint8_t alu,
DBG(("%s (%d, %d)->(%d, %d) x %d\n",
__FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
- if (prefer_blt_copy(sna, flags) &&
- sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
+ if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy_boxes(sna, alu,
src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
@@ -3091,8 +3044,7 @@ gen2_render_copy(struct sna *sna, uint8_t alu,
#endif
/* Prefer to use the BLT */
- if (prefer_blt_copy(sna, 0) &&
- sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
+ if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy(sna, alu,
src_bo, dst_bo,
dst->drawable.bitsPerPixel,
@@ -3145,7 +3097,6 @@ gen2_render_reset(struct sna *sna)
{
sna->render_state.gen2.need_invariant = true;
sna->render_state.gen2.logic_op_enabled = 0;
- sna->render_state.gen2.vertex_offset = 0;
sna->render_state.gen2.target = 0;
sna->render_state.gen2.ls1 = 0;
@@ -3160,6 +3111,7 @@ static void
gen2_render_flush(struct sna *sna)
{
assert(sna->render.vertex_index == 0);
+ assert(sna->render.vertex_offset == 0);
}
static void
@@ -3168,13 +3120,13 @@ gen2_render_context_switch(struct kgem *kgem,
{
struct sna *sna = container_of(kgem, struct sna, kgem);
- if (!kgem->mode)
+ if (!kgem->nbatch)
return;
/* Reload BLT registers following a lost context */
sna->blt_state.fill_bo = 0;
- if (kgem_is_idle(kgem)) {
+ if (kgem_ring_is_idle(kgem, kgem->ring)) {
DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
_kgem_submit(kgem);
}
@@ -3191,10 +3143,12 @@ bool gen2_render_init(struct sna *sna)
*/
#if !NO_COMPOSITE
render->composite = gen2_render_composite;
+ render->prefer_gpu |= PREFER_GPU_RENDER;
#endif
#if !NO_COMPOSITE_SPANS
render->check_composite_spans = gen2_check_composite_spans;
render->composite_spans = gen2_render_composite_spans;
+ render->prefer_gpu |= PREFER_GPU_SPANS;
#endif
render->fill_boxes = gen2_render_fill_boxes;
render->fill = gen2_render_fill;
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 7c303f419..95d44ab56 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -431,6 +431,26 @@ gen3_emit_composite_primitive_constant(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_boxes_constant(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ do {
+ v[0] = box->x2;
+ v[1] = box->y2;
+
+ v[2] = box->x1;
+ v[3] = box->y2;
+
+ v[4] = box->x1;
+ v[5] = box->y1;
+
+ box++;
+ v += 6;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_primitive_identity_gradient(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -457,6 +477,32 @@ gen3_emit_composite_primitive_identity_gradient(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_boxes_identity_gradient(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ do {
+ v[0] = box->x2;
+ v[1] = box->y2;
+ v[2] = box->x2 + op->src.offset[0];
+ v[3] = box->y2 + op->src.offset[1];
+
+ v[4] = box->x1;
+ v[5] = box->y2;
+ v[6] = box->x1 + op->src.offset[0];
+ v[7] = box->y2 + op->src.offset[1];
+
+ v[8] = box->x1;
+ v[9] = box->y1;
+ v[10] = box->x1 + op->src.offset[0];
+ v[11] = box->y1 + op->src.offset[1];
+
+ v += 12;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_primitive_affine_gradient(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -494,6 +540,40 @@ gen3_emit_composite_primitive_affine_gradient(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_boxes_affine_gradient(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ const PictTransform *transform = op->src.transform;
+
+ do {
+ v[0] = box->x2;
+ v[1] = box->y2;
+ sna_get_transformed_coordinates(box->x2 + op->src.offset[0],
+ box->y2 + op->src.offset[1],
+ transform,
+ &v[2], &v[3]);
+
+ v[4] = box->x1;
+ v[5] = box->y2;
+ sna_get_transformed_coordinates(box->x1 + op->src.offset[0],
+ box->y2 + op->src.offset[1],
+ transform,
+ &v[6], &v[7]);
+
+ v[8] = box->x1;
+ v[9] = box->y1;
+ sna_get_transformed_coordinates(box->x1 + op->src.offset[0],
+ box->y1 + op->src.offset[1],
+ transform,
+ &v[10], &v[11]);
+
+ box++;
+ v += 12;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_primitive_identity_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -519,6 +599,28 @@ gen3_emit_composite_primitive_identity_source(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_boxes_identity_source(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ do {
+ v[0] = box->x2 + op->dst.x;
+ v[8] = v[4] = box->x1 + op->dst.x;
+ v[5] = v[1] = box->y2 + op->dst.y;
+ v[9] = box->y1 + op->dst.y;
+
+ v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
+ v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
+
+ v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
+ v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
+
+ v += 12;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -544,6 +646,28 @@ gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_boxes_identity_source_no_offset(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ do {
+ v[0] = box->x2;
+ v[8] = v[4] = box->x1;
+ v[5] = v[1] = box->y2;
+ v[9] = box->y1;
+
+ v[10] = v[6] = box->x1 * op->src.scale[0];
+ v[2] = box->x2 * op->src.scale[0];
+
+ v[11] = box->y1 * op->src.scale[1];
+ v[7] = v[3] = box->y2 * op->src.scale[1];
+
+ v += 12;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_primitive_affine_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -553,29 +677,60 @@ gen3_emit_composite_primitive_affine_source(struct sna *sna,
int16_t dst_y = r->dst.y + op->dst.y;
int src_x = r->src.x + (int)op->src.offset[0];
int src_y = r->src.y + (int)op->src.offset[1];
- float sx, sy;
+ float *v;
- _sna_get_transformed_coordinates(src_x + r->width, src_y + r->height,
- transform,
- &sx, &sy);
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 12;
- gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
- OUT_VERTEX(sx * op->src.scale[0]);
- OUT_VERTEX(sy * op->src.scale[1]);
+ v[0] = dst_x + r->width;
+ v[5] = v[1] = dst_y + r->height;
+ v[8] = v[4] = dst_x;
+ v[9] = dst_y;
- _sna_get_transformed_coordinates(src_x, src_y + r->height,
- transform,
- &sx, &sy);
- gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
- OUT_VERTEX(sx * op->src.scale[0]);
- OUT_VERTEX(sy * op->src.scale[1]);
+ _sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
+ transform, op->src.scale,
+ &v[2], &v[3]);
- _sna_get_transformed_coordinates(src_x, src_y,
- transform,
- &sx, &sy);
- gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
- OUT_VERTEX(sx * op->src.scale[0]);
- OUT_VERTEX(sy * op->src.scale[1]);
+ _sna_get_transformed_scaled(src_x, src_y + r->height,
+ transform, op->src.scale,
+ &v[6], &v[7]);
+
+ _sna_get_transformed_scaled(src_x, src_y,
+ transform, op->src.scale,
+ &v[10], &v[11]);
+}
+
+fastcall static void
+gen3_emit_composite_boxes_affine_source(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ const PictTransform *transform = op->src.transform;
+
+ do {
+ v[0] = box->x2;
+ v[5] = v[1] = box->y2;
+ v[8] = v[4] = box->x1;
+ v[9] = box->y1;
+
+ _sna_get_transformed_scaled(box->x2 + op->src.offset[0],
+ box->y2 + op->src.offset[1],
+ transform, op->src.scale,
+ &v[2], &v[3]);
+
+ _sna_get_transformed_scaled(box->x1 + op->src.offset[0],
+ box->y2 + op->src.offset[1],
+ transform, op->src.scale,
+ &v[6], &v[7]);
+
+ _sna_get_transformed_scaled(box->x1 + op->src.offset[0],
+ box->y1 + op->src.offset[1],
+ transform, op->src.scale,
+ &v[10], &v[11]);
+
+ v += 12;
+ box++;
+ } while (--nbox);
}
fastcall static void
@@ -929,13 +1084,6 @@ gen3_composite_emit_shader(struct sna *sna,
if (mask->u.gen3.type == SHADER_NONE)
mask = NULL;
- if (mask && src->is_opaque &&
- gen3_blend_op[blend].src_alpha &&
- op->has_component_alpha) {
- src = mask;
- mask = NULL;
- }
-
id = (src->u.gen3.type |
src->is_affine << 4 |
src->alpha_fixup << 5 |
@@ -1298,9 +1446,9 @@ static void gen3_emit_invariant(struct sna *sna)
#define MAX_OBJECTS 3 /* worst case: dst + src + mask */
static void
-gen3_get_batch(struct sna *sna)
+gen3_get_batch(struct sna *sna, const struct sna_composite_op *op)
{
- kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
if (!kgem_check_batch(&sna->kgem, 200)) {
DBG(("%s: flushing batch: size %d > %d\n",
@@ -1389,7 +1537,7 @@ static void gen3_emit_composite_state(struct sna *sna,
unsigned int tex_count, n;
uint32_t ss2;
- gen3_get_batch(sna);
+ gen3_get_batch(sna, op);
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo)
@@ -1578,11 +1726,11 @@ static void gen3_emit_composite_state(struct sna *sna,
gen3_composite_emit_shader(sna, op, op->op);
}
-static void gen3_magic_ca_pass(struct sna *sna,
+static bool gen3_magic_ca_pass(struct sna *sna,
const struct sna_composite_op *op)
{
if (!op->need_magic_ca_pass)
- return;
+ return false;
DBG(("%s(%d)\n", __FUNCTION__,
sna->render.vertex_index - sna->render.vertex_start));
@@ -1596,23 +1744,24 @@ static void gen3_magic_ca_pass(struct sna *sna,
OUT_BATCH(sna->render.vertex_start);
sna->render_state.gen3.last_blend = 0;
+ return true;
}
static void gen3_vertex_flush(struct sna *sna)
{
- assert(sna->render_state.gen3.vertex_offset);
+ assert(sna->render.vertex_offset);
DBG(("%s[%x] = %d\n", __FUNCTION__,
- 4*sna->render_state.gen3.vertex_offset,
+ 4*sna->render.vertex_offset,
sna->render.vertex_index - sna->render.vertex_start));
- sna->kgem.batch[sna->render_state.gen3.vertex_offset] =
+ sna->kgem.batch[sna->render.vertex_offset] =
PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
(sna->render.vertex_index - sna->render.vertex_start);
- sna->kgem.batch[sna->render_state.gen3.vertex_offset + 1] =
+ sna->kgem.batch[sna->render.vertex_offset + 1] =
sna->render.vertex_start;
- sna->render_state.gen3.vertex_offset = 0;
+ sna->render.vertex_offset = 0;
}
static int gen3_vertex_finish(struct sna *sna)
@@ -1622,24 +1771,27 @@ static int gen3_vertex_finish(struct sna *sna)
DBG(("%s: used=%d/%d, vbo active? %d\n",
__FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
sna->render.vbo ? sna->render.vbo->handle : 0));
+ assert(sna->render.vertex_offset == 0);
assert(sna->render.vertex_used);
assert(sna->render.vertex_used <= sna->render.vertex_size);
+ sna_vertex_wait__locked(&sna->render);
+
bo = sna->render.vbo;
if (bo) {
- if (sna->render_state.gen3.vertex_offset)
- gen3_vertex_flush(sna);
-
DBG(("%s: reloc = %d\n", __FUNCTION__,
sna->render.vertex_reloc[0]));
- sna->kgem.batch[sna->render.vertex_reloc[0]] =
- kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
- bo, I915_GEM_DOMAIN_VERTEX << 16, 0);
+ if (sna->render.vertex_reloc[0]) {
+ sna->kgem.batch[sna->render.vertex_reloc[0]] =
+ kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
+ bo, I915_GEM_DOMAIN_VERTEX << 16, 0);
- sna->render.vertex_reloc[0] = 0;
+ sna->render.vertex_reloc[0] = 0;
+ }
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
+ sna->render.vbo = NULL;
kgem_bo_destroy(&sna->kgem, bo);
}
@@ -1671,15 +1823,14 @@ static void gen3_vertex_close(struct sna *sna)
struct kgem_bo *bo, *free_bo = NULL;
unsigned int delta = 0;
- assert(sna->render_state.gen3.vertex_offset == 0);
+ assert(sna->render.vertex_offset == 0);
+ if (sna->render.vertex_reloc[0] == 0)
+ return;
DBG(("%s: used=%d/%d, vbo active? %d\n",
__FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
sna->render.vbo ? sna->render.vbo->handle : 0));
- if (sna->render.vertex_used == 0)
- return;
-
bo = sna->render.vbo;
if (bo) {
if (sna->render.vertex_size - sna->render.vertex_used < 64) {
@@ -1713,7 +1864,8 @@ static void gen3_vertex_close(struct sna *sna)
DBG(("%s: new vbo: %d\n", __FUNCTION__,
sna->render.vertex_used));
bo = kgem_create_linear(&sna->kgem,
- 4*sna->render.vertex_used, 0);
+ 4*sna->render.vertex_used,
+ CREATE_NO_THROTTLE);
if (bo) {
assert(bo->snoop == false);
kgem_bo_write(&sna->kgem, bo,
@@ -1724,15 +1876,11 @@ static void gen3_vertex_close(struct sna *sna)
}
}
- DBG(("%s: reloc = %d\n", __FUNCTION__,
- sna->render.vertex_reloc[0]));
-
- if (sna->render.vertex_reloc[0]) {
- sna->kgem.batch[sna->render.vertex_reloc[0]] =
- kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
- bo, I915_GEM_DOMAIN_VERTEX << 16, delta);
- sna->render.vertex_reloc[0] = 0;
- }
+ DBG(("%s: reloc = %d\n", __FUNCTION__, sna->render.vertex_reloc[0]));
+ sna->kgem.batch[sna->render.vertex_reloc[0]] =
+ kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
+ bo, I915_GEM_DOMAIN_VERTEX << 16, delta);
+ sna->render.vertex_reloc[0] = 0;
if (sna->render.vbo == NULL) {
DBG(("%s: resetting vbo\n", __FUNCTION__));
@@ -1752,6 +1900,9 @@ static bool gen3_rectangle_begin(struct sna *sna,
struct gen3_render_state *state = &sna->render_state.gen3;
int ndwords, i1_cmd = 0, i1_len = 0;
+ if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
+ return true;
+
ndwords = 2;
if (op->need_magic_ca_pass)
ndwords += 100;
@@ -1774,14 +1925,15 @@ static bool gen3_rectangle_begin(struct sna *sna,
}
}
- if (sna->kgem.nbatch == 2 + state->last_vertex_offset) {
- state->vertex_offset = state->last_vertex_offset;
+ if (sna->kgem.nbatch == 2 + state->last_vertex_offset &&
+ !op->need_magic_ca_pass) {
+ sna->render.vertex_offset = state->last_vertex_offset;
} else {
- state->vertex_offset = sna->kgem.nbatch;
+ sna->render.vertex_offset = sna->kgem.nbatch;
OUT_BATCH(MI_NOOP); /* to be filled later */
OUT_BATCH(MI_NOOP);
sna->render.vertex_start = sna->render.vertex_index;
- state->last_vertex_offset = state->vertex_offset;
+ state->last_vertex_offset = sna->render.vertex_offset;
}
return true;
@@ -1790,13 +1942,28 @@ static bool gen3_rectangle_begin(struct sna *sna,
static int gen3_get_rectangles__flush(struct sna *sna,
const struct sna_composite_op *op)
{
+ /* Preventing discarding new vbo after lock contention */
+ if (sna_vertex_wait__locked(&sna->render)) {
+ int rem = vertex_space(sna);
+ if (rem > op->floats_per_rect)
+ return rem;
+ }
+
if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 105: 5))
return 0;
if (!kgem_check_reloc_and_exec(&sna->kgem, 1))
return 0;
- if (op->need_magic_ca_pass && sna->render.vbo)
- return 0;
+ if (sna->render.vertex_offset) {
+ gen3_vertex_flush(sna);
+ if (gen3_magic_ca_pass(sna, op)) {
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
+ OUT_BATCH(gen3_get_blend_cntl(op->op,
+ op->has_component_alpha,
+ op->dst.format));
+ gen3_composite_emit_shader(sna, op, op->op);
+ }
+ }
return gen3_vertex_finish(sna);
}
@@ -1822,7 +1989,7 @@ start:
goto flush;
}
- if (unlikely(sna->render_state.gen3.vertex_offset == 0 &&
+ if (unlikely(sna->render.vertex_offset == 0 &&
!gen3_rectangle_begin(sna, op)))
goto flush;
@@ -1836,12 +2003,15 @@ start:
flush:
DBG(("%s: flushing batch\n", __FUNCTION__));
- if (sna->render_state.gen3.vertex_offset) {
+ if (sna->render.vertex_offset) {
gen3_vertex_flush(sna);
gen3_magic_ca_pass(sna, op);
}
+ sna_vertex_wait__locked(&sna->render);
_kgem_submit(&sna->kgem);
gen3_emit_composite_state(sna, op);
+ assert(sna->render.vertex_offset == 0);
+ assert(sna->render.vertex_reloc[0] == 0);
goto start;
}
@@ -1886,9 +2056,9 @@ gen3_render_composite_box(struct sna *sna,
}
static void
-gen3_render_composite_boxes(struct sna *sna,
- const struct sna_composite_op *op,
- const BoxRec *box, int nbox)
+gen3_render_composite_boxes__blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d, src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n",
__FUNCTION__, nbox,
@@ -1922,12 +2092,66 @@ gen3_render_composite_boxes(struct sna *sna,
}
static void
+gen3_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
+
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen3_get_rectangles(sna, op, nbox);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+ } while (nbox);
+}
+
+static void
+gen3_render_composite_boxes__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen3_get_rectangles(sna, op, nbox);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
+static void
gen3_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
DBG(("%s()\n", __FUNCTION__));
- if (sna->render_state.gen3.vertex_offset) {
+ if (sna->render.vertex_offset) {
gen3_vertex_flush(sna);
gen3_magic_ca_pass(sna, op);
}
@@ -1971,7 +2195,6 @@ gen3_render_reset(struct sna *sna)
state->floats_per_vertex = 0;
state->last_floats_per_vertex = 0;
state->last_vertex_offset = 0;
- state->vertex_offset = 0;
if (sna->render.vbo != NULL &&
!kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
@@ -1979,6 +2202,9 @@ gen3_render_reset(struct sna *sna)
__FUNCTION__, sna->render.vbo->presumed_offset));
discard_vbo(sna);
}
+
+ sna->render.vertex_reloc[0] = 0;
+ sna->render.vertex_offset = 0;
}
static void
@@ -2401,7 +2627,8 @@ gen3_composite_picture(struct sna *sna,
if (!gen3_composite_channel_set_format(channel, picture->format) &&
!gen3_composite_channel_set_xformat(picture, channel, x, y, w, h))
return sna_render_picture_convert(sna, picture, channel, pixmap,
- x, y, w, h, dst_x, dst_y);
+ x, y, w, h, dst_x, dst_y,
+ false);
if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
DBG(("%s: pixmap too large (%dx%d), extracting (%d, %d)x(%d,%d)\n",
@@ -2431,7 +2658,7 @@ source_use_blt(struct sna *sna, PicturePtr picture)
if (too_large(picture->pDrawable->width, picture->pDrawable->height))
return true;
- return !is_gpu(picture->pDrawable);
+ return !is_gpu(sna, picture->pDrawable, PREFER_GPU_RENDER);
}
static bool
@@ -2589,12 +2816,6 @@ has_alphamap(PicturePtr p)
}
static bool
-untransformed(PicturePtr p)
-{
- return !p->transform || pixman_transform_is_int_translate(p->transform);
-}
-
-static bool
need_upload(PicturePtr p)
{
return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
@@ -2641,7 +2862,6 @@ gen3_composite_fallback(struct sna *sna,
PicturePtr mask,
PicturePtr dst)
{
- struct sna_pixmap *priv;
PixmapPtr src_pixmap;
PixmapPtr mask_pixmap;
PixmapPtr dst_pixmap;
@@ -2681,17 +2901,16 @@ gen3_composite_fallback(struct sna *sna,
if (mask &&
mask->componentAlpha && PICT_FORMAT_RGB(mask->format) &&
- op != PictOpOver &&
- gen3_blend_op[op].src_blend != BLENDFACT_ZERO)
- {
+ gen3_blend_op[op].src_alpha &&
+ gen3_blend_op[op].src_blend != BLENDFACT_ZERO &&
+ op != PictOpOver) {
DBG(("%s: component-alpha mask with op=%d, should fallback\n",
__FUNCTION__, op));
return true;
}
/* If anything is on the GPU, push everything out to the GPU */
- priv = sna_pixmap(dst_pixmap);
- if (priv && priv->gpu_damage && !priv->clear) {
+ if (dst_use_gpu(dst_pixmap)) {
DBG(("%s: dst is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
@@ -2726,14 +2945,14 @@ gen3_composite_fallback(struct sna *sna,
if (too_large(dst_pixmap->drawable.width,
dst_pixmap->drawable.height) &&
- (priv == NULL || DAMAGE_IS_ALL(priv->cpu_damage))) {
+ dst_is_cpu(dst_pixmap)) {
DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
return true;
}
- DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
- __FUNCTION__));
- return false;
+ DBG(("%s: dst is not on the GPU and the operation should not fallback: use-cpu? %d\n",
+ __FUNCTION__, dst_use_cpu(dst_pixmap)));
+ return dst_use_cpu(dst_pixmap);
}
static int
@@ -2922,13 +3141,12 @@ gen3_render_composite(struct sna *sna,
tmp->mask.u.gen3.type = SHADER_NONE;
tmp->has_component_alpha = false;
} else if (gen3_blend_op[op].src_alpha &&
- (gen3_blend_op[op].src_blend != BLENDFACT_ZERO)) {
+ gen3_blend_op[op].src_blend != BLENDFACT_ZERO) {
if (op != PictOpOver)
goto cleanup_mask;
tmp->need_magic_ca_pass = true;
tmp->op = PictOpOutReverse;
- sna->render.vertex_start = sna->render.vertex_index;
}
} else {
if (tmp->mask.is_opaque) {
@@ -2978,22 +3196,33 @@ gen3_render_composite(struct sna *sna,
case SHADER_WHITE:
case SHADER_CONSTANT:
tmp->prim_emit = gen3_emit_composite_primitive_constant;
+ tmp->emit_boxes = gen3_emit_composite_boxes_constant;
break;
case SHADER_LINEAR:
case SHADER_RADIAL:
- if (tmp->src.transform == NULL)
+ if (tmp->src.transform == NULL) {
tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient;
- else if (tmp->src.is_affine)
+ tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient;
+ } else if (tmp->src.is_affine) {
tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient;
+ tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient;
+ }
break;
case SHADER_TEXTURE:
if (tmp->src.transform == NULL) {
- if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0)
+ if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0) {
tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset;
- else
+ tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset;
+ } else {
tmp->prim_emit = gen3_emit_composite_primitive_identity_source;
- } else if (tmp->src.is_affine)
+ tmp->emit_boxes = gen3_emit_composite_boxes_identity_source;
+ }
+ } else if (tmp->src.is_affine) {
+ tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
+ tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
tmp->prim_emit = gen3_emit_composite_primitive_affine_source;
+ tmp->emit_boxes = gen3_emit_composite_boxes_affine_source;
+ }
break;
}
} else if (tmp->mask.u.gen3.type == SHADER_TEXTURE) {
@@ -3024,7 +3253,11 @@ gen3_render_composite(struct sna *sna,
tmp->blt = gen3_render_composite_blt;
tmp->box = gen3_render_composite_box;
- tmp->boxes = gen3_render_composite_boxes;
+ tmp->boxes = gen3_render_composite_boxes__blt;
+ if (tmp->emit_boxes) {
+ tmp->boxes = gen3_render_composite_boxes;
+ tmp->thread_boxes = gen3_render_composite_boxes__thread;
+ }
tmp->done = gen3_render_composite_done;
if (!kgem_check_bo(&sna->kgem,
@@ -3084,6 +3317,26 @@ gen3_emit_composite_spans_primitive_zero(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_spans_primitive_zero__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox, float *v)
+{
+ do {
+ v[0] = op->base.dst.x + b->box.x2;
+ v[1] = op->base.dst.y + b->box.y2;
+
+ v[2] = op->base.dst.x + b->box.x1;
+ v[3] = v[1];
+
+ v[4] = v[2];
+ v[5] = op->base.dst.x + b->box.y1;
+
+ v += 6;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
@@ -3099,6 +3352,22 @@ gen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_spans_primitive_zero_no_offset__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox, float *v)
+{
+ do {
+ v[0] = b->box.x2;
+ v[3] = v[1] = b->box.y2;
+ v[4] = v[2] = b->box.x1;
+ v[5] = b->box.y1;
+
+ b++;
+ v += 6;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_spans_primitive_constant(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
@@ -3115,6 +3384,24 @@ gen3_emit_composite_spans_primitive_constant(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_spans_primitive_constant__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox,
+ float *v)
+{
+ do {
+ v[0] = op->base.dst.x + b->box.x2;
+ v[6] = v[3] = op->base.dst.x + b->box.x1;
+ v[4] = v[1] = op->base.dst.y + b->box.y2;
+ v[7] = op->base.dst.y + b->box.y1;
+ v[8] = v[5] = v[2] = b->alpha;
+
+ v += 9;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
@@ -3131,6 +3418,23 @@ gen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_spans_primitive_constant_no_offset__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox, float *v)
+{
+ do {
+ v[0] = b->box.x2;
+ v[6] = v[3] = b->box.x1;
+ v[4] = v[1] = b->box.y2;
+ v[7] = b->box.y1;
+ v[8] = v[5] = v[2] = b->alpha;
+
+ v += 9;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_spans_primitive_identity_source(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
@@ -3159,13 +3463,43 @@ gen3_emit_composite_spans_primitive_identity_source(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_spans_primitive_identity_source__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox,
+ float *v)
+{
+ do {
+ v[0] = op->base.dst.x + b->box.x2;
+ v[1] = op->base.dst.y + b->box.y2;
+ v[2] = (op->base.src.offset[0] + b->box.x2) * op->base.src.scale[0];
+ v[3] = (op->base.src.offset[1] + b->box.y2) * op->base.src.scale[1];
+ v[4] = b->alpha;
+
+ v[5] = op->base.dst.x + b->box.x1;
+ v[6] = v[1];
+ v[7] = (op->base.src.offset[0] + b->box.x1) * op->base.src.scale[0];
+ v[8] = v[3];
+ v[9] = b->alpha;
+
+ v[10] = v[5];
+ v[11] = op->base.dst.y + b->box.y1;
+ v[12] = v[7];
+ v[13] = (op->base.src.offset[1] + b->box.y1) * op->base.src.scale[1];
+ v[14] = b->alpha;
+
+ v += 15;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_spans_primitive_affine_source(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
{
PictTransform *transform = op->base.src.transform;
- float x, y, *v;
+ float *v;
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 15;
@@ -3174,30 +3508,56 @@ gen3_emit_composite_spans_primitive_affine_source(struct sna *sna,
v[6] = v[1] = op->base.dst.y + box->y2;
v[10] = v[5] = op->base.dst.x + box->x1;
v[11] = op->base.dst.y + box->y1;
- v[4] = opacity;
- v[9] = opacity;
- v[14] = opacity;
-
- _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x2,
- (int)op->base.src.offset[1] + box->y2,
- transform,
- &x, &y);
- v[2] = x * op->base.src.scale[0];
- v[3] = y * op->base.src.scale[1];
+ v[14] = v[9] = v[4] = opacity;
+
+ _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2,
+ (int)op->base.src.offset[1] + box->y2,
+ transform, op->base.src.scale,
+ &v[2], &v[3]);
+
+ _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
+ (int)op->base.src.offset[1] + box->y2,
+ transform, op->base.src.scale,
+ &v[7], &v[8]);
+
+ _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
+ (int)op->base.src.offset[1] + box->y1,
+ transform, op->base.src.scale,
+ &v[12], &v[13]);
+}
- _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1,
- (int)op->base.src.offset[1] + box->y2,
- transform,
- &x, &y);
- v[7] = x * op->base.src.scale[0];
- v[8] = y * op->base.src.scale[1];
+fastcall static void
+gen3_emit_composite_spans_primitive_affine_source__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox,
+ float *v)
+{
+ PictTransform *transform = op->base.src.transform;
- _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1,
- (int)op->base.src.offset[1] + box->y1,
- transform,
- &x, &y);
- v[12] = x * op->base.src.scale[0];
- v[13] = y * op->base.src.scale[1];
+ do {
+ v[0] = op->base.dst.x + b->box.x2;
+ v[6] = v[1] = op->base.dst.y + b->box.y2;
+ v[10] = v[5] = op->base.dst.x + b->box.x1;
+ v[11] = op->base.dst.y + b->box.y1;
+ v[14] = v[9] = v[4] = b->alpha;
+
+ _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x2,
+ (int)op->base.src.offset[1] + b->box.y2,
+ transform, op->base.src.scale,
+ &v[2], &v[3]);
+
+ _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
+ (int)op->base.src.offset[1] + b->box.y2,
+ transform, op->base.src.scale,
+ &v[7], &v[8]);
+
+ _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
+ (int)op->base.src.offset[1] + b->box.y1,
+ transform, op->base.src.scale,
+ &v[12], &v[13]);
+ v += 15;
+ b++;
+ } while (--nbox);
}
fastcall static void
@@ -3229,6 +3589,36 @@ gen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_spans_primitive_identity_gradient__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox,
+ float *v)
+{
+ do {
+ v[0] = op->base.dst.x + b->box.x2;
+ v[1] = op->base.dst.y + b->box.y2;
+ v[2] = op->base.src.offset[0] + b->box.x2;
+ v[3] = op->base.src.offset[1] + b->box.y2;
+ v[4] = b->alpha;
+
+ v[5] = op->base.dst.x + b->box.x1;
+ v[6] = v[1];
+ v[7] = op->base.src.offset[0] + b->box.x1;
+ v[8] = v[3];
+ v[9] = b->alpha;
+
+ v[10] = v[5];
+ v[11] = op->base.dst.y + b->box.y1;
+ v[12] = v[7];
+ v[13] = op->base.src.offset[1] + b->box.y1;
+ v[14] = b->alpha;
+
+ v += 15;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
@@ -3264,6 +3654,43 @@ gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_spans_primitive_affine_gradient__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox,
+ float *v)
+{
+ PictTransform *transform = op->base.src.transform;
+
+ do {
+ v[0] = op->base.dst.x + b->box.x2;
+ v[1] = op->base.dst.y + b->box.y2;
+ _sna_get_transformed_coordinates((int)op->base.src.offset[0] + b->box.x2,
+ (int)op->base.src.offset[1] + b->box.y2,
+ transform,
+ &v[2], &v[3]);
+ v[4] = b->alpha;
+
+ v[5] = op->base.dst.x + b->box.x1;
+ v[6] = v[1];
+ _sna_get_transformed_coordinates((int)op->base.src.offset[0] + b->box.x1,
+ (int)op->base.src.offset[1] + b->box.y2,
+ transform,
+ &v[7], &v[8]);
+ v[9] = b->alpha;
+
+ v[10] = v[5];
+ v[11] = op->base.dst.y + b->box.y1;
+ _sna_get_transformed_coordinates((int)op->base.src.offset[0] + b->box.x1,
+ (int)op->base.src.offset[1] + b->box.y1,
+ transform,
+ &v[12], &v[13]);
+ v[14] = b->alpha;
+ v += 15;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_spans_primitive(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
@@ -3308,6 +3735,48 @@ gen3_render_composite_spans_constant_box(struct sna *sna,
}
fastcall static void
+gen3_render_composite_spans_constant_thread_boxes(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box,
+ int nbox)
+{
+ DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
+ __FUNCTION__, nbox,
+ op->base.src.offset[0], op->base.src.offset[1],
+ op->base.dst.x, op->base.dst.y));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * 9;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ do {
+ v[0] = box->box.x2;
+ v[6] = v[3] = box->box.x1;
+ v[4] = v[1] = box->box.y2;
+ v[7] = box->box.y1;
+ v[8] = v[5] = v[2] = box->alpha;
+ v += 9;
+ box++;
+ } while (--nbox_this_time);
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
+fastcall static void
gen3_render_composite_spans_box(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box, float opacity)
@@ -3355,10 +3824,45 @@ gen3_render_composite_spans_boxes(struct sna *sna,
}
fastcall static void
+gen3_render_composite_spans_boxes__thread(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box,
+ int nbox)
+{
+ DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
+ __FUNCTION__, nbox,
+ op->base.src.offset[0], op->base.src.offset[1],
+ op->base.dst.x, op->base.dst.y));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
+fastcall static void
gen3_render_composite_spans_done(struct sna *sna,
const struct sna_composite_spans_op *op)
{
- if (sna->render_state.gen3.vertex_offset)
+ if (sna->render.vertex_offset)
gen3_vertex_flush(sna);
DBG(("%s()\n", __FUNCTION__));
@@ -3380,12 +3884,11 @@ gen3_check_composite_spans(struct sna *sna,
if (gen3_composite_fallback(sna, op, src, NULL, dst))
return false;
- if (need_tiling(sna, width, height)) {
- if (!is_gpu(dst->pDrawable)) {
- DBG(("%s: fallback, tiled operation not on GPU\n",
- __FUNCTION__));
- return false;
- }
+ if (need_tiling(sna, width, height) &&
+ !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
+ DBG(("%s: fallback, tiled operation not on GPU\n",
+ __FUNCTION__));
+ return false;
}
return true;
@@ -3458,37 +3961,58 @@ gen3_render_composite_spans(struct sna *sna,
no_offset = tmp->base.dst.x == 0 && tmp->base.dst.y == 0;
tmp->box = gen3_render_composite_spans_box;
tmp->boxes = gen3_render_composite_spans_boxes;
+ tmp->thread_boxes = gen3_render_composite_spans_boxes__thread;
tmp->done = gen3_render_composite_spans_done;
tmp->prim_emit = gen3_emit_composite_spans_primitive;
switch (tmp->base.src.u.gen3.type) {
case SHADER_NONE:
assert(0);
case SHADER_ZERO:
- tmp->prim_emit = no_offset ? gen3_emit_composite_spans_primitive_zero_no_offset : gen3_emit_composite_spans_primitive_zero;
+ if (no_offset) {
+ tmp->prim_emit = gen3_emit_composite_spans_primitive_zero_no_offset;
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero_no_offset__boxes;
+ } else {
+ tmp->prim_emit = gen3_emit_composite_spans_primitive_zero;
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero__boxes;
+ }
break;
case SHADER_BLACK:
case SHADER_WHITE:
case SHADER_CONSTANT:
if (no_offset) {
tmp->box = gen3_render_composite_spans_constant_box;
+ tmp->thread_boxes = gen3_render_composite_spans_constant_thread_boxes;
tmp->prim_emit = gen3_emit_composite_spans_primitive_constant_no_offset;
- } else
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant_no_offset__boxes;
+ } else {
tmp->prim_emit = gen3_emit_composite_spans_primitive_constant;
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__boxes;
+ }
break;
case SHADER_LINEAR:
case SHADER_RADIAL:
- if (tmp->base.src.transform == NULL)
+ if (tmp->base.src.transform == NULL) {
tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient;
- else if (tmp->base.src.is_affine)
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__boxes;
+ } else if (tmp->base.src.is_affine) {
tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient;
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__boxes;
+ }
break;
case SHADER_TEXTURE:
- if (tmp->base.src.transform == NULL)
+ if (tmp->base.src.transform == NULL) {
tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source;
- else if (tmp->base.src.is_affine)
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__boxes;
+ } else if (tmp->base.src.is_affine) {
+ tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
+ tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source;
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__boxes;
+ }
break;
}
+ if (tmp->emit_boxes == NULL)
+ tmp->thread_boxes = NULL;
tmp->base.mask.bo = NULL;
@@ -3528,7 +4052,8 @@ gen3_emit_video_state(struct sna *sna,
struct sna_video_frame *frame,
PixmapPtr pixmap,
struct kgem_bo *dst_bo,
- int width, int height)
+ int width, int height,
+ bool bilinear)
{
struct gen3_render_state *state = &sna->render_state.gen3;
uint32_t id, ms3, rewind;
@@ -3841,9 +4366,9 @@ gen3_emit_video_state(struct sna *sna,
}
static void
-gen3_video_get_batch(struct sna *sna)
+gen3_video_get_batch(struct sna *sna, struct kgem_bo *bo)
{
- kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
if (!kgem_check_batch(&sna->kgem, 120) ||
!kgem_check_reloc(&sna->kgem, 4) ||
@@ -3875,18 +4400,18 @@ gen3_render_video(struct sna *sna,
RegionPtr dstRegion,
short src_w, short src_h,
short drw_w, short drw_h,
+ short dx, short dy,
PixmapPtr pixmap)
{
struct sna_pixmap *priv = sna_pixmap(pixmap);
BoxPtr pbox = REGION_RECTS(dstRegion);
int nbox = REGION_NUM_RECTS(dstRegion);
- int dxo = dstRegion->extents.x1;
- int dyo = dstRegion->extents.y1;
- int width = dstRegion->extents.x2 - dxo;
- int height = dstRegion->extents.y2 - dyo;
+ int width = dstRegion->extents.x2 - dstRegion->extents.x1;
+ int height = dstRegion->extents.y2 - dstRegion->extents.y1;
float src_scale_x, src_scale_y;
int pix_xoff, pix_yoff;
struct kgem_bo *dst_bo;
+ bool bilinear;
int copy = 0;
DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, src_w, src_h, drw_w, drw_h));
@@ -3908,8 +4433,8 @@ gen3_render_video(struct sna *sna,
if (!dst_bo)
return false;
- pix_xoff = -dxo;
- pix_yoff = -dyo;
+ pix_xoff = -dstRegion->extents.x1;
+ pix_yoff = -dstRegion->extents.y1;
copy = 1;
} else {
width = pixmap->drawable.width;
@@ -3927,22 +4452,24 @@ gen3_render_video(struct sna *sna,
#endif
}
+ bilinear = src_w != drw_w || src_h != drw_h;
+
src_scale_x = ((float)src_w / frame->width) / drw_w;
src_scale_y = ((float)src_h / frame->height) / drw_h;
DBG(("%s: src offset=(%d, %d), scale=(%f, %f), dst offset=(%d, %d)\n",
__FUNCTION__,
- dxo, dyo, src_scale_x, src_scale_y, pix_xoff, pix_yoff));
+ dx, dy, src_scale_x, src_scale_y, pix_xoff, pix_yoff));
- gen3_video_get_batch(sna);
+ gen3_video_get_batch(sna, dst_bo);
gen3_emit_video_state(sna, video, frame, pixmap,
- dst_bo, width, height);
+ dst_bo, width, height, bilinear);
do {
int nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
if (nbox_this_time == 0) {
- gen3_video_get_batch(sna);
+ gen3_video_get_batch(sna, dst_bo);
gen3_emit_video_state(sna, video, frame, pixmap,
- dst_bo, width, height);
+ dst_bo, width, height, bilinear);
nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
}
nbox -= nbox_this_time;
@@ -3962,20 +4489,20 @@ gen3_render_video(struct sna *sna,
/* bottom right */
OUT_BATCH_F(box_x2 + pix_xoff);
OUT_BATCH_F(box_y2 + pix_yoff);
- OUT_BATCH_F((box_x2 - dxo) * src_scale_x);
- OUT_BATCH_F((box_y2 - dyo) * src_scale_y);
+ OUT_BATCH_F((box_x2 - dx) * src_scale_x);
+ OUT_BATCH_F((box_y2 - dy) * src_scale_y);
/* bottom left */
OUT_BATCH_F(box_x1 + pix_xoff);
OUT_BATCH_F(box_y2 + pix_yoff);
- OUT_BATCH_F((box_x1 - dxo) * src_scale_x);
- OUT_BATCH_F((box_y2 - dyo) * src_scale_y);
+ OUT_BATCH_F((box_x1 - dx) * src_scale_x);
+ OUT_BATCH_F((box_y2 - dy) * src_scale_y);
/* top left */
OUT_BATCH_F(box_x1 + pix_xoff);
OUT_BATCH_F(box_y1 + pix_yoff);
- OUT_BATCH_F((box_x1 - dxo) * src_scale_x);
- OUT_BATCH_F((box_y1 - dyo) * src_scale_y);
+ OUT_BATCH_F((box_x1 - dx) * src_scale_x);
+ OUT_BATCH_F((box_y1 - dy) * src_scale_y);
}
} while (nbox);
@@ -3988,7 +4515,7 @@ gen3_render_video(struct sna *sna,
pix_yoff = 0;
#endif
sna_blt_copy_boxes(sna, GXcopy,
- dst_bo, -dxo, -dyo,
+ dst_bo, -dstRegion->extents.x1, -dstRegion->extents.y1,
priv->gpu_bo, pix_xoff, pix_yoff,
pixmap->drawable.bitsPerPixel,
REGION_RECTS(dstRegion),
@@ -4207,7 +4734,7 @@ gen3_render_copy_blt(struct sna *sna,
static void
gen3_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
{
- if (sna->render_state.gen3.vertex_offset)
+ if (sna->render.vertex_offset)
gen3_vertex_flush(sna);
}
@@ -4500,7 +5027,7 @@ gen3_render_fill_op_boxes(struct sna *sna,
static void
gen3_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
{
- if (sna->render_state.gen3.vertex_offset)
+ if (sna->render.vertex_offset)
gen3_vertex_flush(sna);
}
@@ -4661,6 +5188,9 @@ gen3_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
static void gen3_render_flush(struct sna *sna)
{
gen3_vertex_close(sna);
+
+ assert(sna->render.vertex_reloc[0] == 0);
+ assert(sna->render.vertex_offset == 0);
}
static void
@@ -4674,10 +5204,12 @@ bool gen3_render_init(struct sna *sna)
#if !NO_COMPOSITE
render->composite = gen3_render_composite;
+ render->prefer_gpu |= PREFER_GPU_RENDER;
#endif
#if !NO_COMPOSITE_SPANS
render->check_composite_spans = gen3_check_composite_spans;
render->composite_spans = gen3_render_composite_spans;
+ render->prefer_gpu |= PREFER_GPU_SPANS;
#endif
render->video = gen3_render_video;
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index ceef528f0..53fe52a92 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -42,13 +42,15 @@
#include "brw/brw.h"
#include "gen4_render.h"
+#include "gen4_source.h"
+#include "gen4_vertex.h"
/* gen4 has a serious issue with its shaders that we need to flush
* after every rectangle... So until that is resolved, prefer
* the BLT engine.
*/
-#define PREFER_BLT 1
-#define FLUSH_EVERY_VERTEX 1
+#define FORCE_SPANS 0
+#define FORCE_NONRECTILINEAR_SPANS -1
#define NO_COMPOSITE 0
#define NO_COMPOSITE_SPANS 0
@@ -59,19 +61,6 @@
#define NO_FILL_BOXES 0
#define NO_VIDEO 0
-#if FLUSH_EVERY_VERTEX
-#define _FLUSH() do { \
- gen4_vertex_flush(sna); \
- OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); \
-} while (0)
-#define FLUSH(OP) do { \
- if ((OP)->mask.bo == NULL) _FLUSH(); \
-} while (0)
-#else
-#define _FLUSH()
-#define FLUSH(OP)
-#endif
-
#define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
/* Set up a default static partitioning of the URB, which is supposed to
@@ -100,15 +89,9 @@
#define SF_KERNEL_NUM_GRF 16
#define PS_KERNEL_NUM_GRF 32
-static const struct gt_info {
- uint32_t max_sf_threads;
- uint32_t max_wm_threads;
- uint32_t urb_size;
-} gen4_gt_info = {
- 24, 32, 256,
-}, g4x_gt_info = {
- 24, 50, 384,
-};
+#define GEN4_MAX_SF_THREADS 24
+#define GEN4_MAX_WM_THREADS 32
+#define G4X_MAX_WM_THREADS 50
static const uint32_t ps_kernel_packed_static[][4] = {
#include "exa_wm_xy.g4b"
@@ -225,21 +208,20 @@ gen4_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
return base + !is_affine;
}
-static void gen4_magic_ca_pass(struct sna *sna,
+static bool gen4_magic_ca_pass(struct sna *sna,
const struct sna_composite_op *op)
{
struct gen4_render_state *state = &sna->render_state.gen4;
if (!op->need_magic_ca_pass)
- return;
+ return false;
+
+ assert(sna->render.vertex_index > sna->render.vertex_start);
DBG(("%s: CA fixup\n", __FUNCTION__));
assert(op->mask.bo != NULL);
assert(op->has_component_alpha);
- if (FLUSH_EVERY_VERTEX)
- OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
-
gen4_emit_pipelined_pointers(sna, op, PictOpAdd,
gen4_choose_composite_kernel(PictOpAdd,
true, true, op->is_affine));
@@ -256,154 +238,9 @@ static void gen4_magic_ca_pass(struct sna *sna,
OUT_BATCH(0); /* index buffer offset, ignored */
state->last_primitive = sna->kgem.nbatch;
+ return true;
}
-static void gen4_vertex_flush(struct sna *sna)
-{
- if (sna->render_state.gen4.vertex_offset == 0)
- return;
-
- DBG(("%s[%x] = %d\n", __FUNCTION__,
- 4*sna->render_state.gen4.vertex_offset,
- sna->render.vertex_index - sna->render.vertex_start));
- sna->kgem.batch[sna->render_state.gen4.vertex_offset] =
- sna->render.vertex_index - sna->render.vertex_start;
- sna->render_state.gen4.vertex_offset = 0;
-}
-
-static int gen4_vertex_finish(struct sna *sna)
-{
- struct kgem_bo *bo;
- unsigned int i;
-
- assert(sna->render.vertex_used);
- assert(sna->render.nvertex_reloc);
-
- /* Note: we only need dword alignment (currently) */
-
- bo = sna->render.vbo;
- if (bo) {
- gen4_vertex_flush(sna);
-
- for (i = 0; i < sna->render.nvertex_reloc; i++) {
- DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
- i, sna->render.vertex_reloc[i]));
-
- sna->kgem.batch[sna->render.vertex_reloc[i]] =
- kgem_add_reloc(&sna->kgem,
- sna->render.vertex_reloc[i], bo,
- I915_GEM_DOMAIN_VERTEX << 16,
- 0);
- }
-
- sna->render.nvertex_reloc = 0;
- sna->render.vertex_used = 0;
- sna->render.vertex_index = 0;
- sna->render_state.gen4.vb_id = 0;
-
- kgem_bo_destroy(&sna->kgem, bo);
- }
-
- sna->render.vertices = NULL;
- sna->render.vbo = kgem_create_linear(&sna->kgem,
- 256*1024, CREATE_GTT_MAP);
- if (sna->render.vbo)
- sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
- if (sna->render.vertices == NULL) {
- if (sna->render.vbo)
- kgem_bo_destroy(&sna->kgem, sna->render.vbo);
- sna->render.vbo = NULL;
- return 0;
- }
-
- if (sna->render.vertex_used) {
- memcpy(sna->render.vertices,
- sna->render.vertex_data,
- sizeof(float)*sna->render.vertex_used);
- }
- sna->render.vertex_size = 64 * 1024 - 1;
- return sna->render.vertex_size - sna->render.vertex_used;
-}
-
-static void gen4_vertex_close(struct sna *sna)
-{
- struct kgem_bo *bo, *free_bo = NULL;
- unsigned int i, delta = 0;
-
- assert(sna->render_state.gen4.vertex_offset == 0);
- if (!sna->render_state.gen4.vb_id)
- return;
-
- DBG(("%s: used=%d, vbo active? %d\n",
- __FUNCTION__, sna->render.vertex_used, sna->render.vbo != NULL));
-
- bo = sna->render.vbo;
- if (bo) {
- if (sna->render.vertex_size - sna->render.vertex_used < 64) {
- DBG(("%s: discarding full vbo\n", __FUNCTION__));
- sna->render.vbo = NULL;
- sna->render.vertices = sna->render.vertex_data;
- sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
- free_bo = bo;
- } else if (IS_CPU_MAP(bo->map)) {
- DBG(("%s: converting CPU map to GTT\n", __FUNCTION__));
- sna->render.vertices =
- kgem_bo_map__gtt(&sna->kgem, sna->render.vbo);
- if (sna->render.vertices == NULL) {
- sna->render.vbo = NULL;
- sna->render.vertices = sna->render.vertex_data;
- sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
- free_bo = bo;
- }
- }
- } else {
- if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
- DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
- sna->render.vertex_used, sna->kgem.nbatch));
- memcpy(sna->kgem.batch + sna->kgem.nbatch,
- sna->render.vertex_data,
- sna->render.vertex_used * 4);
- delta = sna->kgem.nbatch * 4;
- bo = NULL;
- sna->kgem.nbatch += sna->render.vertex_used;
- } else {
- bo = kgem_create_linear(&sna->kgem,
- 4*sna->render.vertex_used, 0);
- if (bo && !kgem_bo_write(&sna->kgem, bo,
- sna->render.vertex_data,
- 4*sna->render.vertex_used)) {
- kgem_bo_destroy(&sna->kgem, bo);
- bo = NULL;
- }
- DBG(("%s: new vbo: %d\n", __FUNCTION__,
- sna->render.vertex_used));
- free_bo = bo;
- }
- }
-
- assert(sna->render.nvertex_reloc);
- for (i = 0; i < sna->render.nvertex_reloc; i++) {
- DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
- i, sna->render.vertex_reloc[i]));
-
- sna->kgem.batch[sna->render.vertex_reloc[i]] =
- kgem_add_reloc(&sna->kgem,
- sna->render.vertex_reloc[i], bo,
- I915_GEM_DOMAIN_VERTEX << 16,
- delta);
- }
- sna->render.nvertex_reloc = 0;
-
- if (sna->render.vbo == NULL) {
- sna->render.vertex_used = 0;
- sna->render.vertex_index = 0;
- }
-
- if (free_bo)
- kgem_bo_destroy(&sna->kgem, free_bo);
-}
-
-
static uint32_t gen4_get_blend(int op,
bool has_component_alpha,
uint32_t dst_format)
@@ -635,6 +472,17 @@ static bool gen4_check_repeat(PicturePtr picture)
}
}
+static uint32_t
+gen4_tiling_bits(uint32_t tiling)
+{
+ switch (tiling) {
+ default: assert(0);
+ case I915_TILING_NONE: return 0;
+ case I915_TILING_X: return GEN4_SURFACE_TILED;
+ case I915_TILING_Y: return GEN4_SURFACE_TILED | GEN4_SURFACE_TILED_Y;
+ }
+}
+
/**
* Sets up the common fields for a surface state buffer for the given
* picture in the given surface state buffer.
@@ -647,11 +495,11 @@ gen4_bind_bo(struct sna *sna,
uint32_t format,
bool is_dst)
{
- struct gen4_surface_state *ss;
uint32_t domains;
uint16_t offset;
+ uint32_t *ss;
- assert(!kgem_bo_is_snoop(bo));
+ assert(sna->kgem.gen != 040 || !kgem_bo_is_snoop(bo));
/* After the first bind, we manage the cache domains within the batch */
offset = kgem_bo_get_binding(bo, format);
@@ -663,340 +511,58 @@ gen4_bind_bo(struct sna *sna,
offset = sna->kgem.surface -=
sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
- ss = memset(sna->kgem.batch + offset, 0, sizeof(*ss));
+ ss = sna->kgem.batch + offset;
- ss->ss0.surface_type = GEN4_SURFACE_2D;
- ss->ss0.surface_format = format;
+ ss[0] = (GEN4_SURFACE_2D << GEN4_SURFACE_TYPE_SHIFT |
+ GEN4_SURFACE_BLEND_ENABLED |
+ format << GEN4_SURFACE_FORMAT_SHIFT);
if (is_dst)
domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
else
domains = I915_GEM_DOMAIN_SAMPLER << 16;
+ ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
- ss->ss0.data_return_format = GEN4_SURFACERETURNFORMAT_FLOAT32;
- ss->ss0.color_blend = 1;
- ss->ss1.base_addr =
- kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
-
- ss->ss2.height = height - 1;
- ss->ss2.width = width - 1;
- ss->ss3.pitch = bo->pitch - 1;
- ss->ss3.tiled_surface = bo->tiling != I915_TILING_NONE;
- ss->ss3.tile_walk = bo->tiling == I915_TILING_Y;
+ ss[2] = ((width - 1) << GEN4_SURFACE_WIDTH_SHIFT |
+ (height - 1) << GEN4_SURFACE_HEIGHT_SHIFT);
+ ss[3] = (gen4_tiling_bits(bo->tiling) |
+ (bo->pitch - 1) << GEN4_SURFACE_PITCH_SHIFT);
+ ss[4] = 0;
+ ss[5] = 0;
kgem_bo_set_binding(bo, format, offset);
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
- offset, bo->handle, ss->ss1.base_addr,
- ss->ss0.surface_format, width, height, bo->pitch, bo->tiling,
+ offset, bo->handle, ss[1],
+ format, width, height, bo->pitch, bo->tiling,
domains & 0xffff ? "render" : "sampler"));
return offset * sizeof(uint32_t);
}
-fastcall static void
-gen4_emit_composite_primitive_solid(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- float *v;
- union {
- struct sna_coordinate p;
- float f;
- } dst;
-
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 9;
-
- dst.p.x = r->dst.x + r->width;
- dst.p.y = r->dst.y + r->height;
- v[0] = dst.f;
- v[1] = 1.;
- v[2] = 1.;
-
- dst.p.x = r->dst.x;
- v[3] = dst.f;
- v[4] = 0.;
- v[5] = 1.;
-
- dst.p.y = r->dst.y;
- v[6] = dst.f;
- v[7] = 0.;
- v[8] = 0.;
-}
-
-fastcall static void
-gen4_emit_composite_primitive_identity_source(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- const float *sf = op->src.scale;
- float sx, sy, *v;
- union {
- struct sna_coordinate p;
- float f;
- } dst;
-
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 9;
-
- sx = r->src.x + op->src.offset[0];
- sy = r->src.y + op->src.offset[1];
-
- dst.p.x = r->dst.x + r->width;
- dst.p.y = r->dst.y + r->height;
- v[0] = dst.f;
- v[1] = (sx + r->width) * sf[0];
- v[2] = (sy + r->height) * sf[1];
-
- dst.p.x = r->dst.x;
- v[3] = dst.f;
- v[4] = sx * sf[0];
- v[5] = v[2];
-
- dst.p.y = r->dst.y;
- v[6] = dst.f;
- v[7] = v[4];
- v[8] = sy * sf[1];
-}
-
-fastcall static void
-gen4_emit_composite_primitive_affine_source(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- union {
- struct sna_coordinate p;
- float f;
- } dst;
- float *v;
-
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 9;
-
- dst.p.x = r->dst.x + r->width;
- dst.p.y = r->dst.y + r->height;
- v[0] = dst.f;
- _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x + r->width,
- op->src.offset[1] + r->src.y + r->height,
- op->src.transform,
- &v[1], &v[2]);
- v[1] *= op->src.scale[0];
- v[2] *= op->src.scale[1];
-
- dst.p.x = r->dst.x;
- v[3] = dst.f;
- _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x,
- op->src.offset[1] + r->src.y + r->height,
- op->src.transform,
- &v[4], &v[5]);
- v[4] *= op->src.scale[0];
- v[5] *= op->src.scale[1];
-
- dst.p.y = r->dst.y;
- v[6] = dst.f;
- _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x,
- op->src.offset[1] + r->src.y,
- op->src.transform,
- &v[7], &v[8]);
- v[7] *= op->src.scale[0];
- v[8] *= op->src.scale[1];
-}
-
-fastcall static void
-gen4_emit_composite_primitive_identity_source_mask(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- union {
- struct sna_coordinate p;
- float f;
- } dst;
- float src_x, src_y;
- float msk_x, msk_y;
- float w, h;
- float *v;
-
- src_x = r->src.x + op->src.offset[0];
- src_y = r->src.y + op->src.offset[1];
- msk_x = r->mask.x + op->mask.offset[0];
- msk_y = r->mask.y + op->mask.offset[1];
- w = r->width;
- h = r->height;
-
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 15;
-
- dst.p.x = r->dst.x + r->width;
- dst.p.y = r->dst.y + r->height;
- v[0] = dst.f;
- v[1] = (src_x + w) * op->src.scale[0];
- v[2] = (src_y + h) * op->src.scale[1];
- v[3] = (msk_x + w) * op->mask.scale[0];
- v[4] = (msk_y + h) * op->mask.scale[1];
-
- dst.p.x = r->dst.x;
- v[5] = dst.f;
- v[6] = src_x * op->src.scale[0];
- v[7] = v[2];
- v[8] = msk_x * op->mask.scale[0];
- v[9] = v[4];
-
- dst.p.y = r->dst.y;
- v[10] = dst.f;
- v[11] = v[6];
- v[12] = src_y * op->src.scale[1];
- v[13] = v[8];
- v[14] = msk_y * op->mask.scale[1];
-}
-
-fastcall static void
-gen4_emit_composite_primitive(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
- bool is_affine = op->is_affine;
- const float *src_sf = op->src.scale;
- const float *mask_sf = op->mask.scale;
-
- if (is_affine) {
- sna_get_transformed_coordinates(r->src.x + op->src.offset[0],
- r->src.y + op->src.offset[1],
- op->src.transform,
- &src_x[0],
- &src_y[0]);
-
- sna_get_transformed_coordinates(r->src.x + op->src.offset[0],
- r->src.y + op->src.offset[1] + r->height,
- op->src.transform,
- &src_x[1],
- &src_y[1]);
-
- sna_get_transformed_coordinates(r->src.x + op->src.offset[0] + r->width,
- r->src.y + op->src.offset[1] + r->height,
- op->src.transform,
- &src_x[2],
- &src_y[2]);
- } else {
- sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0],
- r->src.y + op->src.offset[1],
- op->src.transform,
- &src_x[0],
- &src_y[0],
- &src_w[0]);
- sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0],
- r->src.y + op->src.offset[1] + r->height,
- op->src.transform,
- &src_x[1],
- &src_y[1],
- &src_w[1]);
- sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0] + r->width,
- r->src.y + op->src.offset[1] + r->height,
- op->src.transform,
- &src_x[2],
- &src_y[2],
- &src_w[2]);
- }
-
- if (op->mask.bo) {
- if (is_affine) {
- sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0],
- r->mask.y + op->mask.offset[1],
- op->mask.transform,
- &mask_x[0],
- &mask_y[0]);
-
- sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0],
- r->mask.y + op->mask.offset[1] + r->height,
- op->mask.transform,
- &mask_x[1],
- &mask_y[1]);
-
- sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0] + r->width,
- r->mask.y + op->mask.offset[1] + r->height,
- op->mask.transform,
- &mask_x[2],
- &mask_y[2]);
- } else {
- sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0],
- r->mask.y + op->mask.offset[1],
- op->mask.transform,
- &mask_x[0],
- &mask_y[0],
- &mask_w[0]);
- sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0],
- r->mask.y + op->mask.offset[1] + r->height,
- op->mask.transform,
- &mask_x[1],
- &mask_y[1],
- &mask_w[1]);
- sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0] + r->width,
- r->mask.y + op->mask.offset[1] + r->height,
- op->mask.transform,
- &mask_x[2],
- &mask_y[2],
- &mask_w[2]);
- }
- }
-
- OUT_VERTEX(r->dst.x + r->width, r->dst.y + r->height);
- OUT_VERTEX_F(src_x[2] * src_sf[0]);
- OUT_VERTEX_F(src_y[2] * src_sf[1]);
- if (!is_affine)
- OUT_VERTEX_F(src_w[2]);
- if (op->mask.bo) {
- OUT_VERTEX_F(mask_x[2] * mask_sf[0]);
- OUT_VERTEX_F(mask_y[2] * mask_sf[1]);
- if (!is_affine)
- OUT_VERTEX_F(mask_w[2]);
- }
-
- OUT_VERTEX(r->dst.x, r->dst.y + r->height);
- OUT_VERTEX_F(src_x[1] * src_sf[0]);
- OUT_VERTEX_F(src_y[1] * src_sf[1]);
- if (!is_affine)
- OUT_VERTEX_F(src_w[1]);
- if (op->mask.bo) {
- OUT_VERTEX_F(mask_x[1] * mask_sf[0]);
- OUT_VERTEX_F(mask_y[1] * mask_sf[1]);
- if (!is_affine)
- OUT_VERTEX_F(mask_w[1]);
- }
-
- OUT_VERTEX(r->dst.x, r->dst.y);
- OUT_VERTEX_F(src_x[0] * src_sf[0]);
- OUT_VERTEX_F(src_y[0] * src_sf[1]);
- if (!is_affine)
- OUT_VERTEX_F(src_w[0]);
- if (op->mask.bo) {
- OUT_VERTEX_F(mask_x[0] * mask_sf[0]);
- OUT_VERTEX_F(mask_y[0] * mask_sf[1]);
- if (!is_affine)
- OUT_VERTEX_F(mask_w[0]);
- }
-}
-
static void gen4_emit_vertex_buffer(struct sna *sna,
const struct sna_composite_op *op)
{
int id = op->u.gen4.ve_id;
+ assert((sna->render.vb_id & (1 << id)) == 0);
+
OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS | 3);
OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA |
(4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
+ assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc));
sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
- sna->render_state.gen4.vb_id |= 1 << id;
+ sna->render.vb_id |= 1 << id;
}
static void gen4_emit_primitive(struct sna *sna)
{
if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) {
- sna->render_state.gen4.vertex_offset = sna->kgem.nbatch - 5;
+ sna->render.vertex_offset = sna->kgem.nbatch - 5;
return;
}
@@ -1005,7 +571,7 @@ static void gen4_emit_primitive(struct sna *sna)
(_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
(0 << 9) |
4);
- sna->render_state.gen4.vertex_offset = sna->kgem.nbatch;
+ sna->render.vertex_offset = sna->kgem.nbatch;
OUT_BATCH(0); /* vertex count, to be filled in later */
OUT_BATCH(sna->render.vertex_index);
OUT_BATCH(1); /* single instance */
@@ -1022,19 +588,20 @@ static bool gen4_rectangle_begin(struct sna *sna,
int id = op->u.gen4.ve_id;
int ndwords;
+ if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
+ return true;
+
/* 7xpipelined pointers + 6xprimitive + 1xflush */
ndwords = op->need_magic_ca_pass? 20 : 6;
- if (FLUSH_EVERY_VERTEX)
- ndwords += 1;
- if ((sna->render_state.gen4.vb_id & (1 << id)) == 0)
+ if ((sna->render.vb_id & (1 << id)) == 0)
ndwords += 5;
if (!kgem_check_batch(&sna->kgem, ndwords))
return false;
- if ((sna->render_state.gen4.vb_id & (1 << id)) == 0)
+ if ((sna->render.vb_id & (1 << id)) == 0)
gen4_emit_vertex_buffer(sna, op);
- if (sna->render_state.gen4.vertex_offset == 0)
+ if (sna->render.vertex_offset == 0)
gen4_emit_primitive(sna);
return true;
@@ -1043,14 +610,28 @@ static bool gen4_rectangle_begin(struct sna *sna,
static int gen4_get_rectangles__flush(struct sna *sna,
const struct sna_composite_op *op)
{
- if (!kgem_check_batch(&sna->kgem, (FLUSH_EVERY_VERTEX || op->need_magic_ca_pass) ? 25 : 6))
+ /* Preventing discarding new vbo after lock contention */
+ if (sna_vertex_wait__locked(&sna->render)) {
+ int rem = vertex_space(sna);
+ if (rem > op->floats_per_rect)
+ return rem;
+ }
+
+ if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 25 : 6))
return 0;
- if (!kgem_check_reloc_and_exec(&sna->kgem, 1))
+ if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
return 0;
if (op->need_magic_ca_pass && sna->render.vbo)
return 0;
+ if (sna->render.vertex_offset) {
+ gen4_vertex_flush(sna);
+ if (gen4_magic_ca_pass(sna, op))
+ gen4_emit_pipelined_pointers(sna, op, op->op,
+ op->u.gen4.wm_kernel);
+ }
+
return gen4_vertex_finish(sna);
}
@@ -1063,7 +644,7 @@ inline static int gen4_get_rectangles(struct sna *sna,
start:
rem = vertex_space(sna);
- if (rem < op->floats_per_rect) {
+ if (unlikely(rem < op->floats_per_rect)) {
DBG(("flushing vbo for %s: %d < %d\n",
__FUNCTION__, rem, op->floats_per_rect));
rem = gen4_get_rectangles__flush(sna, op);
@@ -1071,7 +652,7 @@ start:
goto flush;
}
- if (unlikely(sna->render_state.gen4.vertex_offset == 0 &&
+ if (unlikely(sna->render.vertex_offset == 0 &&
!gen4_rectangle_begin(sna, op)))
goto flush;
@@ -1082,18 +663,18 @@ start:
return want;
flush:
- if (sna->render_state.gen4.vertex_offset) {
+ if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen4_magic_ca_pass(sna, op);
}
+ sna_vertex_wait__locked(&sna->render);
_kgem_submit(&sna->kgem);
emit_state(sna, op);
goto start;
}
static uint32_t *
-gen4_composite_get_binding_table(struct sna *sna,
- uint16_t *offset)
+gen4_composite_get_binding_table(struct sna *sna, uint16_t *offset)
{
sna->kgem.surface -=
sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
@@ -1129,6 +710,9 @@ gen4_emit_urb(struct sna *sna)
urb_cs_start = urb_sf_start + urb_sf_size;
urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
+ while ((sna->kgem.nbatch & 15) > 12)
+ OUT_BATCH(MI_NOOP);
+
OUT_BATCH(GEN4_URB_FENCE |
UF0_CS_REALLOC |
UF0_SF_REALLOC |
@@ -1176,7 +760,7 @@ gen4_emit_invariant(struct sna *sna)
{
assert(sna->kgem.surface == sna->kgem.batch_size);
- if (sna->kgem.gen >= 45)
+ if (sna->kgem.gen >= 045)
OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
else
OUT_BATCH(GEN4_PIPELINE_SELECT | PIPELINE_SELECT_3D);
@@ -1187,9 +771,9 @@ gen4_emit_invariant(struct sna *sna)
}
static void
-gen4_get_batch(struct sna *sna)
+gen4_get_batch(struct sna *sna, const struct sna_composite_op *op)
{
- kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
DBG(("%s: flushing batch: %d < %d+%d\n",
@@ -1245,11 +829,11 @@ gen4_emit_pipelined_pointers(struct sna *sna,
const struct sna_composite_op *op,
int blend, int kernel)
{
- uint32_t key;
uint16_t sp, bp;
+ uint32_t key;
DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n",
- __FUNCTION__, op->mask.bo != NULL,
+ __FUNCTION__, op->u.gen4.ve_id & 2,
op->src.filter, op->src.repeat,
op->mask.filter, op->mask.repeat,
kernel, blend, op->has_component_alpha, (int)op->dst.format));
@@ -1260,8 +844,7 @@ gen4_emit_pipelined_pointers(struct sna *sna,
bp = gen4_get_blend(blend, op->has_component_alpha, op->dst.format);
DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp));
-
- key = sp | bp << 16;
+ key = sp | (uint32_t)bp << 16;
if (key == sna->render_state.gen4.last_pipelined_pointers)
return;
@@ -1269,7 +852,7 @@ gen4_emit_pipelined_pointers(struct sna *sna,
OUT_BATCH(sna->render_state.gen4.vs);
OUT_BATCH(GEN4_GS_DISABLE); /* passthrough */
OUT_BATCH(GEN4_CLIP_DISABLE); /* passthrough */
- OUT_BATCH(sna->render_state.gen4.sf[op->mask.bo != NULL]);
+ OUT_BATCH(sna->render_state.gen4.sf);
OUT_BATCH(sna->render_state.gen4.wm + sp);
OUT_BATCH(sna->render_state.gen4.cc + bp);
@@ -1277,7 +860,7 @@ gen4_emit_pipelined_pointers(struct sna *sna,
gen4_emit_urb(sna);
}
-static void
+static bool
gen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
{
uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
@@ -1288,7 +871,8 @@ gen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
if (sna->render_state.gen4.drawrect_limit == limit &&
sna->render_state.gen4.drawrect_offset == offset)
- return;
+ return true;
+
sna->render_state.gen4.drawrect_offset = offset;
sna->render_state.gen4.drawrect_limit = limit;
@@ -1296,6 +880,7 @@ gen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
OUT_BATCH(0);
OUT_BATCH(limit);
OUT_BATCH(offset);
+ return false;
}
static void
@@ -1309,65 +894,108 @@ gen4_emit_vertex_elements(struct sna *sna,
* texture coordinate 1 if (has_mask is true): same as above
*/
struct gen4_render_state *render = &sna->render_state.gen4;
- bool has_mask = op->mask.bo != NULL;
- int nelem = has_mask ? 2 : 1;
- int selem;
- uint32_t w_component;
- uint32_t src_format;
+ uint32_t src_format, dw;
int id = op->u.gen4.ve_id;
if (render->ve_id == id)
return;
-
render->ve_id = id;
- if (op->is_affine) {
- src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
- w_component = GEN4_VFCOMPONENT_STORE_1_FLT;
- selem = 2;
- } else {
- src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
- w_component = GEN4_VFCOMPONENT_STORE_SRC;
- selem = 3;
- }
-
/* The VUE layout
* dword 0-3: position (x, y, 1.0, 1.0),
* dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0)
* [optional] dword 8-11: texture coordinate 1 (u1, v1, w1, 1.0)
*/
- OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + nelem) - 1));
+ OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + 2) - 1));
/* x,y */
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
- 0 << VE0_OFFSET_SHIFT); /* offsets vb in bytes */
- OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
- GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
- GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
- GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
- (1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* VUE offset in dwords */
+ 0 << VE0_OFFSET_SHIFT);
+ OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
+ VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
+ VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
+ VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
+ (1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
/* u0, v0, w0 */
+ /* u0, v0, w0 */
+ DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
+ dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
+ switch (id & 3) {
+ default:
+ assert(0);
+ case 0:
+ src_format = GEN4_SURFACEFORMAT_R16G16_SSCALED;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
+ dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
+ break;
+ case 1:
+ src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
+ dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
+ dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
+ break;
+ case 2:
+ src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
+ dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
+ break;
+ case 3:
+ src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
+ break;
+ }
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
src_format << VE0_FORMAT_SHIFT |
- 4 << VE0_OFFSET_SHIFT); /* offset vb in bytes */
- OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
- GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
- w_component << VE1_VFCOMPONENT_2_SHIFT |
- GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
- (2*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* VUE offset in dwords */
+ 4 << VE0_OFFSET_SHIFT);
+ OUT_BATCH(dw | 8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
/* u1, v1, w1 */
- if (has_mask) {
+ if (id >> 2) {
+ unsigned src_offset = 4 + ((id & 3) ?: 1) * sizeof(float);
+ DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__,
+ id >> 2, src_offset));
+ dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
+ switch (id >> 2) {
+ case 1:
+ src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
+ dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
+ dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
+ break;
+ default:
+ assert(0);
+ case 2:
+ src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
+ dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
+ break;
+ case 3:
+ src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
+ break;
+ }
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
src_format << VE0_FORMAT_SHIFT |
- ((1 + selem) * 4) << VE0_OFFSET_SHIFT); /* vb offset in bytes */
- OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
- GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
- w_component << VE1_VFCOMPONENT_2_SHIFT |
- GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
- (3*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* VUE offset in dwords */
+ src_offset << VE0_OFFSET_SHIFT);
+ OUT_BATCH(dw | 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
+ } else {
+ OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
+ GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
+ 0 << VE0_OFFSET_SHIFT);
+ OUT_BATCH(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
+ VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
+ VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
+ VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
+ 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
}
}
@@ -1376,32 +1004,37 @@ gen4_emit_state(struct sna *sna,
const struct sna_composite_op *op,
uint16_t wm_binding_table)
{
- if (FLUSH_EVERY_VERTEX)
- OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
-
- gen4_emit_drawing_rectangle(sna, op);
- gen4_emit_binding_table(sna, wm_binding_table);
- gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel);
- gen4_emit_vertex_elements(sna, op);
+ bool flush;
+ flush = wm_binding_table & 1;
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
- DBG(("%s: flushing dirty (%d, %d)\n", __FUNCTION__,
+ DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__,
kgem_bo_is_dirty(op->src.bo),
- kgem_bo_is_dirty(op->mask.bo)));
+ kgem_bo_is_dirty(op->mask.bo),
+ flush));
OUT_BATCH(MI_FLUSH);
kgem_clear_dirty(&sna->kgem);
kgem_bo_mark_dirty(op->dst.bo);
+ flush = false;
}
+ flush &= gen4_emit_drawing_rectangle(sna, op);
+ if (flush && op->op > PictOpSrc)
+ OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
+
+ gen4_emit_binding_table(sna, wm_binding_table & ~1);
+ gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel);
+ gen4_emit_vertex_elements(sna, op);
}
static void
gen4_bind_surfaces(struct sna *sna,
const struct sna_composite_op *op)
{
+ bool dirty = kgem_bo_is_dirty(op->dst.bo);
uint32_t *binding_table;
uint16_t offset;
- gen4_get_batch(sna);
+ gen4_get_batch(sna, op);
binding_table = gen4_composite_get_binding_table(sna, &offset);
@@ -1415,7 +1048,8 @@ gen4_bind_surfaces(struct sna *sna,
op->src.bo, op->src.width, op->src.height,
op->src.card_format,
false);
- if (op->mask.bo)
+ if (op->mask.bo) {
+ assert(op->u.gen4.ve_id >> 2);
binding_table[2] =
gen4_bind_bo(sna,
op->mask.bo,
@@ -1423,6 +1057,7 @@ gen4_bind_surfaces(struct sna *sna,
op->mask.height,
op->mask.card_format,
false);
+ }
if (sna->kgem.surface == offset &&
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table &&
@@ -1432,7 +1067,7 @@ gen4_bind_surfaces(struct sna *sna,
offset = sna->render_state.gen4.surface_table;
}
- gen4_emit_state(sna, op, offset);
+ gen4_emit_state(sna, op, offset | dirty);
}
fastcall static void
@@ -1449,9 +1084,6 @@ gen4_render_composite_blt(struct sna *sna,
gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
op->prim_emit(sna, op, r);
-
- /* XXX are the shaders fubar? */
- FLUSH(op);
}
fastcall static void
@@ -1461,19 +1093,25 @@ gen4_render_composite_box(struct sna *sna,
{
struct sna_composite_rectangles r;
+ DBG((" %s: (%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1, box->x2, box->y2));
+
+ gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
+
r.dst.x = box->x1;
r.dst.y = box->y1;
r.width = box->x2 - box->x1;
r.height = box->y2 - box->y1;
r.mask = r.src = r.dst;
- gen4_render_composite_blt(sna, op, &r);
+ op->prim_emit(sna, op, &r);
}
static void
-gen4_render_composite_boxes(struct sna *sna,
- const struct sna_composite_op *op,
- const BoxRec *box, int nbox)
+gen4_render_composite_boxes__blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
{
DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
__FUNCTION__, nbox, op->dst.x, op->dst.y,
@@ -1483,16 +1121,84 @@ gen4_render_composite_boxes(struct sna *sna,
op->mask.width, op->mask.height));
do {
- struct sna_composite_rectangles r;
-
- r.dst.x = box->x1;
- r.dst.y = box->y1;
- r.width = box->x2 - box->x1;
- r.height = box->y2 - box->y1;
- r.mask = r.src = r.dst;
- gen4_render_composite_blt(sna, op, &r);
- box++;
- } while (--nbox);
+ int nbox_this_time;
+
+ nbox_this_time = gen4_get_rectangles(sna, op, nbox,
+ gen4_bind_surfaces);
+ nbox -= nbox_this_time;
+
+ do {
+ struct sna_composite_rectangles r;
+
+ DBG((" %s: (%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1, box->x2, box->y2));
+
+ r.dst.x = box->x1;
+ r.dst.y = box->y1;
+ r.width = box->x2 - box->x1;
+ r.height = box->y2 - box->y1;
+ r.mask = r.src = r.dst;
+ op->prim_emit(sna, op, &r);
+ box++;
+ } while (--nbox_this_time);
+ } while (nbox);
+}
+
+static void
+gen4_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
+
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen4_get_rectangles(sna, op, nbox,
+ gen4_bind_surfaces);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+ } while (nbox);
+}
+
+static void
+gen4_render_composite_boxes__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen4_get_rectangles(sna, op, nbox,
+ gen4_bind_surfaces);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
}
#ifndef MAX
@@ -1533,6 +1239,7 @@ static uint32_t gen4_bind_video_source(struct sna *sna,
static void gen4_video_bind_surfaces(struct sna *sna,
const struct sna_composite_op *op)
{
+ bool dirty = kgem_bo_is_dirty(op->dst.bo);
struct sna_video_frame *frame = op->priv;
uint32_t src_surf_format;
uint32_t src_surf_base[6];
@@ -1574,10 +1281,9 @@ static void gen4_video_bind_surfaces(struct sna *sna,
n_src = 1;
}
- gen4_get_batch(sna);
+ gen4_get_batch(sna, op);
binding_table = gen4_composite_get_binding_table(sna, &offset);
-
binding_table[0] =
gen4_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
@@ -1594,7 +1300,7 @@ static void gen4_video_bind_surfaces(struct sna *sna,
src_surf_format);
}
- gen4_emit_state(sna, op, offset);
+ gen4_emit_state(sna, op, offset | dirty);
}
static bool
@@ -1604,10 +1310,11 @@ gen4_render_video(struct sna *sna,
RegionPtr dstRegion,
short src_w, short src_h,
short drw_w, short drw_h,
+ short dx, short dy,
PixmapPtr pixmap)
{
struct sna_composite_op tmp;
- int nbox, dxo, dyo, pix_xoff, pix_yoff;
+ int nbox, pix_xoff, pix_yoff;
float src_scale_x, src_scale_y;
struct sna_pixmap *priv;
BoxPtr box;
@@ -1627,13 +1334,16 @@ gen4_render_video(struct sna *sna,
tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
tmp.dst.bo = priv->gpu_bo;
- tmp.src.filter = SAMPLER_FILTER_BILINEAR;
+ if (src_w == drw_w && src_h == drw_h)
+ tmp.src.filter = SAMPLER_FILTER_NEAREST;
+ else
+ tmp.src.filter = SAMPLER_FILTER_BILINEAR;
tmp.src.repeat = SAMPLER_EXTEND_PAD;
tmp.src.bo = frame->bo;
tmp.mask.bo = NULL;
tmp.u.gen4.wm_kernel =
is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED;
- tmp.u.gen4.ve_id = 1;
+ tmp.u.gen4.ve_id = 2;
tmp.is_affine = true;
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
@@ -1658,9 +1368,6 @@ gen4_render_video(struct sna *sna,
pix_yoff = 0;
#endif
- dxo = dstRegion->extents.x1;
- dyo = dstRegion->extents.y1;
-
/* Use normalized texture coordinates */
src_scale_x = ((float)src_w / frame->width) / (float)drw_w;
src_scale_y = ((float)src_h / frame->height) / (float)drw_h;
@@ -1678,18 +1385,16 @@ gen4_render_video(struct sna *sna,
gen4_get_rectangles(sna, &tmp, 1, gen4_video_bind_surfaces);
OUT_VERTEX(r.x2, r.y2);
- OUT_VERTEX_F((box->x2 - dxo) * src_scale_x);
- OUT_VERTEX_F((box->y2 - dyo) * src_scale_y);
+ OUT_VERTEX_F((box->x2 - dx) * src_scale_x);
+ OUT_VERTEX_F((box->y2 - dy) * src_scale_y);
OUT_VERTEX(r.x1, r.y2);
- OUT_VERTEX_F((box->x1 - dxo) * src_scale_x);
- OUT_VERTEX_F((box->y2 - dyo) * src_scale_y);
+ OUT_VERTEX_F((box->x1 - dx) * src_scale_x);
+ OUT_VERTEX_F((box->y2 - dy) * src_scale_y);
OUT_VERTEX(r.x1, r.y1);
- OUT_VERTEX_F((box->x1 - dxo) * src_scale_x);
- OUT_VERTEX_F((box->y1 - dyo) * src_scale_y);
-
- _FLUSH();
+ OUT_VERTEX_F((box->x1 - dx) * src_scale_x);
+ OUT_VERTEX_F((box->y1 - dy) * src_scale_y);
if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
sna_damage_add_box(&priv->gpu_damage, &r);
@@ -1703,141 +1408,6 @@ gen4_render_video(struct sna *sna,
return true;
}
-static bool
-gen4_composite_solid_init(struct sna *sna,
- struct sna_composite_channel *channel,
- uint32_t color)
-{
- channel->filter = PictFilterNearest;
- channel->repeat = RepeatNormal;
- channel->is_affine = true;
- channel->is_solid = true;
- channel->transform = NULL;
- channel->width = 1;
- channel->height = 1;
- channel->card_format = GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
-
- channel->bo = sna_render_get_solid(sna, color);
-
- channel->scale[0] = channel->scale[1] = 1;
- channel->offset[0] = channel->offset[1] = 0;
- return channel->bo != NULL;
-}
-
-static bool
-gen4_composite_linear_init(struct sna *sna,
- PicturePtr picture,
- struct sna_composite_channel *channel,
- int x, int y,
- int w, int h,
- int dst_x, int dst_y)
-{
- PictLinearGradient *linear =
- (PictLinearGradient *)picture->pSourcePict;
- pixman_fixed_t tx, ty;
- float x0, y0, sf;
- float dx, dy;
-
- DBG(("%s: p1=(%f, %f), p2=(%f, %f), src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
- __FUNCTION__,
- pixman_fixed_to_double(linear->p1.x), pixman_fixed_to_double(linear->p1.y),
- pixman_fixed_to_double(linear->p2.x), pixman_fixed_to_double(linear->p2.y),
- x, y, dst_x, dst_y, w, h));
-
- if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
- return 0;
-
- if (!sna_transform_is_affine(picture->transform)) {
- DBG(("%s: fallback due to projective transform\n",
- __FUNCTION__));
- return sna_render_picture_fixup(sna, picture, channel,
- x, y, w, h, dst_x, dst_y);
- }
-
- channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear);
- if (!channel->bo)
- return 0;
-
- channel->filter = PictFilterNearest;
- channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
- channel->width = channel->bo->pitch / 4;
- channel->height = 1;
- channel->pict_format = PICT_a8r8g8b8;
-
- channel->scale[0] = channel->scale[1] = 1;
- channel->offset[0] = channel->offset[1] = 0;
-
- if (sna_transform_is_translation(picture->transform, &tx, &ty)) {
- dx = pixman_fixed_to_double(linear->p2.x - linear->p1.x);
- dy = pixman_fixed_to_double(linear->p2.y - linear->p1.y);
-
- x0 = pixman_fixed_to_double(linear->p1.x);
- y0 = pixman_fixed_to_double(linear->p1.y);
-
- if (tx | ty) {
- x0 -= pixman_fixed_to_double(tx);
- y0 -= pixman_fixed_to_double(ty);
- }
- } else {
- struct pixman_f_vector p1, p2;
- struct pixman_f_transform m, inv;
-
- pixman_f_transform_from_pixman_transform(&m, picture->transform);
- DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n",
- __FUNCTION__,
- m.m[0][0], m.m[0][1], m.m[0][2],
- m.m[1][0], m.m[1][1], m.m[1][2],
- m.m[2][0], m.m[2][1], m.m[2][2]));
- if (!pixman_f_transform_invert(&inv, &m))
- return 0;
-
- p1.v[0] = pixman_fixed_to_double(linear->p1.x);
- p1.v[1] = pixman_fixed_to_double(linear->p1.y);
- p1.v[2] = 1.;
- pixman_f_transform_point(&inv, &p1);
-
- p2.v[0] = pixman_fixed_to_double(linear->p2.x);
- p2.v[1] = pixman_fixed_to_double(linear->p2.y);
- p2.v[2] = 1.;
- pixman_f_transform_point(&inv, &p2);
-
- DBG(("%s: untransformed: p1=(%f, %f, %f), p2=(%f, %f, %f)\n",
- __FUNCTION__,
- p1.v[0], p1.v[1], p1.v[2],
- p2.v[0], p2.v[1], p2.v[2]));
-
- dx = p2.v[0] - p1.v[0];
- dy = p2.v[1] - p1.v[1];
-
- x0 = p1.v[0];
- y0 = p1.v[1];
- }
-
- sf = dx*dx + dy*dy;
- dx /= sf;
- dy /= sf;
-
- channel->embedded_transform.matrix[0][0] = pixman_double_to_fixed(dx);
- channel->embedded_transform.matrix[0][1] = pixman_double_to_fixed(dy);
- channel->embedded_transform.matrix[0][2] = -pixman_double_to_fixed(dx*(x0+dst_x-x) + dy*(y0+dst_y-y));
-
- channel->embedded_transform.matrix[1][0] = 0;
- channel->embedded_transform.matrix[1][1] = 0;
- channel->embedded_transform.matrix[1][2] = pixman_double_to_fixed(.5);
-
- channel->embedded_transform.matrix[2][0] = 0;
- channel->embedded_transform.matrix[2][1] = 0;
- channel->embedded_transform.matrix[2][2] = pixman_fixed_1;
-
- channel->transform = &channel->embedded_transform;
- channel->is_affine = 1;
-
- DBG(("%s: dx=%f, dy=%f, offset=%f\n",
- __FUNCTION__, dx, dy, -dx*(x0-x+dst_x) + -dy*(y0-y+dst_y)));
-
- return channel->bo != NULL;
-}
-
static int
gen4_composite_picture(struct sna *sna,
PicturePtr picture,
@@ -1858,16 +1428,16 @@ gen4_composite_picture(struct sna *sna,
channel->card_format = -1;
if (sna_picture_is_solid(picture, &color))
- return gen4_composite_solid_init(sna, channel, color);
+ return gen4_channel_init_solid(sna, channel, color);
if (picture->pDrawable == NULL) {
int ret;
if (picture->pSourcePict->type == SourcePictTypeLinear)
- return gen4_composite_linear_init(sna, picture, channel,
- x, y,
- w, h,
- dst_x, dst_y);
+ return gen4_channel_init_linear(sna, picture, channel,
+ x, y,
+ w, h,
+ dst_x, dst_y);
DBG(("%s -- fixup, gradient\n", __FUNCTION__));
ret = -1;
@@ -1922,7 +1492,8 @@ gen4_composite_picture(struct sna *sna,
channel->card_format = gen4_get_card_format(picture->format);
if (channel->card_format == -1)
return sna_render_picture_convert(sna, picture, channel, pixmap,
- x, y, w, h, dst_x, dst_y);
+ x, y, w, h, dst_x, dst_y,
+ false);
if (too_large(pixmap->drawable.width, pixmap->drawable.height))
return sna_render_picture_extract(sna, picture, channel,
@@ -1950,7 +1521,7 @@ gen4_render_composite_done(struct sna *sna,
{
DBG(("%s()\n", __FUNCTION__));
- if (sna->render_state.gen4.vertex_offset) {
+ if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen4_magic_ca_pass(sna, op);
}
@@ -1964,54 +1535,49 @@ gen4_render_composite_done(struct sna *sna,
}
static bool
-gen4_composite_set_target(PicturePtr dst, struct sna_composite_op *op)
+gen4_composite_set_target(struct sna *sna,
+ struct sna_composite_op *op,
+ PicturePtr dst,
+ int x, int y, int w, int h)
{
- struct sna_pixmap *priv;
-
- if (!gen4_check_dst_format(dst->format)) {
- DBG(("%s: incompatible render target format %08x\n",
- __FUNCTION__, dst->format));
- return false;
- }
+ BoxRec box;
op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
op->dst.width = op->dst.pixmap->drawable.width;
op->dst.height = op->dst.pixmap->drawable.height;
op->dst.format = dst->format;
- priv = sna_pixmap_force_to_gpu(op->dst.pixmap, MOVE_READ | MOVE_WRITE);
- if (priv == NULL)
- return false;
+ if (w && h) {
+ box.x1 = x;
+ box.y1 = y;
+ box.x2 = x + w;
+ box.y2 = y + h;
+ } else
+ sna_render_picture_extents(dst, &box);
- op->dst.bo = priv->gpu_bo;
- op->damage = &priv->gpu_damage;
- if (sna_damage_is_all(&priv->gpu_damage, op->dst.width, op->dst.height))
- op->damage = NULL;
- DBG(("%s: all-damaged=%d, damage=%p\n", __FUNCTION__,
- sna_damage_is_all(&priv->gpu_damage, op->dst.width, op->dst.height),
- op->damage));
+ op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
+ PREFER_GPU | FORCE_GPU | RENDER_GPU,
+ &box, &op->damage);
+ if (op->dst.bo == NULL)
+ return false;
get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
&op->dst.x, &op->dst.y);
- return true;
-}
-static inline bool
-picture_is_cpu(PicturePtr picture)
-{
- if (!picture->pDrawable)
- return false;
+ DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
+ __FUNCTION__,
+ op->dst.pixmap, (int)op->dst.format,
+ op->dst.width, op->dst.height,
+ op->dst.bo->pitch,
+ op->dst.x, op->dst.y,
+ op->damage ? *op->damage : (void *)-1));
- return !is_gpu(picture->pDrawable);
-}
+ assert(op->dst.bo->proxy == NULL);
+
+ if (too_large(op->dst.width, op->dst.height) &&
+ !sna_render_composite_redirect(sna, op, x, y, w, h))
+ return false;
-static inline bool prefer_blt(struct sna *sna)
-{
-#if PREFER_BLT
return true;
- (void)sna;
-#else
- return sna->kgem.mode != KGEM_RENDER;
-#endif
}
static bool
@@ -2019,7 +1585,7 @@ try_blt(struct sna *sna,
PicturePtr dst, PicturePtr src,
int width, int height)
{
- if (prefer_blt(sna)) {
+ if (sna->kgem.mode != KGEM_RENDER) {
DBG(("%s: already performing BLT\n", __FUNCTION__));
return true;
}
@@ -2038,7 +1604,7 @@ try_blt(struct sna *sna,
return true;
/* is the source picture only in cpu memory e.g. a shm pixmap? */
- return picture_is_cpu(src);
+ return picture_is_cpu(sna, src);
}
static bool
@@ -2060,15 +1626,10 @@ has_alphamap(PicturePtr p)
}
static bool
-untransformed(PicturePtr p)
+need_upload(struct sna *sna, PicturePtr p)
{
- return !p->transform || pixman_transform_is_int_translate(p->transform);
-}
-
-static bool
-need_upload(PicturePtr p)
-{
- return p->pDrawable && untransformed(p) && !is_gpu(p->pDrawable);
+ return p->pDrawable && untransformed(p) &&
+ !is_gpu(sna, p->pDrawable, PREFER_GPU_RENDER);
}
static bool
@@ -2084,11 +1645,14 @@ source_is_busy(PixmapPtr pixmap)
if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
return true;
+ if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
+ return true;
+
return priv->gpu_damage && !priv->cpu_damage;
}
static bool
-source_fallback(PicturePtr p, PixmapPtr pixmap)
+source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap)
{
if (sna_picture_is_solid(p, NULL))
return false;
@@ -2103,7 +1667,7 @@ source_fallback(PicturePtr p, PixmapPtr pixmap)
if (pixmap && source_is_busy(pixmap))
return false;
- return has_alphamap(p) || !gen4_check_filter(p) || need_upload(p);
+ return has_alphamap(p) || !gen4_check_filter(p) || need_upload(sna, p);
}
static bool
@@ -2112,7 +1676,6 @@ gen4_composite_fallback(struct sna *sna,
PicturePtr mask,
PicturePtr dst)
{
- struct sna_pixmap *priv;
PixmapPtr src_pixmap;
PixmapPtr mask_pixmap;
PixmapPtr dst_pixmap;
@@ -2127,11 +1690,11 @@ gen4_composite_fallback(struct sna *sna,
dst_pixmap = get_drawable_pixmap(dst->pDrawable);
src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
- src_fallback = source_fallback(src, src_pixmap);
+ src_fallback = source_fallback(sna, src, src_pixmap);
if (mask) {
mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
- mask_fallback = source_fallback(mask, mask_pixmap);
+ mask_fallback = source_fallback(sna, mask, mask_pixmap);
} else {
mask_pixmap = NULL;
mask_fallback = false;
@@ -2151,8 +1714,7 @@ gen4_composite_fallback(struct sna *sna,
}
/* If anything is on the GPU, push everything out to the GPU */
- priv = sna_pixmap(dst_pixmap);
- if (priv && priv->gpu_damage && !priv->clear) {
+ if (dst_use_gpu(dst_pixmap)) {
DBG(("%s: dst is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
@@ -2187,14 +1749,14 @@ gen4_composite_fallback(struct sna *sna,
if (too_large(dst_pixmap->drawable.width,
dst_pixmap->drawable.height) &&
- (priv == NULL || DAMAGE_IS_ALL(priv->cpu_damage))) {
+ dst_is_cpu(dst_pixmap)) {
DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
return true;
}
DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
__FUNCTION__));
- return false;
+ return dst_use_cpu(dst_pixmap);
}
static int
@@ -2215,7 +1777,7 @@ reuse_source(struct sna *sna,
}
if (sna_picture_is_solid(mask, &color))
- return gen4_composite_solid_init(sna, mc, color);
+ return gen4_channel_init_solid(sna, mc, color);
if (sc->is_solid)
return false;
@@ -2291,15 +1853,13 @@ gen4_render_composite(struct sna *sna,
width, height,
tmp);
- if (!gen4_composite_set_target(dst, tmp))
- return false;
- sna_render_reduce_damage(tmp, dst_x, dst_y, width, height);
-
- if (too_large(tmp->dst.width, tmp->dst.height) &&
- !sna_render_composite_redirect(sna, tmp,
- dst_x, dst_y, width, height))
+ if (!gen4_composite_set_target(sna, tmp, dst,
+ dst_x, dst_y, width, height)) {
+ DBG(("%s: failed to set composite target\n", __FUNCTION__));
return false;
+ }
+ tmp->op = op;
switch (gen4_composite_picture(sna, src, &tmp->src,
src_x, src_y,
width, height,
@@ -2309,7 +1869,7 @@ gen4_render_composite(struct sna *sna,
DBG(("%s: failed to prepare source\n", __FUNCTION__));
goto cleanup_dst;
case 0:
- if (!gen4_composite_solid_init(sna, &tmp->src, 0))
+ if (!gen4_channel_init_solid(sna, &tmp->src, 0))
goto cleanup_dst;
/* fall through to fixup */
case 1:
@@ -2323,12 +1883,10 @@ gen4_render_composite(struct sna *sna,
break;
}
- tmp->op = op;
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
- tmp->prim_emit = gen4_emit_composite_primitive;
if (mask) {
if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
tmp->has_component_alpha = true;
@@ -2363,7 +1921,7 @@ gen4_render_composite(struct sna *sna,
DBG(("%s: failed to prepare mask\n", __FUNCTION__));
goto cleanup_src;
case 0:
- if (!gen4_composite_solid_init(sna, &tmp->mask, 0))
+ if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
goto cleanup_src;
/* fall through to fixup */
case 1:
@@ -2373,33 +1931,22 @@ gen4_render_composite(struct sna *sna,
}
tmp->is_affine &= tmp->mask.is_affine;
-
- if (tmp->src.transform == NULL && tmp->mask.transform == NULL)
- tmp->prim_emit = gen4_emit_composite_primitive_identity_source_mask;
-
- tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine;
- } else {
- if (tmp->src.is_solid)
- tmp->prim_emit = gen4_emit_composite_primitive_solid;
- else if (tmp->src.transform == NULL)
- tmp->prim_emit = gen4_emit_composite_primitive_identity_source;
- else if (tmp->src.is_affine)
- tmp->prim_emit = gen4_emit_composite_primitive_affine_source;
-
- tmp->floats_per_vertex = 3 + !tmp->is_affine;
}
- tmp->floats_per_rect = 3*tmp->floats_per_vertex;
tmp->u.gen4.wm_kernel =
gen4_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine);
- tmp->u.gen4.ve_id = (tmp->mask.bo != NULL) << 1 | tmp->is_affine;
+ tmp->u.gen4.ve_id = gen4_choose_composite_emitter(tmp);
tmp->blt = gen4_render_composite_blt;
tmp->box = gen4_render_composite_box;
- tmp->boxes = gen4_render_composite_boxes;
+ tmp->boxes = gen4_render_composite_boxes__blt;
+ if (tmp->emit_boxes) {
+ tmp->boxes = gen4_render_composite_boxes;
+ tmp->thread_boxes = gen4_render_composite_boxes__thread;
+ }
tmp->done = gen4_render_composite_done;
if (!kgem_check_bo(&sna->kgem,
@@ -2428,127 +1975,7 @@ cleanup_dst:
return false;
}
-/* A poor man's span interface. But better than nothing? */
#if !NO_COMPOSITE_SPANS
-inline static void
-gen4_emit_composite_texcoord(struct sna *sna,
- const struct sna_composite_channel *channel,
- int16_t x, int16_t y)
-{
- float t[3];
-
- if (channel->is_affine) {
- sna_get_transformed_coordinates(x + channel->offset[0],
- y + channel->offset[1],
- channel->transform,
- &t[0], &t[1]);
- OUT_VERTEX_F(t[0] * channel->scale[0]);
- OUT_VERTEX_F(t[1] * channel->scale[1]);
- } else {
- t[0] = t[1] = 0; t[2] = 1;
- sna_get_transformed_coordinates_3d(x + channel->offset[0],
- y + channel->offset[1],
- channel->transform,
- &t[0], &t[1], &t[2]);
- OUT_VERTEX_F(t[0] * channel->scale[0]);
- OUT_VERTEX_F(t[1] * channel->scale[1]);
- OUT_VERTEX_F(t[2]);
- }
-}
-
-inline static void
-gen4_emit_composite_texcoord_affine(struct sna *sna,
- const struct sna_composite_channel *channel,
- int16_t x, int16_t y)
-{
- float t[2];
-
- sna_get_transformed_coordinates(x + channel->offset[0],
- y + channel->offset[1],
- channel->transform,
- &t[0], &t[1]);
- OUT_VERTEX_F(t[0] * channel->scale[0]);
- OUT_VERTEX_F(t[1] * channel->scale[1]);
-}
-
-inline static void
-gen4_emit_composite_spans_vertex(struct sna *sna,
- const struct sna_composite_spans_op *op,
- int16_t x, int16_t y)
-{
- OUT_VERTEX(x, y);
- gen4_emit_composite_texcoord(sna, &op->base.src, x, y);
-}
-
-fastcall static void
-gen4_emit_composite_spans_primitive(struct sna *sna,
- const struct sna_composite_spans_op *op,
- const BoxRec *box,
- float opacity)
-{
- gen4_emit_composite_spans_vertex(sna, op, box->x2, box->y2);
- OUT_VERTEX_F(opacity);
- OUT_VERTEX_F(1);
- if (!op->base.is_affine)
- OUT_VERTEX_F(1);
-
- gen4_emit_composite_spans_vertex(sna, op, box->x1, box->y2);
- OUT_VERTEX_F(opacity);
- OUT_VERTEX_F(1);
- if (!op->base.is_affine)
- OUT_VERTEX_F(1);
-
- gen4_emit_composite_spans_vertex(sna, op, box->x1, box->y1);
- OUT_VERTEX_F(opacity);
- OUT_VERTEX_F(0);
- if (!op->base.is_affine)
- OUT_VERTEX_F(1);
-}
-
-fastcall static void
-gen4_emit_composite_spans_solid(struct sna *sna,
- const struct sna_composite_spans_op *op,
- const BoxRec *box,
- float opacity)
-{
- OUT_VERTEX(box->x2, box->y2);
- OUT_VERTEX_F(1); OUT_VERTEX_F(1);
- OUT_VERTEX_F(opacity); OUT_VERTEX_F(1);
-
- OUT_VERTEX(box->x1, box->y2);
- OUT_VERTEX_F(0); OUT_VERTEX_F(1);
- OUT_VERTEX_F(opacity); OUT_VERTEX_F(1);
-
- OUT_VERTEX(box->x1, box->y1);
- OUT_VERTEX_F(0); OUT_VERTEX_F(0);
- OUT_VERTEX_F(opacity); OUT_VERTEX_F(0);
-}
-
-fastcall static void
-gen4_emit_composite_spans_affine(struct sna *sna,
- const struct sna_composite_spans_op *op,
- const BoxRec *box,
- float opacity)
-{
- OUT_VERTEX(box->x2, box->y2);
- gen4_emit_composite_texcoord_affine(sna, &op->base.src,
- box->x2, box->y2);
- OUT_VERTEX_F(opacity);
- OUT_VERTEX_F(1);
-
- OUT_VERTEX(box->x1, box->y2);
- gen4_emit_composite_texcoord_affine(sna, &op->base.src,
- box->x1, box->y2);
- OUT_VERTEX_F(opacity);
- OUT_VERTEX_F(1);
-
- OUT_VERTEX(box->x1, box->y1);
- gen4_emit_composite_texcoord_affine(sna, &op->base.src,
- box->x1, box->y1);
- OUT_VERTEX_F(opacity);
- OUT_VERTEX_F(0);
-}
-
fastcall static void
gen4_render_composite_spans_box(struct sna *sna,
const struct sna_composite_spans_op *op,
@@ -2580,22 +2007,69 @@ gen4_render_composite_spans_boxes(struct sna *sna,
op->base.dst.x, op->base.dst.y));
do {
- gen4_render_composite_spans_box(sna, op, box++, opacity);
- } while (--nbox);
+ int nbox_this_time;
+
+ nbox_this_time = gen4_get_rectangles(sna, &op->base, nbox,
+ gen4_bind_surfaces);
+ nbox -= nbox_this_time;
+
+ do {
+ DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
+ box->x1, box->y1,
+ box->x2 - box->x1,
+ box->y2 - box->y1));
+
+ op->prim_emit(sna, op, box++, opacity);
+ } while (--nbox_this_time);
+ } while (nbox);
+}
+
+fastcall static void
+gen4_render_composite_spans_boxes__thread(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box,
+ int nbox)
+{
+ DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
+ __FUNCTION__, nbox,
+ op->base.src.offset[0], op->base.src.offset[1],
+ op->base.dst.x, op->base.dst.y));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen4_get_rectangles(sna, &op->base, nbox,
+ gen4_bind_surfaces);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
}
fastcall static void
gen4_render_composite_spans_done(struct sna *sna,
const struct sna_composite_spans_op *op)
{
- gen4_vertex_flush(sna);
+ if (sna->render.vertex_offset)
+ gen4_vertex_flush(sna);
DBG(("%s()\n", __FUNCTION__));
- kgem_bo_destroy(&sna->kgem, op->base.mask.bo);
- if (op->base.src.bo)
- kgem_bo_destroy(&sna->kgem, op->base.src.bo);
-
+ kgem_bo_destroy(&sna->kgem, op->base.src.bo);
sna_render_composite_redirect_done(sna, &op->base);
}
@@ -2605,17 +2079,43 @@ gen4_check_composite_spans(struct sna *sna,
int16_t width, int16_t height,
unsigned flags)
{
- if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0)
- return false;
+ DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n",
+ __FUNCTION__, op, width, height, flags));
if (op >= ARRAY_SIZE(gen4_blend_op))
return false;
- if (gen4_composite_fallback(sna, src, NULL, dst))
+ if (gen4_composite_fallback(sna, src, NULL, dst)) {
+ DBG(("%s: operation would fallback\n", __FUNCTION__));
return false;
+ }
- if (need_tiling(sna, width, height) && !is_gpu(dst->pDrawable))
+ if (need_tiling(sna, width, height) &&
+ !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
+ DBG(("%s: fallback, tiled operation not on GPU\n",
+ __FUNCTION__));
return false;
+ }
+
+ if (FORCE_SPANS)
+ return FORCE_SPANS > 0;
+
+ if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
+ struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
+ assert(priv);
+
+ if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
+ return true;
+
+ if (flags & COMPOSITE_SPANS_INPLACE_HINT)
+ return false;
+
+ if ((sna->render.prefer_gpu & PREFER_GPU_SPANS) == 0 &&
+ dst->format == PICT_a8)
+ return false;
+
+ return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo);
+ }
return true;
}
@@ -2645,15 +2145,9 @@ gen4_render_composite_spans(struct sna *sna,
}
tmp->base.op = op;
- if (!gen4_composite_set_target(dst, &tmp->base))
+ if (!gen4_composite_set_target(sna, &tmp->base, dst,
+ dst_x, dst_y, width, height))
return false;
- sna_render_reduce_damage(&tmp->base, dst_x, dst_y, width, height);
-
- if (too_large(tmp->base.dst.width, tmp->base.dst.height)) {
- if (!sna_render_composite_redirect(sna, &tmp->base,
- dst_x, dst_y, width, height))
- return false;
- }
switch (gen4_composite_picture(sna, src, &tmp->base.src,
src_x, src_y,
@@ -2663,7 +2157,7 @@ gen4_render_composite_spans(struct sna *sna,
case -1:
goto cleanup_dst;
case 0:
- if (!gen4_composite_solid_init(sna, &tmp->base.src, 0))
+ if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
goto cleanup_dst;
/* fall through to fixup */
case 1:
@@ -2671,27 +2165,21 @@ gen4_render_composite_spans(struct sna *sna,
break;
}
- tmp->base.mask.bo = sna_render_get_solid(sna, 0);
- if (tmp->base.mask.bo == NULL)
- goto cleanup_src;
+ tmp->base.mask.bo = NULL;
+ tmp->base.mask.filter = SAMPLER_FILTER_NEAREST;
+ tmp->base.mask.repeat = SAMPLER_EXTEND_NONE;
tmp->base.is_affine = tmp->base.src.is_affine;
tmp->base.has_component_alpha = false;
tmp->base.need_magic_ca_pass = false;
- tmp->prim_emit = gen4_emit_composite_spans_primitive;
- if (tmp->base.src.is_solid)
- tmp->prim_emit = gen4_emit_composite_spans_solid;
- else if (tmp->base.is_affine)
- tmp->prim_emit = gen4_emit_composite_spans_affine;
- tmp->base.floats_per_vertex = 5 + 2*!tmp->base.is_affine;
- tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex;
-
- tmp->base.u.gen5.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine;
- tmp->base.u.gen4.ve_id = 1 << 1 | tmp->base.is_affine;
+ tmp->base.u.gen4.ve_id = gen4_choose_spans_emitter(tmp);
+ tmp->base.u.gen4.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine;
tmp->box = gen4_render_composite_spans_box;
tmp->boxes = gen4_render_composite_spans_boxes;
+ if (tmp->emit_boxes)
+ tmp->thread_boxes = gen4_render_composite_spans_boxes__thread;
tmp->done = gen4_render_composite_spans_done;
if (!kgem_check_bo(&sna->kgem,
@@ -2721,10 +2209,11 @@ cleanup_dst:
static void
gen4_copy_bind_surfaces(struct sna *sna, const struct sna_composite_op *op)
{
+ bool dirty = kgem_bo_is_dirty(op->dst.bo);
uint32_t *binding_table;
uint16_t offset;
- gen4_get_batch(sna);
+ gen4_get_batch(sna, op);
binding_table = gen4_composite_get_binding_table(sna, &offset);
@@ -2745,7 +2234,7 @@ gen4_copy_bind_surfaces(struct sna *sna, const struct sna_composite_op *op)
offset = sna->render_state.gen4.surface_table;
}
- gen4_emit_state(sna, op, offset);
+ gen4_emit_state(sna, op, offset | dirty);
}
static void
@@ -2768,19 +2257,6 @@ gen4_render_copy_one(struct sna *sna,
OUT_VERTEX(dx, dy);
OUT_VERTEX_F(sx*op->src.scale[0]);
OUT_VERTEX_F(sy*op->src.scale[1]);
-
- _FLUSH();
-}
-
-static inline bool prefer_blt_copy(struct sna *sna, unsigned flags)
-{
-#if PREFER_BLT
- return true;
- (void)sna;
-#else
- return sna->kgem.mode != KGEM_RENDER;
-#endif
- (void)flags;
}
static bool
@@ -2793,8 +2269,7 @@ gen4_render_copy_boxes(struct sna *sna, uint8_t alu,
DBG(("%s x %d\n", __FUNCTION__, n));
- if (prefer_blt_copy(sna, flags) &&
- sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
+ if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy_boxes(sna, alu,
src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
@@ -2899,7 +2374,7 @@ fallback_blt:
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
tmp.u.gen4.wm_kernel = WM_KERNEL;
- tmp.u.gen4.ve_id = 1;
+ tmp.u.gen4.ve_id = 2;
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
kgem_submit(&sna->kgem);
@@ -2936,6 +2411,14 @@ fallback_tiled_dst:
if (tmp.redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
fallback_tiled:
+ if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
+ sna_blt_copy_boxes(sna, alu,
+ src_bo, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ dst->drawable.bitsPerPixel,
+ box, n))
+ return true;
+
return sna_tiling_copy_boxes(sna, alu,
src, src_bo, src_dx, src_dy,
dst, dst_bo, dst_dx, dst_dy,
@@ -2955,7 +2438,8 @@ gen4_render_copy_blt(struct sna *sna,
static void
gen4_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
{
- gen4_vertex_flush(sna);
+ if (sna->render.vertex_offset)
+ gen4_vertex_flush(sna);
}
static bool
@@ -2970,8 +2454,7 @@ gen4_render_copy(struct sna *sna, uint8_t alu,
dst->drawable.serialNumber,
alu));
- if (prefer_blt(sna) &&
- sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
+ if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy(sna, alu,
src_bo, dst_bo,
dst->drawable.bitsPerPixel,
@@ -3021,7 +2504,7 @@ fallback:
op->base.floats_per_vertex = 3;
op->base.floats_per_rect = 9;
op->base.u.gen4.wm_kernel = WM_KERNEL;
- op->base.u.gen4.ve_id = 1;
+ op->base.u.gen4.ve_id = 2;
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
kgem_submit(&sna->kgem);
@@ -3047,56 +2530,20 @@ fallback:
}
static void
-gen4_fill_bind_surfaces(struct sna *sna, const struct sna_composite_op *op)
-{
- uint32_t *binding_table;
- uint16_t offset;
-
- gen4_get_batch(sna);
-
- binding_table = gen4_composite_get_binding_table(sna, &offset);
-
- binding_table[0] =
- gen4_bind_bo(sna,
- op->dst.bo, op->dst.width, op->dst.height,
- gen4_get_dest_format(op->dst.format),
- true);
- binding_table[1] =
- gen4_bind_bo(sna,
- op->src.bo, 1, 1,
- GEN4_SURFACEFORMAT_B8G8R8A8_UNORM,
- false);
-
- if (sna->kgem.surface == offset &&
- *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table) {
- sna->kgem.surface +=
- sizeof(struct gen4_surface_state_padded)/sizeof(uint32_t);
- offset = sna->render_state.gen4.surface_table;
- }
-
- gen4_emit_state(sna, op, offset);
-}
-
-static void
gen4_render_fill_rectangle(struct sna *sna,
const struct sna_composite_op *op,
int x, int y, int w, int h)
{
- gen4_get_rectangles(sna, op, 1, gen4_fill_bind_surfaces);
+ gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
OUT_VERTEX(x+w, y+h);
- OUT_VERTEX_F(1);
- OUT_VERTEX_F(1);
+ OUT_VERTEX_F(.5);
OUT_VERTEX(x, y+h);
- OUT_VERTEX_F(0);
- OUT_VERTEX_F(1);
+ OUT_VERTEX_F(.5);
OUT_VERTEX(x, y);
- OUT_VERTEX_F(0);
- OUT_VERTEX_F(0);
-
- _FLUSH();
+ OUT_VERTEX_F(.5);
}
static bool
@@ -3116,10 +2563,7 @@ gen4_render_fill_boxes(struct sna *sna,
return false;
}
- if (op <= PictOpSrc &&
- (prefer_blt(sna) ||
- too_large(dst->drawable.width, dst->drawable.height) ||
- !gen4_check_dst_format(format))) {
+ if (op <= PictOpSrc) {
uint8_t alu = GXinvalid;
pixel = 0;
@@ -3170,13 +2614,11 @@ gen4_render_fill_boxes(struct sna *sna,
tmp.dst.format = format;
tmp.dst.bo = dst_bo;
- tmp.src.bo = sna_render_get_solid(sna, pixel);
- tmp.src.filter = SAMPLER_FILTER_NEAREST;
- tmp.src.repeat = SAMPLER_EXTEND_REPEAT;
+ gen4_channel_init_solid(sna, &tmp.src, pixel);
tmp.is_affine = true;
- tmp.floats_per_vertex = 3;
- tmp.floats_per_rect = 9;
+ tmp.floats_per_vertex = 2;
+ tmp.floats_per_rect = 6;
tmp.u.gen4.wm_kernel = WM_KERNEL;
tmp.u.gen4.ve_id = 1;
@@ -3185,7 +2627,7 @@ gen4_render_fill_boxes(struct sna *sna,
assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
}
- gen4_fill_bind_surfaces(sna, &tmp);
+ gen4_bind_surfaces(sna, &tmp);
gen4_align_vertex(sna, &tmp);
do {
@@ -3235,7 +2677,8 @@ gen4_render_fill_op_boxes(struct sna *sna,
static void
gen4_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
{
- gen4_vertex_flush(sna);
+ if (sna->render.vertex_offset)
+ gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, op->base.src.bo);
}
@@ -3245,8 +2688,7 @@ gen4_render_fill(struct sna *sna, uint8_t alu,
uint32_t color,
struct sna_fill_op *op)
{
- if (prefer_blt(sna) &&
- sna_blt_fill(sna, alu,
+ if (sna_blt_fill(sna, alu,
dst_bo, dst->drawable.bitsPerPixel,
color,
op))
@@ -3274,20 +2716,14 @@ gen4_render_fill(struct sna *sna, uint8_t alu,
op->base.need_magic_ca_pass = 0;
op->base.has_component_alpha = 0;
- op->base.src.bo =
- sna_render_get_solid(sna,
- sna_rgba_for_color(color,
- dst->drawable.depth));
- op->base.src.filter = SAMPLER_FILTER_NEAREST;
- op->base.src.repeat = SAMPLER_EXTEND_REPEAT;
-
+ gen4_channel_init_solid(sna, &op->base.src,
+ sna_rgba_for_color(color,
+ dst->drawable.depth));
op->base.mask.bo = NULL;
- op->base.mask.filter = SAMPLER_FILTER_NEAREST;
- op->base.mask.repeat = SAMPLER_EXTEND_NONE;
op->base.is_affine = true;
- op->base.floats_per_vertex = 3;
- op->base.floats_per_rect = 9;
+ op->base.floats_per_vertex = 2;
+ op->base.floats_per_rect = 6;
op->base.u.gen4.wm_kernel = WM_KERNEL;
op->base.u.gen4.ve_id = 1;
@@ -3296,7 +2732,7 @@ gen4_render_fill(struct sna *sna, uint8_t alu,
assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
}
- gen4_fill_bind_surfaces(sna, &op->base);
+ gen4_bind_surfaces(sna, &op->base);
gen4_align_vertex(sna, &op->base);
op->blt = gen4_render_fill_op_blt;
@@ -3356,32 +2792,29 @@ gen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
tmp.dst.bo = bo;
tmp.dst.x = tmp.dst.y = 0;
- tmp.src.bo =
- sna_render_get_solid(sna,
- sna_rgba_for_color(color,
- dst->drawable.depth));
- tmp.src.filter = SAMPLER_FILTER_NEAREST;
- tmp.src.repeat = SAMPLER_EXTEND_REPEAT;
-
+ gen4_channel_init_solid(sna, &tmp.src,
+ sna_rgba_for_color(color,
+ dst->drawable.depth));
tmp.mask.bo = NULL;
- tmp.mask.filter = SAMPLER_FILTER_NEAREST;
- tmp.mask.repeat = SAMPLER_EXTEND_NONE;
tmp.is_affine = true;
- tmp.floats_per_vertex = 3;
- tmp.floats_per_rect = 9;
- tmp.has_component_alpha = 0;
+ tmp.floats_per_vertex = 2;
+ tmp.floats_per_rect = 6;
+ tmp.has_component_alpha = false;
tmp.need_magic_ca_pass = false;
tmp.u.gen4.wm_kernel = WM_KERNEL;
tmp.u.gen4.ve_id = 1;
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
- _kgem_submit(&sna->kgem);
- assert(kgem_check_bo(&sna->kgem, bo, NULL));
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
+ kgem_bo_destroy(&sna->kgem, tmp.src.bo);
+ return false;
+ }
}
- gen4_fill_bind_surfaces(sna, &tmp);
+ gen4_bind_surfaces(sna, &tmp);
gen4_align_vertex(sna, &tmp);
gen4_render_fill_rectangle(sna, &tmp, x1, y1, x2 - x1, y2 - y1);
@@ -3396,6 +2829,9 @@ static void
gen4_render_flush(struct sna *sna)
{
gen4_vertex_close(sna);
+
+ assert(sna->render.vb_id == 0);
+ assert(sna->render.vertex_offset == 0);
}
static void
@@ -3438,7 +2874,6 @@ static void gen4_render_reset(struct sna *sna)
{
sna->render_state.gen4.needs_invariant = true;
sna->render_state.gen4.needs_urb = true;
- sna->render_state.gen4.vb_id = 0;
sna->render_state.gen4.ve_id = -1;
sna->render_state.gen4.last_primitive = -1;
sna->render_state.gen4.last_pipelined_pointers = -1;
@@ -3452,6 +2887,10 @@ static void gen4_render_reset(struct sna *sna)
DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
discard_vbo(sna);
}
+
+ sna->render.vertex_offset = 0;
+ sna->render.nvertex_reloc = 0;
+ sna->render.vb_id = 0;
}
static void gen4_render_fini(struct sna *sna)
@@ -3473,8 +2912,7 @@ static uint32_t gen4_create_vs_unit_state(struct sna_static_stream *stream)
}
static uint32_t gen4_create_sf_state(struct sna_static_stream *stream,
- const struct gt_info *info,
- uint32_t kernel)
+ int gen, uint32_t kernel)
{
struct gen4_sf_unit_state *sf;
@@ -3488,7 +2926,7 @@ static uint32_t gen4_create_sf_state(struct sna_static_stream *stream,
/* don't smash vertex header, read start from dw8 */
sf->thread3.urb_entry_read_offset = 1;
sf->thread3.dispatch_grf_start_reg = 3;
- sf->thread4.max_threads = info->max_sf_threads - 1;
+ sf->thread4.max_threads = GEN4_MAX_SF_THREADS - 1;
sf->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
sf->thread4.nr_urb_entries = URB_SF_ENTRIES;
sf->sf5.viewport_transform = false; /* skip viewport */
@@ -3519,7 +2957,7 @@ static uint32_t gen4_create_sampler_state(struct sna_static_stream *stream,
}
static void gen4_init_wm_state(struct gen4_wm_unit_state *wm,
- const struct gt_info *info,
+ int gen,
bool has_mask,
uint32_t kernel,
uint32_t sampler)
@@ -3540,7 +2978,7 @@ static void gen4_init_wm_state(struct gen4_wm_unit_state *wm,
wm->wm4.sampler_state_pointer = sampler >> 5;
wm->wm4.sampler_count = 1;
- wm->wm5.max_threads = info->max_wm_threads - 1;
+ wm->wm5.max_threads = gen >= 045 ? G4X_MAX_WM_THREADS - 1 : GEN4_MAX_WM_THREADS - 1;
wm->wm5.transposed_urb_read = 0;
wm->wm5.thread_dispatch_enable = 1;
/* just use 16-pixel dispatch (4 subspans), don't need to change kernel
@@ -3560,23 +2998,11 @@ static void gen4_init_wm_state(struct gen4_wm_unit_state *wm,
}
}
-static uint32_t gen4_create_cc_viewport(struct sna_static_stream *stream)
-{
- struct gen4_cc_viewport vp;
-
- vp.min_depth = -1.e35;
- vp.max_depth = 1.e35;
-
- return sna_static_stream_add(stream, &vp, sizeof(vp), 32);
-}
-
static uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream)
{
uint8_t *ptr, *base;
- uint32_t vp;
int i, j;
- vp = gen4_create_cc_viewport(stream);
base = ptr =
sna_static_stream_map(stream,
GEN4_BLENDFACTOR_COUNT*GEN4_BLENDFACTOR_COUNT*64,
@@ -3589,7 +3015,6 @@ static uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream)
state->cc3.blend_enable =
!(j == GEN4_BLENDFACTOR_ZERO && i == GEN4_BLENDFACTOR_ONE);
- state->cc4.cc_viewport_state_offset = vp >> 5;
state->cc5.logicop_func = 0xc; /* COPY */
state->cc5.ia_blend_function = GEN4_BLENDFUNCTION_ADD;
@@ -3616,15 +3041,9 @@ static bool gen4_render_setup(struct sna *sna)
struct gen4_render_state *state = &sna->render_state.gen4;
struct sna_static_stream general;
struct gen4_wm_unit_state_padded *wm_state;
- const struct gt_info *info;
- uint32_t sf[2], wm[KERNEL_COUNT];
+ uint32_t sf, wm[KERNEL_COUNT];
int i, j, k, l, m;
- if (sna->kgem.gen == 45)
- info = &g4x_gt_info;
- else
- info = &gen4_gt_info;
-
sna_static_stream_init(&general);
/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
@@ -3632,8 +3051,7 @@ static bool gen4_render_setup(struct sna *sna)
*/
null_create(&general);
- sf[0] = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__nomask);
- sf[1] = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask);
+ sf = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask);
for (m = 0; m < KERNEL_COUNT; m++) {
if (wm_kernels[m].size) {
wm[m] = sna_static_stream_add(&general,
@@ -3648,8 +3066,7 @@ static bool gen4_render_setup(struct sna *sna)
}
state->vs = gen4_create_vs_unit_state(&general);
- state->sf[0] = gen4_create_sf_state(&general, info, sf[0]);
- state->sf[1] = gen4_create_sf_state(&general, info, sf[1]);
+ state->sf = gen4_create_sf_state(&general, sna->kgem.gen, sf);
wm_state = sna_static_stream_map(&general,
sizeof(*wm_state) * KERNEL_COUNT *
@@ -3669,7 +3086,8 @@ static bool gen4_render_setup(struct sna *sna)
k, l);
for (m = 0; m < KERNEL_COUNT; m++) {
- gen4_init_wm_state(&wm_state->state, info,
+ gen4_init_wm_state(&wm_state->state,
+ sna->kgem.gen,
wm_kernels[m].has_mask,
wm[m], sampler_state);
wm_state++;
@@ -3695,10 +3113,13 @@ bool gen4_render_init(struct sna *sna)
#if !NO_COMPOSITE
sna->render.composite = gen4_render_composite;
+ sna->render.prefer_gpu |= PREFER_GPU_RENDER;
#endif
#if !NO_COMPOSITE_SPANS
sna->render.check_composite_spans = gen4_check_composite_spans;
sna->render.composite_spans = gen4_render_composite_spans;
+ if (0)
+ sna->render.prefer_gpu |= PREFER_GPU_SPANS;
#endif
#if !NO_VIDEO
diff --git a/src/sna/gen4_render.h b/src/sna/gen4_render.h
index 49d232e88..53c7fc2f7 100644
--- a/src/sna/gen4_render.h
+++ b/src/sna/gen4_render.h
@@ -25,8 +25,8 @@
*
**************************************************************************/
-#ifndef GEN5_RENDER_H
-#define GEN5_RENDER_H
+#ifndef GEN4_RENDER_H
+#define GEN4_RENDER_H
#define GEN4_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \
((Pipeline) << 27) | \
@@ -661,15 +661,14 @@
#define GEN4_VERTEXBUFFER_ACCESS_VERTEXDATA 0
#define GEN4_VERTEXBUFFER_ACCESS_INSTANCEDATA 1
-#define GEN4_VFCOMPONENT_NOSTORE 0
-#define GEN4_VFCOMPONENT_STORE_SRC 1
-#define GEN4_VFCOMPONENT_STORE_0 2
-#define GEN4_VFCOMPONENT_STORE_1_FLT 3
-#define GEN4_VFCOMPONENT_STORE_1_INT 4
-#define GEN4_VFCOMPONENT_STORE_VID 5
-#define GEN4_VFCOMPONENT_STORE_IID 6
-#define GEN4_VFCOMPONENT_STORE_PID 7
-
+#define VFCOMPONENT_NOSTORE 0
+#define VFCOMPONENT_STORE_SRC 1
+#define VFCOMPONENT_STORE_0 2
+#define VFCOMPONENT_STORE_1_FLT 3
+#define VFCOMPONENT_STORE_1_INT 4
+#define VFCOMPONENT_STORE_VID 5
+#define VFCOMPONENT_STORE_IID 6
+#define VFCOMPONENT_STORE_PID 7
/* Execution Unit (EU) defines
@@ -725,8 +724,8 @@
#define GEN4_INSTRUCTION_NORMAL 0
#define GEN4_INSTRUCTION_SATURATE 1
-#define GEN4_MASK_ENABLE 0
-#define GEN4_MASK_DISABLE 1
+#define _MASK_ENABLE 0
+#define _MASK_DISABLE 1
#define GEN4_OPCODE_MOV 1
#define GEN4_OPCODE_SEL 2
@@ -2043,6 +2042,54 @@ struct gen4_surface_state
} ss5;
};
+/* Surface state DW0 */
+#define GEN4_SURFACE_RC_READ_WRITE (1 << 8)
+#define GEN4_SURFACE_MIPLAYOUT_SHIFT 10
+#define GEN4_SURFACE_MIPMAPLAYOUT_BELOW 0
+#define GEN4_SURFACE_MIPMAPLAYOUT_RIGHT 1
+#define GEN4_SURFACE_CUBEFACE_ENABLES 0x3f
+#define GEN4_SURFACE_BLEND_ENABLED (1 << 13)
+#define GEN4_SURFACE_WRITEDISABLE_B_SHIFT 14
+#define GEN4_SURFACE_WRITEDISABLE_G_SHIFT 15
+#define GEN4_SURFACE_WRITEDISABLE_R_SHIFT 16
+#define GEN4_SURFACE_WRITEDISABLE_A_SHIFT 17
+#define GEN4_SURFACE_FORMAT_SHIFT 18
+#define GEN4_SURFACE_FORMAT_MASK _MASK(26, 18)
+
+#define GEN4_SURFACE_TYPE_SHIFT 29
+#define GEN4_SURFACE_TYPE_MASK _MASK(31, 29)
+#define GEN4_SURFACE_1D 0
+#define GEN4_SURFACE_2D 1
+#define GEN4_SURFACE_3D 2
+#define GEN4_SURFACE_CUBE 3
+#define GEN4_SURFACE_BUFFER 4
+#define GEN4_SURFACE_NULL 7
+
+/* Surface state DW2 */
+#define GEN4_SURFACE_HEIGHT_SHIFT 19
+#define GEN4_SURFACE_HEIGHT_MASK _MASK(31, 19)
+#define GEN4_SURFACE_WIDTH_SHIFT 6
+#define GEN4_SURFACE_WIDTH_MASK _MASK(18, 6)
+#define GEN4_SURFACE_LOD_SHIFT 2
+#define GEN4_SURFACE_LOD_MASK _MASK(5, 2)
+
+/* Surface state DW3 */
+#define GEN4_SURFACE_DEPTH_SHIFT 21
+#define GEN4_SURFACE_DEPTH_MASK _MASK(31, 21)
+#define GEN4_SURFACE_PITCH_SHIFT 3
+#define GEN4_SURFACE_PITCH_MASK _MASK(19, 3)
+#define GEN4_SURFACE_TILED (1 << 1)
+#define GEN4_SURFACE_TILED_Y (1 << 0)
+
+/* Surface state DW4 */
+#define GEN4_SURFACE_MIN_LOD_SHIFT 28
+#define GEN4_SURFACE_MIN_LOD_MASK _MASK(31, 28)
+
+/* Surface state DW5 */
+#define GEN4_SURFACE_X_OFFSET_SHIFT 25
+#define GEN4_SURFACE_X_OFFSET_MASK _MASK(31, 25)
+#define GEN4_SURFACE_Y_OFFSET_SHIFT 20
+#define GEN4_SURFACE_Y_OFFSET_MASK _MASK(23, 20)
struct gen4_vertex_buffer_state
diff --git a/src/sna/gen4_source.c b/src/sna/gen4_source.c
new file mode 100644
index 000000000..749de8d60
--- /dev/null
+++ b/src/sna/gen4_source.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright © 2011,2012,2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "sna.h"
+#include "sna_render.h"
+#include "sna_render_inline.h"
+#include "gen4_source.h"
+#include "gen4_render.h"
+
+bool
+gen4_channel_init_solid(struct sna *sna,
+ struct sna_composite_channel *channel,
+ uint32_t color)
+{
+ channel->filter = PictFilterNearest;
+ channel->repeat = RepeatNormal;
+ channel->is_affine = true;
+ channel->is_solid = true;
+ channel->is_opaque = (color >> 24) == 0xff;
+ channel->transform = NULL;
+ channel->width = 1;
+ channel->height = 1;
+ channel->pict_format = PICT_a8r8g8b8;
+ channel->card_format = GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
+
+ channel->bo = sna_render_get_solid(sna, color);
+
+ channel->scale[0] = channel->scale[1] = 1;
+ channel->offset[0] = channel->offset[1] = 0;
+ return channel->bo != NULL;
+}
+
+bool
+gen4_channel_init_linear(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_channel *channel,
+ int x, int y,
+ int w, int h,
+ int dst_x, int dst_y)
+{
+ PictLinearGradient *linear =
+ (PictLinearGradient *)picture->pSourcePict;
+ pixman_fixed_t tx, ty;
+ float x0, y0, sf;
+ float dx, dy;
+
+ DBG(("%s: p1=(%f, %f), p2=(%f, %f), src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
+ __FUNCTION__,
+ pixman_fixed_to_double(linear->p1.x), pixman_fixed_to_double(linear->p1.y),
+ pixman_fixed_to_double(linear->p2.x), pixman_fixed_to_double(linear->p2.y),
+ x, y, dst_x, dst_y, w, h));
+
+ if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
+ return 0;
+
+ if (!sna_transform_is_affine(picture->transform)) {
+ DBG(("%s: fallback due to projective transform\n",
+ __FUNCTION__));
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+ }
+
+ channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear);
+ if (!channel->bo)
+ return 0;
+
+ channel->filter = PictFilterNearest;
+ channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
+ channel->width = channel->bo->pitch / 4;
+ channel->height = 1;
+ channel->pict_format = PICT_a8r8g8b8;
+ channel->card_format = GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
+ channel->is_linear = 1;
+ channel->is_affine = 1;
+
+ channel->scale[0] = channel->scale[1] = 1;
+ channel->offset[0] = channel->offset[1] = 0;
+
+ if (sna_transform_is_translation(picture->transform, &tx, &ty)) {
+ dx = pixman_fixed_to_double(linear->p2.x - linear->p1.x);
+ dy = pixman_fixed_to_double(linear->p2.y - linear->p1.y);
+
+ x0 = pixman_fixed_to_double(linear->p1.x);
+ y0 = pixman_fixed_to_double(linear->p1.y);
+
+ if (tx | ty) {
+ x0 -= pixman_fixed_to_double(tx);
+ y0 -= pixman_fixed_to_double(ty);
+ }
+ } else {
+ struct pixman_f_vector p1, p2;
+ struct pixman_f_transform m, inv;
+
+ pixman_f_transform_from_pixman_transform(&m, picture->transform);
+ DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n",
+ __FUNCTION__,
+ m.m[0][0], m.m[0][1], m.m[0][2],
+ m.m[1][0], m.m[1][1], m.m[1][2],
+ m.m[2][0], m.m[2][1], m.m[2][2]));
+ if (!pixman_f_transform_invert(&inv, &m))
+ return 0;
+
+ p1.v[0] = pixman_fixed_to_double(linear->p1.x);
+ p1.v[1] = pixman_fixed_to_double(linear->p1.y);
+ p1.v[2] = 1.;
+ pixman_f_transform_point(&inv, &p1);
+
+ p2.v[0] = pixman_fixed_to_double(linear->p2.x);
+ p2.v[1] = pixman_fixed_to_double(linear->p2.y);
+ p2.v[2] = 1.;
+ pixman_f_transform_point(&inv, &p2);
+
+ DBG(("%s: untransformed: p1=(%f, %f, %f), p2=(%f, %f, %f)\n",
+ __FUNCTION__,
+ p1.v[0], p1.v[1], p1.v[2],
+ p2.v[0], p2.v[1], p2.v[2]));
+
+ dx = p2.v[0] - p1.v[0];
+ dy = p2.v[1] - p1.v[1];
+
+ x0 = p1.v[0];
+ y0 = p1.v[1];
+ }
+
+ sf = dx*dx + dy*dy;
+ dx /= sf;
+ dy /= sf;
+
+ channel->u.linear.dx = dx;
+ channel->u.linear.dy = dy;
+ channel->u.linear.offset = -dx*(x0+dst_x-x) + -dy*(y0+dst_y-y);
+
+ channel->embedded_transform.matrix[0][0] = pixman_double_to_fixed(dx);
+ channel->embedded_transform.matrix[0][1] = pixman_double_to_fixed(dy);
+ channel->embedded_transform.matrix[0][2] = pixman_double_to_fixed(channel->u.linear.offset);
+
+ channel->embedded_transform.matrix[1][0] = 0;
+ channel->embedded_transform.matrix[1][1] = 0;
+ channel->embedded_transform.matrix[1][2] = pixman_double_to_fixed(.5);
+
+ channel->embedded_transform.matrix[2][0] = 0;
+ channel->embedded_transform.matrix[2][1] = 0;
+ channel->embedded_transform.matrix[2][2] = pixman_fixed_1;
+
+ channel->transform = &channel->embedded_transform;
+
+ DBG(("%s: dx=%f, dy=%f, offset=%f\n",
+ __FUNCTION__, dx, dy, channel->u.linear.offset));
+
+ return channel->bo != NULL;
+}
diff --git a/src/sna/gen4_source.h b/src/sna/gen4_source.h
new file mode 100644
index 000000000..c73afaca9
--- /dev/null
+++ b/src/sna/gen4_source.h
@@ -0,0 +1,22 @@
+#ifndef GEN4_SOURCE_H
+#define GEN4_SOURCE_H
+
+#include "compiler.h"
+
+#include "sna.h"
+#include "sna_render.h"
+
+bool
+gen4_channel_init_solid(struct sna *sna,
+ struct sna_composite_channel *channel,
+ uint32_t color);
+
+bool
+gen4_channel_init_linear(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_channel *channel,
+ int x, int y,
+ int w, int h,
+ int dst_x, int dst_y);
+
+#endif /* GEN4_SOURCE_H */
diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c
new file mode 100644
index 000000000..5062ebdf0
--- /dev/null
+++ b/src/sna/gen4_vertex.c
@@ -0,0 +1,1543 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "sna.h"
+#include "sna_render.h"
+#include "sna_render_inline.h"
+#include "gen4_vertex.h"
+
+void gen4_vertex_flush(struct sna *sna)
+{
+ DBG(("%s[%x] = %d\n", __FUNCTION__,
+ 4*sna->render.vertex_offset,
+ sna->render.vertex_index - sna->render.vertex_start));
+
+ assert(sna->render.vertex_offset);
+ assert(sna->render.vertex_index > sna->render.vertex_start);
+
+ sna->kgem.batch[sna->render.vertex_offset] =
+ sna->render.vertex_index - sna->render.vertex_start;
+ sna->render.vertex_offset = 0;
+}
+
+int gen4_vertex_finish(struct sna *sna)
+{
+ struct kgem_bo *bo;
+ unsigned int i;
+ unsigned hint, size;
+
+ DBG(("%s: used=%d / %d\n", __FUNCTION__,
+ sna->render.vertex_used, sna->render.vertex_size));
+ assert(sna->render.vertex_offset == 0);
+ assert(sna->render.vertex_used);
+
+ sna_vertex_wait__locked(&sna->render);
+
+ /* Note: we only need dword alignment (currently) */
+
+ bo = sna->render.vbo;
+ if (bo) {
+ for (i = 0; i < sna->render.nvertex_reloc; i++) {
+ DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
+ i, sna->render.vertex_reloc[i]));
+
+ sna->kgem.batch[sna->render.vertex_reloc[i]] =
+ kgem_add_reloc(&sna->kgem,
+ sna->render.vertex_reloc[i], bo,
+ I915_GEM_DOMAIN_VERTEX << 16,
+ 0);
+ }
+
+ assert(!sna->render.active);
+ sna->render.nvertex_reloc = 0;
+ sna->render.vertex_used = 0;
+ sna->render.vertex_index = 0;
+ sna->render.vbo = NULL;
+ sna->render.vb_id = 0;
+
+ kgem_bo_destroy(&sna->kgem, bo);
+ }
+
+ hint = CREATE_GTT_MAP;
+ if (bo)
+ hint |= CREATE_CACHED | CREATE_NO_THROTTLE;
+
+ size = 256*1024;
+ assert(!sna->render.active);
+ sna->render.vertices = NULL;
+ sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint);
+ while (sna->render.vbo == NULL && size > 16*1024) {
+ size /= 2;
+ sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint);
+ }
+ if (sna->render.vbo == NULL)
+ sna->render.vbo = kgem_create_linear(&sna->kgem,
+ 256*1024, CREATE_GTT_MAP);
+ if (sna->render.vbo)
+ sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
+ if (sna->render.vertices == NULL) {
+ if (sna->render.vbo) {
+ kgem_bo_destroy(&sna->kgem, sna->render.vbo);
+ sna->render.vbo = NULL;
+ }
+ sna->render.vertices = sna->render.vertex_data;
+ sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
+ return 0;
+ }
+
+ if (sna->render.vertex_used) {
+ DBG(("%s: copying initial buffer x %d to handle=%d\n",
+ __FUNCTION__,
+ sna->render.vertex_used,
+ sna->render.vbo->handle));
+ assert(sizeof(float)*sna->render.vertex_used <=
+ __kgem_bo_size(sna->render.vbo));
+ memcpy(sna->render.vertices,
+ sna->render.vertex_data,
+ sizeof(float)*sna->render.vertex_used);
+ }
+
+ size = __kgem_bo_size(sna->render.vbo)/4;
+ if (size >= UINT16_MAX)
+ size = UINT16_MAX - 1;
+
+ DBG(("%s: create vbo handle=%d, size=%d\n",
+ __FUNCTION__, sna->render.vbo->handle, size));
+
+ sna->render.vertex_size = size;
+ return sna->render.vertex_size - sna->render.vertex_used;
+}
+
+void gen4_vertex_close(struct sna *sna)
+{
+ struct kgem_bo *bo, *free_bo = NULL;
+ unsigned int i, delta = 0;
+
+ assert(sna->render.vertex_offset == 0);
+ if (!sna->render.vb_id)
+ return;
+
+ DBG(("%s: used=%d, vbo active? %d, vb=%x, nreloc=%d\n",
+ __FUNCTION__, sna->render.vertex_used, sna->render.vbo ? sna->render.vbo->handle : 0,
+ sna->render.vb_id, sna->render.nvertex_reloc));
+
+ assert(!sna->render.active);
+
+ bo = sna->render.vbo;
+ if (bo) {
+ if (sna->render.vertex_size - sna->render.vertex_used < 64) {
+ DBG(("%s: discarding vbo (full), handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
+ sna->render.vbo = NULL;
+ sna->render.vertices = sna->render.vertex_data;
+ sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
+ free_bo = bo;
+ } else if (IS_CPU_MAP(bo->map) && !sna->kgem.has_llc) {
+ DBG(("%s: converting CPU map to GTT\n", __FUNCTION__));
+ sna->render.vertices =
+ kgem_bo_map__gtt(&sna->kgem, sna->render.vbo);
+ if (sna->render.vertices == NULL) {
+ sna->render.vbo = NULL;
+ sna->render.vertices = sna->render.vertex_data;
+ sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
+ free_bo = bo;
+ }
+
+ }
+ } else {
+ if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
+ DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
+ sna->render.vertex_used, sna->kgem.nbatch));
+ memcpy(sna->kgem.batch + sna->kgem.nbatch,
+ sna->render.vertex_data,
+ sna->render.vertex_used * 4);
+ delta = sna->kgem.nbatch * 4;
+ bo = NULL;
+ sna->kgem.nbatch += sna->render.vertex_used;
+ } else {
+ bo = kgem_create_linear(&sna->kgem,
+ 4*sna->render.vertex_used,
+ CREATE_NO_THROTTLE);
+ if (bo && !kgem_bo_write(&sna->kgem, bo,
+ sna->render.vertex_data,
+ 4*sna->render.vertex_used)) {
+ kgem_bo_destroy(&sna->kgem, bo);
+ bo = NULL;
+ }
+ DBG(("%s: new vbo: %d\n", __FUNCTION__,
+ sna->render.vertex_used));
+ free_bo = bo;
+ }
+ }
+
+ assert(sna->render.nvertex_reloc);
+ for (i = 0; i < sna->render.nvertex_reloc; i++) {
+ DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
+ i, sna->render.vertex_reloc[i]));
+
+ sna->kgem.batch[sna->render.vertex_reloc[i]] =
+ kgem_add_reloc(&sna->kgem,
+ sna->render.vertex_reloc[i], bo,
+ I915_GEM_DOMAIN_VERTEX << 16,
+ delta);
+ }
+ sna->render.nvertex_reloc = 0;
+ sna->render.vb_id = 0;
+
+ if (sna->render.vbo == NULL) {
+ assert(!sna->render.active);
+ sna->render.vertex_used = 0;
+ sna->render.vertex_index = 0;
+ assert(sna->render.vertices == sna->render.vertex_data);
+ assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data));
+ }
+
+ if (free_bo)
+ kgem_bo_destroy(&sna->kgem, free_bo);
+}
+
+/* specialised vertex emission routines */
+
+#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) /* XXX assert(!too_large(x, y)); */
+#define OUT_VERTEX_F(v) vertex_emit(sna, v)
+
+inline static float
+compute_linear(const struct sna_composite_channel *channel,
+ int16_t x, int16_t y)
+{
+ return ((x+channel->offset[0]) * channel->u.linear.dx +
+ (y+channel->offset[1]) * channel->u.linear.dy +
+ channel->u.linear.offset);
+}
+
+inline static void
+emit_texcoord(struct sna *sna,
+ const struct sna_composite_channel *channel,
+ int16_t x, int16_t y)
+{
+ if (channel->is_solid) {
+ OUT_VERTEX_F(x);
+ return;
+ }
+
+ x += channel->offset[0];
+ y += channel->offset[1];
+
+ if (channel->is_affine) {
+ float s, t;
+
+ sna_get_transformed_coordinates(x, y,
+ channel->transform,
+ &s, &t);
+ OUT_VERTEX_F(s * channel->scale[0]);
+ OUT_VERTEX_F(t * channel->scale[1]);
+ } else {
+ float s, t, w;
+
+ sna_get_transformed_coordinates_3d(x, y,
+ channel->transform,
+ &s, &t, &w);
+ OUT_VERTEX_F(s * channel->scale[0]);
+ OUT_VERTEX_F(t * channel->scale[1]);
+ OUT_VERTEX_F(w);
+ }
+}
+
+inline static void
+emit_vertex(struct sna *sna,
+ const struct sna_composite_op *op,
+ int16_t srcX, int16_t srcY,
+ int16_t mskX, int16_t mskY,
+ int16_t dstX, int16_t dstY)
+{
+ OUT_VERTEX(dstX, dstY);
+ emit_texcoord(sna, &op->src, srcX, srcY);
+}
+
+fastcall static void
+emit_primitive(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ emit_vertex(sna, op,
+ r->src.x + r->width, r->src.y + r->height,
+ r->mask.x + r->width, r->mask.y + r->height,
+ r->dst.x + r->width, r->dst.y + r->height);
+ emit_vertex(sna, op,
+ r->src.x, r->src.y + r->height,
+ r->mask.x, r->mask.y + r->height,
+ r->dst.x, r->dst.y + r->height);
+ emit_vertex(sna, op,
+ r->src.x, r->src.y,
+ r->mask.x, r->mask.y,
+ r->dst.x, r->dst.y);
+}
+
+inline static void
+emit_vertex_mask(struct sna *sna,
+ const struct sna_composite_op *op,
+ int16_t srcX, int16_t srcY,
+ int16_t mskX, int16_t mskY,
+ int16_t dstX, int16_t dstY)
+{
+ OUT_VERTEX(dstX, dstY);
+ emit_texcoord(sna, &op->src, srcX, srcY);
+ emit_texcoord(sna, &op->mask, mskX, mskY);
+}
+
+fastcall static void
+emit_primitive_mask(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ emit_vertex_mask(sna, op,
+ r->src.x + r->width, r->src.y + r->height,
+ r->mask.x + r->width, r->mask.y + r->height,
+ r->dst.x + r->width, r->dst.y + r->height);
+ emit_vertex_mask(sna, op,
+ r->src.x, r->src.y + r->height,
+ r->mask.x, r->mask.y + r->height,
+ r->dst.x, r->dst.y + r->height);
+ emit_vertex_mask(sna, op,
+ r->src.x, r->src.y,
+ r->mask.x, r->mask.y,
+ r->dst.x, r->dst.y);
+}
+
+fastcall static void
+emit_primitive_solid(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ float *v;
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ assert(op->floats_per_rect == 6);
+ assert((sna->render.vertex_used % 2) == 0);
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 6;
+ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ dst.p.x = r->dst.x;
+ v[2] = dst.f;
+ dst.p.y = r->dst.y;
+ v[4] = dst.f;
+
+ v[5] = v[3] = v[1] = .5;
+}
+
+fastcall static void
+emit_boxes_solid(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ dst.p.x = box->x1;
+ v[2] = dst.f;
+ dst.p.y = box->y1;
+ v[4] = dst.f;
+
+ v[5] = v[3] = v[1] = .5;
+ box++;
+ v += 6;
+ } while (--nbox);
+}
+
+fastcall static void
+emit_primitive_linear(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ float *v;
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ assert(op->floats_per_rect == 6);
+ assert((sna->render.vertex_used % 2) == 0);
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 6;
+ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ dst.p.x = r->dst.x;
+ v[2] = dst.f;
+ dst.p.y = r->dst.y;
+ v[4] = dst.f;
+
+ v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
+ v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
+ v[5] = compute_linear(&op->src, r->src.x, r->src.y);
+}
+
+fastcall static void
+emit_boxes_linear(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ do {
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ dst.p.x = box->x1;
+ v[2] = dst.f;
+ dst.p.y = box->y1;
+ v[4] = dst.f;
+
+ v[1] = compute_linear(&op->src, box->x2, box->y2);
+ v[3] = compute_linear(&op->src, box->x1, box->y2);
+ v[5] = compute_linear(&op->src, box->x1, box->y1);
+
+ v += 6;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
+emit_primitive_identity_source(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+ float *v;
+
+ assert(op->floats_per_rect == 9);
+ assert((sna->render.vertex_used % 3) == 0);
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 9;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ dst.p.x = r->dst.x;
+ v[3] = dst.f;
+ dst.p.y = r->dst.y;
+ v[6] = dst.f;
+
+ v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
+ v[1] = v[4] + r->width * op->src.scale[0];
+
+ v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
+ v[5] = v[2] = v[8] + r->height * op->src.scale[1];
+}
+
+fastcall static void
+emit_boxes_identity_source(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ dst.p.x = box->x1;
+ v[3] = dst.f;
+ dst.p.y = box->y1;
+ v[6] = dst.f;
+
+ v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
+ v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
+
+ v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
+ v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
+
+ v += 9;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
+emit_primitive_simple_source(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ float *v;
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ float xx = op->src.transform->matrix[0][0];
+ float x0 = op->src.transform->matrix[0][2];
+ float yy = op->src.transform->matrix[1][1];
+ float y0 = op->src.transform->matrix[1][2];
+ float sx = op->src.scale[0];
+ float sy = op->src.scale[1];
+ int16_t tx = op->src.offset[0];
+ int16_t ty = op->src.offset[1];
+
+ assert(op->floats_per_rect == 9);
+ assert((sna->render.vertex_used % 3) == 0);
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 3*3;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
+ v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
+
+ dst.p.x = r->dst.x;
+ v[3] = dst.f;
+ v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
+
+ dst.p.y = r->dst.y;
+ v[6] = dst.f;
+ v[8] = ((r->src.y + ty) * yy + y0) * sy;
+}
+
+fastcall static void
+emit_boxes_simple_source(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ float xx = op->src.transform->matrix[0][0];
+ float x0 = op->src.transform->matrix[0][2];
+ float yy = op->src.transform->matrix[1][1];
+ float y0 = op->src.transform->matrix[1][2];
+ float sx = op->src.scale[0];
+ float sy = op->src.scale[1];
+ int16_t tx = op->src.offset[0];
+ int16_t ty = op->src.offset[1];
+
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ v[1] = ((box->x2 + tx) * xx + x0) * sx;
+ v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
+
+ dst.p.x = box->x1;
+ v[3] = dst.f;
+ v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
+
+ dst.p.y = box->y1;
+ v[6] = dst.f;
+ v[8] = ((box->y1 + ty) * yy + y0) * sy;
+
+ v += 9;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
+emit_primitive_affine_source(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+ float *v;
+
+ assert(op->floats_per_rect == 9);
+ assert((sna->render.vertex_used % 3) == 0);
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 9;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ _sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width,
+ op->src.offset[1] + r->src.y + r->height,
+ op->src.transform, op->src.scale,
+ &v[1], &v[2]);
+
+ dst.p.x = r->dst.x;
+ v[3] = dst.f;
+ _sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
+ op->src.offset[1] + r->src.y + r->height,
+ op->src.transform, op->src.scale,
+ &v[4], &v[5]);
+
+ dst.p.y = r->dst.y;
+ v[6] = dst.f;
+ _sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
+ op->src.offset[1] + r->src.y,
+ op->src.transform, op->src.scale,
+ &v[7], &v[8]);
+}
+
+fastcall static void
+emit_boxes_affine_source(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ _sna_get_transformed_scaled(op->src.offset[0] + box->x2,
+ op->src.offset[1] + box->y2,
+ op->src.transform, op->src.scale,
+ &v[1], &v[2]);
+
+ dst.p.x = box->x1;
+ v[3] = dst.f;
+ _sna_get_transformed_scaled(op->src.offset[0] + box->x1,
+ op->src.offset[1] + box->y2,
+ op->src.transform, op->src.scale,
+ &v[4], &v[5]);
+
+ dst.p.y = box->y1;
+ v[6] = dst.f;
+ _sna_get_transformed_scaled(op->src.offset[0] + box->x1,
+ op->src.offset[1] + box->y1,
+ op->src.transform, op->src.scale,
+ &v[7], &v[8]);
+ box++;
+ v += 9;
+ } while (--nbox);
+}
+
+fastcall static void
+emit_primitive_identity_mask(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+ float msk_x, msk_y;
+ float w, h;
+ float *v;
+
+ msk_x = r->mask.x + op->mask.offset[0];
+ msk_y = r->mask.y + op->mask.offset[1];
+ w = r->width;
+ h = r->height;
+
+ DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
+ __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
+
+ assert(op->floats_per_rect == 12);
+ assert((sna->render.vertex_used % 4) == 0);
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 12;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ v[2] = (msk_x + w) * op->mask.scale[0];
+ v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
+
+ dst.p.x = r->dst.x;
+ v[4] = dst.f;
+ v[10] = v[6] = msk_x * op->mask.scale[0];
+
+ dst.p.y = r->dst.y;
+ v[8] = dst.f;
+ v[11] = msk_y * op->mask.scale[1];
+
+ v[9] = v[5] = v[1] = .5;
+}
+
+fastcall static void
+emit_boxes_identity_mask(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ float msk_x = op->mask.offset[0];
+ float msk_y = op->mask.offset[1];
+
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ v[2] = (msk_x + box->x2) * op->mask.scale[0];
+ v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
+
+ dst.p.x = box->x1;
+ v[4] = dst.f;
+ v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
+
+ dst.p.y = box->y1;
+ v[8] = dst.f;
+ v[11] = (msk_y + box->y1) * op->mask.scale[1];
+
+ v[9] = v[5] = v[1] = .5;
+ v += 12;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
+emit_primitive_linear_identity_mask(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+ float msk_x, msk_y;
+ float w, h;
+ float *v;
+
+ msk_x = r->mask.x + op->mask.offset[0];
+ msk_y = r->mask.y + op->mask.offset[1];
+ w = r->width;
+ h = r->height;
+
+ DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
+ __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
+
+ assert(op->floats_per_rect == 12);
+ assert((sna->render.vertex_used % 4) == 0);
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 12;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ v[2] = (msk_x + w) * op->mask.scale[0];
+ v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
+
+ dst.p.x = r->dst.x;
+ v[4] = dst.f;
+ v[10] = v[6] = msk_x * op->mask.scale[0];
+
+ dst.p.y = r->dst.y;
+ v[8] = dst.f;
+ v[11] = msk_y * op->mask.scale[1];
+
+ v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
+ v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
+ v[9] = compute_linear(&op->src, r->src.x, r->src.y);
+}
+
+fastcall static void
+emit_boxes_linear_identity_mask(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ float msk_x = op->mask.offset[0];
+ float msk_y = op->mask.offset[1];
+
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ v[2] = (msk_x + box->x2) * op->mask.scale[0];
+ v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
+
+ dst.p.x = box->x1;
+ v[4] = dst.f;
+ v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
+
+ dst.p.y = box->y1;
+ v[8] = dst.f;
+ v[11] = (msk_y + box->y1) * op->mask.scale[1];
+
+ v[1] = compute_linear(&op->src, box->x2, box->y2);
+ v[5] = compute_linear(&op->src, box->x1, box->y2);
+ v[9] = compute_linear(&op->src, box->x1, box->y1);
+
+ v += 12;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
+emit_primitive_identity_source_mask(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+ float src_x, src_y;
+ float msk_x, msk_y;
+ float w, h;
+ float *v;
+
+ src_x = r->src.x + op->src.offset[0];
+ src_y = r->src.y + op->src.offset[1];
+ msk_x = r->mask.x + op->mask.offset[0];
+ msk_y = r->mask.y + op->mask.offset[1];
+ w = r->width;
+ h = r->height;
+
+ assert(op->floats_per_rect == 15);
+ assert((sna->render.vertex_used % 5) == 0);
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 15;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ v[1] = (src_x + w) * op->src.scale[0];
+ v[2] = (src_y + h) * op->src.scale[1];
+ v[3] = (msk_x + w) * op->mask.scale[0];
+ v[4] = (msk_y + h) * op->mask.scale[1];
+
+ dst.p.x = r->dst.x;
+ v[5] = dst.f;
+ v[6] = src_x * op->src.scale[0];
+ v[7] = v[2];
+ v[8] = msk_x * op->mask.scale[0];
+ v[9] = v[4];
+
+ dst.p.y = r->dst.y;
+ v[10] = dst.f;
+ v[11] = v[6];
+ v[12] = src_y * op->src.scale[1];
+ v[13] = v[8];
+ v[14] = msk_y * op->mask.scale[1];
+}
+
+fastcall static void
+emit_primitive_simple_source_identity(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ float *v;
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ float xx = op->src.transform->matrix[0][0];
+ float x0 = op->src.transform->matrix[0][2];
+ float yy = op->src.transform->matrix[1][1];
+ float y0 = op->src.transform->matrix[1][2];
+ float sx = op->src.scale[0];
+ float sy = op->src.scale[1];
+ int16_t tx = op->src.offset[0];
+ int16_t ty = op->src.offset[1];
+ float msk_x = r->mask.x + op->mask.offset[0];
+ float msk_y = r->mask.y + op->mask.offset[1];
+ float w = r->width, h = r->height;
+
+ assert(op->floats_per_rect == 15);
+ assert((sna->render.vertex_used % 5) == 0);
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 3*5;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
+ v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
+ v[3] = (msk_x + w) * op->mask.scale[0];
+ v[4] = (msk_y + h) * op->mask.scale[1];
+
+ dst.p.x = r->dst.x;
+ v[5] = dst.f;
+ v[6] = ((r->src.x + tx) * xx + x0) * sx;
+ v[7] = v[2];
+ v[8] = msk_x * op->mask.scale[0];
+ v[9] = v[4];
+
+ dst.p.y = r->dst.y;
+ v[10] = dst.f;
+ v[11] = v[6];
+ v[12] = ((r->src.y + ty) * yy + y0) * sy;
+ v[13] = v[8];
+ v[14] = msk_y * op->mask.scale[1];
+}
+
+fastcall static void
+emit_primitive_affine_source_identity(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ float *v;
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+ float msk_x = r->mask.x + op->mask.offset[0];
+ float msk_y = r->mask.y + op->mask.offset[1];
+ float w = r->width, h = r->height;
+
+ assert(op->floats_per_rect == 15);
+ assert((sna->render.vertex_used % 5) == 0);
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 3*5;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ _sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width,
+ op->src.offset[1] + r->src.y + r->height,
+ op->src.transform, op->src.scale,
+ &v[1], &v[2]);
+ v[3] = (msk_x + w) * op->mask.scale[0];
+ v[4] = (msk_y + h) * op->mask.scale[1];
+
+ dst.p.x = r->dst.x;
+ v[5] = dst.f;
+ _sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
+ op->src.offset[1] + r->src.y + r->height,
+ op->src.transform, op->src.scale,
+ &v[6], &v[7]);
+ v[8] = msk_x * op->mask.scale[0];
+ v[9] = v[4];
+
+ dst.p.y = r->dst.y;
+ v[10] = dst.f;
+ _sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
+ op->src.offset[1] + r->src.y,
+ op->src.transform, op->src.scale,
+ &v[11], &v[12]);
+ v[13] = v[8];
+ v[14] = msk_y * op->mask.scale[1];
+}
+
+inline static void
+emit_composite_texcoord_affine(struct sna *sna,
+ const struct sna_composite_channel *channel,
+ int16_t x, int16_t y)
+{
+ float t[2];
+
+ sna_get_transformed_coordinates(x + channel->offset[0],
+ y + channel->offset[1],
+ channel->transform,
+ &t[0], &t[1]);
+ OUT_VERTEX_F(t[0] * channel->scale[0]);
+ OUT_VERTEX_F(t[1] * channel->scale[1]);
+}
+
+
+unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp)
+{
+ unsigned vb;
+
+ if (tmp->mask.bo) {
+ if (tmp->mask.transform == NULL) {
+ if (tmp->src.is_solid) {
+ DBG(("%s: solid, identity mask\n", __FUNCTION__));
+ tmp->prim_emit = emit_primitive_identity_mask;
+ tmp->emit_boxes = emit_boxes_identity_mask;
+ tmp->floats_per_vertex = 4;
+ vb = 1 | 2 << 2;
+ } else if (tmp->src.is_linear) {
+ DBG(("%s: linear, identity mask\n", __FUNCTION__));
+ tmp->prim_emit = emit_primitive_linear_identity_mask;
+ tmp->emit_boxes = emit_boxes_linear_identity_mask;
+ tmp->floats_per_vertex = 4;
+ vb = 1 | 2 << 2;
+ } else if (tmp->src.transform == NULL) {
+ DBG(("%s: identity source, identity mask\n", __FUNCTION__));
+ tmp->prim_emit = emit_primitive_identity_source_mask;
+ tmp->floats_per_vertex = 5;
+ vb = 2 << 2 | 2;
+ } else if (tmp->src.is_affine) {
+ tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
+ tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
+ if (!sna_affine_transform_is_rotation(tmp->src.transform)) {
+ DBG(("%s: simple src, identity mask\n", __FUNCTION__));
+ tmp->prim_emit = emit_primitive_simple_source_identity;
+ } else {
+ DBG(("%s: affine src, identity mask\n", __FUNCTION__));
+ tmp->prim_emit = emit_primitive_affine_source_identity;
+ }
+ tmp->floats_per_vertex = 5;
+ vb = 2 << 2 | 2;
+ } else {
+ DBG(("%s: projective source, identity mask\n", __FUNCTION__));
+ tmp->prim_emit = emit_primitive_mask;
+ tmp->floats_per_vertex = 6;
+ vb = 2 << 2 | 3;
+ }
+ } else {
+ tmp->prim_emit = emit_primitive_mask;
+ tmp->floats_per_vertex = 1;
+ vb = 0;
+ if (tmp->mask.is_solid) {
+ tmp->floats_per_vertex += 1;
+ vb |= 1 << 2;
+ } else if (tmp->mask.is_affine) {
+ tmp->floats_per_vertex += 2;
+ vb |= 2 << 2;
+ }else {
+ tmp->floats_per_vertex += 3;
+ vb |= 3 << 2;
+ }
+ if (tmp->src.is_solid) {
+ tmp->floats_per_vertex += 1;
+ vb |= 1;
+ } else if (tmp->src.is_affine) {
+ tmp->floats_per_vertex += 2;
+ vb |= 2 ;
+ }else {
+ tmp->floats_per_vertex += 3;
+ vb |= 3;
+ }
+ DBG(("%s: general mask: floats-per-vertex=%d, vb=%x\n",
+ __FUNCTION__,tmp->floats_per_vertex, vb));
+ }
+ } else {
+ if (tmp->src.is_solid) {
+ DBG(("%s: solid, no mask\n", __FUNCTION__));
+ tmp->prim_emit = emit_primitive_solid;
+ tmp->emit_boxes = emit_boxes_solid;
+ if (tmp->src.is_opaque && tmp->op == PictOpOver)
+ tmp->op = PictOpSrc;
+ tmp->floats_per_vertex = 2;
+ vb = 1;
+ } else if (tmp->src.is_linear) {
+ DBG(("%s: linear, no mask\n", __FUNCTION__));
+ tmp->prim_emit = emit_primitive_linear;
+ tmp->emit_boxes = emit_boxes_linear;
+ tmp->floats_per_vertex = 2;
+ vb = 1;
+ } else if (tmp->src.transform == NULL) {
+ DBG(("%s: identity src, no mask\n", __FUNCTION__));
+ tmp->prim_emit = emit_primitive_identity_source;
+ tmp->emit_boxes = emit_boxes_identity_source;
+ tmp->floats_per_vertex = 3;
+ vb = 2;
+ } else if (tmp->src.is_affine) {
+ tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
+ tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
+ if (!sna_affine_transform_is_rotation(tmp->src.transform)) {
+ DBG(("%s: simple src, no mask\n", __FUNCTION__));
+ tmp->prim_emit = emit_primitive_simple_source;
+ tmp->emit_boxes = emit_boxes_simple_source;
+ } else {
+ DBG(("%s: affine src, no mask\n", __FUNCTION__));
+ tmp->prim_emit = emit_primitive_affine_source;
+ tmp->emit_boxes = emit_boxes_affine_source;
+ }
+ tmp->floats_per_vertex = 3;
+ vb = 2;
+ } else {
+ DBG(("%s: projective src, no mask\n", __FUNCTION__));
+ assert(!tmp->src.is_solid);
+ tmp->prim_emit = emit_primitive;
+ tmp->floats_per_vertex = 4;
+ vb = 3;
+ }
+ }
+ tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
+
+ return vb;
+}
+
+inline static void
+emit_span_vertex(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ int16_t x, int16_t y)
+{
+ OUT_VERTEX(x, y);
+ emit_texcoord(sna, &op->base.src, x, y);
+}
+
+fastcall static void
+emit_composite_spans_primitive(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const BoxRec *box,
+ float opacity)
+{
+ emit_span_vertex(sna, op, box->x2, box->y2);
+ OUT_VERTEX_F(opacity);
+
+ emit_span_vertex(sna, op, box->x1, box->y2);
+ OUT_VERTEX_F(opacity);
+
+ emit_span_vertex(sna, op, box->x1, box->y1);
+ OUT_VERTEX_F(opacity);
+}
+
+fastcall static void
+emit_span_solid(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const BoxRec *box,
+ float opacity)
+{
+ float *v;
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ assert(op->base.floats_per_rect == 9);
+ assert((sna->render.vertex_used % 3) == 0);
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 3*3;
+
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+
+ dst.p.x = box->x1;
+ v[3] = dst.f;
+
+ dst.p.y = box->y1;
+ v[6] = dst.f;
+
+ v[7] = v[4] = v[1] = .5;
+ v[8] = v[5] = v[2] = opacity;
+}
+
+fastcall static void
+emit_span_boxes_solid(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox, float *v)
+{
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = b->box.x2;
+ dst.p.y = b->box.y2;
+ v[0] = dst.f;
+
+ dst.p.x = b->box.x1;
+ v[3] = dst.f;
+
+ dst.p.y = b->box.y1;
+ v[6] = dst.f;
+
+ v[7] = v[4] = v[1] = .5;
+ v[8] = v[5] = v[2] = b->alpha;
+
+ v += 9;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
+emit_span_identity(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const BoxRec *box,
+ float opacity)
+{
+ float *v;
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ float sx = op->base.src.scale[0];
+ float sy = op->base.src.scale[1];
+ int16_t tx = op->base.src.offset[0];
+ int16_t ty = op->base.src.offset[1];
+
+ assert(op->base.floats_per_rect == 12);
+ assert((sna->render.vertex_used % 4) == 0);
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 3*4;
+ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ v[1] = (box->x2 + tx) * sx;
+ v[6] = v[2] = (box->y2 + ty) * sy;
+
+ dst.p.x = box->x1;
+ v[4] = dst.f;
+ v[9] = v[5] = (box->x1 + tx) * sx;
+
+ dst.p.y = box->y1;
+ v[8] = dst.f;
+ v[10] = (box->y1 + ty) * sy;
+
+ v[11] = v[7] = v[3] = opacity;
+}
+
+fastcall static void
+emit_span_boxes_identity(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b, int nbox,
+ float *v)
+{
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ float sx = op->base.src.scale[0];
+ float sy = op->base.src.scale[1];
+ int16_t tx = op->base.src.offset[0];
+ int16_t ty = op->base.src.offset[1];
+
+ dst.p.x = b->box.x2;
+ dst.p.y = b->box.y2;
+ v[0] = dst.f;
+ v[1] = (b->box.x2 + tx) * sx;
+ v[6] = v[2] = (b->box.y2 + ty) * sy;
+
+ dst.p.x = b->box.x1;
+ v[4] = dst.f;
+ v[9] = v[5] = (b->box.x1 + tx) * sx;
+
+ dst.p.y = b->box.y1;
+ v[8] = dst.f;
+ v[10] = (b->box.y1 + ty) * sy;
+
+ v[11] = v[7] = v[3] = b->alpha;
+
+ v += 12;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
+emit_span_simple(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const BoxRec *box,
+ float opacity)
+{
+ float *v;
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ float xx = op->base.src.transform->matrix[0][0];
+ float x0 = op->base.src.transform->matrix[0][2];
+ float yy = op->base.src.transform->matrix[1][1];
+ float y0 = op->base.src.transform->matrix[1][2];
+ float sx = op->base.src.scale[0];
+ float sy = op->base.src.scale[1];
+ int16_t tx = op->base.src.offset[0];
+ int16_t ty = op->base.src.offset[1];
+
+ assert(op->base.floats_per_rect == 12);
+ assert((sna->render.vertex_used % 4) == 0);
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 3*4;
+ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ v[1] = ((box->x2 + tx) * xx + x0) * sx;
+ v[6] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
+
+ dst.p.x = box->x1;
+ v[4] = dst.f;
+ v[9] = v[5] = ((box->x1 + tx) * xx + x0) * sx;
+
+ dst.p.y = box->y1;
+ v[8] = dst.f;
+ v[10] = ((box->y1 + ty) * yy + y0) * sy;
+
+ v[11] = v[7] = v[3] = opacity;
+}
+
+fastcall static void
+emit_span_boxes_simple(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b, int nbox,
+ float *v)
+{
+ float xx = op->base.src.transform->matrix[0][0];
+ float x0 = op->base.src.transform->matrix[0][2];
+ float yy = op->base.src.transform->matrix[1][1];
+ float y0 = op->base.src.transform->matrix[1][2];
+ float sx = op->base.src.scale[0];
+ float sy = op->base.src.scale[1];
+ int16_t tx = op->base.src.offset[0];
+ int16_t ty = op->base.src.offset[1];
+
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = b->box.x2;
+ dst.p.y = b->box.y2;
+ v[0] = dst.f;
+ v[1] = ((b->box.x2 + tx) * xx + x0) * sx;
+ v[6] = v[2] = ((b->box.y2 + ty) * yy + y0) * sy;
+
+ dst.p.x = b->box.x1;
+ v[4] = dst.f;
+ v[9] = v[5] = ((b->box.x1 + tx) * xx + x0) * sx;
+
+ dst.p.y = b->box.y1;
+ v[8] = dst.f;
+ v[10] = ((b->box.y1 + ty) * yy + y0) * sy;
+
+ v[11] = v[7] = v[3] = b->alpha;
+
+ v += 12;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
+emit_span_affine(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const BoxRec *box,
+ float opacity)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+ float *v;
+
+ assert(op->base.floats_per_rect == 12);
+ assert((sna->render.vertex_used % 4) == 0);
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 12;
+
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ _sna_get_transformed_scaled(op->base.src.offset[0] + box->x2,
+ op->base.src.offset[1] + box->y2,
+ op->base.src.transform,
+ op->base.src.scale,
+ &v[1], &v[2]);
+
+ dst.p.x = box->x1;
+ v[4] = dst.f;
+ _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
+ op->base.src.offset[1] + box->y2,
+ op->base.src.transform,
+ op->base.src.scale,
+ &v[5], &v[6]);
+
+ dst.p.y = box->y1;
+ v[8] = dst.f;
+ _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
+ op->base.src.offset[1] + box->y1,
+ op->base.src.transform,
+ op->base.src.scale,
+ &v[9], &v[10]);
+
+ v[11] = v[7] = v[3] = opacity;
+}
+
+fastcall static void
+emit_span_boxes_affine(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b, int nbox,
+ float *v)
+{
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = b->box.x2;
+ dst.p.y = b->box.y2;
+ v[0] = dst.f;
+ _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2,
+ op->base.src.offset[1] + b->box.y2,
+ op->base.src.transform,
+ op->base.src.scale,
+ &v[1], &v[2]);
+
+ dst.p.x = b->box.x1;
+ v[4] = dst.f;
+ _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
+ op->base.src.offset[1] + b->box.y2,
+ op->base.src.transform,
+ op->base.src.scale,
+ &v[5], &v[6]);
+
+ dst.p.y = b->box.y1;
+ v[8] = dst.f;
+ _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
+ op->base.src.offset[1] + b->box.y1,
+ op->base.src.transform,
+ op->base.src.scale,
+ &v[9], &v[10]);
+
+ v[11] = v[7] = v[3] = b->alpha;
+
+ v += 12;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
+emit_span_linear(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const BoxRec *box,
+ float opacity)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+ float *v;
+
+ assert(op->base.floats_per_rect == 9);
+ assert((sna->render.vertex_used % 3) == 0);
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 9;
+
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ dst.p.x = box->x1;
+ v[3] = dst.f;
+ dst.p.y = box->y1;
+ v[6] = dst.f;
+
+ v[1] = compute_linear(&op->base.src, box->x2, box->y2);
+ v[4] = compute_linear(&op->base.src, box->x1, box->y2);
+ v[7] = compute_linear(&op->base.src, box->x1, box->y1);
+
+ v[8] = v[5] = v[2] = opacity;
+}
+
+fastcall static void
+emit_span_boxes_linear(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b, int nbox,
+ float *v)
+{
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = b->box.x2;
+ dst.p.y = b->box.y2;
+ v[0] = dst.f;
+ dst.p.x = b->box.x1;
+ v[3] = dst.f;
+ dst.p.y = b->box.y1;
+ v[6] = dst.f;
+
+ v[1] = compute_linear(&op->base.src, b->box.x2, b->box.y2);
+ v[4] = compute_linear(&op->base.src, b->box.x1, b->box.y2);
+ v[7] = compute_linear(&op->base.src, b->box.x1, b->box.y1);
+
+ v[8] = v[5] = v[2] = b->alpha;
+
+ v += 9;
+ b++;
+ } while (--nbox);
+}
+
+inline inline static uint32_t
+gen4_choose_spans_vertex_buffer(const struct sna_composite_op *op)
+{
+ int id = op->src.is_solid ? 1 : 2 + !op->src.is_affine;
+ DBG(("%s: id=%x (%d, 1)\n", __FUNCTION__, 1 << 2 | id, id));
+ return 1 << 2 | id;
+}
+
+unsigned gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp)
+{
+ unsigned vb;
+
+ if (tmp->base.src.is_solid) {
+ tmp->prim_emit = emit_span_solid;
+ tmp->emit_boxes = emit_span_boxes_solid;
+ tmp->base.floats_per_vertex = 3;
+ vb = 1 << 2 | 1;
+ } else if (tmp->base.src.is_linear) {
+ tmp->prim_emit = emit_span_linear;
+ tmp->emit_boxes = emit_span_boxes_linear;
+ tmp->base.floats_per_vertex = 3;
+ vb = 1 << 2 | 1;
+ } else if (tmp->base.src.transform == NULL) {
+ tmp->prim_emit = emit_span_identity;
+ tmp->emit_boxes = emit_span_boxes_identity;
+ tmp->base.floats_per_vertex = 4;
+ vb = 1 << 2 | 2;
+ } else if (tmp->base.is_affine) {
+ tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
+ tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
+ if (!sna_affine_transform_is_rotation(tmp->base.src.transform)) {
+ tmp->prim_emit = emit_span_simple;
+ tmp->emit_boxes = emit_span_boxes_simple;
+ } else {
+ tmp->prim_emit = emit_span_affine;
+ tmp->emit_boxes = emit_span_boxes_affine;
+ }
+ tmp->base.floats_per_vertex = 4;
+ vb = 1 << 2 | 2;
+ } else {
+ tmp->prim_emit = emit_composite_spans_primitive;
+ tmp->base.floats_per_vertex = 5;
+ vb = 1 << 2 | 3;
+ }
+ tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex;
+ return vb;
+}
diff --git a/src/sna/gen4_vertex.h b/src/sna/gen4_vertex.h
new file mode 100644
index 000000000..431b545eb
--- /dev/null
+++ b/src/sna/gen4_vertex.h
@@ -0,0 +1,16 @@
+#ifndef GEN4_VERTEX_H
+#define GEN4_VERTEX_H
+
+#include "compiler.h"
+
+#include "sna.h"
+#include "sna_render.h"
+
+void gen4_vertex_flush(struct sna *sna);
+int gen4_vertex_finish(struct sna *sna);
+void gen4_vertex_close(struct sna *sna);
+
+unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp);
+unsigned gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp);
+
+#endif /* GEN4_VERTEX_H */
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 5d559377b..6e1199638 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -42,7 +42,10 @@
#include "brw/brw.h"
#include "gen5_render.h"
+#include "gen4_source.h"
+#include "gen4_vertex.h"
+#define NO_COMPOSITE 0
#define NO_COMPOSITE_SPANS 0
#define PREFER_BLT_FILL 1
@@ -196,17 +199,19 @@ gen5_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
return base + !is_affine;
}
-static void gen5_magic_ca_pass(struct sna *sna,
+static bool gen5_magic_ca_pass(struct sna *sna,
const struct sna_composite_op *op)
{
struct gen5_render_state *state = &sna->render_state.gen5;
if (!op->need_magic_ca_pass)
- return;
+ return false;
assert(sna->render.vertex_index > sna->render.vertex_start);
DBG(("%s: CA fixup\n", __FUNCTION__));
+ assert(op->mask.bo != NULL);
+ assert(op->has_component_alpha);
gen5_emit_pipelined_pointers
(sna, op, PictOpAdd,
@@ -225,162 +230,7 @@ static void gen5_magic_ca_pass(struct sna *sna,
OUT_BATCH(0); /* index buffer offset, ignored */
state->last_primitive = sna->kgem.nbatch;
-}
-
-static void gen5_vertex_flush(struct sna *sna)
-{
- assert(sna->render_state.gen5.vertex_offset);
- assert(sna->render.vertex_index > sna->render.vertex_start);
-
- DBG(("%s[%x] = %d\n", __FUNCTION__,
- 4*sna->render_state.gen5.vertex_offset,
- sna->render.vertex_index - sna->render.vertex_start));
- sna->kgem.batch[sna->render_state.gen5.vertex_offset] =
- sna->render.vertex_index - sna->render.vertex_start;
- sna->render_state.gen5.vertex_offset = 0;
-}
-
-static int gen5_vertex_finish(struct sna *sna)
-{
- struct kgem_bo *bo;
- unsigned int i;
-
- assert(sna->render.vertex_used);
- assert(sna->render.nvertex_reloc);
-
- /* Note: we only need dword alignment (currently) */
-
- bo = sna->render.vbo;
- if (bo) {
- if (sna->render_state.gen5.vertex_offset)
- gen5_vertex_flush(sna);
-
- for (i = 0; i < sna->render.nvertex_reloc; i++) {
- DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
- i, sna->render.vertex_reloc[i]));
-
- sna->kgem.batch[sna->render.vertex_reloc[i]] =
- kgem_add_reloc(&sna->kgem,
- sna->render.vertex_reloc[i], bo,
- I915_GEM_DOMAIN_VERTEX << 16,
- 0);
- sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
- kgem_add_reloc(&sna->kgem,
- sna->render.vertex_reloc[i]+1, bo,
- I915_GEM_DOMAIN_VERTEX << 16,
- sna->render.vertex_used * 4 - 1);
- }
-
- sna->render.nvertex_reloc = 0;
- sna->render.vertex_used = 0;
- sna->render.vertex_index = 0;
- sna->render_state.gen5.vb_id = 0;
-
- kgem_bo_destroy(&sna->kgem, bo);
- }
-
- sna->render.vertices = NULL;
- sna->render.vbo = kgem_create_linear(&sna->kgem,
- 256*1024, CREATE_GTT_MAP);
- if (sna->render.vbo)
- sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
- if (sna->render.vertices == NULL) {
- if (sna->render.vbo)
- kgem_bo_destroy(&sna->kgem, sna->render.vbo);
- sna->render.vbo = NULL;
- return 0;
- }
-
- if (sna->render.vertex_used) {
- memcpy(sna->render.vertices,
- sna->render.vertex_data,
- sizeof(float)*sna->render.vertex_used);
- }
- sna->render.vertex_size = 64 * 1024 - 1;
- return sna->render.vertex_size - sna->render.vertex_used;
-}
-
-static void gen5_vertex_close(struct sna *sna)
-{
- struct kgem_bo *bo, *free_bo = NULL;
- unsigned int i, delta = 0;
-
- assert(sna->render_state.gen5.vertex_offset == 0);
- if (!sna->render_state.gen5.vb_id)
- return;
-
- DBG(("%s: used=%d, vbo active? %d\n",
- __FUNCTION__, sna->render.vertex_used, sna->render.vbo != NULL));
-
- bo = sna->render.vbo;
- if (bo) {
- if (sna->render.vertex_size - sna->render.vertex_used < 64) {
- DBG(("%s: discarding full vbo\n", __FUNCTION__));
- sna->render.vbo = NULL;
- sna->render.vertices = sna->render.vertex_data;
- sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
- free_bo = bo;
- } else if (IS_CPU_MAP(bo->map)) {
- DBG(("%s: converting CPU map to GTT\n", __FUNCTION__));
- sna->render.vertices =
- kgem_bo_map__gtt(&sna->kgem, sna->render.vbo);
- if (sna->render.vertices == NULL) {
- sna->render.vbo = NULL;
- sna->render.vertices = sna->render.vertex_data;
- sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
- free_bo = bo;
- }
- }
- } else {
- if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
- DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
- sna->render.vertex_used, sna->kgem.nbatch));
- memcpy(sna->kgem.batch + sna->kgem.nbatch,
- sna->render.vertex_data,
- sna->render.vertex_used * 4);
- delta = sna->kgem.nbatch * 4;
- bo = NULL;
- sna->kgem.nbatch += sna->render.vertex_used;
- } else {
- bo = kgem_create_linear(&sna->kgem,
- 4*sna->render.vertex_used, 0);
- if (bo && !kgem_bo_write(&sna->kgem, bo,
- sna->render.vertex_data,
- 4*sna->render.vertex_used)) {
- kgem_bo_destroy(&sna->kgem, bo);
- bo = NULL;
- }
- DBG(("%s: new vbo: %d\n", __FUNCTION__,
- sna->render.vertex_used));
- free_bo = bo;
- }
- }
-
- assert(sna->render.nvertex_reloc);
- for (i = 0; i < sna->render.nvertex_reloc; i++) {
- DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
- i, sna->render.vertex_reloc[i]));
-
- sna->kgem.batch[sna->render.vertex_reloc[i]] =
- kgem_add_reloc(&sna->kgem,
- sna->render.vertex_reloc[i], bo,
- I915_GEM_DOMAIN_VERTEX << 16,
- delta);
- sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
- kgem_add_reloc(&sna->kgem,
- sna->render.vertex_reloc[i]+1, bo,
- I915_GEM_DOMAIN_VERTEX << 16,
- delta + sna->render.vertex_used * 4 - 1);
- }
- sna->render.nvertex_reloc = 0;
-
- if (sna->render.vbo == NULL) {
- sna->render.vertex_used = 0;
- sna->render.vertex_index = 0;
- }
-
- if (free_bo)
- kgem_bo_destroy(&sna->kgem, free_bo);
+ return true;
}
static uint32_t gen5_get_blend(int op,
@@ -679,310 +529,29 @@ gen5_bind_bo(struct sna *sna,
return offset * sizeof(uint32_t);
}
-fastcall static void
-gen5_emit_composite_primitive_solid(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- float *v;
- union {
- struct sna_coordinate p;
- float f;
- } dst;
-
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 9;
-
- dst.p.x = r->dst.x + r->width;
- dst.p.y = r->dst.y + r->height;
- v[0] = dst.f;
- v[1] = 1.;
- v[2] = 1.;
-
- dst.p.x = r->dst.x;
- v[3] = dst.f;
- v[4] = 0.;
- v[5] = 1.;
-
- dst.p.y = r->dst.y;
- v[6] = dst.f;
- v[7] = 0.;
- v[8] = 0.;
-}
-
-fastcall static void
-gen5_emit_composite_primitive_identity_source(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- const float *sf = op->src.scale;
- float sx, sy, *v;
- union {
- struct sna_coordinate p;
- float f;
- } dst;
-
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 9;
-
- sx = r->src.x + op->src.offset[0];
- sy = r->src.y + op->src.offset[1];
-
- dst.p.x = r->dst.x + r->width;
- dst.p.y = r->dst.y + r->height;
- v[0] = dst.f;
- v[1] = (sx + r->width) * sf[0];
- v[5] = v[2] = (sy + r->height) * sf[1];
-
- dst.p.x = r->dst.x;
- v[3] = dst.f;
- v[7] = v[4] = sx * sf[0];
-
- dst.p.y = r->dst.y;
- v[6] = dst.f;
- v[8] = sy * sf[1];
-}
-
-fastcall static void
-gen5_emit_composite_primitive_affine_source(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- union {
- struct sna_coordinate p;
- float f;
- } dst;
- float *v;
-
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 9;
-
- dst.p.x = r->dst.x + r->width;
- dst.p.y = r->dst.y + r->height;
- v[0] = dst.f;
- _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x + r->width,
- op->src.offset[1] + r->src.y + r->height,
- op->src.transform,
- &v[1], &v[2]);
- v[1] *= op->src.scale[0];
- v[2] *= op->src.scale[1];
-
- dst.p.x = r->dst.x;
- v[3] = dst.f;
- _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x,
- op->src.offset[1] + r->src.y + r->height,
- op->src.transform,
- &v[4], &v[5]);
- v[4] *= op->src.scale[0];
- v[5] *= op->src.scale[1];
-
- dst.p.y = r->dst.y;
- v[6] = dst.f;
- _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x,
- op->src.offset[1] + r->src.y,
- op->src.transform,
- &v[7], &v[8]);
- v[7] *= op->src.scale[0];
- v[8] *= op->src.scale[1];
-}
-
-fastcall static void
-gen5_emit_composite_primitive_identity_source_mask(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- union {
- struct sna_coordinate p;
- float f;
- } dst;
- float src_x, src_y;
- float msk_x, msk_y;
- float w, h;
- float *v;
-
- src_x = r->src.x + op->src.offset[0];
- src_y = r->src.y + op->src.offset[1];
- msk_x = r->mask.x + op->mask.offset[0];
- msk_y = r->mask.y + op->mask.offset[1];
- w = r->width;
- h = r->height;
-
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 15;
-
- dst.p.x = r->dst.x + r->width;
- dst.p.y = r->dst.y + r->height;
- v[0] = dst.f;
- v[1] = (src_x + w) * op->src.scale[0];
- v[2] = (src_y + h) * op->src.scale[1];
- v[3] = (msk_x + w) * op->mask.scale[0];
- v[4] = (msk_y + h) * op->mask.scale[1];
-
- dst.p.x = r->dst.x;
- v[5] = dst.f;
- v[6] = src_x * op->src.scale[0];
- v[7] = v[2];
- v[8] = msk_x * op->mask.scale[0];
- v[9] = v[4];
-
- dst.p.y = r->dst.y;
- v[10] = dst.f;
- v[11] = v[6];
- v[12] = src_y * op->src.scale[1];
- v[13] = v[8];
- v[14] = msk_y * op->mask.scale[1];
-}
-
-fastcall static void
-gen5_emit_composite_primitive(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
- bool is_affine = op->is_affine;
- const float *src_sf = op->src.scale;
- const float *mask_sf = op->mask.scale;
-
- if (is_affine) {
- sna_get_transformed_coordinates(r->src.x + op->src.offset[0],
- r->src.y + op->src.offset[1],
- op->src.transform,
- &src_x[0],
- &src_y[0]);
-
- sna_get_transformed_coordinates(r->src.x + op->src.offset[0],
- r->src.y + op->src.offset[1] + r->height,
- op->src.transform,
- &src_x[1],
- &src_y[1]);
-
- sna_get_transformed_coordinates(r->src.x + op->src.offset[0] + r->width,
- r->src.y + op->src.offset[1] + r->height,
- op->src.transform,
- &src_x[2],
- &src_y[2]);
- } else {
- sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0],
- r->src.y + op->src.offset[1],
- op->src.transform,
- &src_x[0],
- &src_y[0],
- &src_w[0]);
- sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0],
- r->src.y + op->src.offset[1] + r->height,
- op->src.transform,
- &src_x[1],
- &src_y[1],
- &src_w[1]);
- sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0] + r->width,
- r->src.y + op->src.offset[1] + r->height,
- op->src.transform,
- &src_x[2],
- &src_y[2],
- &src_w[2]);
- }
-
- if (op->mask.bo) {
- if (is_affine) {
- sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0],
- r->mask.y + op->mask.offset[1],
- op->mask.transform,
- &mask_x[0],
- &mask_y[0]);
-
- sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0],
- r->mask.y + op->mask.offset[1] + r->height,
- op->mask.transform,
- &mask_x[1],
- &mask_y[1]);
-
- sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0] + r->width,
- r->mask.y + op->mask.offset[1] + r->height,
- op->mask.transform,
- &mask_x[2],
- &mask_y[2]);
- } else {
- sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0],
- r->mask.y + op->mask.offset[1],
- op->mask.transform,
- &mask_x[0],
- &mask_y[0],
- &mask_w[0]);
-
- sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0],
- r->mask.y + op->mask.offset[1] + r->height,
- op->mask.transform,
- &mask_x[1],
- &mask_y[1],
- &mask_w[1]);
- sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0] + r->width,
- r->mask.y + op->mask.offset[1] + r->height,
- op->mask.transform,
- &mask_x[2],
- &mask_y[2],
- &mask_w[2]);
- }
- }
-
- OUT_VERTEX(r->dst.x + r->width, r->dst.y + r->height);
- OUT_VERTEX_F(src_x[2] * src_sf[0]);
- OUT_VERTEX_F(src_y[2] * src_sf[1]);
- if (!is_affine)
- OUT_VERTEX_F(src_w[2]);
- if (op->mask.bo) {
- OUT_VERTEX_F(mask_x[2] * mask_sf[0]);
- OUT_VERTEX_F(mask_y[2] * mask_sf[1]);
- if (!is_affine)
- OUT_VERTEX_F(mask_w[2]);
- }
-
- OUT_VERTEX(r->dst.x, r->dst.y + r->height);
- OUT_VERTEX_F(src_x[1] * src_sf[0]);
- OUT_VERTEX_F(src_y[1] * src_sf[1]);
- if (!is_affine)
- OUT_VERTEX_F(src_w[1]);
- if (op->mask.bo) {
- OUT_VERTEX_F(mask_x[1] * mask_sf[0]);
- OUT_VERTEX_F(mask_y[1] * mask_sf[1]);
- if (!is_affine)
- OUT_VERTEX_F(mask_w[1]);
- }
-
- OUT_VERTEX(r->dst.x, r->dst.y);
- OUT_VERTEX_F(src_x[0] * src_sf[0]);
- OUT_VERTEX_F(src_y[0] * src_sf[1]);
- if (!is_affine)
- OUT_VERTEX_F(src_w[0]);
- if (op->mask.bo) {
- OUT_VERTEX_F(mask_x[0] * mask_sf[0]);
- OUT_VERTEX_F(mask_y[0] * mask_sf[1]);
- if (!is_affine)
- OUT_VERTEX_F(mask_w[0]);
- }
-}
-
static void gen5_emit_vertex_buffer(struct sna *sna,
const struct sna_composite_op *op)
{
int id = op->u.gen5.ve_id;
- assert((unsigned)id <= 3);
+ assert((sna->render.vb_id & (1 << id)) == 0);
OUT_BATCH(GEN5_3DSTATE_VERTEX_BUFFERS | 3);
- OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA |
+ OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA |
(4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
+ assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc));
sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
OUT_BATCH(0);
- OUT_BATCH(0);
+ OUT_BATCH(~0); /* max address: disabled */
OUT_BATCH(0);
- sna->render_state.gen5.vb_id |= 1 << id;
+ sna->render.vb_id |= 1 << id;
}
static void gen5_emit_primitive(struct sna *sna)
{
if (sna->kgem.nbatch == sna->render_state.gen5.last_primitive) {
- sna->render_state.gen5.vertex_offset = sna->kgem.nbatch - 5;
+ sna->render.vertex_offset = sna->kgem.nbatch - 5;
return;
}
@@ -991,7 +560,7 @@ static void gen5_emit_primitive(struct sna *sna)
(_3DPRIM_RECTLIST << GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT) |
(0 << 9) |
4);
- sna->render_state.gen5.vertex_offset = sna->kgem.nbatch;
+ sna->render.vertex_offset = sna->kgem.nbatch;
OUT_BATCH(0); /* vertex count, to be filled in later */
OUT_BATCH(sna->render.vertex_index);
OUT_BATCH(1); /* single instance */
@@ -1008,18 +577,19 @@ static bool gen5_rectangle_begin(struct sna *sna,
int id = op->u.gen5.ve_id;
int ndwords;
- assert((unsigned)id <= 3);
+ if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
+ return true;
ndwords = op->need_magic_ca_pass ? 20 : 6;
- if ((sna->render_state.gen5.vb_id & (1 << id)) == 0)
+ if ((sna->render.vb_id & (1 << id)) == 0)
ndwords += 5;
if (!kgem_check_batch(&sna->kgem, ndwords))
return false;
- if ((sna->render_state.gen5.vb_id & (1 << id)) == 0)
+ if ((sna->render.vb_id & (1 << id)) == 0)
gen5_emit_vertex_buffer(sna, op);
- if (sna->render_state.gen5.vertex_offset == 0)
+ if (sna->render.vertex_offset == 0)
gen5_emit_primitive(sna);
return true;
@@ -1028,17 +598,26 @@ static bool gen5_rectangle_begin(struct sna *sna,
static int gen5_get_rectangles__flush(struct sna *sna,
const struct sna_composite_op *op)
{
+ /* Preventing discarding new vbo after lock contention */
+ if (sna_vertex_wait__locked(&sna->render)) {
+ int rem = vertex_space(sna);
+ if (rem > op->floats_per_rect)
+ return rem;
+ }
+
if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 20 : 6))
return 0;
- if (!kgem_check_exec(&sna->kgem, 1))
- return 0;
- if (!kgem_check_reloc(&sna->kgem, 2))
+ if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
return 0;
- if (op->need_magic_ca_pass && sna->render.vbo)
- return 0;
+ if (sna->render.vertex_offset) {
+ gen4_vertex_flush(sna);
+ if (gen5_magic_ca_pass(sna, op))
+ gen5_emit_pipelined_pointers(sna, op, op->op,
+ op->u.gen5.wm_kernel);
+ }
- return gen5_vertex_finish(sna);
+ return gen4_vertex_finish(sna);
}
inline static int gen5_get_rectangles(struct sna *sna,
@@ -1051,7 +630,7 @@ inline static int gen5_get_rectangles(struct sna *sna,
start:
rem = vertex_space(sna);
- if (rem < op->floats_per_rect) {
+ if (unlikely(rem < op->floats_per_rect)) {
DBG(("flushing vbo for %s: %d < %d\n",
__FUNCTION__, rem, op->floats_per_rect));
rem = gen5_get_rectangles__flush(sna, op);
@@ -1059,21 +638,22 @@ start:
goto flush;
}
- if (unlikely(sna->render_state.gen5.vertex_offset == 0 &&
+ if (unlikely(sna->render.vertex_offset == 0 &&
!gen5_rectangle_begin(sna, op)))
goto flush;
- if (want * op->floats_per_rect > rem)
+ if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
sna->render.vertex_index += 3*want;
return want;
flush:
- if (sna->render_state.gen5.vertex_offset) {
- gen5_vertex_flush(sna);
+ if (sna->render.vertex_offset) {
+ gen4_vertex_flush(sna);
gen5_magic_ca_pass(sna, op);
}
+ sna_vertex_wait__locked(&sna->render);
_kgem_submit(&sna->kgem);
emit_state(sna, op);
goto start;
@@ -1083,18 +663,15 @@ static uint32_t *
gen5_composite_get_binding_table(struct sna *sna,
uint16_t *offset)
{
- uint32_t *table;
-
sna->kgem.surface -=
sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
- /* Clear all surplus entries to zero in case of prefetch */
- table = memset(sna->kgem.batch + sna->kgem.surface,
- 0, sizeof(struct gen5_surface_state_padded));
- *offset = sna->kgem.surface;
DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
- return table;
+ /* Clear all surplus entries to zero in case of prefetch */
+ *offset = sna->kgem.surface;
+ return memset(sna->kgem.batch + sna->kgem.surface,
+ 0, sizeof(struct gen5_surface_state_padded));
}
static void
@@ -1181,9 +758,9 @@ gen5_emit_invariant(struct sna *sna)
}
static void
-gen5_get_batch(struct sna *sna)
+gen5_get_batch(struct sna *sna, const struct sna_composite_op *op)
{
- kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
DBG(("%s: flushing batch: %d < %d+%d\n",
@@ -1200,9 +777,10 @@ gen5_get_batch(struct sna *sna)
static void
gen5_align_vertex(struct sna *sna, const struct sna_composite_op *op)
{
+ assert(op->floats_per_rect == 3*op->floats_per_vertex);
if (op->floats_per_vertex != sna->render_state.gen5.floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
- gen5_vertex_finish(sna);
+ gen4_vertex_finish(sna);
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
sna->render_state.gen5.floats_per_vertex,
@@ -1215,12 +793,12 @@ gen5_align_vertex(struct sna *sna, const struct sna_composite_op *op)
}
}
-static bool
+static void
gen5_emit_binding_table(struct sna *sna, uint16_t offset)
{
if (!DBG_NO_STATE_CACHE &&
sna->render_state.gen5.surface_table == offset)
- return false;
+ return;
sna->render_state.gen5.surface_table = offset;
@@ -1232,8 +810,6 @@ gen5_emit_binding_table(struct sna *sna, uint16_t offset)
OUT_BATCH(0); /* sf */
/* Only the PS uses the binding table */
OUT_BATCH(offset*4);
-
- return true;
}
static bool
@@ -1241,33 +817,36 @@ gen5_emit_pipelined_pointers(struct sna *sna,
const struct sna_composite_op *op,
int blend, int kernel)
{
- uint16_t offset = sna->kgem.nbatch, last;
+ uint16_t sp, bp;
+ uint32_t key;
+
+ DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n",
+ __FUNCTION__, op->u.gen5.ve_id & 2,
+ op->src.filter, op->src.repeat,
+ op->mask.filter, op->mask.repeat,
+ kernel, blend, op->has_component_alpha, (int)op->dst.format));
+
+ sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat,
+ op->mask.filter, op->mask.repeat,
+ kernel);
+ bp = gen5_get_blend(blend, op->has_component_alpha, op->dst.format);
+
+ DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp));
+ key = sp | (uint32_t)bp << 16 | (op->mask.bo != NULL) << 31;
+ if (key == sna->render_state.gen5.last_pipelined_pointers)
+ return false;
+
OUT_BATCH(GEN5_3DSTATE_PIPELINED_POINTERS | 5);
OUT_BATCH(sna->render_state.gen5.vs);
OUT_BATCH(GEN5_GS_DISABLE); /* passthrough */
OUT_BATCH(GEN5_CLIP_DISABLE); /* passthrough */
OUT_BATCH(sna->render_state.gen5.sf[op->mask.bo != NULL]);
- OUT_BATCH(sna->render_state.gen5.wm +
- SAMPLER_OFFSET(op->src.filter, op->src.repeat,
- op->mask.filter, op->mask.repeat,
- kernel));
- OUT_BATCH(sna->render_state.gen5.cc +
- gen5_get_blend(blend, op->has_component_alpha, op->dst.format));
-
- last = sna->render_state.gen5.last_pipelined_pointers;
- if (!DBG_NO_STATE_CACHE && last &&
- sna->kgem.batch[offset + 1] == sna->kgem.batch[last + 1] &&
- sna->kgem.batch[offset + 3] == sna->kgem.batch[last + 3] &&
- sna->kgem.batch[offset + 4] == sna->kgem.batch[last + 4] &&
- sna->kgem.batch[offset + 5] == sna->kgem.batch[last + 5] &&
- sna->kgem.batch[offset + 6] == sna->kgem.batch[last + 6]) {
- sna->kgem.nbatch = offset;
- return false;
- } else {
- sna->render_state.gen5.last_pipelined_pointers = offset;
- return true;
- }
+ OUT_BATCH(sna->render_state.gen5.wm + sp);
+ OUT_BATCH(sna->render_state.gen5.cc + bp);
+
+ sna->render_state.gen5.last_pipelined_pointers = key;
+ return true;
}
static void
@@ -1304,28 +883,16 @@ gen5_emit_vertex_elements(struct sna *sna,
* texture coordinate 1 if (has_mask is true): same as above
*/
struct gen5_render_state *render = &sna->render_state.gen5;
- bool has_mask = op->mask.bo != NULL;
- bool is_affine = op->is_affine;
- int nelem = has_mask ? 2 : 1;
- int selem = is_affine ? 2 : 3;
- uint32_t w_component;
- uint32_t src_format;
int id = op->u.gen5.ve_id;
+ bool has_mask = id >> 2;
+ uint32_t format, dw;
- assert((unsigned)id <= 3);
if (!DBG_NO_STATE_CACHE && render->ve_id == id)
return;
+ DBG(("%s: changing %d -> %d\n", __FUNCTION__, render->ve_id, id));
render->ve_id = id;
- if (is_affine) {
- src_format = GEN5_SURFACEFORMAT_R32G32_FLOAT;
- w_component = GEN5_VFCOMPONENT_STORE_1_FLT;
- } else {
- src_format = GEN5_SURFACEFORMAT_R32G32B32_FLOAT;
- w_component = GEN5_VFCOMPONENT_STORE_SRC;
- }
-
/* The VUE layout
* dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
* dword 4-7: position (x, y, 1.0, 1.0),
@@ -1335,43 +902,92 @@ gen5_emit_vertex_elements(struct sna *sna,
* dword 4-15 are fetched from vertex buffer
*/
OUT_BATCH(GEN5_3DSTATE_VERTEX_ELEMENTS |
- ((2 * (2 + nelem)) + 1 - 2));
+ ((2 * (has_mask ? 4 : 3)) + 1 - 2));
OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
(GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT) |
(0 << VE0_OFFSET_SHIFT));
- OUT_BATCH((GEN5_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
- (GEN5_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
- (GEN5_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
- (GEN5_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
+ OUT_BATCH((VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
+ (VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
+ (VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
+ (VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
/* x,y */
- OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
- (GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT) |
- (0 << VE0_OFFSET_SHIFT)); /* offsets vb in bytes */
- OUT_BATCH((GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- (GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- (GEN5_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
- (GEN5_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+ OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
+ GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
+ 0 << VE0_OFFSET_SHIFT);
+ OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
+ VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
+ VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
+ VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
/* u0, v0, w0 */
- OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
- (src_format << VE0_FORMAT_SHIFT) |
- (4 << VE0_OFFSET_SHIFT)); /* offset vb in bytes */
- OUT_BATCH((GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- (GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- (w_component << VE1_VFCOMPONENT_2_SHIFT) |
- (GEN5_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+ DBG(("%s: id=%d, first channel %d floats, offset=4b\n", __FUNCTION__,
+ id, id & 3));
+ dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
+ switch (id & 3) {
+ default:
+ assert(0);
+ case 0:
+ format = GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
+ dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
+ break;
+ case 1:
+ format = GEN5_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
+ dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
+ dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
+ break;
+ case 2:
+ format = GEN5_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
+ dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
+ break;
+ case 3:
+ format = GEN5_SURFACEFORMAT_R32G32B32_FLOAT << VE0_FORMAT_SHIFT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
+ break;
+ }
+ OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
+ format | 4 << VE0_OFFSET_SHIFT);
+ OUT_BATCH(dw);
/* u1, v1, w1 */
if (has_mask) {
- OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
- (src_format << VE0_FORMAT_SHIFT) |
- (((1 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */
- OUT_BATCH((GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- (GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- (w_component << VE1_VFCOMPONENT_2_SHIFT) |
- (GEN5_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+ unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
+ DBG(("%s: id=%x, second channel %d floats, offset=%db\n", __FUNCTION__,
+ id, id >> 2, offset));
+ dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
+ switch (id >> 2) {
+ case 1:
+ format = GEN5_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
+ dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
+ dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
+ break;
+ default:
+ assert(0);
+ case 2:
+ format = GEN5_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
+ dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
+ break;
+ case 3:
+ format = GEN5_SURFACEFORMAT_R32G32B32_FLOAT << VE0_FORMAT_SHIFT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
+ dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
+ break;
+ }
+ OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
+ format | offset << VE0_OFFSET_SHIFT);
+ OUT_BATCH(dw);
}
}
@@ -1380,23 +996,21 @@ gen5_emit_state(struct sna *sna,
const struct sna_composite_op *op,
uint16_t offset)
{
- bool flush;
+ if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
+ DBG(("%s: flushing dirty (%d, %d)\n", __FUNCTION__,
+ kgem_bo_is_dirty(op->src.bo),
+ kgem_bo_is_dirty(op->mask.bo)));
+ OUT_BATCH(MI_FLUSH);
+ kgem_clear_dirty(&sna->kgem);
+ kgem_bo_mark_dirty(op->dst.bo);
+ }
/* drawrect must be first for Ironlake BLT workaround */
gen5_emit_drawing_rectangle(sna, op);
-
- flush = gen5_emit_binding_table(sna, offset);
- if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel)) {
+ gen5_emit_binding_table(sna, offset);
+ if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel))
gen5_emit_urb(sna);
- flush = true;
- }
gen5_emit_vertex_elements(sna, op);
-
- if (flush || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
- OUT_BATCH(MI_FLUSH);
- kgem_clear_dirty(&sna->kgem);
- kgem_bo_mark_dirty(op->dst.bo);
- }
}
static void gen5_bind_surfaces(struct sna *sna,
@@ -1405,7 +1019,7 @@ static void gen5_bind_surfaces(struct sna *sna,
uint32_t *binding_table;
uint16_t offset;
- gen5_get_batch(sna);
+ gen5_get_batch(sna, op);
binding_table = gen5_composite_get_binding_table(sna, &offset);
@@ -1419,7 +1033,8 @@ static void gen5_bind_surfaces(struct sna *sna,
op->src.bo, op->src.width, op->src.height,
op->src.card_format,
false);
- if (op->mask.bo)
+ if (op->mask.bo) {
+ assert(op->u.gen5.ve_id >> 2);
binding_table[2] =
gen5_bind_bo(sna,
op->mask.bo,
@@ -1427,6 +1042,7 @@ static void gen5_bind_surfaces(struct sna *sna,
op->mask.height,
op->mask.card_format,
false);
+ }
if (sna->kgem.surface == offset &&
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen5.surface_table) == *(uint64_t*)binding_table &&
@@ -1478,9 +1094,9 @@ gen5_render_composite_box(struct sna *sna,
}
static void
-gen5_render_composite_boxes(struct sna *sna,
- const struct sna_composite_op *op,
- const BoxRec *box, int nbox)
+gen5_render_composite_boxes__blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
{
DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
__FUNCTION__, nbox, op->dst.x, op->dst.y,
@@ -1514,6 +1130,62 @@ gen5_render_composite_boxes(struct sna *sna,
} while (nbox);
}
+static void
+gen5_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
+
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen5_get_rectangles(sna, op, nbox,
+ gen5_bind_surfaces);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+ } while (nbox);
+}
+
+static void
+gen5_render_composite_boxes__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen5_get_rectangles(sna, op, nbox,
+ gen5_bind_surfaces);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
@@ -1559,9 +1231,8 @@ static void gen5_video_bind_surfaces(struct sna *sna,
int src_height[6];
int src_pitch[6];
uint32_t *binding_table;
- int n_src, n;
uint16_t offset;
-
+ int n_src, n;
src_surf_base[0] = 0;
src_surf_base[1] = 0;
@@ -1594,9 +1265,9 @@ static void gen5_video_bind_surfaces(struct sna *sna,
n_src = 1;
}
- gen5_get_batch(sna);
- binding_table = gen5_composite_get_binding_table(sna, &offset);
+ gen5_get_batch(sna, op);
+ binding_table = gen5_composite_get_binding_table(sna, &offset);
binding_table[0] =
gen5_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
@@ -1623,10 +1294,11 @@ gen5_render_video(struct sna *sna,
RegionPtr dstRegion,
short src_w, short src_h,
short drw_w, short drw_h,
+ short dx, short dy,
PixmapPtr pixmap)
{
struct sna_composite_op tmp;
- int nbox, dxo, dyo, pix_xoff, pix_yoff;
+ int nbox, pix_xoff, pix_yoff;
float src_scale_x, src_scale_y;
struct sna_pixmap *priv;
BoxPtr box;
@@ -1646,13 +1318,16 @@ gen5_render_video(struct sna *sna,
tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
tmp.dst.bo = priv->gpu_bo;
- tmp.src.filter = SAMPLER_FILTER_BILINEAR;
+ if (src_w == drw_w && src_h == drw_h)
+ tmp.src.filter = SAMPLER_FILTER_NEAREST;
+ else
+ tmp.src.filter = SAMPLER_FILTER_BILINEAR;
tmp.src.repeat = SAMPLER_EXTEND_PAD;
tmp.src.bo = frame->bo;
tmp.mask.bo = NULL;
tmp.u.gen5.wm_kernel =
is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED;
- tmp.u.gen5.ve_id = 1;
+ tmp.u.gen5.ve_id = 2;
tmp.is_affine = true;
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
@@ -1677,9 +1352,6 @@ gen5_render_video(struct sna *sna,
pix_yoff = 0;
#endif
- dxo = dstRegion->extents.x1;
- dyo = dstRegion->extents.y1;
-
/* Use normalized texture coordinates */
src_scale_x = ((float)src_w / frame->width) / (float)drw_w;
src_scale_y = ((float)src_h / frame->height) / (float)drw_h;
@@ -1697,16 +1369,16 @@ gen5_render_video(struct sna *sna,
gen5_get_rectangles(sna, &tmp, 1, gen5_video_bind_surfaces);
OUT_VERTEX(r.x2, r.y2);
- OUT_VERTEX_F((box->x2 - dxo) * src_scale_x);
- OUT_VERTEX_F((box->y2 - dyo) * src_scale_y);
+ OUT_VERTEX_F((box->x2 - dx) * src_scale_x);
+ OUT_VERTEX_F((box->y2 - dy) * src_scale_y);
OUT_VERTEX(r.x1, r.y2);
- OUT_VERTEX_F((box->x1 - dxo) * src_scale_x);
- OUT_VERTEX_F((box->y2 - dyo) * src_scale_y);
+ OUT_VERTEX_F((box->x1 - dx) * src_scale_x);
+ OUT_VERTEX_F((box->y2 - dy) * src_scale_y);
OUT_VERTEX(r.x1, r.y1);
- OUT_VERTEX_F((box->x1 - dxo) * src_scale_x);
- OUT_VERTEX_F((box->y1 - dyo) * src_scale_y);
+ OUT_VERTEX_F((box->x1 - dx) * src_scale_x);
+ OUT_VERTEX_F((box->y1 - dy) * src_scale_y);
if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
sna_damage_add_box(&priv->gpu_damage, &r);
@@ -1716,146 +1388,11 @@ gen5_render_video(struct sna *sna,
}
priv->clear = false;
- gen5_vertex_flush(sna);
+ gen4_vertex_flush(sna);
return true;
}
static int
-gen5_composite_solid_init(struct sna *sna,
- struct sna_composite_channel *channel,
- uint32_t color)
-{
- channel->filter = PictFilterNearest;
- channel->repeat = RepeatNormal;
- channel->is_affine = true;
- channel->is_solid = true;
- channel->transform = NULL;
- channel->width = 1;
- channel->height = 1;
- channel->card_format = GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
-
- channel->bo = sna_render_get_solid(sna, color);
-
- channel->scale[0] = channel->scale[1] = 1;
- channel->offset[0] = channel->offset[1] = 0;
- return channel->bo != NULL;
-}
-
-static bool
-gen5_composite_linear_init(struct sna *sna,
- PicturePtr picture,
- struct sna_composite_channel *channel,
- int x, int y,
- int w, int h,
- int dst_x, int dst_y)
-{
- PictLinearGradient *linear =
- (PictLinearGradient *)picture->pSourcePict;
- pixman_fixed_t tx, ty;
- float x0, y0, sf;
- float dx, dy;
-
- DBG(("%s: p1=(%f, %f), p2=(%f, %f), src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
- __FUNCTION__,
- pixman_fixed_to_double(linear->p1.x), pixman_fixed_to_double(linear->p1.y),
- pixman_fixed_to_double(linear->p2.x), pixman_fixed_to_double(linear->p2.y),
- x, y, dst_x, dst_y, w, h));
-
- if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
- return 0;
-
- if (!sna_transform_is_affine(picture->transform)) {
- DBG(("%s: fallback due to projective transform\n",
- __FUNCTION__));
- return sna_render_picture_fixup(sna, picture, channel,
- x, y, w, h, dst_x, dst_y);
- }
-
- channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear);
- if (!channel->bo)
- return 0;
-
- channel->filter = PictFilterNearest;
- channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
- channel->width = channel->bo->pitch / 4;
- channel->height = 1;
- channel->pict_format = PICT_a8r8g8b8;
-
- channel->scale[0] = channel->scale[1] = 1;
- channel->offset[0] = channel->offset[1] = 0;
-
- if (sna_transform_is_translation(picture->transform, &tx, &ty)) {
- dx = pixman_fixed_to_double(linear->p2.x - linear->p1.x);
- dy = pixman_fixed_to_double(linear->p2.y - linear->p1.y);
-
- x0 = pixman_fixed_to_double(linear->p1.x);
- y0 = pixman_fixed_to_double(linear->p1.y);
-
- if (tx | ty) {
- x0 -= pixman_fixed_to_double(tx);
- y0 -= pixman_fixed_to_double(ty);
- }
- } else {
- struct pixman_f_vector p1, p2;
- struct pixman_f_transform m, inv;
-
- pixman_f_transform_from_pixman_transform(&m, picture->transform);
- DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n",
- __FUNCTION__,
- m.m[0][0], m.m[0][1], m.m[0][2],
- m.m[1][0], m.m[1][1], m.m[1][2],
- m.m[2][0], m.m[2][1], m.m[2][2]));
- if (!pixman_f_transform_invert(&inv, &m))
- return 0;
-
- p1.v[0] = pixman_fixed_to_double(linear->p1.x);
- p1.v[1] = pixman_fixed_to_double(linear->p1.y);
- p1.v[2] = 1.;
- pixman_f_transform_point(&inv, &p1);
-
- p2.v[0] = pixman_fixed_to_double(linear->p2.x);
- p2.v[1] = pixman_fixed_to_double(linear->p2.y);
- p2.v[2] = 1.;
- pixman_f_transform_point(&inv, &p2);
-
- DBG(("%s: untransformed: p1=(%f, %f, %f), p2=(%f, %f, %f)\n",
- __FUNCTION__,
- p1.v[0], p1.v[1], p1.v[2],
- p2.v[0], p2.v[1], p2.v[2]));
-
- dx = p2.v[0] - p1.v[0];
- dy = p2.v[1] - p1.v[1];
-
- x0 = p1.v[0];
- y0 = p1.v[1];
- }
-
- sf = dx*dx + dy*dy;
- dx /= sf;
- dy /= sf;
-
- channel->embedded_transform.matrix[0][0] = pixman_double_to_fixed(dx);
- channel->embedded_transform.matrix[0][1] = pixman_double_to_fixed(dy);
- channel->embedded_transform.matrix[0][2] = -pixman_double_to_fixed(dx*(x0+dst_x-x) + dy*(y0+dst_y-y));
-
- channel->embedded_transform.matrix[1][0] = 0;
- channel->embedded_transform.matrix[1][1] = 0;
- channel->embedded_transform.matrix[1][2] = pixman_double_to_fixed(.5);
-
- channel->embedded_transform.matrix[2][0] = 0;
- channel->embedded_transform.matrix[2][1] = 0;
- channel->embedded_transform.matrix[2][2] = pixman_fixed_1;
-
- channel->transform = &channel->embedded_transform;
- channel->is_affine = 1;
-
- DBG(("%s: dx=%f, dy=%f, offset=%f\n",
- __FUNCTION__, dx, dy, -dx*(x0-x+dst_x) + -dy*(y0-y+dst_y)));
-
- return channel->bo != NULL;
-}
-
-static int
gen5_composite_picture(struct sna *sna,
PicturePtr picture,
struct sna_composite_channel *channel,
@@ -1875,16 +1412,16 @@ gen5_composite_picture(struct sna *sna,
channel->card_format = -1;
if (sna_picture_is_solid(picture, &color))
- return gen5_composite_solid_init(sna, channel, color);
+ return gen4_channel_init_solid(sna, channel, color);
if (picture->pDrawable == NULL) {
int ret;
if (picture->pSourcePict->type == SourcePictTypeLinear)
- return gen5_composite_linear_init(sna, picture, channel,
- x, y,
- w, h,
- dst_x, dst_y);
+ return gen4_channel_init_linear(sna, picture, channel,
+ x, y,
+ w, h,
+ dst_x, dst_y);
DBG(("%s -- fixup, gradient\n", __FUNCTION__));
ret = -1;
@@ -1935,7 +1472,8 @@ gen5_composite_picture(struct sna *sna,
channel->card_format = gen5_get_card_format(picture->format);
if (channel->card_format == -1)
return sna_render_picture_convert(sna, picture, channel, pixmap,
- x, y, w, h, dst_x, dst_y);
+ x, y, w, h, dst_x, dst_y,
+ false);
if (too_large(pixmap->drawable.width, pixmap->drawable.height))
return sna_render_picture_extract(sna, picture, channel,
@@ -1957,8 +1495,8 @@ static void
gen5_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
- if (sna->render_state.gen5.vertex_offset) {
- gen5_vertex_flush(sna);
+ if (sna->render.vertex_offset) {
+ gen4_vertex_flush(sna);
gen5_magic_ca_pass(sna,op);
}
@@ -1981,10 +1519,9 @@ gen5_composite_set_target(struct sna *sna,
BoxRec box;
op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
- op->dst.format = dst->format;
- op->dst.width = op->dst.pixmap->drawable.width;
+ op->dst.width = op->dst.pixmap->drawable.width;
op->dst.height = op->dst.pixmap->drawable.height;
-
+ op->dst.format = dst->format;
if (w && h) {
box.x1 = x;
box.y1 = y;
@@ -2019,15 +1556,6 @@ gen5_composite_set_target(struct sna *sna,
return true;
}
-static inline bool
-picture_is_cpu(PicturePtr picture)
-{
- if (!picture->pDrawable)
- return false;
-
- return !is_gpu(picture->pDrawable);
-}
-
static bool
try_blt(struct sna *sna,
PicturePtr dst, PicturePtr src,
@@ -2052,7 +1580,7 @@ try_blt(struct sna *sna,
return true;
/* is the source picture only in cpu memory e.g. a shm pixmap? */
- return picture_is_cpu(src);
+ return picture_is_cpu(sna, src);
}
static bool
@@ -2077,15 +1605,10 @@ has_alphamap(PicturePtr p)
}
static bool
-untransformed(PicturePtr p)
+need_upload(struct sna *sna, PicturePtr p)
{
- return !p->transform || pixman_transform_is_int_translate(p->transform);
-}
-
-static bool
-need_upload(PicturePtr p)
-{
- return p->pDrawable && untransformed(p) && !is_gpu(p->pDrawable);
+ return p->pDrawable && untransformed(p) &&
+ !is_gpu(sna, p->pDrawable, PREFER_GPU_RENDER);
}
static bool
@@ -2108,7 +1631,7 @@ source_is_busy(PixmapPtr pixmap)
}
static bool
-source_fallback(PicturePtr p, PixmapPtr pixmap)
+source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap)
{
if (sna_picture_is_solid(p, NULL))
return false;
@@ -2121,7 +1644,7 @@ source_fallback(PicturePtr p, PixmapPtr pixmap)
if (pixmap && source_is_busy(pixmap))
return false;
- return has_alphamap(p) || !gen5_check_filter(p) || need_upload(p);
+ return has_alphamap(p) || !gen5_check_filter(p) || need_upload(sna, p);
}
static bool
@@ -2130,7 +1653,6 @@ gen5_composite_fallback(struct sna *sna,
PicturePtr mask,
PicturePtr dst)
{
- struct sna_pixmap *priv;
PixmapPtr src_pixmap;
PixmapPtr mask_pixmap;
PixmapPtr dst_pixmap;
@@ -2145,11 +1667,11 @@ gen5_composite_fallback(struct sna *sna,
dst_pixmap = get_drawable_pixmap(dst->pDrawable);
src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
- src_fallback = source_fallback(src, src_pixmap);
+ src_fallback = source_fallback(sna, src, src_pixmap);
if (mask) {
mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
- mask_fallback = source_fallback(mask, mask_pixmap);
+ mask_fallback = source_fallback(sna, mask, mask_pixmap);
} else {
mask_pixmap = NULL;
mask_fallback = false;
@@ -2169,8 +1691,7 @@ gen5_composite_fallback(struct sna *sna,
}
/* If anything is on the GPU, push everything out to the GPU */
- priv = sna_pixmap(dst_pixmap);
- if (priv && priv->gpu_damage && !priv->clear) {
+ if (dst_use_gpu(dst_pixmap)) {
DBG(("%s: dst is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
@@ -2205,14 +1726,14 @@ gen5_composite_fallback(struct sna *sna,
if (too_large(dst_pixmap->drawable.width,
dst_pixmap->drawable.height) &&
- (priv == NULL || DAMAGE_IS_ALL(priv->cpu_damage))) {
+ dst_is_cpu(dst_pixmap)) {
DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
return true;
}
DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
__FUNCTION__));
- return false;
+ return dst_use_cpu(dst_pixmap);
}
static int
@@ -2233,7 +1754,7 @@ reuse_source(struct sna *sna,
}
if (sna_picture_is_solid(mask, &color))
- return gen5_composite_solid_init(sna, mc, color);
+ return gen4_channel_init_solid(sna, mc, color);
if (sc->is_solid)
return false;
@@ -2318,6 +1839,7 @@ gen5_render_composite(struct sna *sna,
}
DBG(("%s: preparing source\n", __FUNCTION__));
+ tmp->op = op;
switch (gen5_composite_picture(sna, src, &tmp->src,
src_x, src_y,
width, height,
@@ -2327,7 +1849,7 @@ gen5_render_composite(struct sna *sna,
DBG(("%s: failed to prepare source picture\n", __FUNCTION__));
goto cleanup_dst;
case 0:
- if (!gen5_composite_solid_init(sna, &tmp->src, 0))
+ if (!gen4_channel_init_solid(sna, &tmp->src, 0))
goto cleanup_dst;
/* fall through to fixup */
case 1:
@@ -2341,12 +1863,10 @@ gen5_render_composite(struct sna *sna,
break;
}
- tmp->op = op;
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
- tmp->prim_emit = gen5_emit_composite_primitive;
if (mask) {
if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
tmp->has_component_alpha = true;
@@ -2380,7 +1900,7 @@ gen5_render_composite(struct sna *sna,
DBG(("%s: failed to prepare mask picture\n", __FUNCTION__));
goto cleanup_src;
case 0:
- if (!gen5_composite_solid_init(sna, &tmp->mask, 0))
+ if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
goto cleanup_src;
/* fall through to fixup */
case 1:
@@ -2390,33 +1910,22 @@ gen5_render_composite(struct sna *sna,
}
tmp->is_affine &= tmp->mask.is_affine;
-
- if (tmp->src.transform == NULL && tmp->mask.transform == NULL)
- tmp->prim_emit = gen5_emit_composite_primitive_identity_source_mask;
-
- tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine;
- } else {
- if (tmp->src.is_solid)
- tmp->prim_emit = gen5_emit_composite_primitive_solid;
- else if (tmp->src.transform == NULL)
- tmp->prim_emit = gen5_emit_composite_primitive_identity_source;
- else if (tmp->src.is_affine)
- tmp->prim_emit = gen5_emit_composite_primitive_affine_source;
-
- tmp->floats_per_vertex = 3 + !tmp->is_affine;
}
- tmp->floats_per_rect = 3*tmp->floats_per_vertex;
tmp->u.gen5.wm_kernel =
gen5_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine);
- tmp->u.gen5.ve_id = (tmp->mask.bo != NULL) << 1 | tmp->is_affine;
+ tmp->u.gen5.ve_id = gen4_choose_composite_emitter(tmp);
tmp->blt = gen5_render_composite_blt;
tmp->box = gen5_render_composite_box;
- tmp->boxes = gen5_render_composite_boxes;
+ tmp->boxes = gen5_render_composite_boxes__blt;
+ if (tmp->emit_boxes) {
+ tmp->boxes = gen5_render_composite_boxes;
+ tmp->thread_boxes = gen5_render_composite_boxes__thread;
+ }
tmp->done = gen5_render_composite_done;
if (!kgem_check_bo(&sna->kgem,
@@ -2444,125 +1953,6 @@ cleanup_dst:
}
#if !NO_COMPOSITE_SPANS
-inline static void
-gen5_emit_composite_texcoord(struct sna *sna,
- const struct sna_composite_channel *channel,
- int16_t x, int16_t y)
-{
- float t[3];
-
- if (channel->is_affine) {
- sna_get_transformed_coordinates(x + channel->offset[0],
- y + channel->offset[1],
- channel->transform,
- &t[0], &t[1]);
- OUT_VERTEX_F(t[0] * channel->scale[0]);
- OUT_VERTEX_F(t[1] * channel->scale[1]);
- } else {
- t[0] = t[1] = 0; t[2] = 1;
- sna_get_transformed_coordinates_3d(x + channel->offset[0],
- y + channel->offset[1],
- channel->transform,
- &t[0], &t[1], &t[2]);
- OUT_VERTEX_F(t[0] * channel->scale[0]);
- OUT_VERTEX_F(t[1] * channel->scale[1]);
- OUT_VERTEX_F(t[2]);
- }
-}
-
-inline static void
-gen5_emit_composite_texcoord_affine(struct sna *sna,
- const struct sna_composite_channel *channel,
- int16_t x, int16_t y)
-{
- float t[2];
-
- sna_get_transformed_coordinates(x + channel->offset[0],
- y + channel->offset[1],
- channel->transform,
- &t[0], &t[1]);
- OUT_VERTEX_F(t[0] * channel->scale[0]);
- OUT_VERTEX_F(t[1] * channel->scale[1]);
-}
-
-inline static void
-gen5_emit_composite_spans_vertex(struct sna *sna,
- const struct sna_composite_spans_op *op,
- int16_t x, int16_t y)
-{
- OUT_VERTEX(x, y);
- gen5_emit_composite_texcoord(sna, &op->base.src, x, y);
-}
-
-fastcall static void
-gen5_emit_composite_spans_primitive(struct sna *sna,
- const struct sna_composite_spans_op *op,
- const BoxRec *box,
- float opacity)
-{
- gen5_emit_composite_spans_vertex(sna, op, box->x2, box->y2);
- OUT_VERTEX_F(opacity);
- OUT_VERTEX_F(1);
- if (!op->base.is_affine)
- OUT_VERTEX_F(1);
-
- gen5_emit_composite_spans_vertex(sna, op, box->x1, box->y2);
- OUT_VERTEX_F(opacity);
- OUT_VERTEX_F(1);
- if (!op->base.is_affine)
- OUT_VERTEX_F(1);
-
- gen5_emit_composite_spans_vertex(sna, op, box->x1, box->y1);
- OUT_VERTEX_F(opacity);
- OUT_VERTEX_F(0);
- if (!op->base.is_affine)
- OUT_VERTEX_F(1);
-}
-
-fastcall static void
-gen5_emit_composite_spans_solid(struct sna *sna,
- const struct sna_composite_spans_op *op,
- const BoxRec *box,
- float opacity)
-{
- OUT_VERTEX(box->x2, box->y2);
- OUT_VERTEX_F(1); OUT_VERTEX_F(1);
- OUT_VERTEX_F(opacity); OUT_VERTEX_F(1);
-
- OUT_VERTEX(box->x1, box->y2);
- OUT_VERTEX_F(0); OUT_VERTEX_F(1);
- OUT_VERTEX_F(opacity); OUT_VERTEX_F(1);
-
- OUT_VERTEX(box->x1, box->y1);
- OUT_VERTEX_F(0); OUT_VERTEX_F(0);
- OUT_VERTEX_F(opacity); OUT_VERTEX_F(0);
-}
-
-fastcall static void
-gen5_emit_composite_spans_affine(struct sna *sna,
- const struct sna_composite_spans_op *op,
- const BoxRec *box,
- float opacity)
-{
- OUT_VERTEX(box->x2, box->y2);
- gen5_emit_composite_texcoord_affine(sna, &op->base.src,
- box->x2, box->y2);
- OUT_VERTEX_F(opacity);
- OUT_VERTEX_F(1);
-
- OUT_VERTEX(box->x1, box->y2);
- gen5_emit_composite_texcoord_affine(sna, &op->base.src,
- box->x1, box->y2);
- OUT_VERTEX_F(opacity);
- OUT_VERTEX_F(1);
-
- OUT_VERTEX(box->x1, box->y1);
- gen5_emit_composite_texcoord_affine(sna, &op->base.src,
- box->x1, box->y1);
- OUT_VERTEX_F(opacity);
- OUT_VERTEX_F(0);
-}
-
fastcall static void
gen5_render_composite_spans_box(struct sna *sna,
const struct sna_composite_spans_op *op,
@@ -2612,18 +2002,51 @@ gen5_render_composite_spans_boxes(struct sna *sna,
}
fastcall static void
+gen5_render_composite_spans_boxes__thread(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box,
+ int nbox)
+{
+ DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
+ __FUNCTION__, nbox,
+ op->base.src.offset[0], op->base.src.offset[1],
+ op->base.dst.x, op->base.dst.y));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox,
+ gen5_bind_surfaces);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
+fastcall static void
gen5_render_composite_spans_done(struct sna *sna,
const struct sna_composite_spans_op *op)
{
- if (sna->render_state.gen5.vertex_offset)
- gen5_vertex_flush(sna);
+ if (sna->render.vertex_offset)
+ gen4_vertex_flush(sna);
DBG(("%s()\n", __FUNCTION__));
- kgem_bo_destroy(&sna->kgem, op->base.mask.bo);
- if (op->base.src.bo)
- kgem_bo_destroy(&sna->kgem, op->base.src.bo);
-
+ kgem_bo_destroy(&sna->kgem, op->base.src.bo);
sna_render_composite_redirect_done(sna, &op->base);
}
@@ -2633,21 +2056,39 @@ gen5_check_composite_spans(struct sna *sna,
int16_t width, int16_t height,
unsigned flags)
{
- if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0)
- return false;
+ DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n",
+ __FUNCTION__, op, width, height, flags));
if (op >= ARRAY_SIZE(gen5_blend_op))
return false;
- if (gen5_composite_fallback(sna, src, NULL, dst))
+ if (gen5_composite_fallback(sna, src, NULL, dst)) {
+ DBG(("%s: operation would fallback\n", __FUNCTION__));
return false;
+ }
- if (need_tiling(sna, width, height)) {
- if (!is_gpu(dst->pDrawable)) {
- DBG(("%s: fallback, tiled operation not on GPU\n",
- __FUNCTION__));
+ if (need_tiling(sna, width, height) &&
+ !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
+ DBG(("%s: fallback, tiled operation not on GPU\n",
+ __FUNCTION__));
+ return false;
+ }
+
+ if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
+ struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
+ assert(priv);
+
+ if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
+ return true;
+
+ if (flags & COMPOSITE_SPANS_INPLACE_HINT)
return false;
- }
+
+ if ((sna->render.prefer_gpu & PREFER_GPU_SPANS) == 0 &&
+ dst->format == PICT_a8)
+ return false;
+
+ return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo);
}
return true;
@@ -2690,7 +2131,7 @@ gen5_render_composite_spans(struct sna *sna,
case -1:
goto cleanup_dst;
case 0:
- if (!gen5_composite_solid_init(sna, &tmp->base.src, 0))
+ if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
goto cleanup_dst;
/* fall through to fixup */
case 1:
@@ -2698,27 +2139,19 @@ gen5_render_composite_spans(struct sna *sna,
break;
}
- tmp->base.mask.bo = sna_render_get_solid(sna, 0);
- if (tmp->base.mask.bo == NULL)
- goto cleanup_src;
+ tmp->base.mask.bo = NULL;
tmp->base.is_affine = tmp->base.src.is_affine;
tmp->base.has_component_alpha = false;
tmp->base.need_magic_ca_pass = false;
- tmp->prim_emit = gen5_emit_composite_spans_primitive;
- if (tmp->base.src.is_solid)
- tmp->prim_emit = gen5_emit_composite_spans_solid;
- else if (tmp->base.is_affine)
- tmp->prim_emit = gen5_emit_composite_spans_affine;
- tmp->base.floats_per_vertex = 5 + 2*!tmp->base.is_affine;
- tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex;
-
+ tmp->base.u.gen5.ve_id = gen4_choose_spans_emitter(tmp);
tmp->base.u.gen5.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine;
- tmp->base.u.gen5.ve_id = 1 << 1 | tmp->base.is_affine;
tmp->box = gen5_render_composite_spans_box;
tmp->boxes = gen5_render_composite_spans_boxes;
+ if (tmp->emit_boxes)
+ tmp->thread_boxes = gen5_render_composite_spans_boxes__thread;
tmp->done = gen5_render_composite_spans_done;
if (!kgem_check_bo(&sna->kgem,
@@ -2752,7 +2185,7 @@ gen5_copy_bind_surfaces(struct sna *sna,
uint32_t *binding_table;
uint16_t offset;
- gen5_get_batch(sna);
+ gen5_get_batch(sna, op);
binding_table = gen5_composite_get_binding_table(sna, &offset);
@@ -2846,7 +2279,6 @@ fallback_blt:
if (box[i].y2 > extents.y2)
extents.y2 = box[i].y2;
}
-
if (!sna_render_composite_redirect(sna, &tmp,
extents.x1 + dst_dx,
extents.y1 + dst_dy,
@@ -2893,7 +2325,7 @@ fallback_blt:
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
tmp.u.gen5.wm_kernel = WM_KERNEL;
- tmp.u.gen5.ve_id = 1;
+ tmp.u.gen5.ve_id = 2;
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
kgem_submit(&sna->kgem);
@@ -2939,7 +2371,7 @@ fallback_blt:
} while (--n_this_time);
} while (n);
- gen5_vertex_flush(sna);
+ gen4_vertex_flush(sna);
sna_render_composite_redirect_done(sna, &tmp);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
return true;
@@ -2950,6 +2382,14 @@ fallback_tiled_dst:
if (tmp.redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
fallback_tiled:
+ if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
+ sna_blt_copy_boxes(sna, alu,
+ src_bo, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ dst->drawable.bitsPerPixel,
+ box, n))
+ return true;
+
return sna_tiling_copy_boxes(sna, alu,
src, src_bo, src_dx, src_dy,
dst, dst_bo, dst_dx, dst_dy,
@@ -2985,8 +2425,8 @@ static void
gen5_render_copy_done(struct sna *sna,
const struct sna_copy_op *op)
{
- if (sna->render_state.gen5.vertex_offset)
- gen5_vertex_flush(sna);
+ if (sna->render.vertex_offset)
+ gen4_vertex_flush(sna);
DBG(("%s()\n", __FUNCTION__));
}
@@ -3049,9 +2489,9 @@ fallback:
op->base.floats_per_vertex = 3;
op->base.floats_per_rect = 9;
op->base.u.gen5.wm_kernel = WM_KERNEL;
- op->base.u.gen5.ve_id = 1;
+ op->base.u.gen5.ve_id = 2;
- if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
+ if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
goto fallback;
@@ -3081,7 +2521,7 @@ gen5_fill_bind_surfaces(struct sna *sna,
uint32_t *binding_table;
uint16_t offset;
- gen5_get_batch(sna);
+ gen5_get_batch(sna, op);
binding_table = gen5_composite_get_binding_table(sna, &offset);
@@ -3168,16 +2608,19 @@ gen5_render_fill_boxes(struct sna *sna,
dst, dst_bo, box, n);
}
- if (op == PictOpClear)
+ if (op == PictOpClear) {
pixel = 0;
- else if (!sna_get_pixel_from_rgba(&pixel,
- color->red,
- color->green,
- color->blue,
- color->alpha,
- PICT_a8r8g8b8))
+ op = PictOpSrc;
+ } else if (!sna_get_pixel_from_rgba(&pixel,
+ color->red,
+ color->green,
+ color->blue,
+ color->alpha,
+ PICT_a8r8g8b8))
return false;
+ DBG(("%s(%08x x %d)\n", __FUNCTION__, pixel, n));
+
memset(&tmp, 0, sizeof(tmp));
tmp.op = op;
@@ -3193,8 +2636,8 @@ gen5_render_fill_boxes(struct sna *sna,
tmp.src.repeat = SAMPLER_EXTEND_REPEAT;
tmp.is_affine = true;
- tmp.floats_per_vertex = 3;
- tmp.floats_per_rect = 9;
+ tmp.floats_per_vertex = 2;
+ tmp.floats_per_rect = 6;
tmp.u.gen5.wm_kernel = WM_KERNEL;
tmp.u.gen5.ve_id = 1;
@@ -3217,22 +2660,19 @@ gen5_render_fill_boxes(struct sna *sna,
DBG((" (%d, %d), (%d, %d)\n",
box->x1, box->y1, box->x2, box->y2));
OUT_VERTEX(box->x2, box->y2);
- OUT_VERTEX_F(1);
- OUT_VERTEX_F(1);
+ OUT_VERTEX_F(.5);
OUT_VERTEX(box->x1, box->y2);
- OUT_VERTEX_F(0);
- OUT_VERTEX_F(1);
+ OUT_VERTEX_F(.5);
OUT_VERTEX(box->x1, box->y1);
- OUT_VERTEX_F(0);
- OUT_VERTEX_F(0);
+ OUT_VERTEX_F(.5);
box++;
} while (--n_this_time);
} while (n);
- gen5_vertex_flush(sna);
+ gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
return true;
}
@@ -3247,16 +2687,13 @@ gen5_render_fill_op_blt(struct sna *sna,
gen5_get_rectangles(sna, &op->base, 1, gen5_fill_bind_surfaces);
OUT_VERTEX(x+w, y+h);
- OUT_VERTEX_F(1);
- OUT_VERTEX_F(1);
+ OUT_VERTEX_F(.5);
OUT_VERTEX(x, y+h);
- OUT_VERTEX_F(0);
- OUT_VERTEX_F(1);
+ OUT_VERTEX_F(.5);
OUT_VERTEX(x, y);
- OUT_VERTEX_F(0);
- OUT_VERTEX_F(0);
+ OUT_VERTEX_F(.5);
}
fastcall static void
@@ -3270,16 +2707,13 @@ gen5_render_fill_op_box(struct sna *sna,
gen5_get_rectangles(sna, &op->base, 1, gen5_fill_bind_surfaces);
OUT_VERTEX(box->x2, box->y2);
- OUT_VERTEX_F(1);
- OUT_VERTEX_F(1);
+ OUT_VERTEX_F(.5);
OUT_VERTEX(box->x1, box->y2);
- OUT_VERTEX_F(0);
- OUT_VERTEX_F(1);
+ OUT_VERTEX_F(.5);
OUT_VERTEX(box->x1, box->y1);
- OUT_VERTEX_F(0);
- OUT_VERTEX_F(0);
+ OUT_VERTEX_F(.5);
}
fastcall static void
@@ -3300,16 +2734,13 @@ gen5_render_fill_op_boxes(struct sna *sna,
do {
OUT_VERTEX(box->x2, box->y2);
- OUT_VERTEX_F(1);
- OUT_VERTEX_F(1);
+ OUT_VERTEX_F(.5);
OUT_VERTEX(box->x1, box->y2);
- OUT_VERTEX_F(0);
- OUT_VERTEX_F(1);
+ OUT_VERTEX_F(.5);
OUT_VERTEX(box->x1, box->y1);
- OUT_VERTEX_F(0);
- OUT_VERTEX_F(0);
+ OUT_VERTEX_F(.5);
box++;
} while (--nbox_this_time);
} while (nbox);
@@ -3319,8 +2750,8 @@ static void
gen5_render_fill_op_done(struct sna *sna,
const struct sna_fill_op *op)
{
- if (sna->render_state.gen5.vertex_offset)
- gen5_vertex_flush(sna);
+ if (sna->render.vertex_offset)
+ gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, op->base.src.bo);
DBG(("%s()\n", __FUNCTION__));
@@ -3375,8 +2806,8 @@ gen5_render_fill(struct sna *sna, uint8_t alu,
op->base.mask.repeat = SAMPLER_EXTEND_NONE;
op->base.is_affine = true;
- op->base.floats_per_vertex = 3;
- op->base.floats_per_rect = 9;
+ op->base.floats_per_vertex = 2;
+ op->base.floats_per_rect = 6;
op->base.u.gen5.wm_kernel = WM_KERNEL;
op->base.u.gen5.ve_id = 1;
@@ -3463,8 +2894,8 @@ gen5_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
tmp.mask.repeat = SAMPLER_EXTEND_NONE;
tmp.is_affine = true;
- tmp.floats_per_vertex = 3;
- tmp.floats_per_rect = 9;
+ tmp.floats_per_vertex = 2;
+ tmp.floats_per_rect = 6;
tmp.has_component_alpha = 0;
tmp.need_magic_ca_pass = false;
@@ -3472,7 +2903,11 @@ gen5_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
tmp.u.gen5.ve_id = 1;
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
- _kgem_submit(&sna->kgem);
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
+ kgem_bo_destroy(&sna->kgem, tmp.src.bo);
+ return false;
+ }
assert(kgem_check_bo(&sna->kgem, bo, NULL));
}
@@ -3483,18 +2918,15 @@ gen5_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2));
OUT_VERTEX(x2, y2);
- OUT_VERTEX_F(1);
- OUT_VERTEX_F(1);
+ OUT_VERTEX_F(.5);
OUT_VERTEX(x1, y2);
- OUT_VERTEX_F(0);
- OUT_VERTEX_F(1);
+ OUT_VERTEX_F(.5);
OUT_VERTEX(x1, y1);
- OUT_VERTEX_F(0);
- OUT_VERTEX_F(0);
+ OUT_VERTEX_F(.5);
- gen5_vertex_flush(sna);
+ gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
return true;
@@ -3503,14 +2935,17 @@ gen5_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
static void
gen5_render_flush(struct sna *sna)
{
- gen5_vertex_close(sna);
+ gen4_vertex_close(sna);
+
+ assert(sna->render.vb_id == 0);
+ assert(sna->render.vertex_offset == 0);
}
static void
gen5_render_context_switch(struct kgem *kgem,
int new_mode)
{
- if (!kgem->mode)
+ if (!kgem->nbatch)
return;
/* WaNonPipelinedStateCommandFlush
@@ -3529,7 +2964,7 @@ gen5_render_context_switch(struct kgem *kgem,
sna->render_state.gen5.drawrect_limit = -1;
}
- if (kgem_is_idle(kgem)) {
+ if (kgem_ring_is_idle(kgem, kgem->ring)) {
DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
_kgem_submit(kgem);
}
@@ -3574,7 +3009,6 @@ gen5_render_expire(struct kgem *kgem)
static void gen5_render_reset(struct sna *sna)
{
sna->render_state.gen5.needs_invariant = true;
- sna->render_state.gen5.vb_id = 0;
sna->render_state.gen5.ve_id = -1;
sna->render_state.gen5.last_primitive = -1;
sna->render_state.gen5.last_pipelined_pointers = 0;
@@ -3588,6 +3022,10 @@ static void gen5_render_reset(struct sna *sna)
DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
discard_vbo(sna);
}
+
+ sna->render.vertex_offset = 0;
+ sna->render.nvertex_reloc = 0;
+ sna->render.vb_id = 0;
}
static void gen5_render_fini(struct sna *sna)
@@ -3703,23 +3141,11 @@ static void gen5_init_wm_state(struct gen5_wm_unit_state *state,
state->thread1.binding_table_entry_count = 0;
}
-static uint32_t gen5_create_cc_viewport(struct sna_static_stream *stream)
-{
- struct gen5_cc_viewport vp;
-
- vp.min_depth = -1.e35;
- vp.max_depth = 1.e35;
-
- return sna_static_stream_add(stream, &vp, sizeof(vp), 32);
-}
-
static uint32_t gen5_create_cc_unit_state(struct sna_static_stream *stream)
{
uint8_t *ptr, *base;
- uint32_t vp;
int i, j;
- vp = gen5_create_cc_viewport(stream);
base = ptr =
sna_static_stream_map(stream,
GEN5_BLENDFACTOR_COUNT*GEN5_BLENDFACTOR_COUNT*64,
@@ -3732,7 +3158,6 @@ static uint32_t gen5_create_cc_unit_state(struct sna_static_stream *stream)
state->cc3.blend_enable =
!(j == GEN5_BLENDFACTOR_ZERO && i == GEN5_BLENDFACTOR_ONE);
- state->cc4.cc_viewport_state_offset = vp >> 5;
state->cc5.logicop_func = 0xc; /* COPY */
state->cc5.ia_blend_function = GEN5_BLENDFUNCTION_ADD;
@@ -3816,8 +3241,7 @@ static bool gen5_render_setup(struct sna *sna)
for (m = 0; m < KERNEL_COUNT; m++) {
gen5_init_wm_state(&wm_state->state,
wm_kernels[m].has_mask,
- wm[m],
- sampler_state);
+ wm[m], sampler_state);
wm_state++;
}
}
@@ -3840,10 +3264,15 @@ bool gen5_render_init(struct sna *sna)
sna->kgem.retire = gen5_render_retire;
sna->kgem.expire = gen5_render_expire;
+#if !NO_COMPOSITE
sna->render.composite = gen5_render_composite;
+ sna->render.prefer_gpu |= PREFER_GPU_RENDER;
+#endif
#if !NO_COMPOSITE_SPANS
sna->render.check_composite_spans = gen5_check_composite_spans;
sna->render.composite_spans = gen5_render_composite_spans;
+ if (DEVICE_ID(sna->PciInfo) == 0x0044)
+ sna->render.prefer_gpu |= PREFER_GPU_SPANS;
#endif
sna->render.video = gen5_render_video;
diff --git a/src/sna/gen5_render.h b/src/sna/gen5_render.h
index b6e5b0c2e..0f6bae6b6 100644
--- a/src/sna/gen5_render.h
+++ b/src/sna/gen5_render.h
@@ -749,15 +749,14 @@
#define GEN5_VERTEXBUFFER_ACCESS_VERTEXDATA 0
#define GEN5_VERTEXBUFFER_ACCESS_INSTANCEDATA 1
-#define GEN5_VFCOMPONENT_NOSTORE 0
-#define GEN5_VFCOMPONENT_STORE_SRC 1
-#define GEN5_VFCOMPONENT_STORE_0 2
-#define GEN5_VFCOMPONENT_STORE_1_FLT 3
-#define GEN5_VFCOMPONENT_STORE_1_INT 4
-#define GEN5_VFCOMPONENT_STORE_VID 5
-#define GEN5_VFCOMPONENT_STORE_IID 6
-#define GEN5_VFCOMPONENT_STORE_PID 7
-
+#define VFCOMPONENT_NOSTORE 0
+#define VFCOMPONENT_STORE_SRC 1
+#define VFCOMPONENT_STORE_0 2
+#define VFCOMPONENT_STORE_1_FLT 3
+#define VFCOMPONENT_STORE_1_INT 4
+#define VFCOMPONENT_STORE_VID 5
+#define VFCOMPONENT_STORE_IID 6
+#define VFCOMPONENT_STORE_PID 7
/* Execution Unit (EU) defines
@@ -1990,50 +1989,43 @@ struct gen5_sampler_legacy_border_color {
uint8_t color[4];
};
-struct gen5_sampler_state
-{
-
- struct
- {
- unsigned int shadow_function:3;
- unsigned int lod_bias:11;
- unsigned int min_filter:3;
- unsigned int mag_filter:3;
- unsigned int mip_filter:2;
- unsigned int base_level:5;
+struct gen5_sampler_state {
+ struct {
+ unsigned int shadow_function:3;
+ unsigned int lod_bias:11;
+ unsigned int min_filter:3;
+ unsigned int mag_filter:3;
+ unsigned int mip_filter:2;
+ unsigned int base_level:5;
unsigned int pad:1;
- unsigned int lod_preclamp:1;
- unsigned int border_color_mode:1;
+ unsigned int lod_preclamp:1;
+ unsigned int border_color_mode:1;
unsigned int pad0:1;
- unsigned int disable:1;
+ unsigned int disable:1;
} ss0;
- struct
- {
- unsigned int r_wrap_mode:3;
- unsigned int t_wrap_mode:3;
- unsigned int s_wrap_mode:3;
+ struct {
+ unsigned int r_wrap_mode:3;
+ unsigned int t_wrap_mode:3;
+ unsigned int s_wrap_mode:3;
unsigned int pad:3;
- unsigned int max_lod:10;
- unsigned int min_lod:10;
+ unsigned int max_lod:10;
+ unsigned int min_lod:10;
} ss1;
-
- struct
- {
+ struct {
unsigned int pad:5;
- unsigned int border_color_pointer:27;
+ unsigned int border_color_pointer:27;
} ss2;
-
- struct
- {
- unsigned int pad:19;
- unsigned int max_aniso:3;
- unsigned int chroma_key_mode:1;
- unsigned int chroma_key_index:2;
- unsigned int chroma_key_enable:1;
- unsigned int monochrome_filter_width:3;
- unsigned int monochrome_filter_height:3;
+
+ struct {
+ uint32_t pad:13;
+ uint32_t address_round:6;
+ uint32_t max_aniso:3;
+ uint32_t chroma_key_mode:1;
+ uint32_t chroma_key_index:2;
+ uint32_t chroma_key_enable:1;
+ uint32_t mbz:6;
} ss3;
};
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index fd7f2958b..3855f0449 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -42,6 +42,8 @@
#include "brw/brw.h"
#include "gen6_render.h"
+#include "gen4_source.h"
+#include "gen4_vertex.h"
#define NO_COMPOSITE 0
#define NO_COMPOSITE_SPANS 0
@@ -186,10 +188,6 @@ static const struct blendinfo {
#define FILL_FLAGS(op, format) GEN6_SET_FLAGS(FILL_SAMPLER, gen6_get_blend((op), false, (format)), GEN6_WM_KERNEL_NOMASK, FILL_VERTEX)
#define FILL_FLAGS_NOBLEND GEN6_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN6_WM_KERNEL_NOMASK, FILL_VERTEX)
-#define VIDEO_SAMPLER \
- SAMPLER_OFFSET(SAMPLER_FILTER_BILINEAR, SAMPLER_EXTEND_PAD, \
- SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE)
-
#define GEN6_SAMPLER(f) (((f) >> 16) & 0xfff0)
#define GEN6_BLEND(f) (((f) >> 0) & 0xfff0)
#define GEN6_KERNEL(f) (((f) >> 16) & 0xf)
@@ -437,7 +435,7 @@ gen6_emit_viewports(struct sna *sna)
(4 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
- OUT_BATCH(sna->render_state.gen6.cc_vp);
+ OUT_BATCH(0);
}
static void
@@ -734,7 +732,7 @@ gen6_emit_vertex_elements(struct sna *sna,
* texture coordinate 1 if (has_mask is true): same as above
*/
struct gen6_render_state *render = &sna->render_state.gen6;
- uint32_t src_format, dw, offset;
+ uint32_t src_format, dw;
int id = GEN6_VERTEX(op->u.gen6.flags);
bool has_mask;
@@ -744,40 +742,6 @@ gen6_emit_vertex_elements(struct sna *sna,
return;
render->ve_id = id;
- if (id == VERTEX_2s2s) {
- DBG(("%s: setup COPY\n", __FUNCTION__));
-
- OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS |
- ((2 * (1 + 2)) + 1 - 2));
-
- OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
- GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT |
- 0 << VE0_OFFSET_SHIFT);
- OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
- GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
- GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
- GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT);
-
- /* x,y */
- OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
- GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
- 0 << VE0_OFFSET_SHIFT);
- OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
- GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
- GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
- GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
-
- /* u0, v0, w0 */
- OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
- GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
- 4 << VE0_OFFSET_SHIFT);
- OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
- GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
- GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
- GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
- return;
- }
-
/* The VUE layout
* dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
* dword 4-7: position (x, y, 1.0, 1.0),
@@ -806,20 +770,25 @@ gen6_emit_vertex_elements(struct sna *sna,
GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
- offset = 4;
/* u0, v0, w0 */
- DBG(("%s: first channel %d floats, offset=%d\n", __FUNCTION__, id & 3, offset));
+ DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
switch (id & 3) {
+ default:
+ assert(0);
+ case 0:
+ src_format = GEN6_SURFACEFORMAT_R16G16_SSCALED;
+ dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
+ dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
+ dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
+ break;
case 1:
src_format = GEN6_SURFACEFORMAT_R32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
break;
- default:
- assert(0);
case 2:
src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
@@ -835,15 +804,15 @@ gen6_emit_vertex_elements(struct sna *sna,
}
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
src_format << VE0_FORMAT_SHIFT |
- offset << VE0_OFFSET_SHIFT);
+ 4 << VE0_OFFSET_SHIFT);
OUT_BATCH(dw);
- offset += (id & 3) * sizeof(float);
/* u1, v1, w1 */
if (has_mask) {
- DBG(("%s: second channel %d floats, offset=%d\n", __FUNCTION__, (id >> 2) & 3, offset));
+ unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
+ DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset));
dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
- switch ((id >> 2) & 3) {
+ switch (id >> 2) {
case 1:
src_format = GEN6_SURFACEFORMAT_R32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
@@ -917,13 +886,13 @@ gen6_emit_state(struct sna *sna,
sna->render_state.gen6.first_state_packet = false;
}
-static void gen6_magic_ca_pass(struct sna *sna,
+static bool gen6_magic_ca_pass(struct sna *sna,
const struct sna_composite_op *op)
{
struct gen6_render_state *state = &sna->render_state.gen6;
if (!op->need_magic_ca_pass)
- return;
+ return false;
DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
sna->render.vertex_start, sna->render.vertex_index));
@@ -949,163 +918,7 @@ static void gen6_magic_ca_pass(struct sna *sna,
OUT_BATCH(0); /* index buffer offset, ignored */
state->last_primitive = sna->kgem.nbatch;
-}
-
-static void gen6_vertex_flush(struct sna *sna)
-{
- assert(sna->render_state.gen6.vertex_offset);
-
- DBG(("%s[%x] = %d\n", __FUNCTION__,
- 4*sna->render_state.gen6.vertex_offset,
- sna->render.vertex_index - sna->render.vertex_start));
- sna->kgem.batch[sna->render_state.gen6.vertex_offset] =
- sna->render.vertex_index - sna->render.vertex_start;
- sna->render_state.gen6.vertex_offset = 0;
-}
-
-static int gen6_vertex_finish(struct sna *sna)
-{
- struct kgem_bo *bo;
- unsigned int i;
-
- DBG(("%s: used=%d / %d\n", __FUNCTION__,
- sna->render.vertex_used, sna->render.vertex_size));
- assert(sna->render.vertex_used);
- assert(sna->render.nvertex_reloc);
-
- /* Note: we only need dword alignment (currently) */
-
- bo = sna->render.vbo;
- if (bo) {
- if (sna->render_state.gen6.vertex_offset)
- gen6_vertex_flush(sna);
-
- for (i = 0; i < sna->render.nvertex_reloc; i++) {
- DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
- i, sna->render.vertex_reloc[i]));
-
- sna->kgem.batch[sna->render.vertex_reloc[i]] =
- kgem_add_reloc(&sna->kgem,
- sna->render.vertex_reloc[i], bo,
- I915_GEM_DOMAIN_VERTEX << 16,
- 0);
- sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
- kgem_add_reloc(&sna->kgem,
- sna->render.vertex_reloc[i]+1, bo,
- I915_GEM_DOMAIN_VERTEX << 16,
- sna->render.vertex_used * 4 - 1);
- }
-
- sna->render.nvertex_reloc = 0;
- sna->render.vertex_used = 0;
- sna->render.vertex_index = 0;
- sna->render_state.gen6.vb_id = 0;
-
- kgem_bo_destroy(&sna->kgem, bo);
- }
-
- sna->render.vertices = NULL;
- sna->render.vbo = kgem_create_linear(&sna->kgem,
- 256*1024, CREATE_GTT_MAP);
- if (sna->render.vbo)
- sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
- if (sna->render.vertices == NULL) {
- if (sna->render.vbo)
- kgem_bo_destroy(&sna->kgem, sna->render.vbo);
- sna->render.vbo = NULL;
- return 0;
- }
-
- DBG(("%s: create vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
-
- kgem_bo_sync__cpu(&sna->kgem, sna->render.vbo);
- if (sna->render.vertex_used) {
- DBG(("%s: copying initial buffer x %d to handle=%d\n",
- __FUNCTION__,
- sna->render.vertex_used,
- sna->render.vbo->handle));
- memcpy(sna->render.vertices,
- sna->render.vertex_data,
- sizeof(float)*sna->render.vertex_used);
- }
- sna->render.vertex_size = 64 * 1024 - 1;
- return sna->render.vertex_size - sna->render.vertex_used;
-}
-
-static void gen6_vertex_close(struct sna *sna)
-{
- struct kgem_bo *bo, *free_bo = NULL;
- unsigned int i, delta = 0;
-
- assert(sna->render_state.gen6.vertex_offset == 0);
-
- if (!sna->render_state.gen6.vb_id)
- return;
-
- DBG(("%s: used=%d, vbo active? %d\n",
- __FUNCTION__, sna->render.vertex_used, sna->render.vbo ? sna->render.vbo->handle : 0));
-
- bo = sna->render.vbo;
- if (bo) {
- if (sna->render.vertex_size - sna->render.vertex_used < 64) {
- DBG(("%s: discarding vbo (full), handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
- sna->render.vbo = NULL;
- sna->render.vertices = sna->render.vertex_data;
- sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
- free_bo = bo;
- }
- } else {
- if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
- DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
- sna->render.vertex_used, sna->kgem.nbatch));
- memcpy(sna->kgem.batch + sna->kgem.nbatch,
- sna->render.vertex_data,
- sna->render.vertex_used * 4);
- delta = sna->kgem.nbatch * 4;
- bo = NULL;
- sna->kgem.nbatch += sna->render.vertex_used;
- } else {
- bo = kgem_create_linear(&sna->kgem,
- 4*sna->render.vertex_used, 0);
- if (bo && !kgem_bo_write(&sna->kgem, bo,
- sna->render.vertex_data,
- 4*sna->render.vertex_used)) {
- kgem_bo_destroy(&sna->kgem, bo);
- bo = NULL;
- }
- DBG(("%s: new vbo: %d\n", __FUNCTION__,
- sna->render.vertex_used));
- free_bo = bo;
- }
- }
-
- assert(sna->render.nvertex_reloc);
- for (i = 0; i < sna->render.nvertex_reloc; i++) {
- DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
- i, sna->render.vertex_reloc[i]));
-
- sna->kgem.batch[sna->render.vertex_reloc[i]] =
- kgem_add_reloc(&sna->kgem,
- sna->render.vertex_reloc[i], bo,
- I915_GEM_DOMAIN_VERTEX << 16,
- delta);
- sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
- kgem_add_reloc(&sna->kgem,
- sna->render.vertex_reloc[i]+1, bo,
- I915_GEM_DOMAIN_VERTEX << 16,
- delta + sna->render.vertex_used * 4 - 1);
- }
- sna->render.nvertex_reloc = 0;
-
- if (sna->render.vbo == NULL) {
- sna->render.vertex_used = 0;
- sna->render.vertex_index = 0;
- assert(sna->render.vertices == sna->render.vertex_data);
- assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data));
- }
-
- if (free_bo)
- kgem_bo_destroy(&sna->kgem, free_bo);
+ return true;
}
typedef struct gen6_surface_state_padded {
@@ -1193,16 +1006,6 @@ sampler_fill_init(struct gen6_sampler_state *ss)
sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
}
-static uint32_t gen6_create_cc_viewport(struct sna_static_stream *stream)
-{
- struct gen6_cc_viewport vp;
-
- vp.min_depth = -1.e35;
- vp.max_depth = 1.e35;
-
- return sna_static_stream_add(stream, &vp, sizeof(vp), 32);
-}
-
static uint32_t
gen6_tiling_bits(uint32_t tiling)
{
@@ -1229,9 +1032,10 @@ gen6_bind_bo(struct sna *sna,
uint32_t *ss;
uint32_t domains;
uint16_t offset;
+ uint32_t is_scanout = is_dst && bo->scanout;
/* After the first bind, we manage the cache domains within the batch */
- offset = kgem_bo_get_binding(bo, format);
+ offset = kgem_bo_get_binding(bo, format | is_scanout << 31);
if (offset) {
DBG(("[%x] bo(handle=%d), format=%d, reuse %s binding\n",
offset, bo->handle, format,
@@ -1258,9 +1062,9 @@ gen6_bind_bo(struct sna *sna,
ss[3] = (gen6_tiling_bits(bo->tiling) |
(bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT);
ss[4] = 0;
- ss[5] = 0;
+ ss[5] = is_scanout ? 0 : 3 << 16;
- kgem_bo_set_binding(bo, format, offset);
+ kgem_bo_set_binding(bo, format | is_scanout << 31, offset);
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
offset, bo->handle, ss[1],
@@ -1270,254 +1074,6 @@ gen6_bind_bo(struct sna *sna,
return offset * sizeof(uint32_t);
}
-fastcall static void
-gen6_emit_composite_primitive_solid(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- float *v;
- union {
- struct sna_coordinate p;
- float f;
- } dst;
-
- DBG(("%s: [%d+9] = (%d, %d)x(%d, %d)\n", __FUNCTION__,
- sna->render.vertex_used, r->dst.x, r->dst.y, r->width, r->height));
-
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 9;
- assert(sna->render.vertex_used <= sna->render.vertex_size);
- assert(!too_large(op->dst.x + r->dst.x + r->width,
- op->dst.y + r->dst.y + r->height));
-
- dst.p.x = r->dst.x + r->width;
- dst.p.y = r->dst.y + r->height;
- v[0] = dst.f;
- dst.p.x = r->dst.x;
- v[3] = dst.f;
- dst.p.y = r->dst.y;
- v[6] = dst.f;
-
- v[5] = v[2] = v[1] = 1.;
- v[8] = v[7] = v[4] = 0.;
-}
-
-fastcall static void
-gen6_emit_composite_primitive_identity_source(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- union {
- struct sna_coordinate p;
- float f;
- } dst;
- float *v;
-
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 9;
-
- dst.p.x = r->dst.x + r->width;
- dst.p.y = r->dst.y + r->height;
- v[0] = dst.f;
- dst.p.x = r->dst.x;
- v[3] = dst.f;
- dst.p.y = r->dst.y;
- v[6] = dst.f;
-
- v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
- v[1] = v[4] + r->width * op->src.scale[0];
-
- v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
- v[5] = v[2] = v[8] + r->height * op->src.scale[1];
-}
-
-fastcall static void
-gen6_emit_composite_primitive_simple_source(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- float *v;
- union {
- struct sna_coordinate p;
- float f;
- } dst;
-
- float xx = op->src.transform->matrix[0][0];
- float x0 = op->src.transform->matrix[0][2];
- float yy = op->src.transform->matrix[1][1];
- float y0 = op->src.transform->matrix[1][2];
- float sx = op->src.scale[0];
- float sy = op->src.scale[1];
- int16_t tx = op->src.offset[0];
- int16_t ty = op->src.offset[1];
-
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 3*3;
-
- dst.p.x = r->dst.x + r->width;
- dst.p.y = r->dst.y + r->height;
- v[0] = dst.f;
- v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
- v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
-
- dst.p.x = r->dst.x;
- v[3] = dst.f;
- v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
-
- dst.p.y = r->dst.y;
- v[6] = dst.f;
- v[8] = ((r->src.y + ty) * yy + y0) * sy;
-}
-
-fastcall static void
-gen6_emit_composite_primitive_affine_source(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- union {
- struct sna_coordinate p;
- float f;
- } dst;
- float *v;
-
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 9;
-
- dst.p.x = r->dst.x + r->width;
- dst.p.y = r->dst.y + r->height;
- v[0] = dst.f;
- _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x + r->width,
- op->src.offset[1] + r->src.y + r->height,
- op->src.transform,
- &v[1], &v[2]);
- v[1] *= op->src.scale[0];
- v[2] *= op->src.scale[1];
-
- dst.p.x = r->dst.x;
- v[3] = dst.f;
- _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x,
- op->src.offset[1] + r->src.y + r->height,
- op->src.transform,
- &v[4], &v[5]);
- v[4] *= op->src.scale[0];
- v[5] *= op->src.scale[1];
-
- dst.p.y = r->dst.y;
- v[6] = dst.f;
- _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x,
- op->src.offset[1] + r->src.y,
- op->src.transform,
- &v[7], &v[8]);
- v[7] *= op->src.scale[0];
- v[8] *= op->src.scale[1];
-}
-
-fastcall static void
-gen6_emit_composite_primitive_identity_source_mask(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- union {
- struct sna_coordinate p;
- float f;
- } dst;
- float src_x, src_y;
- float msk_x, msk_y;
- float w, h;
- float *v;
-
- src_x = r->src.x + op->src.offset[0];
- src_y = r->src.y + op->src.offset[1];
- msk_x = r->mask.x + op->mask.offset[0];
- msk_y = r->mask.y + op->mask.offset[1];
- w = r->width;
- h = r->height;
-
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 15;
-
- dst.p.x = r->dst.x + r->width;
- dst.p.y = r->dst.y + r->height;
- v[0] = dst.f;
- v[1] = (src_x + w) * op->src.scale[0];
- v[2] = (src_y + h) * op->src.scale[1];
- v[3] = (msk_x + w) * op->mask.scale[0];
- v[4] = (msk_y + h) * op->mask.scale[1];
-
- dst.p.x = r->dst.x;
- v[5] = dst.f;
- v[6] = src_x * op->src.scale[0];
- v[7] = v[2];
- v[8] = msk_x * op->mask.scale[0];
- v[9] = v[4];
-
- dst.p.y = r->dst.y;
- v[10] = dst.f;
- v[11] = v[6];
- v[12] = src_y * op->src.scale[1];
- v[13] = v[8];
- v[14] = msk_y * op->mask.scale[1];
-}
-
-inline static void
-gen6_emit_composite_texcoord(struct sna *sna,
- const struct sna_composite_channel *channel,
- int16_t x, int16_t y)
-{
- x += channel->offset[0];
- y += channel->offset[1];
-
- if (channel->is_affine) {
- float s, t;
-
- sna_get_transformed_coordinates(x, y,
- channel->transform,
- &s, &t);
- OUT_VERTEX_F(s * channel->scale[0]);
- OUT_VERTEX_F(t * channel->scale[1]);
- } else {
- float s, t, w;
-
- sna_get_transformed_coordinates_3d(x, y,
- channel->transform,
- &s, &t, &w);
- OUT_VERTEX_F(s * channel->scale[0]);
- OUT_VERTEX_F(t * channel->scale[1]);
- OUT_VERTEX_F(w);
- }
-}
-
-static void
-gen6_emit_composite_vertex(struct sna *sna,
- const struct sna_composite_op *op,
- int16_t srcX, int16_t srcY,
- int16_t mskX, int16_t mskY,
- int16_t dstX, int16_t dstY)
-{
- OUT_VERTEX(dstX, dstY);
- gen6_emit_composite_texcoord(sna, &op->src, srcX, srcY);
- gen6_emit_composite_texcoord(sna, &op->mask, mskX, mskY);
-}
-
-fastcall static void
-gen6_emit_composite_primitive(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- gen6_emit_composite_vertex(sna, op,
- r->src.x + r->width, r->src.y + r->height,
- r->mask.x + r->width, r->mask.y + r->height,
- r->dst.x + r->width, r->dst.y + r->height);
- gen6_emit_composite_vertex(sna, op,
- r->src.x, r->src.y + r->height,
- r->mask.x, r->mask.y + r->height,
- r->dst.x, r->dst.y + r->height);
- gen6_emit_composite_vertex(sna, op,
- r->src.x, r->src.y,
- r->mask.x, r->mask.y,
- r->dst.x, r->dst.y);
-}
-
static void gen6_emit_vertex_buffer(struct sna *sna,
const struct sna_composite_op *op)
{
@@ -1528,10 +1084,10 @@ static void gen6_emit_vertex_buffer(struct sna *sna,
4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT);
sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
OUT_BATCH(0);
- OUT_BATCH(0);
+ OUT_BATCH(~0); /* max address: disabled */
OUT_BATCH(0);
- sna->render_state.gen6.vb_id |= 1 << id;
+ sna->render.vb_id |= 1 << id;
}
static void gen6_emit_primitive(struct sna *sna)
@@ -1541,7 +1097,7 @@ static void gen6_emit_primitive(struct sna *sna)
__FUNCTION__,
sna->render.vertex_start,
sna->render.vertex_index));
- sna->render_state.gen6.vertex_offset = sna->kgem.nbatch - 5;
+ sna->render.vertex_offset = sna->kgem.nbatch - 5;
return;
}
@@ -1550,7 +1106,7 @@ static void gen6_emit_primitive(struct sna *sna)
_3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT |
0 << 9 |
4);
- sna->render_state.gen6.vertex_offset = sna->kgem.nbatch;
+ sna->render.vertex_offset = sna->kgem.nbatch;
OUT_BATCH(0); /* vertex count, to be filled in later */
OUT_BATCH(sna->render.vertex_index);
OUT_BATCH(1); /* single instance */
@@ -1569,13 +1125,16 @@ static bool gen6_rectangle_begin(struct sna *sna,
int id = 1 << GEN6_VERTEX(op->u.gen6.flags);
int ndwords;
+ if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
+ return true;
+
ndwords = op->need_magic_ca_pass ? 60 : 6;
- if ((sna->render_state.gen6.vb_id & id) == 0)
+ if ((sna->render.vb_id & id) == 0)
ndwords += 5;
if (!kgem_check_batch(&sna->kgem, ndwords))
return false;
- if ((sna->render_state.gen6.vb_id & id) == 0)
+ if ((sna->render.vb_id & id) == 0)
gen6_emit_vertex_buffer(sna, op);
gen6_emit_primitive(sna);
@@ -1585,17 +1144,30 @@ static bool gen6_rectangle_begin(struct sna *sna,
static int gen6_get_rectangles__flush(struct sna *sna,
const struct sna_composite_op *op)
{
+ /* Preventing discarding new vbo after lock contention */
+ if (sna_vertex_wait__locked(&sna->render)) {
+ int rem = vertex_space(sna);
+ if (rem > op->floats_per_rect)
+ return rem;
+ }
+
if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5))
return 0;
- if (!kgem_check_exec(&sna->kgem, 1))
- return 0;
- if (!kgem_check_reloc(&sna->kgem, 2))
+ if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
return 0;
- if (op->need_magic_ca_pass && sna->render.vbo)
- return 0;
+ if (sna->render.vertex_offset) {
+ gen4_vertex_flush(sna);
+ if (gen6_magic_ca_pass(sna, op)) {
+ gen6_emit_flush(sna);
+ gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags));
+ gen6_emit_wm(sna,
+ GEN6_KERNEL(op->u.gen6.flags),
+ GEN6_VERTEX(op->u.gen6.flags) >> 2);
+ }
+ }
- return gen6_vertex_finish(sna);
+ return gen4_vertex_finish(sna);
}
inline static int gen6_get_rectangles(struct sna *sna,
@@ -1607,7 +1179,7 @@ inline static int gen6_get_rectangles(struct sna *sna,
start:
rem = vertex_space(sna);
- if (rem < op->floats_per_rect) {
+ if (unlikely(rem < op->floats_per_rect)) {
DBG(("flushing vbo for %s: %d < %d\n",
__FUNCTION__, rem, op->floats_per_rect));
rem = gen6_get_rectangles__flush(sna, op);
@@ -1615,7 +1187,7 @@ start:
goto flush;
}
- if (unlikely(sna->render_state.gen6.vertex_offset == 0 &&
+ if (unlikely(sna->render.vertex_offset == 0 &&
!gen6_rectangle_begin(sna, op)))
goto flush;
@@ -1627,10 +1199,11 @@ start:
return want;
flush:
- if (sna->render_state.gen6.vertex_offset) {
- gen6_vertex_flush(sna);
+ if (sna->render.vertex_offset) {
+ gen4_vertex_flush(sna);
gen6_magic_ca_pass(sna, op);
}
+ sna_vertex_wait__locked(&sna->render);
_kgem_submit(&sna->kgem);
emit_state(sna, op);
goto start;
@@ -1653,20 +1226,10 @@ inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna,
return table;
}
-static uint32_t
-gen6_choose_composite_vertex_buffer(const struct sna_composite_op *op)
-{
- int id = 2 + !op->is_affine;
- if (op->mask.bo)
- id |= id << 2;
- assert(id > 0 && id < 16);
- return id;
-}
-
-static void
-gen6_get_batch(struct sna *sna)
+static bool
+gen6_get_batch(struct sna *sna, const struct sna_composite_op *op)
{
- kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
DBG(("%s: flushing batch: %d < %d+%d\n",
@@ -1678,6 +1241,8 @@ gen6_get_batch(struct sna *sna)
if (sna->render_state.gen6.needs_invariant)
gen6_emit_invariant(sna);
+
+ return kgem_bo_is_dirty(op->dst.bo);
}
static void gen6_emit_composite_state(struct sna *sna,
@@ -1687,8 +1252,7 @@ static void gen6_emit_composite_state(struct sna *sna,
uint16_t offset;
bool dirty;
- gen6_get_batch(sna);
- dirty = kgem_bo_is_dirty(op->dst.bo);
+ dirty = gen6_get_batch(sna, op);
binding_table = gen6_composite_get_binding_table(sna, &offset);
@@ -1726,11 +1290,10 @@ static void gen6_emit_composite_state(struct sna *sna,
static void
gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op)
{
- assert (sna->render_state.gen6.vertex_offset == 0);
+ assert (sna->render.vertex_offset == 0);
if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
- /* XXX propagate failure */
- gen6_vertex_finish(sna);
+ gen4_vertex_finish(sna);
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
sna->render_state.gen6.floats_per_vertex,
@@ -1741,6 +1304,7 @@ gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op)
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex;
}
+ assert((sna->render.vertex_used % op->floats_per_vertex) == 0);
}
fastcall static void
@@ -1775,9 +1339,9 @@ gen6_render_composite_box(struct sna *sna,
}
static void
-gen6_render_composite_boxes(struct sna *sna,
- const struct sna_composite_op *op,
- const BoxRec *box, int nbox)
+gen6_render_composite_boxes__blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
{
DBG(("composite_boxes(%d)\n", nbox));
@@ -1807,6 +1371,62 @@ gen6_render_composite_boxes(struct sna *sna,
} while (nbox);
}
+static void
+gen6_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
+
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen6_get_rectangles(sna, op, nbox,
+ gen6_emit_composite_state);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+ } while (nbox);
+}
+
+static void
+gen6_render_composite_boxes__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen6_get_rectangles(sna, op, nbox,
+ gen6_emit_composite_state);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
@@ -1887,8 +1507,7 @@ static void gen6_emit_video_state(struct sna *sna,
bool dirty;
int n_src, n;
- gen6_get_batch(sna);
- dirty = kgem_bo_is_dirty(op->dst.bo);
+ dirty = gen6_get_batch(sna, op);
src_surf_base[0] = 0;
src_surf_base[1] = 0;
@@ -1949,12 +1568,14 @@ gen6_render_video(struct sna *sna,
RegionPtr dstRegion,
short src_w, short src_h,
short drw_w, short drw_h,
+ short dx, short dy,
PixmapPtr pixmap)
{
struct sna_composite_op tmp;
- int nbox, dxo, dyo, pix_xoff, pix_yoff;
+ int nbox, pix_xoff, pix_yoff;
float src_scale_x, src_scale_y;
struct sna_pixmap *priv;
+ unsigned filter;
BoxPtr box;
DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n",
@@ -1983,15 +1604,22 @@ gen6_render_video(struct sna *sna,
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
+ if (src_w == drw_w && src_h == drw_h)
+ filter = SAMPLER_FILTER_NEAREST;
+ else
+ filter = SAMPLER_FILTER_BILINEAR;
+
tmp.u.gen6.flags =
- GEN6_SET_FLAGS(VIDEO_SAMPLER, NO_BLEND,
+ GEN6_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
+ SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
+ NO_BLEND,
is_planar_fourcc(frame->id) ?
GEN6_WM_KERNEL_VIDEO_PLANAR :
GEN6_WM_KERNEL_VIDEO_PACKED,
2);
tmp.priv = frame;
- kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
@@ -2012,9 +1640,6 @@ gen6_render_video(struct sna *sna,
pix_yoff = 0;
#endif
- dxo = dstRegion->extents.x1;
- dyo = dstRegion->extents.y1;
-
/* Use normalized texture coordinates */
src_scale_x = ((float)src_w / frame->width) / (float)drw_w;
src_scale_y = ((float)src_h / frame->height) / (float)drw_h;
@@ -2032,16 +1657,16 @@ gen6_render_video(struct sna *sna,
gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state);
OUT_VERTEX(r.x2, r.y2);
- OUT_VERTEX_F((box->x2 - dxo) * src_scale_x);
- OUT_VERTEX_F((box->y2 - dyo) * src_scale_y);
+ OUT_VERTEX_F((box->x2 - dx) * src_scale_x);
+ OUT_VERTEX_F((box->y2 - dy) * src_scale_y);
OUT_VERTEX(r.x1, r.y2);
- OUT_VERTEX_F((box->x1 - dxo) * src_scale_x);
- OUT_VERTEX_F((box->y2 - dyo) * src_scale_y);
+ OUT_VERTEX_F((box->x1 - dx) * src_scale_x);
+ OUT_VERTEX_F((box->y2 - dy) * src_scale_y);
OUT_VERTEX(r.x1, r.y1);
- OUT_VERTEX_F((box->x1 - dxo) * src_scale_x);
- OUT_VERTEX_F((box->y1 - dyo) * src_scale_y);
+ OUT_VERTEX_F((box->x1 - dx) * src_scale_x);
+ OUT_VERTEX_F((box->y1 - dy) * src_scale_y);
if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
sna_damage_add_box(&priv->gpu_damage, &r);
@@ -2051,148 +1676,10 @@ gen6_render_video(struct sna *sna,
}
priv->clear = false;
- gen6_vertex_flush(sna);
+ gen4_vertex_flush(sna);
return true;
}
-static bool
-gen6_composite_solid_init(struct sna *sna,
- struct sna_composite_channel *channel,
- uint32_t color)
-{
- DBG(("%s: color=%x\n", __FUNCTION__, color));
-
- channel->filter = PictFilterNearest;
- channel->repeat = RepeatNormal;
- channel->is_affine = true;
- channel->is_solid = true;
- channel->is_opaque = (color >> 24) == 0xff;
- channel->transform = NULL;
- channel->width = 1;
- channel->height = 1;
- channel->card_format = GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
-
- channel->bo = sna_render_get_solid(sna, color);
-
- channel->scale[0] = channel->scale[1] = 1;
- channel->offset[0] = channel->offset[1] = 0;
- return channel->bo != NULL;
-}
-
-static bool
-gen6_composite_linear_init(struct sna *sna,
- PicturePtr picture,
- struct sna_composite_channel *channel,
- int x, int y,
- int w, int h,
- int dst_x, int dst_y)
-{
- PictLinearGradient *linear =
- (PictLinearGradient *)picture->pSourcePict;
- pixman_fixed_t tx, ty;
- float x0, y0, sf;
- float dx, dy;
-
- DBG(("%s: p1=(%f, %f), p2=(%f, %f), src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
- __FUNCTION__,
- pixman_fixed_to_double(linear->p1.x), pixman_fixed_to_double(linear->p1.y),
- pixman_fixed_to_double(linear->p2.x), pixman_fixed_to_double(linear->p2.y),
- x, y, dst_x, dst_y, w, h));
-
- if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
- return 0;
-
- if (!sna_transform_is_affine(picture->transform)) {
- DBG(("%s: fallback due to projective transform\n",
- __FUNCTION__));
- return sna_render_picture_fixup(sna, picture, channel,
- x, y, w, h, dst_x, dst_y);
- }
-
- channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear);
- if (!channel->bo)
- return 0;
-
- channel->filter = PictFilterNearest;
- channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
- channel->width = channel->bo->pitch / 4;
- channel->height = 1;
- channel->pict_format = PICT_a8r8g8b8;
-
- channel->scale[0] = channel->scale[1] = 1;
- channel->offset[0] = channel->offset[1] = 0;
-
- if (sna_transform_is_translation(picture->transform, &tx, &ty)) {
- dx = pixman_fixed_to_double(linear->p2.x - linear->p1.x);
- dy = pixman_fixed_to_double(linear->p2.y - linear->p1.y);
-
- x0 = pixman_fixed_to_double(linear->p1.x);
- y0 = pixman_fixed_to_double(linear->p1.y);
-
- if (tx | ty) {
- x0 -= pixman_fixed_to_double(tx);
- y0 -= pixman_fixed_to_double(ty);
- }
- } else {
- struct pixman_f_vector p1, p2;
- struct pixman_f_transform m, inv;
-
- pixman_f_transform_from_pixman_transform(&m, picture->transform);
- DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n",
- __FUNCTION__,
- m.m[0][0], m.m[0][1], m.m[0][2],
- m.m[1][0], m.m[1][1], m.m[1][2],
- m.m[2][0], m.m[2][1], m.m[2][2]));
- if (!pixman_f_transform_invert(&inv, &m))
- return 0;
-
- p1.v[0] = pixman_fixed_to_double(linear->p1.x);
- p1.v[1] = pixman_fixed_to_double(linear->p1.y);
- p1.v[2] = 1.;
- pixman_f_transform_point(&inv, &p1);
-
- p2.v[0] = pixman_fixed_to_double(linear->p2.x);
- p2.v[1] = pixman_fixed_to_double(linear->p2.y);
- p2.v[2] = 1.;
- pixman_f_transform_point(&inv, &p2);
-
- DBG(("%s: untransformed: p1=(%f, %f, %f), p2=(%f, %f, %f)\n",
- __FUNCTION__,
- p1.v[0], p1.v[1], p1.v[2],
- p2.v[0], p2.v[1], p2.v[2]));
-
- dx = p2.v[0] - p1.v[0];
- dy = p2.v[1] - p1.v[1];
-
- x0 = p1.v[0];
- y0 = p1.v[1];
- }
-
- sf = dx*dx + dy*dy;
- dx /= sf;
- dy /= sf;
-
- channel->embedded_transform.matrix[0][0] = pixman_double_to_fixed(dx);
- channel->embedded_transform.matrix[0][1] = pixman_double_to_fixed(dy);
- channel->embedded_transform.matrix[0][2] = -pixman_double_to_fixed(dx*(x0+dst_x-x) + dy*(y0+dst_y-y));
-
- channel->embedded_transform.matrix[1][0] = 0;
- channel->embedded_transform.matrix[1][1] = 0;
- channel->embedded_transform.matrix[1][2] = pixman_double_to_fixed(.5);
-
- channel->embedded_transform.matrix[2][0] = 0;
- channel->embedded_transform.matrix[2][1] = 0;
- channel->embedded_transform.matrix[2][2] = pixman_fixed_1;
-
- channel->transform = &channel->embedded_transform;
- channel->is_affine = 1;
-
- DBG(("%s: dx=%f, dy=%f, offset=%f\n",
- __FUNCTION__, dx, dy, -dx*(x0-x+dst_x) + -dy*(y0-y+dst_y)));
-
- return channel->bo != NULL;
-}
-
static int
gen6_composite_picture(struct sna *sna,
PicturePtr picture,
@@ -2213,16 +1700,16 @@ gen6_composite_picture(struct sna *sna,
channel->card_format = -1;
if (sna_picture_is_solid(picture, &color))
- return gen6_composite_solid_init(sna, channel, color);
+ return gen4_channel_init_solid(sna, channel, color);
if (picture->pDrawable == NULL) {
int ret;
if (picture->pSourcePict->type == SourcePictTypeLinear)
- return gen6_composite_linear_init(sna, picture, channel,
- x, y,
- w, h,
- dst_x, dst_y);
+ return gen4_channel_init_linear(sna, picture, channel,
+ x, y,
+ w, h,
+ dst_x, dst_y);
DBG(("%s -- fixup, gradient\n", __FUNCTION__));
ret = -1;
@@ -2273,7 +1760,8 @@ gen6_composite_picture(struct sna *sna,
channel->card_format = gen6_get_card_format(picture->format);
if (channel->card_format == (unsigned)-1)
return sna_render_picture_convert(sna, picture, channel, pixmap,
- x, y, w, h, dst_x, dst_y);
+ x, y, w, h, dst_x, dst_y,
+ false);
if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__,
@@ -2300,8 +1788,9 @@ static void gen6_render_composite_done(struct sna *sna,
{
DBG(("%s\n", __FUNCTION__));
- if (sna->render_state.gen6.vertex_offset) {
- gen6_vertex_flush(sna);
+ assert(!sna->render.active);
+ if (sna->render.vertex_offset) {
+ gen4_vertex_flush(sna);
gen6_magic_ca_pass(sna, op);
}
@@ -2360,17 +1849,11 @@ gen6_composite_set_target(struct sna *sna,
return true;
}
-static bool prefer_blt_ring(struct sna *sna)
-{
- if (PREFER_RENDER)
- return PREFER_RENDER < 0;
-
- return sna->kgem.ring != KGEM_RENDER;
-}
-
-static bool can_switch_to_blt(struct sna *sna)
+inline static bool can_switch_to_blt(struct sna *sna,
+ struct kgem_bo *bo,
+ unsigned flags)
{
- if (sna->kgem.ring == KGEM_BLT)
+ if (sna->kgem.ring != KGEM_RENDER)
return true;
if (NO_RING_SWITCH)
@@ -2379,7 +1862,13 @@ static bool can_switch_to_blt(struct sna *sna)
if (!sna->kgem.has_semaphores)
return false;
- return sna->kgem.mode == KGEM_NONE || kgem_is_idle(&sna->kgem);
+ if (flags & COPY_LAST)
+ return true;
+
+ if (bo && RQ_IS_BLT(bo->rq))
+ return true;
+
+ return kgem_ring_is_idle(&sna->kgem, KGEM_BLT);
}
static inline bool untiled_tlb_miss(struct kgem_bo *bo)
@@ -2387,9 +1876,19 @@ static inline bool untiled_tlb_miss(struct kgem_bo *bo)
return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096;
}
-static bool prefer_blt_bo(struct sna *sna, struct kgem_bo *bo)
+static int prefer_blt_bo(struct sna *sna, struct kgem_bo *bo)
{
- return untiled_tlb_miss(bo) && bo->pitch < MAXSHORT;
+ if (bo->rq)
+ return RQ_IS_BLT(bo->rq) ? 1 : -1;
+
+ return bo->tiling == I915_TILING_NONE || bo->scanout;
+}
+
+inline static bool prefer_blt_ring(struct sna *sna,
+ struct kgem_bo *bo,
+ unsigned flags)
+{
+ return can_switch_to_blt(sna, bo, flags);
}
static bool
@@ -2397,7 +1896,7 @@ try_blt(struct sna *sna,
PicturePtr dst, PicturePtr src,
int width, int height)
{
- if (prefer_blt_ring(sna)) {
+ if (sna->kgem.ring == KGEM_BLT) {
DBG(("%s: already performing BLT\n", __FUNCTION__));
return true;
}
@@ -2408,7 +1907,7 @@ try_blt(struct sna *sna,
return true;
}
- if (can_switch_to_blt(sna) && sna_picture_is_solid(src, NULL))
+ if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, NULL, 0))
return true;
return false;
@@ -2436,12 +1935,6 @@ has_alphamap(PicturePtr p)
}
static bool
-untransformed(PicturePtr p)
-{
- return !p->transform || pixman_transform_is_int_translate(p->transform);
-}
-
-static bool
need_upload(PicturePtr p)
{
return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
@@ -2487,7 +1980,6 @@ gen6_composite_fallback(struct sna *sna,
PicturePtr mask,
PicturePtr dst)
{
- struct sna_pixmap *priv;
PixmapPtr src_pixmap;
PixmapPtr mask_pixmap;
PixmapPtr dst_pixmap;
@@ -2526,10 +2018,7 @@ gen6_composite_fallback(struct sna *sna,
}
/* If anything is on the GPU, push everything out to the GPU */
- priv = sna_pixmap(dst_pixmap);
- if (priv &&
- ((priv->gpu_damage && !priv->clear) ||
- (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)))) {
+ if (dst_use_gpu(dst_pixmap)) {
DBG(("%s: dst is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
@@ -2564,14 +2053,14 @@ gen6_composite_fallback(struct sna *sna,
if (too_large(dst_pixmap->drawable.width,
dst_pixmap->drawable.height) &&
- (priv == NULL || DAMAGE_IS_ALL(priv->cpu_damage))) {
+ dst_is_cpu(dst_pixmap)) {
DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
return true;
}
DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
__FUNCTION__));
- return false;
+ return dst_use_cpu(dst_pixmap);
}
static int
@@ -2592,7 +2081,7 @@ reuse_source(struct sna *sna,
}
if (sna_picture_is_solid(mask, &color))
- return gen6_composite_solid_init(sna, mc, color);
+ return gen4_channel_init_solid(sna, mc, color);
if (sc->is_solid)
return false;
@@ -2635,11 +2124,14 @@ prefer_blt_composite(struct sna *sna, struct sna_composite_op *tmp)
if (sna->kgem.ring == KGEM_BLT)
return true;
- if (!prefer_blt_ring(sna))
+ if (untiled_tlb_miss(tmp->dst.bo) ||
+ untiled_tlb_miss(tmp->src.bo))
+ return true;
+
+ if (!prefer_blt_ring(sna, tmp->dst.bo, 0))
return false;
- return (prefer_blt_bo(sna, tmp->dst.bo) ||
- prefer_blt_bo(sna, tmp->src.bo));
+ return (prefer_blt_bo(sna, tmp->dst.bo) | prefer_blt_bo(sna, tmp->src.bo)) > 0;
}
static bool
@@ -2696,7 +2188,7 @@ gen6_render_composite(struct sna *sna,
case -1:
goto cleanup_dst;
case 0:
- if (!gen6_composite_solid_init(sna, &tmp->src, 0))
+ if (!gen4_channel_init_solid(sna, &tmp->src, 0))
goto cleanup_dst;
/* fall through to fixup */
case 1:
@@ -2720,7 +2212,6 @@ gen6_render_composite(struct sna *sna,
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
- tmp->prim_emit = gen6_emit_composite_primitive;
if (mask) {
if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
tmp->has_component_alpha = true;
@@ -2750,7 +2241,7 @@ gen6_render_composite(struct sna *sna,
case -1:
goto cleanup_src;
case 0:
- if (!gen6_composite_solid_init(sna, &tmp->mask, 0))
+ if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
goto cleanup_src;
/* fall through to fixup */
case 1:
@@ -2760,40 +2251,7 @@ gen6_render_composite(struct sna *sna,
}
tmp->is_affine &= tmp->mask.is_affine;
-
- if (tmp->src.transform == NULL && tmp->mask.transform == NULL)
- tmp->prim_emit = gen6_emit_composite_primitive_identity_source_mask;
-
- tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine;
- } else {
- if (tmp->src.is_solid) {
- DBG(("%s: choosing gen6_emit_composite_primitive_solid\n",
- __FUNCTION__));
- tmp->prim_emit = gen6_emit_composite_primitive_solid;
- if (tmp->src.is_opaque && op == PictOpOver)
- tmp->op = PictOpSrc;
- } else if (tmp->src.transform == NULL) {
- DBG(("%s: choosing gen6_emit_composite_primitive_identity_source\n",
- __FUNCTION__));
- tmp->prim_emit = gen6_emit_composite_primitive_identity_source;
- } else if (tmp->src.is_affine) {
- if (tmp->src.transform->matrix[0][1] == 0 &&
- tmp->src.transform->matrix[1][0] == 0) {
- tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
- tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
- DBG(("%s: choosing gen6_emit_composite_primitive_simple_source\n",
- __FUNCTION__));
- tmp->prim_emit = gen6_emit_composite_primitive_simple_source;
- } else {
- DBG(("%s: choosing gen6_emit_composite_primitive_affine_source\n",
- __FUNCTION__));
- tmp->prim_emit = gen6_emit_composite_primitive_affine_source;
- }
- }
-
- tmp->floats_per_vertex = 3 + !tmp->is_affine;
}
- tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
tmp->u.gen6.flags =
GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
@@ -2807,14 +2265,18 @@ gen6_render_composite(struct sna *sna,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine),
- gen6_choose_composite_vertex_buffer(tmp));
+ gen4_choose_composite_emitter(tmp));
tmp->blt = gen6_render_composite_blt;
tmp->box = gen6_render_composite_box;
- tmp->boxes = gen6_render_composite_boxes;
+ tmp->boxes = gen6_render_composite_boxes__blt;
+ if (tmp->emit_boxes) {
+ tmp->boxes = gen6_render_composite_boxes;
+ tmp->thread_boxes = gen6_render_composite_boxes__thread;
+ }
tmp->done = gen6_render_composite_done;
- kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
@@ -2843,167 +2305,6 @@ cleanup_dst:
}
#if !NO_COMPOSITE_SPANS
-inline static void
-gen6_emit_composite_texcoord_affine(struct sna *sna,
- const struct sna_composite_channel *channel,
- int16_t x, int16_t y)
-{
- float t[2];
-
- sna_get_transformed_coordinates(x + channel->offset[0],
- y + channel->offset[1],
- channel->transform,
- &t[0], &t[1]);
- OUT_VERTEX_F(t[0] * channel->scale[0]);
- OUT_VERTEX_F(t[1] * channel->scale[1]);
-}
-
-inline static void
-gen6_emit_composite_spans_vertex(struct sna *sna,
- const struct sna_composite_spans_op *op,
- int16_t x, int16_t y)
-{
- OUT_VERTEX(x, y);
- gen6_emit_composite_texcoord(sna, &op->base.src, x, y);
-}
-
-fastcall static void
-gen6_emit_composite_spans_primitive(struct sna *sna,
- const struct sna_composite_spans_op *op,
- const BoxRec *box,
- float opacity)
-{
- gen6_emit_composite_spans_vertex(sna, op, box->x2, box->y2);
- OUT_VERTEX_F(opacity);
-
- gen6_emit_composite_spans_vertex(sna, op, box->x1, box->y2);
- OUT_VERTEX_F(opacity);
-
- gen6_emit_composite_spans_vertex(sna, op, box->x1, box->y1);
- OUT_VERTEX_F(opacity);
-}
-
-fastcall static void
-gen6_emit_composite_spans_solid(struct sna *sna,
- const struct sna_composite_spans_op *op,
- const BoxRec *box,
- float opacity)
-{
- OUT_VERTEX(box->x2, box->y2);
- OUT_VERTEX_F(1); OUT_VERTEX_F(1);
- OUT_VERTEX_F(opacity);
-
- OUT_VERTEX(box->x1, box->y2);
- OUT_VERTEX_F(0); OUT_VERTEX_F(1);
- OUT_VERTEX_F(opacity);
-
- OUT_VERTEX(box->x1, box->y1);
- OUT_VERTEX_F(0); OUT_VERTEX_F(0);
- OUT_VERTEX_F(opacity);
-}
-
-fastcall static void
-gen6_emit_composite_spans_identity(struct sna *sna,
- const struct sna_composite_spans_op *op,
- const BoxRec *box,
- float opacity)
-{
- float *v;
- union {
- struct sna_coordinate p;
- float f;
- } dst;
-
- float sx = op->base.src.scale[0];
- float sy = op->base.src.scale[1];
- int16_t tx = op->base.src.offset[0];
- int16_t ty = op->base.src.offset[1];
-
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 3*4;
- assert(sna->render.vertex_used <= sna->render.vertex_size);
-
- dst.p.x = box->x2;
- dst.p.y = box->y2;
- v[0] = dst.f;
- v[1] = (box->x2 + tx) * sx;
- v[6] = v[2] = (box->y2 + ty) * sy;
-
- dst.p.x = box->x1;
- v[4] = dst.f;
- v[9] = v[5] = (box->x1 + tx) * sx;
-
- dst.p.y = box->y1;
- v[8] = dst.f;
- v[10] = (box->y1 + ty) * sy;
-
- v[11] = v[7] = v[3] = opacity;
-}
-
-fastcall static void
-gen6_emit_composite_spans_simple(struct sna *sna,
- const struct sna_composite_spans_op *op,
- const BoxRec *box,
- float opacity)
-{
- float *v;
- union {
- struct sna_coordinate p;
- float f;
- } dst;
-
- float xx = op->base.src.transform->matrix[0][0];
- float x0 = op->base.src.transform->matrix[0][2];
- float yy = op->base.src.transform->matrix[1][1];
- float y0 = op->base.src.transform->matrix[1][2];
- float sx = op->base.src.scale[0];
- float sy = op->base.src.scale[1];
- int16_t tx = op->base.src.offset[0];
- int16_t ty = op->base.src.offset[1];
-
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 3*4;
- assert(sna->render.vertex_used <= sna->render.vertex_size);
-
- dst.p.x = box->x2;
- dst.p.y = box->y2;
- v[0] = dst.f;
- v[1] = ((box->x2 + tx) * xx + x0) * sx;
- v[6] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
-
- dst.p.x = box->x1;
- v[4] = dst.f;
- v[9] = v[5] = ((box->x1 + tx) * xx + x0) * sx;
-
- dst.p.y = box->y1;
- v[8] = dst.f;
- v[10] = ((box->y1 + ty) * yy + y0) * sy;
-
- v[11] = v[7] = v[3] = opacity;
-}
-
-fastcall static void
-gen6_emit_composite_spans_affine(struct sna *sna,
- const struct sna_composite_spans_op *op,
- const BoxRec *box,
- float opacity)
-{
- OUT_VERTEX(box->x2, box->y2);
- gen6_emit_composite_texcoord_affine(sna, &op->base.src,
- box->x2, box->y2);
- OUT_VERTEX_F(opacity);
-
- OUT_VERTEX(box->x1, box->y2);
- gen6_emit_composite_texcoord_affine(sna, &op->base.src,
- box->x1, box->y2);
- OUT_VERTEX_F(opacity);
-
- OUT_VERTEX(box->x1, box->y1);
- gen6_emit_composite_texcoord_affine(sna, &op->base.src,
- box->x1, box->y1);
- OUT_VERTEX_F(opacity);
-}
-
fastcall static void
gen6_render_composite_spans_box(struct sna *sna,
const struct sna_composite_spans_op *op,
@@ -3053,13 +2354,50 @@ gen6_render_composite_spans_boxes(struct sna *sna,
}
fastcall static void
+gen6_render_composite_spans_boxes__thread(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box,
+ int nbox)
+{
+ DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
+ __FUNCTION__, nbox,
+ op->base.src.offset[0], op->base.src.offset[1],
+ op->base.dst.x, op->base.dst.y));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
+ gen6_emit_composite_state);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
+fastcall static void
gen6_render_composite_spans_done(struct sna *sna,
const struct sna_composite_spans_op *op)
{
DBG(("%s()\n", __FUNCTION__));
+ assert(!sna->render.active);
- if (sna->render_state.gen6.vertex_offset)
- gen6_vertex_flush(sna);
+ if (sna->render.vertex_offset)
+ gen4_vertex_flush(sna);
if (op->base.src.bo)
kgem_bo_destroy(&sna->kgem, op->base.src.bo);
@@ -3070,23 +2408,38 @@ gen6_render_composite_spans_done(struct sna *sna,
static bool
gen6_check_composite_spans(struct sna *sna,
uint8_t op, PicturePtr src, PicturePtr dst,
- int16_t width, int16_t height, unsigned flags)
+ int16_t width, int16_t height,
+ unsigned flags)
{
- if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0)
- return false;
+ DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n",
+ __FUNCTION__, op, width, height, flags));
if (op >= ARRAY_SIZE(gen6_blend_op))
return false;
- if (gen6_composite_fallback(sna, src, NULL, dst))
+ if (gen6_composite_fallback(sna, src, NULL, dst)) {
+ DBG(("%s: operation would fallback\n", __FUNCTION__));
return false;
+ }
- if (need_tiling(sna, width, height)) {
- if (!is_gpu(dst->pDrawable)) {
- DBG(("%s: fallback, tiled operation not on GPU\n",
- __FUNCTION__));
+ if (need_tiling(sna, width, height) &&
+ !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
+ DBG(("%s: fallback, tiled operation not on GPU\n",
+ __FUNCTION__));
+ return false;
+ }
+
+ if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
+ struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
+ assert(priv);
+
+ if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
+ return true;
+
+ if (flags & COMPOSITE_SPANS_INPLACE_HINT)
return false;
- }
+
+ return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo);
}
return true;
@@ -3129,7 +2482,7 @@ gen6_render_composite_spans(struct sna *sna,
case -1:
goto cleanup_dst;
case 0:
- if (!gen6_composite_solid_init(sna, &tmp->base.src, 0))
+ if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
goto cleanup_dst;
/* fall through to fixup */
case 1:
@@ -3141,23 +2494,6 @@ gen6_render_composite_spans(struct sna *sna,
tmp->base.is_affine = tmp->base.src.is_affine;
tmp->base.need_magic_ca_pass = false;
- tmp->prim_emit = gen6_emit_composite_spans_primitive;
- if (tmp->base.src.is_solid) {
- tmp->prim_emit = gen6_emit_composite_spans_solid;
- } else if (tmp->base.src.transform == NULL) {
- tmp->prim_emit = gen6_emit_composite_spans_identity;
- } else if (tmp->base.is_affine) {
- if (tmp->base.src.transform->matrix[0][1] == 0 &&
- tmp->base.src.transform->matrix[1][0] == 0) {
- tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
- tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
- tmp->prim_emit = gen6_emit_composite_spans_simple;
- } else
- tmp->prim_emit = gen6_emit_composite_spans_affine;
- }
- tmp->base.floats_per_vertex = 4 + !tmp->base.is_affine;
- tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex;
-
tmp->base.u.gen6.flags =
GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter,
tmp->base.src.repeat,
@@ -3165,13 +2501,15 @@ gen6_render_composite_spans(struct sna *sna,
SAMPLER_EXTEND_PAD),
gen6_get_blend(tmp->base.op, false, tmp->base.dst.format),
GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine,
- 1 << 2 | (2+!tmp->base.is_affine));
+ gen4_choose_spans_emitter(tmp));
tmp->box = gen6_render_composite_spans_box;
tmp->boxes = gen6_render_composite_spans_boxes;
+ if (tmp->emit_boxes)
+ tmp->thread_boxes = gen6_render_composite_spans_boxes__thread;
tmp->done = gen6_render_composite_spans_done;
- kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo);
if (!kgem_check_bo(&sna->kgem,
tmp->base.dst.bo, tmp->base.src.bo,
NULL)) {
@@ -3205,8 +2543,7 @@ gen6_emit_copy_state(struct sna *sna,
uint16_t offset;
bool dirty;
- gen6_get_batch(sna);
- dirty = kgem_bo_is_dirty(op->dst.bo);
+ dirty = gen6_get_batch(sna, op);
binding_table = gen6_composite_get_binding_table(sna, &offset);
@@ -3235,13 +2572,27 @@ static inline bool prefer_blt_copy(struct sna *sna,
struct kgem_bo *dst_bo,
unsigned flags)
{
+ if (flags & COPY_SYNC)
+ return false;
+
if (PREFER_RENDER)
return PREFER_RENDER > 0;
- return (sna->kgem.ring == KGEM_BLT ||
- (flags & COPY_LAST && sna->kgem.mode == KGEM_NONE) ||
- prefer_blt_bo(sna, src_bo) ||
- prefer_blt_bo(sna, dst_bo));
+ if (sna->kgem.ring == KGEM_BLT)
+ return true;
+
+ if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags))
+ return true;
+
+ if (untiled_tlb_miss(src_bo) ||
+ untiled_tlb_miss(dst_bo))
+ return true;
+
+ if (!prefer_blt_ring(sna, dst_bo, flags))
+ return false;
+
+ return (prefer_blt_bo(sna, src_bo) >= 0 &&
+ prefer_blt_bo(sna, dst_bo) > 0);
}
inline static void boxes_extents(const BoxRec *box, int n, BoxRec *extents)
@@ -3322,7 +2673,7 @@ fallback_blt:
if (too_large(extents.x2-extents.x1, extents.y2-extents.y1))
goto fallback_blt;
- if ((flags & COPY_LAST || can_switch_to_blt(sna)) &&
+ if (can_switch_to_blt(sna, dst_bo, flags) &&
sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy_boxes(sna, alu,
src_bo, src_dx, src_dy,
@@ -3429,7 +2780,7 @@ fallback_blt:
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == COPY_SAMPLER);
assert(GEN6_VERTEX(tmp.u.gen6.flags) == COPY_VERTEX);
- kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
@@ -3472,7 +2823,7 @@ fallback_blt:
} while (--n_this_time);
} while (n);
- gen6_vertex_flush(sna);
+ gen4_vertex_flush(sna);
sna_render_composite_redirect_done(sna, &tmp);
if (tmp.src.bo != src_bo)
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
@@ -3485,6 +2836,14 @@ fallback_tiled_dst:
if (tmp.redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
fallback_tiled:
+ if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
+ sna_blt_copy_boxes(sna, alu,
+ src_bo, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ dst->drawable.bitsPerPixel,
+ box, n))
+ return true;
+
return sna_tiling_copy_boxes(sna, alu,
src, src_bo, src_dx, src_dy,
dst, dst_bo, dst_dx, dst_dy,
@@ -3519,8 +2878,9 @@ gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
{
DBG(("%s()\n", __FUNCTION__));
- if (sna->render_state.gen6.vertex_offset)
- gen6_vertex_flush(sna);
+ assert(!sna->render.active);
+ if (sna->render.vertex_offset)
+ gen4_vertex_flush(sna);
}
static bool
@@ -3585,7 +2945,7 @@ fallback:
assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER);
assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX);
- kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
@@ -3608,8 +2968,7 @@ gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
uint16_t offset;
bool dirty;
- gen6_get_batch(sna);
- dirty = kgem_bo_is_dirty(op->dst.bo);
+ dirty = gen6_get_batch(sna, op);
binding_table = gen6_composite_get_binding_table(sna, &offset);
@@ -3640,9 +2999,10 @@ static inline bool prefer_blt_fill(struct sna *sna,
if (PREFER_RENDER)
return PREFER_RENDER < 0;
- return (can_switch_to_blt(sna) ||
- prefer_blt_ring(sna) ||
- untiled_tlb_miss(bo));
+ if (untiled_tlb_miss(bo))
+ return true;
+
+ return prefer_blt_ring(sna, bo, 0) || prefer_blt_bo(sna, bo) >= 0;
}
static bool
@@ -3773,7 +3133,7 @@ gen6_render_fill_boxes(struct sna *sna,
} while (--n_this_time);
} while (n);
- gen6_vertex_flush(sna);
+ gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
sna_render_composite_redirect_done(sna, &tmp);
return true;
@@ -3866,8 +3226,9 @@ gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op)
{
DBG(("%s()\n", __FUNCTION__));
- if (sna->render_state.gen6.vertex_offset)
- gen6_vertex_flush(sna);
+ assert(!sna->render.active);
+ if (sna->render.vertex_offset)
+ gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, op->base.src.bo);
}
@@ -3999,8 +3360,11 @@ gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
- _kgem_submit(&sna->kgem);
- assert(kgem_check_bo(&sna->kgem, bo, NULL));
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
+ kgem_bo_destroy(&sna->kgem, tmp.src.bo);
+ return false;
+ }
}
gen6_emit_fill_state(sna, &tmp);
@@ -4021,7 +3385,7 @@ gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
v[7] = v[2] = v[3] = 1;
v[6] = v[10] = v[11] = 0;
- gen6_vertex_flush(sna);
+ gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
return true;
@@ -4082,8 +3446,11 @@ gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
- _kgem_submit(&sna->kgem);
- assert(kgem_check_bo(&sna->kgem, bo, NULL));
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
+ kgem_bo_destroy(&sna->kgem, tmp.src.bo);
+ return false;
+ }
}
gen6_emit_fill_state(sna, &tmp);
@@ -4103,7 +3470,7 @@ gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
v[7] = v[2] = v[3] = 1;
v[6] = v[10] = v[11] = 0;
- gen6_vertex_flush(sna);
+ gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
return true;
@@ -4111,20 +3478,20 @@ gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
static void gen6_render_flush(struct sna *sna)
{
- gen6_vertex_close(sna);
+ gen4_vertex_close(sna);
+
+ assert(sna->render.vb_id == 0);
+ assert(sna->render.vertex_offset == 0);
}
static void
gen6_render_context_switch(struct kgem *kgem,
int new_mode)
{
- if (!new_mode)
- return;
-
- DBG(("%s: from %d to %d\n", __FUNCTION__, kgem->mode, new_mode));
-
- if (kgem->mode)
- kgem_submit(kgem);
+ if (kgem->nbatch) {
+ DBG(("%s: from %d to %d\n", __FUNCTION__, kgem->mode, new_mode));
+ _kgem_submit(kgem);
+ }
kgem->ring = new_mode;
}
@@ -4154,6 +3521,7 @@ gen6_render_expire(struct kgem *kgem)
if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
kgem_bo_destroy(kgem, sna->render.vbo);
+ assert(!sna->render.active);
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
@@ -4166,7 +3534,6 @@ static void gen6_render_reset(struct sna *sna)
{
sna->render_state.gen6.needs_invariant = true;
sna->render_state.gen6.first_state_packet = true;
- sna->render_state.gen6.vb_id = 0;
sna->render_state.gen6.ve_id = 3 << 2;
sna->render_state.gen6.last_primitive = -1;
@@ -4177,6 +3544,10 @@ static void gen6_render_reset(struct sna *sna)
sna->render_state.gen6.drawrect_offset = -1;
sna->render_state.gen6.drawrect_limit = -1;
sna->render_state.gen6.surface_table = -1;
+
+ sna->render.vertex_offset = 0;
+ sna->render.nvertex_reloc = 0;
+ sna->render.vb_id = 0;
}
static void gen6_render_fini(struct sna *sna)
@@ -4184,6 +3555,16 @@ static void gen6_render_fini(struct sna *sna)
kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo);
}
+static bool is_gt2(struct sna *sna)
+{
+ return DEVICE_ID(sna->PciInfo) & 0x30;
+}
+
+static bool is_mobile(struct sna *sna)
+{
+ return (DEVICE_ID(sna->PciInfo) & 0xf) == 0x6;
+}
+
static bool gen6_render_setup(struct sna *sna)
{
struct gen6_render_state *state = &sna->render_state.gen6;
@@ -4192,7 +3573,7 @@ static bool gen6_render_setup(struct sna *sna)
int i, j, k, l, m;
state->info = &gt1_info;
- if (DEVICE_ID(sna->PciInfo) & 0x20)
+ if (is_gt2(sna))
state->info = &gt2_info; /* XXX requires GT_MODE WiZ disabled */
sna_static_stream_init(&general);
@@ -4256,7 +3637,6 @@ static bool gen6_render_setup(struct sna *sna)
}
}
- state->cc_vp = gen6_create_cc_viewport(&general);
state->cc_blend = gen6_composite_create_blend_state(&general);
state->general_bo = sna_static_stream_fini(sna, &general);
@@ -4274,10 +3654,14 @@ bool gen6_render_init(struct sna *sna)
#if !NO_COMPOSITE
sna->render.composite = gen6_render_composite;
+ sna->render.prefer_gpu |= PREFER_GPU_RENDER;
+
#endif
#if !NO_COMPOSITE_SPANS
sna->render.check_composite_spans = gen6_check_composite_spans;
sna->render.composite_spans = gen6_render_composite_spans;
+ if (is_mobile(sna))
+ sna->render.prefer_gpu |= PREFER_GPU_SPANS;
#endif
sna->render.video = gen6_render_video;
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 4d94c803c..f05d6f926 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -42,6 +42,8 @@
#include "brw/brw.h"
#include "gen7_render.h"
+#include "gen4_source.h"
+#include "gen4_vertex.h"
#define NO_COMPOSITE 0
#define NO_COMPOSITE_SPANS 0
@@ -113,6 +115,24 @@ static const struct gt_info hsw_gt_info = {
.urb = { 128, 64, 64 },
};
+static const struct gt_info hsw_gt1_info = {
+ .max_vs_threads = 70,
+ .max_gs_threads = 70,
+ .max_wm_threads =
+ (102 - 1) << HSW_PS_MAX_THREADS_SHIFT |
+ 1 << HSW_PS_SAMPLE_MASK_SHIFT,
+ .urb = { 128, 640, 256 },
+};
+
+static const struct gt_info hsw_gt2_info = {
+ .max_vs_threads = 280,
+ .max_gs_threads = 280,
+ .max_wm_threads =
+ (204 - 1) << HSW_PS_MAX_THREADS_SHIFT |
+ 1 << HSW_PS_SAMPLE_MASK_SHIFT,
+ .urb = { 256, 1664, 640 },
+};
+
static const uint32_t ps_kernel_packed[][4] = {
#include "exa_wm_src_affine.g7b"
#include "exa_wm_src_sample_argb.g7b"
@@ -209,10 +229,6 @@ static const struct blendinfo {
#define FILL_FLAGS(op, format) GEN7_SET_FLAGS(FILL_SAMPLER, gen7_get_blend((op), false, (format)), GEN7_WM_KERNEL_NOMASK, FILL_VERTEX)
#define FILL_FLAGS_NOBLEND GEN7_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN7_WM_KERNEL_NOMASK, FILL_VERTEX)
-#define VIDEO_SAMPLER \
- SAMPLER_OFFSET(SAMPLER_FILTER_BILINEAR, SAMPLER_EXTEND_PAD, \
- SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE)
-
#define GEN7_SAMPLER(f) (((f) >> 16) & 0xfff0)
#define GEN7_BLEND(f) (((f) >> 0) & 0x7ff0)
#define GEN7_READS_DST(f) (((f) >> 15) & 1)
@@ -627,9 +643,9 @@ gen7_emit_cc_invariant(struct sna *sna)
OUT_BATCH(0);
#endif
- assert(is_aligned(sna->render_state.gen7.cc_vp, 32));
+ /* XXX clear to be safe */
OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
- OUT_BATCH(sna->render_state.gen7.cc_vp);
+ OUT_BATCH(0);
}
static void
@@ -865,7 +881,7 @@ gen7_emit_vertex_elements(struct sna *sna,
* texture coordinate 1 if (has_mask is true): same as above
*/
struct gen7_render_state *render = &sna->render_state.gen7;
- uint32_t src_format, dw, offset;
+ uint32_t src_format, dw;
int id = GEN7_VERTEX(op->u.gen7.flags);
bool has_mask;
@@ -875,39 +891,6 @@ gen7_emit_vertex_elements(struct sna *sna,
return;
render->ve_id = id;
- if (id == VERTEX_2s2s) {
- DBG(("%s: setup COPY\n", __FUNCTION__));
-
- OUT_BATCH(GEN7_3DSTATE_VERTEX_ELEMENTS |
- ((2 * (1 + 2)) + 1 - 2));
-
- OUT_BATCH(VERTEX_2s2s << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
- GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT << GEN7_VE0_FORMAT_SHIFT |
- 0 << GEN7_VE0_OFFSET_SHIFT);
- OUT_BATCH(GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_0_SHIFT |
- GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT |
- GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
- GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_3_SHIFT);
-
- /* x,y */
- OUT_BATCH(VERTEX_2s2s << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
- GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT |
- 0 << GEN7_VE0_OFFSET_SHIFT); /* offsets vb in bytes */
- OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT |
- GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT |
- GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
- GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT);
-
- OUT_BATCH(VERTEX_2s2s << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
- GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT |
- 4 << GEN7_VE0_OFFSET_SHIFT); /* offset vb in bytes */
- OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT |
- GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT |
- GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
- GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT);
- return;
- }
-
/* The VUE layout
* dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
* dword 4-7: position (x, y, 1.0, 1.0),
@@ -936,20 +919,25 @@ gen7_emit_vertex_elements(struct sna *sna,
GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT |
GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT);
- offset = 4;
/* u0, v0, w0 */
- DBG(("%s: first channel %d floats, offset=%d\n", __FUNCTION__, id & 3, offset));
+ DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
switch (id & 3) {
+ default:
+ assert(0);
+ case 0:
+ src_format = GEN7_SURFACEFORMAT_R16G16_SSCALED;
+ dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
+ dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
+ dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
+ break;
case 1:
src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
break;
- default:
- assert(0);
case 2:
src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
@@ -965,15 +953,15 @@ gen7_emit_vertex_elements(struct sna *sna,
}
OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
src_format << GEN7_VE0_FORMAT_SHIFT |
- offset << GEN7_VE0_OFFSET_SHIFT);
+ 4 << GEN7_VE0_OFFSET_SHIFT);
OUT_BATCH(dw);
- offset += (id & 3) * sizeof(float);
/* u1, v1, w1 */
if (has_mask) {
- DBG(("%s: second channel %d floats, offset=%d\n", __FUNCTION__, (id >> 2) & 3, offset));
+ unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
+ DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset));
dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
- switch ((id >> 2) & 3) {
+ switch (id >> 2) {
case 1:
src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
@@ -1064,20 +1052,22 @@ gen7_emit_state(struct sna *sna,
sna->render_state.gen7.emit_flush = GEN7_READS_DST(op->u.gen7.flags);
}
-static void gen7_magic_ca_pass(struct sna *sna,
+static bool gen7_magic_ca_pass(struct sna *sna,
const struct sna_composite_op *op)
{
struct gen7_render_state *state = &sna->render_state.gen7;
if (!op->need_magic_ca_pass)
- return;
+ return false;
DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
sna->render.vertex_start, sna->render.vertex_index));
gen7_emit_pipe_invalidate(sna);
- gen7_emit_cc(sna, gen7_get_blend(PictOpAdd, true, op->dst.format));
+ gen7_emit_cc(sna,
+ GEN7_BLEND(gen7_get_blend(PictOpAdd, true,
+ op->dst.format)));
gen7_emit_wm(sna,
gen7_choose_composite_kernel(PictOpAdd,
true, true,
@@ -1092,155 +1082,7 @@ static void gen7_magic_ca_pass(struct sna *sna,
OUT_BATCH(0); /* index buffer offset, ignored */
state->last_primitive = sna->kgem.nbatch;
-}
-
-static void gen7_vertex_flush(struct sna *sna)
-{
- assert(sna->render_state.gen7.vertex_offset);
-
- DBG(("%s[%x] = %d\n", __FUNCTION__,
- 4*sna->render_state.gen7.vertex_offset,
- sna->render.vertex_index - sna->render.vertex_start));
- sna->kgem.batch[sna->render_state.gen7.vertex_offset] =
- sna->render.vertex_index - sna->render.vertex_start;
- sna->render_state.gen7.vertex_offset = 0;
-}
-
-static int gen7_vertex_finish(struct sna *sna)
-{
- struct kgem_bo *bo;
- unsigned int i;
-
- assert(sna->render.vertex_used);
- assert(sna->render.nvertex_reloc);
-
- /* Note: we only need dword alignment (currently) */
-
- bo = sna->render.vbo;
- if (bo) {
- if (sna->render_state.gen7.vertex_offset)
- gen7_vertex_flush(sna);
-
- for (i = 0; i < sna->render.nvertex_reloc; i++) {
- DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
- i, sna->render.vertex_reloc[i]));
-
- sna->kgem.batch[sna->render.vertex_reloc[i]] =
- kgem_add_reloc(&sna->kgem,
- sna->render.vertex_reloc[i], bo,
- I915_GEM_DOMAIN_VERTEX << 16,
- 0);
- sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
- kgem_add_reloc(&sna->kgem,
- sna->render.vertex_reloc[i]+1, bo,
- I915_GEM_DOMAIN_VERTEX << 16,
- sna->render.vertex_used * 4 - 1);
- }
-
- sna->render.nvertex_reloc = 0;
- sna->render.vertex_used = 0;
- sna->render.vertex_index = 0;
- sna->render_state.gen7.vb_id = 0;
-
- kgem_bo_destroy(&sna->kgem, bo);
- }
-
- sna->render.vertices = NULL;
- sna->render.vbo = kgem_create_linear(&sna->kgem,
- 256*1024, CREATE_GTT_MAP);
- if (sna->render.vbo)
- sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
- if (sna->render.vertices == NULL) {
- if (sna->render.vbo)
- kgem_bo_destroy(&sna->kgem, sna->render.vbo);
- sna->render.vbo = NULL;
- return 0;
- }
-
- kgem_bo_sync__cpu(&sna->kgem, sna->render.vbo);
- if (sna->render.vertex_used) {
- memcpy(sna->render.vertices,
- sna->render.vertex_data,
- sizeof(float)*sna->render.vertex_used);
- }
- sna->render.vertex_size = 64 * 1024 - 1;
- return sna->render.vertex_size - sna->render.vertex_used;
-}
-
-static void gen7_vertex_close(struct sna *sna)
-{
- struct kgem_bo *bo, *free_bo = NULL;
- unsigned int i, delta = 0;
-
- assert(sna->render_state.gen7.vertex_offset == 0);
-
- if (!sna->render_state.gen7.vb_id)
- return;
-
- DBG(("%s: used=%d, vbo active? %d\n",
- __FUNCTION__, sna->render.vertex_used, sna->render.vbo ? sna->render.vbo->handle : 0));
-
- bo = sna->render.vbo;
- if (bo) {
- if (sna->render.vertex_size - sna->render.vertex_used < 64) {
- DBG(("%s: discarding vbo (full), handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
- sna->render.vbo = NULL;
- sna->render.vertices = sna->render.vertex_data;
- sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
- free_bo = bo;
- }
- } else {
- if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
- DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
- sna->render.vertex_used, sna->kgem.nbatch));
- memcpy(sna->kgem.batch + sna->kgem.nbatch,
- sna->render.vertex_data,
- sna->render.vertex_used * 4);
- delta = sna->kgem.nbatch * 4;
- bo = NULL;
- sna->kgem.nbatch += sna->render.vertex_used;
- } else {
- bo = kgem_create_linear(&sna->kgem,
- 4*sna->render.vertex_used, 0);
- if (bo && !kgem_bo_write(&sna->kgem, bo,
- sna->render.vertex_data,
- 4*sna->render.vertex_used)) {
- kgem_bo_destroy(&sna->kgem, bo);
- bo = NULL;
- }
- DBG(("%s: new vbo: %d\n", __FUNCTION__,
- sna->render.vertex_used));
- free_bo = bo;
- }
- }
-
- assert(sna->render.nvertex_reloc);
- for (i = 0; i < sna->render.nvertex_reloc; i++) {
- DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
- i, sna->render.vertex_reloc[i]));
-
- sna->kgem.batch[sna->render.vertex_reloc[i]] =
- kgem_add_reloc(&sna->kgem,
- sna->render.vertex_reloc[i], bo,
- I915_GEM_DOMAIN_VERTEX << 16,
- delta);
- sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
- kgem_add_reloc(&sna->kgem,
- sna->render.vertex_reloc[i]+1, bo,
- I915_GEM_DOMAIN_VERTEX << 16,
- delta + sna->render.vertex_used * 4 - 1);
- }
- sna->render.nvertex_reloc = 0;
-
- if (sna->render.vbo == NULL) {
- sna->render.vertex_used = 0;
- sna->render.vertex_index = 0;
- assert(sna->render.vertices == sna->render.vertex_data);
- assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data));
- }
-
- if (free_bo)
- kgem_bo_destroy(&sna->kgem, free_bo);
+ return true;
}
static void null_create(struct sna_static_stream *stream)
@@ -1315,16 +1157,6 @@ sampler_fill_init(struct gen7_sampler_state *ss)
sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
}
-static uint32_t gen7_create_cc_viewport(struct sna_static_stream *stream)
-{
- struct gen7_cc_viewport vp;
-
- vp.min_depth = -1.e35;
- vp.max_depth = 1.e35;
-
- return sna_static_stream_add(stream, &vp, sizeof(vp), 32);
-}
-
static uint32_t
gen7_tiling_bits(uint32_t tiling)
{
@@ -1351,11 +1183,12 @@ gen7_bind_bo(struct sna *sna,
uint32_t *ss;
uint32_t domains;
int offset;
+ uint32_t is_scanout = is_dst && bo->scanout;
COMPILE_TIME_ASSERT(sizeof(struct gen7_surface_state) == 32);
/* After the first bind, we manage the cache domains within the batch */
- offset = kgem_bo_get_binding(bo, format);
+ offset = kgem_bo_get_binding(bo, format | is_scanout << 31);
if (offset) {
if (is_dst)
kgem_bo_mark_dirty(bo);
@@ -1377,13 +1210,13 @@ gen7_bind_bo(struct sna *sna,
(height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
ss[3] = (bo->pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
ss[4] = 0;
- ss[5] = 0;
+ ss[5] = is_scanout ? 0 : 3 << 16;
ss[6] = 0;
ss[7] = 0;
- if (sna->kgem.gen == 75)
+ if (sna->kgem.gen == 075)
ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
- kgem_bo_set_binding(bo, format, offset);
+ kgem_bo_set_binding(bo, format | is_scanout << 31, offset);
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
offset, bo->handle, ss[1],
@@ -1393,251 +1226,6 @@ gen7_bind_bo(struct sna *sna,
return offset * sizeof(uint32_t);
}
-fastcall static void
-gen7_emit_composite_primitive_solid(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- float *v;
- union {
- struct sna_coordinate p;
- float f;
- } dst;
-
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 9;
- assert(sna->render.vertex_used <= sna->render.vertex_size);
- assert(!too_large(op->dst.x + r->dst.x + r->width,
- op->dst.y + r->dst.y + r->height));
-
- dst.p.x = r->dst.x + r->width;
- dst.p.y = r->dst.y + r->height;
- v[0] = dst.f;
- dst.p.x = r->dst.x;
- v[3] = dst.f;
- dst.p.y = r->dst.y;
- v[6] = dst.f;
-
- v[5] = v[2] = v[1] = 1.;
- v[8] = v[7] = v[4] = 0.;
-}
-
-fastcall static void
-gen7_emit_composite_primitive_identity_source(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- union {
- struct sna_coordinate p;
- float f;
- } dst;
- float *v;
-
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 9;
-
- dst.p.x = r->dst.x + r->width;
- dst.p.y = r->dst.y + r->height;
- v[0] = dst.f;
- dst.p.x = r->dst.x;
- v[3] = dst.f;
- dst.p.y = r->dst.y;
- v[6] = dst.f;
-
- v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
- v[1] = v[4] + r->width * op->src.scale[0];
-
- v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
- v[5] = v[2] = v[8] + r->height * op->src.scale[1];
-}
-
-fastcall static void
-gen7_emit_composite_primitive_simple_source(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- float *v;
- union {
- struct sna_coordinate p;
- float f;
- } dst;
-
- float xx = op->src.transform->matrix[0][0];
- float x0 = op->src.transform->matrix[0][2];
- float yy = op->src.transform->matrix[1][1];
- float y0 = op->src.transform->matrix[1][2];
- float sx = op->src.scale[0];
- float sy = op->src.scale[1];
- int16_t tx = op->src.offset[0];
- int16_t ty = op->src.offset[1];
-
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 3*3;
-
- dst.p.x = r->dst.x + r->width;
- dst.p.y = r->dst.y + r->height;
- v[0] = dst.f;
- v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
- v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
-
- dst.p.x = r->dst.x;
- v[3] = dst.f;
- v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
-
- dst.p.y = r->dst.y;
- v[6] = dst.f;
- v[8] = ((r->src.y + ty) * yy + y0) * sy;
-}
-
-fastcall static void
-gen7_emit_composite_primitive_affine_source(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- union {
- struct sna_coordinate p;
- float f;
- } dst;
- float *v;
-
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 9;
-
- dst.p.x = r->dst.x + r->width;
- dst.p.y = r->dst.y + r->height;
- v[0] = dst.f;
- _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x + r->width,
- op->src.offset[1] + r->src.y + r->height,
- op->src.transform,
- &v[1], &v[2]);
- v[1] *= op->src.scale[0];
- v[2] *= op->src.scale[1];
-
- dst.p.x = r->dst.x;
- v[3] = dst.f;
- _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x,
- op->src.offset[1] + r->src.y + r->height,
- op->src.transform,
- &v[4], &v[5]);
- v[4] *= op->src.scale[0];
- v[5] *= op->src.scale[1];
-
- dst.p.y = r->dst.y;
- v[6] = dst.f;
- _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x,
- op->src.offset[1] + r->src.y,
- op->src.transform,
- &v[7], &v[8]);
- v[7] *= op->src.scale[0];
- v[8] *= op->src.scale[1];
-}
-
-fastcall static void
-gen7_emit_composite_primitive_identity_source_mask(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- union {
- struct sna_coordinate p;
- float f;
- } dst;
- float src_x, src_y;
- float msk_x, msk_y;
- float w, h;
- float *v;
-
- src_x = r->src.x + op->src.offset[0];
- src_y = r->src.y + op->src.offset[1];
- msk_x = r->mask.x + op->mask.offset[0];
- msk_y = r->mask.y + op->mask.offset[1];
- w = r->width;
- h = r->height;
-
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 15;
-
- dst.p.x = r->dst.x + r->width;
- dst.p.y = r->dst.y + r->height;
- v[0] = dst.f;
- v[1] = (src_x + w) * op->src.scale[0];
- v[2] = (src_y + h) * op->src.scale[1];
- v[3] = (msk_x + w) * op->mask.scale[0];
- v[4] = (msk_y + h) * op->mask.scale[1];
-
- dst.p.x = r->dst.x;
- v[5] = dst.f;
- v[6] = src_x * op->src.scale[0];
- v[7] = v[2];
- v[8] = msk_x * op->mask.scale[0];
- v[9] = v[4];
-
- dst.p.y = r->dst.y;
- v[10] = dst.f;
- v[11] = v[6];
- v[12] = src_y * op->src.scale[1];
- v[13] = v[8];
- v[14] = msk_y * op->mask.scale[1];
-}
-
-inline static void
-gen7_emit_composite_texcoord(struct sna *sna,
- const struct sna_composite_channel *channel,
- int16_t x, int16_t y)
-{
- x += channel->offset[0];
- y += channel->offset[1];
-
- if (channel->is_affine) {
- float s, t;
-
- sna_get_transformed_coordinates(x, y,
- channel->transform,
- &s, &t);
- OUT_VERTEX_F(s * channel->scale[0]);
- OUT_VERTEX_F(t * channel->scale[1]);
- } else {
- float s, t, w;
-
- sna_get_transformed_coordinates_3d(x, y,
- channel->transform,
- &s, &t, &w);
- OUT_VERTEX_F(s * channel->scale[0]);
- OUT_VERTEX_F(t * channel->scale[1]);
- OUT_VERTEX_F(w);
- }
-}
-
-static void
-gen7_emit_composite_vertex(struct sna *sna,
- const struct sna_composite_op *op,
- int16_t srcX, int16_t srcY,
- int16_t mskX, int16_t mskY,
- int16_t dstX, int16_t dstY)
-{
- OUT_VERTEX(dstX, dstY);
- gen7_emit_composite_texcoord(sna, &op->src, srcX, srcY);
- gen7_emit_composite_texcoord(sna, &op->mask, mskX, mskY);
-}
-
-fastcall static void
-gen7_emit_composite_primitive(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- gen7_emit_composite_vertex(sna, op,
- r->src.x + r->width, r->src.y + r->height,
- r->mask.x + r->width, r->mask.y + r->height,
- r->dst.x + r->width, r->dst.y + r->height);
- gen7_emit_composite_vertex(sna, op,
- r->src.x, r->src.y + r->height,
- r->mask.x, r->mask.y + r->height,
- r->dst.x, r->dst.y + r->height);
- gen7_emit_composite_vertex(sna, op,
- r->src.x, r->src.y,
- r->mask.x, r->mask.y,
- r->dst.x, r->dst.y);
-}
-
static void gen7_emit_vertex_buffer(struct sna *sna,
const struct sna_composite_op *op)
{
@@ -1650,22 +1238,22 @@ static void gen7_emit_vertex_buffer(struct sna *sna,
4*op->floats_per_vertex << GEN7_VB0_BUFFER_PITCH_SHIFT);
sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
OUT_BATCH(0);
- OUT_BATCH(0);
+ OUT_BATCH(~0); /* max address: disabled */
OUT_BATCH(0);
- sna->render_state.gen7.vb_id |= 1 << id;
+ sna->render.vb_id |= 1 << id;
}
static void gen7_emit_primitive(struct sna *sna)
{
if (sna->kgem.nbatch == sna->render_state.gen7.last_primitive) {
- sna->render_state.gen7.vertex_offset = sna->kgem.nbatch - 5;
+ sna->render.vertex_offset = sna->kgem.nbatch - 5;
return;
}
OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2));
OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST);
- sna->render_state.gen7.vertex_offset = sna->kgem.nbatch;
+ sna->render.vertex_offset = sna->kgem.nbatch;
OUT_BATCH(0); /* vertex count, to be filled in later */
OUT_BATCH(sna->render.vertex_index);
OUT_BATCH(1); /* single instance */
@@ -1682,13 +1270,16 @@ static bool gen7_rectangle_begin(struct sna *sna,
int id = 1 << GEN7_VERTEX(op->u.gen7.flags);
int ndwords;
+ if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
+ return true;
+
ndwords = op->need_magic_ca_pass ? 60 : 6;
- if ((sna->render_state.gen7.vb_id & id) == 0)
+ if ((sna->render.vb_id & id) == 0)
ndwords += 5;
if (!kgem_check_batch(&sna->kgem, ndwords))
return false;
- if ((sna->render_state.gen7.vb_id & id) == 0)
+ if ((sna->render.vb_id & id) == 0)
gen7_emit_vertex_buffer(sna, op);
gen7_emit_primitive(sna);
@@ -1698,17 +1289,28 @@ static bool gen7_rectangle_begin(struct sna *sna,
static int gen7_get_rectangles__flush(struct sna *sna,
const struct sna_composite_op *op)
{
+ /* Preventing discarding new vbo after lock contention */
+ if (sna_vertex_wait__locked(&sna->render)) {
+ int rem = vertex_space(sna);
+ if (rem > op->floats_per_rect)
+ return rem;
+ }
+
if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6))
return 0;
- if (!kgem_check_exec(&sna->kgem, 1))
- return 0;
- if (!kgem_check_reloc(&sna->kgem, 2))
+ if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
return 0;
- if (op->need_magic_ca_pass && sna->render.vbo)
- return 0;
+ if (sna->render.vertex_offset) {
+ gen4_vertex_flush(sna);
+ if (gen7_magic_ca_pass(sna, op)) {
+ gen7_emit_pipe_invalidate(sna);
+ gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
+ gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
+ }
+ }
- return gen7_vertex_finish(sna);
+ return gen4_vertex_finish(sna);
}
inline static int gen7_get_rectangles(struct sna *sna,
@@ -1720,7 +1322,7 @@ inline static int gen7_get_rectangles(struct sna *sna,
start:
rem = vertex_space(sna);
- if (rem < op->floats_per_rect) {
+ if (unlikely(rem < op->floats_per_rect)) {
DBG(("flushing vbo for %s: %d < %d\n",
__FUNCTION__, rem, op->floats_per_rect));
rem = gen7_get_rectangles__flush(sna, op);
@@ -1728,7 +1330,7 @@ start:
goto flush;
}
- if (unlikely(sna->render_state.gen7.vertex_offset == 0 &&
+ if (unlikely(sna->render.vertex_offset == 0 &&
!gen7_rectangle_begin(sna, op)))
goto flush;
@@ -1740,10 +1342,11 @@ start:
return want;
flush:
- if (sna->render_state.gen7.vertex_offset) {
- gen7_vertex_flush(sna);
+ if (sna->render.vertex_offset) {
+ gen4_vertex_flush(sna);
gen7_magic_ca_pass(sna, op);
}
+ sna_vertex_wait__locked(&sna->render);
_kgem_submit(&sna->kgem);
emit_state(sna, op);
goto start;
@@ -1766,20 +1369,10 @@ inline static uint32_t *gen7_composite_get_binding_table(struct sna *sna,
return table;
}
-static uint32_t
-gen7_choose_composite_vertex_buffer(const struct sna_composite_op *op)
-{
- int id = 2 + !op->is_affine;
- if (op->mask.bo)
- id |= id << 2;
- assert(id > 0 && id < 16);
- return id;
-}
-
static void
-gen7_get_batch(struct sna *sna)
+gen7_get_batch(struct sna *sna, const struct sna_composite_op *op)
{
- kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
DBG(("%s: flushing batch: %d < %d+%d\n",
@@ -1802,7 +1395,7 @@ static void gen7_emit_composite_state(struct sna *sna,
uint32_t *binding_table;
uint16_t offset;
- gen7_get_batch(sna);
+ gen7_get_batch(sna, op);
binding_table = gen7_composite_get_binding_table(sna, &offset);
@@ -1842,7 +1435,7 @@ gen7_align_vertex(struct sna *sna, const struct sna_composite_op *op)
{
if (op->floats_per_vertex != sna->render_state.gen7.floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
- gen7_vertex_finish(sna);
+ gen4_vertex_finish(sna);
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
sna->render_state.gen7.floats_per_vertex,
@@ -1887,9 +1480,9 @@ gen7_render_composite_box(struct sna *sna,
}
static void
-gen7_render_composite_boxes(struct sna *sna,
- const struct sna_composite_op *op,
- const BoxRec *box, int nbox)
+gen7_render_composite_boxes__blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
{
DBG(("composite_boxes(%d)\n", nbox));
@@ -1919,6 +1512,62 @@ gen7_render_composite_boxes(struct sna *sna,
} while (nbox);
}
+static void
+gen7_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
+
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen7_get_rectangles(sna, op, nbox,
+ gen7_emit_composite_state);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+ } while (nbox);
+}
+
+static void
+gen7_render_composite_boxes__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen7_get_rectangles(sna, op, nbox,
+ gen7_emit_composite_state);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
@@ -1998,7 +1647,7 @@ static void gen7_emit_video_state(struct sna *sna,
uint16_t offset;
int n_src, n;
- gen7_get_batch(sna);
+ gen7_get_batch(sna, op);
src_surf_base[0] = 0;
src_surf_base[1] = 0;
@@ -2059,12 +1708,14 @@ gen7_render_video(struct sna *sna,
RegionPtr dstRegion,
short src_w, short src_h,
short drw_w, short drw_h,
+ short dx, short dy,
PixmapPtr pixmap)
{
struct sna_composite_op tmp;
- int nbox, dxo, dyo, pix_xoff, pix_yoff;
+ int nbox, pix_xoff, pix_yoff;
float src_scale_x, src_scale_y;
struct sna_pixmap *priv;
+ unsigned filter;
BoxPtr box;
DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n",
@@ -2093,15 +1744,22 @@ gen7_render_video(struct sna *sna,
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
+ if (src_w == drw_w && src_h == drw_h)
+ filter = SAMPLER_FILTER_NEAREST;
+ else
+ filter = SAMPLER_FILTER_BILINEAR;
+
tmp.u.gen7.flags =
- GEN7_SET_FLAGS(VIDEO_SAMPLER, NO_BLEND,
+ GEN7_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
+ SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
+ NO_BLEND,
is_planar_fourcc(frame->id) ?
GEN7_WM_KERNEL_VIDEO_PLANAR :
GEN7_WM_KERNEL_VIDEO_PACKED,
2);
tmp.priv = frame;
- kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
@@ -2122,9 +1780,6 @@ gen7_render_video(struct sna *sna,
pix_yoff = 0;
#endif
- dxo = dstRegion->extents.x1;
- dyo = dstRegion->extents.y1;
-
/* Use normalized texture coordinates */
src_scale_x = ((float)src_w / frame->width) / (float)drw_w;
src_scale_y = ((float)src_h / frame->height) / (float)drw_h;
@@ -2142,16 +1797,16 @@ gen7_render_video(struct sna *sna,
gen7_get_rectangles(sna, &tmp, 1, gen7_emit_video_state);
OUT_VERTEX(r.x2, r.y2);
- OUT_VERTEX_F((box->x2 - dxo) * src_scale_x);
- OUT_VERTEX_F((box->y2 - dyo) * src_scale_y);
+ OUT_VERTEX_F((box->x2 - dx) * src_scale_x);
+ OUT_VERTEX_F((box->y2 - dy) * src_scale_y);
OUT_VERTEX(r.x1, r.y2);
- OUT_VERTEX_F((box->x1 - dxo) * src_scale_x);
- OUT_VERTEX_F((box->y2 - dyo) * src_scale_y);
+ OUT_VERTEX_F((box->x1 - dx) * src_scale_x);
+ OUT_VERTEX_F((box->y2 - dy) * src_scale_y);
OUT_VERTEX(r.x1, r.y1);
- OUT_VERTEX_F((box->x1 - dxo) * src_scale_x);
- OUT_VERTEX_F((box->y1 - dyo) * src_scale_y);
+ OUT_VERTEX_F((box->x1 - dx) * src_scale_x);
+ OUT_VERTEX_F((box->y1 - dy) * src_scale_y);
if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
sna_damage_add_box(&priv->gpu_damage, &r);
@@ -2161,148 +1816,10 @@ gen7_render_video(struct sna *sna,
}
priv->clear = false;
- gen7_vertex_flush(sna);
+ gen4_vertex_flush(sna);
return true;
}
-static bool
-gen7_composite_solid_init(struct sna *sna,
- struct sna_composite_channel *channel,
- uint32_t color)
-{
- DBG(("%s: color=%x\n", __FUNCTION__, color));
-
- channel->filter = PictFilterNearest;
- channel->repeat = RepeatNormal;
- channel->is_affine = true;
- channel->is_solid = true;
- channel->is_opaque = (color >> 24) == 0xff;
- channel->transform = NULL;
- channel->width = 1;
- channel->height = 1;
- channel->card_format = GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
-
- channel->bo = sna_render_get_solid(sna, color);
-
- channel->scale[0] = channel->scale[1] = 1;
- channel->offset[0] = channel->offset[1] = 0;
- return channel->bo != NULL;
-}
-
-static bool
-gen7_composite_linear_init(struct sna *sna,
- PicturePtr picture,
- struct sna_composite_channel *channel,
- int x, int y,
- int w, int h,
- int dst_x, int dst_y)
-{
- PictLinearGradient *linear =
- (PictLinearGradient *)picture->pSourcePict;
- pixman_fixed_t tx, ty;
- float x0, y0, sf;
- float dx, dy;
-
- DBG(("%s: p1=(%f, %f), p2=(%f, %f), src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
- __FUNCTION__,
- pixman_fixed_to_double(linear->p1.x), pixman_fixed_to_double(linear->p1.y),
- pixman_fixed_to_double(linear->p2.x), pixman_fixed_to_double(linear->p2.y),
- x, y, dst_x, dst_y, w, h));
-
- if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
- return 0;
-
- if (!sna_transform_is_affine(picture->transform)) {
- DBG(("%s: fallback due to projective transform\n",
- __FUNCTION__));
- return sna_render_picture_fixup(sna, picture, channel,
- x, y, w, h, dst_x, dst_y);
- }
-
- channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear);
- if (!channel->bo)
- return 0;
-
- channel->filter = PictFilterNearest;
- channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
- channel->width = channel->bo->pitch / 4;
- channel->height = 1;
- channel->pict_format = PICT_a8r8g8b8;
-
- channel->scale[0] = channel->scale[1] = 1;
- channel->offset[0] = channel->offset[1] = 0;
-
- if (sna_transform_is_translation(picture->transform, &tx, &ty)) {
- dx = pixman_fixed_to_double(linear->p2.x - linear->p1.x);
- dy = pixman_fixed_to_double(linear->p2.y - linear->p1.y);
-
- x0 = pixman_fixed_to_double(linear->p1.x);
- y0 = pixman_fixed_to_double(linear->p1.y);
-
- if (tx | ty) {
- x0 -= pixman_fixed_to_double(tx);
- y0 -= pixman_fixed_to_double(ty);
- }
- } else {
- struct pixman_f_vector p1, p2;
- struct pixman_f_transform m, inv;
-
- pixman_f_transform_from_pixman_transform(&m, picture->transform);
- DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n",
- __FUNCTION__,
- m.m[0][0], m.m[0][1], m.m[0][2],
- m.m[1][0], m.m[1][1], m.m[1][2],
- m.m[2][0], m.m[2][1], m.m[2][2]));
- if (!pixman_f_transform_invert(&inv, &m))
- return 0;
-
- p1.v[0] = pixman_fixed_to_double(linear->p1.x);
- p1.v[1] = pixman_fixed_to_double(linear->p1.y);
- p1.v[2] = 1.;
- pixman_f_transform_point(&inv, &p1);
-
- p2.v[0] = pixman_fixed_to_double(linear->p2.x);
- p2.v[1] = pixman_fixed_to_double(linear->p2.y);
- p2.v[2] = 1.;
- pixman_f_transform_point(&inv, &p2);
-
- DBG(("%s: untransformed: p1=(%f, %f, %f), p2=(%f, %f, %f)\n",
- __FUNCTION__,
- p1.v[0], p1.v[1], p1.v[2],
- p2.v[0], p2.v[1], p2.v[2]));
-
- dx = p2.v[0] - p1.v[0];
- dy = p2.v[1] - p1.v[1];
-
- x0 = p1.v[0];
- y0 = p1.v[1];
- }
-
- sf = dx*dx + dy*dy;
- dx /= sf;
- dy /= sf;
-
- channel->embedded_transform.matrix[0][0] = pixman_double_to_fixed(dx);
- channel->embedded_transform.matrix[0][1] = pixman_double_to_fixed(dy);
- channel->embedded_transform.matrix[0][2] = -pixman_double_to_fixed(dx*(x0+dst_x-x) + dy*(y0+dst_y-y));
-
- channel->embedded_transform.matrix[1][0] = 0;
- channel->embedded_transform.matrix[1][1] = 0;
- channel->embedded_transform.matrix[1][2] = pixman_double_to_fixed(.5);
-
- channel->embedded_transform.matrix[2][0] = 0;
- channel->embedded_transform.matrix[2][1] = 0;
- channel->embedded_transform.matrix[2][2] = pixman_fixed_1;
-
- channel->transform = &channel->embedded_transform;
- channel->is_affine = 1;
-
- DBG(("%s: dx=%f, dy=%f, offset=%f\n",
- __FUNCTION__, dx, dy, -dx*(x0-x+dst_x) + -dy*(y0-y+dst_y)));
-
- return channel->bo != NULL;
-}
-
static int
gen7_composite_picture(struct sna *sna,
PicturePtr picture,
@@ -2323,16 +1840,16 @@ gen7_composite_picture(struct sna *sna,
channel->card_format = -1;
if (sna_picture_is_solid(picture, &color))
- return gen7_composite_solid_init(sna, channel, color);
+ return gen4_channel_init_solid(sna, channel, color);
if (picture->pDrawable == NULL) {
int ret;
if (picture->pSourcePict->type == SourcePictTypeLinear)
- return gen7_composite_linear_init(sna, picture, channel,
- x, y,
- w, h,
- dst_x, dst_y);
+ return gen4_channel_init_linear(sna, picture, channel,
+ x, y,
+ w, h,
+ dst_x, dst_y);
DBG(("%s -- fixup, gradient\n", __FUNCTION__));
ret = -1;
@@ -2383,7 +1900,8 @@ gen7_composite_picture(struct sna *sna,
channel->card_format = gen7_get_card_format(picture->format);
if (channel->card_format == (unsigned)-1)
return sna_render_picture_convert(sna, picture, channel, pixmap,
- x, y, w, h, dst_x, dst_y);
+ x, y, w, h, dst_x, dst_y,
+ false);
if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__,
@@ -2408,8 +1926,8 @@ static void gen7_composite_channel_convert(struct sna_composite_channel *channel
static void gen7_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
- if (sna->render_state.gen7.vertex_offset) {
- gen7_vertex_flush(sna);
+ if (sna->render.vertex_offset) {
+ gen4_vertex_flush(sna);
gen7_magic_ca_pass(sna, op);
}
@@ -2469,9 +1987,11 @@ gen7_composite_set_target(struct sna *sna,
return true;
}
-inline static bool can_switch_to_blt(struct sna *sna)
+inline static bool can_switch_to_blt(struct sna *sna,
+ struct kgem_bo *bo,
+ unsigned flags)
{
- if (sna->kgem.ring == KGEM_BLT)
+ if (sna->kgem.ring != KGEM_RENDER)
return true;
if (NO_RING_SWITCH)
@@ -2480,7 +2000,13 @@ inline static bool can_switch_to_blt(struct sna *sna)
if (!sna->kgem.has_semaphores)
return false;
- return sna->kgem.mode == KGEM_NONE || kgem_is_idle(&sna->kgem);
+ if (flags & COPY_LAST)
+ return true;
+
+ if (bo && RQ_IS_BLT(bo->rq))
+ return true;
+
+ return kgem_ring_is_idle(&sna->kgem, KGEM_BLT);
}
static inline bool untiled_tlb_miss(struct kgem_bo *bo)
@@ -2488,14 +2014,19 @@ static inline bool untiled_tlb_miss(struct kgem_bo *bo)
return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096;
}
-static bool prefer_blt_bo(struct sna *sna, struct kgem_bo *bo)
+static int prefer_blt_bo(struct sna *sna, struct kgem_bo *bo)
{
- return untiled_tlb_miss(bo) && bo->pitch < MAXSHORT;
+ if (bo->rq)
+ return RQ_IS_BLT(bo->rq) ? 1 : -1;
+
+ return bo->tiling == I915_TILING_NONE || bo->scanout;
}
-inline static bool prefer_blt_ring(struct sna *sna)
+inline static bool prefer_blt_ring(struct sna *sna,
+ struct kgem_bo *bo,
+ unsigned flags)
{
- return sna->kgem.ring != KGEM_RENDER || can_switch_to_blt(sna);
+ return can_switch_to_blt(sna, bo, flags);
}
static bool
@@ -2514,17 +2045,8 @@ try_blt(struct sna *sna,
return true;
}
- if (can_switch_to_blt(sna)) {
- if (sna_picture_is_solid(src, NULL))
- return true;
-
- if (dst->pDrawable == src->pDrawable)
- return true;
-
- if (src->pDrawable &&
- get_drawable_pixmap(dst->pDrawable) == get_drawable_pixmap(src->pDrawable))
- return true;
- }
+ if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, NULL, 0))
+ return true;
return false;
}
@@ -2551,12 +2073,6 @@ has_alphamap(PicturePtr p)
}
static bool
-untransformed(PicturePtr p)
-{
- return !p->transform || pixman_transform_is_int_translate(p->transform);
-}
-
-static bool
need_upload(PicturePtr p)
{
return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
@@ -2602,7 +2118,6 @@ gen7_composite_fallback(struct sna *sna,
PicturePtr mask,
PicturePtr dst)
{
- struct sna_pixmap *priv;
PixmapPtr src_pixmap;
PixmapPtr mask_pixmap;
PixmapPtr dst_pixmap;
@@ -2641,10 +2156,7 @@ gen7_composite_fallback(struct sna *sna,
}
/* If anything is on the GPU, push everything out to the GPU */
- priv = sna_pixmap(dst_pixmap);
- if (priv &&
- ((priv->gpu_damage && !priv->clear) ||
- (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)))) {
+ if (dst_use_gpu(dst_pixmap)) {
DBG(("%s: dst is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
@@ -2679,14 +2191,14 @@ gen7_composite_fallback(struct sna *sna,
if (too_large(dst_pixmap->drawable.width,
dst_pixmap->drawable.height) &&
- (priv == NULL || DAMAGE_IS_ALL(priv->cpu_damage))) {
+ dst_is_cpu(dst_pixmap)) {
DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
return true;
}
DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
__FUNCTION__));
- return false;
+ return dst_use_cpu(dst_pixmap);
}
static int
@@ -2707,7 +2219,7 @@ reuse_source(struct sna *sna,
}
if (sna_picture_is_solid(mask, &color))
- return gen7_composite_solid_init(sna, mc, color);
+ return gen4_channel_init_solid(sna, mc, color);
if (sc->is_solid)
return false;
@@ -2750,11 +2262,14 @@ prefer_blt_composite(struct sna *sna, struct sna_composite_op *tmp)
if (sna->kgem.ring == KGEM_BLT)
return true;
- if (!prefer_blt_ring(sna))
+ if (untiled_tlb_miss(tmp->dst.bo) ||
+ untiled_tlb_miss(tmp->src.bo))
+ return true;
+
+ if (!prefer_blt_ring(sna, tmp->dst.bo, 0))
return false;
- return (prefer_blt_bo(sna, tmp->dst.bo) ||
- prefer_blt_bo(sna, tmp->src.bo));
+ return (prefer_blt_bo(sna, tmp->dst.bo) | prefer_blt_bo(sna, tmp->src.bo)) > 0;
}
static bool
@@ -2811,7 +2326,7 @@ gen7_render_composite(struct sna *sna,
case -1:
goto cleanup_dst;
case 0:
- if (!gen7_composite_solid_init(sna, &tmp->src, 0))
+ if (!gen4_channel_init_solid(sna, &tmp->src, 0))
goto cleanup_dst;
/* fall through to fixup */
case 1:
@@ -2835,7 +2350,6 @@ gen7_render_composite(struct sna *sna,
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
- tmp->prim_emit = gen7_emit_composite_primitive;
if (mask) {
if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
tmp->has_component_alpha = true;
@@ -2865,7 +2379,7 @@ gen7_render_composite(struct sna *sna,
case -1:
goto cleanup_src;
case 0:
- if (!gen7_composite_solid_init(sna, &tmp->mask, 0))
+ if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
goto cleanup_src;
/* fall through to fixup */
case 1:
@@ -2875,31 +2389,7 @@ gen7_render_composite(struct sna *sna,
}
tmp->is_affine &= tmp->mask.is_affine;
-
- if (tmp->src.transform == NULL && tmp->mask.transform == NULL)
- tmp->prim_emit = gen7_emit_composite_primitive_identity_source_mask;
-
- tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine;
- } else {
- if (tmp->src.is_solid) {
- tmp->prim_emit = gen7_emit_composite_primitive_solid;
- if (tmp->src.is_opaque && op == PictOpOver)
- tmp->op = PictOpSrc;
- } else if (tmp->src.transform == NULL)
- tmp->prim_emit = gen7_emit_composite_primitive_identity_source;
- else if (tmp->src.is_affine) {
- if (tmp->src.transform->matrix[0][1] == 0 &&
- tmp->src.transform->matrix[1][0] == 0) {
- tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
- tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
- tmp->prim_emit = gen7_emit_composite_primitive_simple_source;
- } else
- tmp->prim_emit = gen7_emit_composite_primitive_affine_source;
- }
-
- tmp->floats_per_vertex = 3 + !tmp->is_affine;
}
- tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
tmp->u.gen7.flags =
GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
@@ -2913,14 +2403,18 @@ gen7_render_composite(struct sna *sna,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine),
- gen7_choose_composite_vertex_buffer(tmp));
+ gen4_choose_composite_emitter(tmp));
tmp->blt = gen7_render_composite_blt;
tmp->box = gen7_render_composite_box;
- tmp->boxes = gen7_render_composite_boxes;
+ tmp->boxes = gen7_render_composite_boxes__blt;
+ if (tmp->emit_boxes){
+ tmp->boxes = gen7_render_composite_boxes;
+ tmp->thread_boxes = gen7_render_composite_boxes__thread;
+ }
tmp->done = gen7_render_composite_done;
- kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
@@ -2949,167 +2443,6 @@ cleanup_dst:
}
#if !NO_COMPOSITE_SPANS
-inline static void
-gen7_emit_composite_texcoord_affine(struct sna *sna,
- const struct sna_composite_channel *channel,
- int16_t x, int16_t y)
-{
- float t[2];
-
- sna_get_transformed_coordinates(x + channel->offset[0],
- y + channel->offset[1],
- channel->transform,
- &t[0], &t[1]);
- OUT_VERTEX_F(t[0] * channel->scale[0]);
- OUT_VERTEX_F(t[1] * channel->scale[1]);
-}
-
-inline static void
-gen7_emit_composite_spans_vertex(struct sna *sna,
- const struct sna_composite_spans_op *op,
- int16_t x, int16_t y)
-{
- OUT_VERTEX(x, y);
- gen7_emit_composite_texcoord(sna, &op->base.src, x, y);
-}
-
-fastcall static void
-gen7_emit_composite_spans_primitive(struct sna *sna,
- const struct sna_composite_spans_op *op,
- const BoxRec *box,
- float opacity)
-{
- gen7_emit_composite_spans_vertex(sna, op, box->x2, box->y2);
- OUT_VERTEX_F(opacity);
-
- gen7_emit_composite_spans_vertex(sna, op, box->x1, box->y2);
- OUT_VERTEX_F(opacity);
-
- gen7_emit_composite_spans_vertex(sna, op, box->x1, box->y1);
- OUT_VERTEX_F(opacity);
-}
-
-fastcall static void
-gen7_emit_composite_spans_solid(struct sna *sna,
- const struct sna_composite_spans_op *op,
- const BoxRec *box,
- float opacity)
-{
- OUT_VERTEX(box->x2, box->y2);
- OUT_VERTEX_F(1); OUT_VERTEX_F(1);
- OUT_VERTEX_F(opacity);
-
- OUT_VERTEX(box->x1, box->y2);
- OUT_VERTEX_F(0); OUT_VERTEX_F(1);
- OUT_VERTEX_F(opacity);
-
- OUT_VERTEX(box->x1, box->y1);
- OUT_VERTEX_F(0); OUT_VERTEX_F(0);
- OUT_VERTEX_F(opacity);
-}
-
-fastcall static void
-gen7_emit_composite_spans_identity(struct sna *sna,
- const struct sna_composite_spans_op *op,
- const BoxRec *box,
- float opacity)
-{
- float *v;
- union {
- struct sna_coordinate p;
- float f;
- } dst;
-
- float sx = op->base.src.scale[0];
- float sy = op->base.src.scale[1];
- int16_t tx = op->base.src.offset[0];
- int16_t ty = op->base.src.offset[1];
-
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 3*4;
- assert(sna->render.vertex_used <= sna->render.vertex_size);
-
- dst.p.x = box->x2;
- dst.p.y = box->y2;
- v[0] = dst.f;
- v[1] = (box->x2 + tx) * sx;
- v[6] = v[2] = (box->y2 + ty) * sy;
-
- dst.p.x = box->x1;
- v[4] = dst.f;
- v[9] = v[5] = (box->x1 + tx) * sx;
-
- dst.p.y = box->y1;
- v[8] = dst.f;
- v[10] = (box->y1 + ty) * sy;
-
- v[11] = v[7] = v[3] = opacity;
-}
-
-fastcall static void
-gen7_emit_composite_spans_simple(struct sna *sna,
- const struct sna_composite_spans_op *op,
- const BoxRec *box,
- float opacity)
-{
- float *v;
- union {
- struct sna_coordinate p;
- float f;
- } dst;
-
- float xx = op->base.src.transform->matrix[0][0];
- float x0 = op->base.src.transform->matrix[0][2];
- float yy = op->base.src.transform->matrix[1][1];
- float y0 = op->base.src.transform->matrix[1][2];
- float sx = op->base.src.scale[0];
- float sy = op->base.src.scale[1];
- int16_t tx = op->base.src.offset[0];
- int16_t ty = op->base.src.offset[1];
-
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 3*4;
- assert(sna->render.vertex_used <= sna->render.vertex_size);
-
- dst.p.x = box->x2;
- dst.p.y = box->y2;
- v[0] = dst.f;
- v[1] = ((box->x2 + tx) * xx + x0) * sx;
- v[6] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
-
- dst.p.x = box->x1;
- v[4] = dst.f;
- v[9] = v[5] = ((box->x1 + tx) * xx + x0) * sx;
-
- dst.p.y = box->y1;
- v[8] = dst.f;
- v[10] = ((box->y1 + ty) * yy + y0) * sy;
-
- v[11] = v[7] = v[3] = opacity;
-}
-
-fastcall static void
-gen7_emit_composite_spans_affine(struct sna *sna,
- const struct sna_composite_spans_op *op,
- const BoxRec *box,
- float opacity)
-{
- OUT_VERTEX(box->x2, box->y2);
- gen7_emit_composite_texcoord_affine(sna, &op->base.src,
- box->x2, box->y2);
- OUT_VERTEX_F(opacity);
-
- OUT_VERTEX(box->x1, box->y2);
- gen7_emit_composite_texcoord_affine(sna, &op->base.src,
- box->x1, box->y2);
- OUT_VERTEX_F(opacity);
-
- OUT_VERTEX(box->x1, box->y1);
- gen7_emit_composite_texcoord_affine(sna, &op->base.src,
- box->x1, box->y1);
- OUT_VERTEX_F(opacity);
-}
-
fastcall static void
gen7_render_composite_spans_box(struct sna *sna,
const struct sna_composite_spans_op *op,
@@ -3159,11 +2492,47 @@ gen7_render_composite_spans_boxes(struct sna *sna,
}
fastcall static void
+gen7_render_composite_spans_boxes__thread(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box,
+ int nbox)
+{
+ DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
+ __FUNCTION__, nbox,
+ op->base.src.offset[0], op->base.src.offset[1],
+ op->base.dst.x, op->base.dst.y));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox,
+ gen7_emit_composite_state);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
+fastcall static void
gen7_render_composite_spans_done(struct sna *sna,
const struct sna_composite_spans_op *op)
{
- if (sna->render_state.gen7.vertex_offset)
- gen7_vertex_flush(sna);
+ if (sna->render.vertex_offset)
+ gen4_vertex_flush(sna);
DBG(("%s()\n", __FUNCTION__));
@@ -3184,12 +2553,11 @@ gen7_check_composite_spans(struct sna *sna,
if (gen7_composite_fallback(sna, src, NULL, dst))
return false;
- if (need_tiling(sna, width, height)) {
- if (!is_gpu(dst->pDrawable)) {
- DBG(("%s: fallback, tiled operation not on GPU\n",
- __FUNCTION__));
- return false;
- }
+ if (need_tiling(sna, width, height) &&
+ !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
+ DBG(("%s: fallback, tiled operation not on GPU\n",
+ __FUNCTION__));
+ return false;
}
return true;
@@ -3232,7 +2600,7 @@ gen7_render_composite_spans(struct sna *sna,
case -1:
goto cleanup_dst;
case 0:
- if (!gen7_composite_solid_init(sna, &tmp->base.src, 0))
+ if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
goto cleanup_dst;
/* fall through to fixup */
case 1:
@@ -3244,23 +2612,6 @@ gen7_render_composite_spans(struct sna *sna,
tmp->base.is_affine = tmp->base.src.is_affine;
tmp->base.need_magic_ca_pass = false;
- tmp->prim_emit = gen7_emit_composite_spans_primitive;
- if (tmp->base.src.is_solid) {
- tmp->prim_emit = gen7_emit_composite_spans_solid;
- } else if (tmp->base.src.transform == NULL) {
- tmp->prim_emit = gen7_emit_composite_spans_identity;
- } else if (tmp->base.is_affine) {
- if (tmp->base.src.transform->matrix[0][1] == 0 &&
- tmp->base.src.transform->matrix[1][0] == 0) {
- tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
- tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
- tmp->prim_emit = gen7_emit_composite_spans_simple;
- } else
- tmp->prim_emit = gen7_emit_composite_spans_affine;
- }
- tmp->base.floats_per_vertex = 4 + !tmp->base.is_affine;
- tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex;
-
tmp->base.u.gen7.flags =
GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter,
tmp->base.src.repeat,
@@ -3268,13 +2619,15 @@ gen7_render_composite_spans(struct sna *sna,
SAMPLER_EXTEND_PAD),
gen7_get_blend(tmp->base.op, false, tmp->base.dst.format),
GEN7_WM_KERNEL_OPACITY | !tmp->base.is_affine,
- 1 << 2 | (2+!tmp->base.is_affine));
+ gen4_choose_spans_emitter(tmp));
tmp->box = gen7_render_composite_spans_box;
tmp->boxes = gen7_render_composite_spans_boxes;
+ if (tmp->emit_boxes)
+ tmp->thread_boxes = gen7_render_composite_spans_boxes__thread;
tmp->done = gen7_render_composite_spans_done;
- kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo);
if (!kgem_check_bo(&sna->kgem,
tmp->base.dst.bo, tmp->base.src.bo,
NULL)) {
@@ -3307,7 +2660,7 @@ gen7_emit_copy_state(struct sna *sna,
uint32_t *binding_table;
uint16_t offset;
- gen7_get_batch(sna);
+ gen7_get_batch(sna, op);
binding_table = gen7_composite_get_binding_table(sna, &offset);
@@ -3337,10 +2690,23 @@ static inline bool prefer_blt_copy(struct sna *sna,
struct kgem_bo *dst_bo,
unsigned flags)
{
- return (sna->kgem.ring == KGEM_BLT ||
- (flags & COPY_LAST && sna->kgem.mode == KGEM_NONE) ||
- prefer_blt_bo(sna, src_bo) ||
- prefer_blt_bo(sna, dst_bo));
+ if (sna->kgem.ring == KGEM_BLT)
+ return true;
+
+ assert((flags & COPY_SYNC) == 0);
+
+ if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags))
+ return true;
+
+ if (untiled_tlb_miss(src_bo) ||
+ untiled_tlb_miss(dst_bo))
+ return true;
+
+ if (!prefer_blt_ring(sna, dst_bo, flags))
+ return false;
+
+ return (prefer_blt_bo(sna, src_bo) >= 0 &&
+ prefer_blt_bo(sna, dst_bo) > 0);
}
inline static void boxes_extents(const BoxRec *box, int n, BoxRec *extents)
@@ -3386,8 +2752,8 @@ gen7_render_copy_boxes(struct sna *sna, uint8_t alu,
struct sna_composite_op tmp;
BoxRec extents;
- DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d, overlaps? %d\n",
- __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu,
+ DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, flags=%x, self-copy=%d, overlaps? %d\n",
+ __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, flags,
src_bo == dst_bo,
overlaps(sna,
src_bo, src_dx, src_dy,
@@ -3421,7 +2787,7 @@ fallback_blt:
if (too_large(extents.x2-extents.x1, extents.y2-extents.y1))
goto fallback_blt;
- if ((flags & COPY_LAST || can_switch_to_blt(sna)) &&
+ if (can_switch_to_blt(sna, dst_bo, flags) &&
sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy_boxes(sna, alu,
src_bo, src_dx, src_dy,
@@ -3523,7 +2889,7 @@ fallback_blt:
tmp.u.gen7.flags = COPY_FLAGS(alu);
- kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL))
@@ -3563,7 +2929,7 @@ fallback_blt:
} while (--n_this_time);
} while (n);
- gen7_vertex_flush(sna);
+ gen4_vertex_flush(sna);
sna_render_composite_redirect_done(sna, &tmp);
if (tmp.src.bo != src_bo)
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
@@ -3576,6 +2942,14 @@ fallback_tiled_dst:
if (tmp.redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
fallback_tiled:
+ if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
+ sna_blt_copy_boxes(sna, alu,
+ src_bo, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ dst->drawable.bitsPerPixel,
+ box, n))
+ return true;
+
return sna_tiling_copy_boxes(sna, alu,
src, src_bo, src_dx, src_dy,
dst, dst_bo, dst_dx, dst_dy,
@@ -3608,8 +2982,8 @@ gen7_render_copy_blt(struct sna *sna,
static void
gen7_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
{
- if (sna->render_state.gen7.vertex_offset)
- gen7_vertex_flush(sna);
+ if (sna->render.vertex_offset)
+ gen4_vertex_flush(sna);
}
static bool
@@ -3671,7 +3045,7 @@ fallback:
op->base.u.gen7.flags = COPY_FLAGS(alu);
- kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
@@ -3699,7 +3073,7 @@ gen7_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
* specific kernel.
*/
- gen7_get_batch(sna);
+ gen7_get_batch(sna, op);
binding_table = gen7_composite_get_binding_table(sna, &offset);
@@ -3727,7 +3101,10 @@ gen7_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
static inline bool prefer_blt_fill(struct sna *sna,
struct kgem_bo *bo)
{
- return prefer_blt_ring(sna) || untiled_tlb_miss(bo);
+ if (untiled_tlb_miss(bo))
+ return true;
+
+ return prefer_blt_ring(sna, bo, 0) || prefer_blt_bo(sna, bo) >= 0;
}
static bool
@@ -3822,6 +3199,7 @@ gen7_render_fill_boxes(struct sna *sna,
tmp.u.gen7.flags = FILL_FLAGS(op, format);
+ kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
@@ -3855,7 +3233,7 @@ gen7_render_fill_boxes(struct sna *sna,
} while (--n_this_time);
} while (n);
- gen7_vertex_flush(sna);
+ gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
sna_render_composite_redirect_done(sna, &tmp);
return true;
@@ -3946,8 +3324,8 @@ gen7_render_fill_op_boxes(struct sna *sna,
static void
gen7_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
{
- if (sna->render_state.gen7.vertex_offset)
- gen7_vertex_flush(sna);
+ if (sna->render.vertex_offset)
+ gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, op->base.src.bo);
}
@@ -3995,6 +3373,7 @@ gen7_render_fill(struct sna *sna, uint8_t alu,
op->base.u.gen7.flags = FILL_FLAGS_NOBLEND;
+ kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
@@ -4072,9 +3451,13 @@ gen7_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
tmp.u.gen7.flags = FILL_FLAGS_NOBLEND;
+ kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
- _kgem_submit(&sna->kgem);
- assert(kgem_check_bo(&sna->kgem, bo, NULL));
+ kgem_submit(&sna->kgem);
+ if (kgem_check_bo(&sna->kgem, bo, NULL)) {
+ kgem_bo_destroy(&sna->kgem, tmp.src.bo);
+ return false;
+ }
}
gen7_emit_fill_state(sna, &tmp);
@@ -4095,7 +3478,7 @@ gen7_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
v[7] = v[2] = v[3] = 1;
v[6] = v[10] = v[11] = 0;
- gen7_vertex_flush(sna);
+ gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
return true;
@@ -4152,9 +3535,13 @@ gen7_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
tmp.u.gen7.flags = FILL_FLAGS_NOBLEND;
+ kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
- _kgem_submit(&sna->kgem);
- assert(kgem_check_bo(&sna->kgem, bo, NULL));
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
+ kgem_bo_destroy(&sna->kgem, tmp.src.bo);
+ return false;
+ }
}
gen7_emit_fill_state(sna, &tmp);
@@ -4174,7 +3561,7 @@ gen7_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
v[7] = v[2] = v[3] = 1;
v[6] = v[10] = v[11] = 0;
- gen7_vertex_flush(sna);
+ gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
return true;
@@ -4182,20 +3569,20 @@ gen7_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
static void gen7_render_flush(struct sna *sna)
{
- gen7_vertex_close(sna);
+ gen4_vertex_close(sna);
+
+ assert(sna->render.vb_id == 0);
+ assert(sna->render.vertex_offset == 0);
}
static void
gen7_render_context_switch(struct kgem *kgem,
int new_mode)
{
- if (!new_mode)
- return;
-
- if (kgem->mode) {
+ if (kgem->nbatch) {
DBG(("%s: switch rings %d -> %d\n",
__FUNCTION__, kgem->mode, new_mode));
- kgem_submit(kgem);
+ _kgem_submit(kgem);
}
kgem->ring = new_mode;
@@ -4238,7 +3625,6 @@ static void gen7_render_reset(struct sna *sna)
{
sna->render_state.gen7.emit_flush = false;
sna->render_state.gen7.needs_invariant = true;
- sna->render_state.gen7.vb_id = 0;
sna->render_state.gen7.ve_id = 3 << 2;
sna->render_state.gen7.last_primitive = -1;
@@ -4249,6 +3635,10 @@ static void gen7_render_reset(struct sna *sna)
sna->render_state.gen7.drawrect_offset = -1;
sna->render_state.gen7.drawrect_limit = -1;
sna->render_state.gen7.surface_table = -1;
+
+ sna->render.vertex_offset = 0;
+ sna->render.nvertex_reloc = 0;
+ sna->render.vb_id = 0;
}
static void gen7_render_fini(struct sna *sna)
@@ -4256,6 +3646,16 @@ static void gen7_render_fini(struct sna *sna)
kgem_bo_destroy(&sna->kgem, sna->render_state.gen7.general_bo);
}
+static bool is_gt2(struct sna *sna)
+{
+ return DEVICE_ID(sna->PciInfo) & 0x20;
+}
+
+static bool is_mobile(struct sna *sna)
+{
+ return (DEVICE_ID(sna->PciInfo) & 0xf) == 0x6;
+}
+
static bool gen7_render_setup(struct sna *sna)
{
struct gen7_render_state *state = &sna->render_state.gen7;
@@ -4263,15 +3663,20 @@ static bool gen7_render_setup(struct sna *sna)
struct gen7_sampler_state *ss;
int i, j, k, l, m;
- if (sna->kgem.gen == 70) {
+ if (sna->kgem.gen == 070) {
state->info = &ivb_gt_info;
if (DEVICE_ID(sna->PciInfo) & 0xf) {
state->info = &ivb_gt1_info;
- if (DEVICE_ID(sna->PciInfo) & 0x20)
+ if (is_gt2(sna))
state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */
}
- } else if (sna->kgem.gen == 75) {
+ } else if (sna->kgem.gen == 075) {
state->info = &hsw_gt_info;
+ if (DEVICE_ID(sna->PciInfo) & 0xf) {
+ state->info = &hsw_gt1_info;
+ if (is_gt2(sna))
+ state->info = &hsw_gt2_info;
+ }
} else
return false;
@@ -4331,7 +3736,6 @@ static bool gen7_render_setup(struct sna *sna)
}
}
- state->cc_vp = gen7_create_cc_viewport(&general);
state->cc_blend = gen7_composite_create_blend_state(&general);
state->general_bo = sna_static_stream_fini(sna, &general);
@@ -4349,10 +3753,13 @@ bool gen7_render_init(struct sna *sna)
#if !NO_COMPOSITE
sna->render.composite = gen7_render_composite;
+ sna->render.prefer_gpu |= PREFER_GPU_RENDER;
#endif
#if !NO_COMPOSITE_SPANS
sna->render.check_composite_spans = gen7_check_composite_spans;
sna->render.composite_spans = gen7_render_composite_spans;
+ if (is_mobile(sna))
+ sna->render.prefer_gpu |= PREFER_GPU_SPANS;
#endif
sna->render.video = gen7_render_video;
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 46c898f79..86a2dfcde 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -69,10 +69,26 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
#define DBG_NO_UPLOAD_ACTIVE 0
#define DBG_NO_MAP_UPLOAD 0
#define DBG_NO_RELAXED_FENCING 0
+#define DBG_NO_SECURE_BATCHES 0
+#define DBG_NO_PINNED_BATCHES 0
+#define DBG_NO_FAST_RELOC 0
+#define DBG_NO_HANDLE_LUT 0
#define DBG_DUMP 0
+#ifndef DEBUG_SYNC
+#define DEBUG_SYNC 0
+#endif
+
#define SHOW_BATCH 0
+#if 0
+#define ASSERT_IDLE(kgem__, handle__) assert(!__kgem_busy(kgem__, handle__))
+#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) assert(!(expect__) || !__kgem_busy(kgem__, handle__))
+#else
+#define ASSERT_IDLE(kgem__, handle__)
+#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__)
+#endif
+
/* Worst case seems to be 965gm where we cannot write within a cacheline that
* is being simultaneously being read by the GPU, or within the sampler
* prefetch. In general, the chipsets seem to have a requirement that sampler
@@ -93,7 +109,20 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
#define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 2)
#define __MAP_TYPE(ptr) ((uintptr_t)(ptr) & 3)
-#define LOCAL_I915_PARAM_HAS_SEMAPHORES 20
+#define MAKE_REQUEST(rq, ring) ((struct kgem_request *)((uintptr_t)(rq) | (ring)))
+
+#define LOCAL_I915_PARAM_HAS_BLT 11
+#define LOCAL_I915_PARAM_HAS_RELAXED_FENCING 12
+#define LOCAL_I915_PARAM_HAS_RELAXED_DELTA 15
+#define LOCAL_I915_PARAM_HAS_SEMAPHORES 20
+#define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23
+#define LOCAL_I915_PARAM_HAS_PINNED_BATCHES 24
+#define LOCAL_I915_PARAM_HAS_NO_RELOC 25
+#define LOCAL_I915_PARAM_HAS_HANDLE_LUT 26
+
+#define LOCAL_I915_EXEC_IS_PINNED (1<<10)
+#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
#define LOCAL_I915_GEM_USERPTR 0x32
#define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr)
@@ -101,7 +130,8 @@ struct local_i915_gem_userptr {
uint64_t user_ptr;
uint32_t user_size;
uint32_t flags;
-#define I915_USERPTR_READ_ONLY 0x1
+#define I915_USERPTR_READ_ONLY (1<<0)
+#define I915_USERPTR_UNSYNCHRONIZED (1<<31)
uint32_t handle;
};
@@ -170,13 +200,13 @@ static void kgem_sna_flush(struct kgem *kgem)
sna_render_flush_solid(sna);
}
-static int gem_set_tiling(int fd, uint32_t handle, int tiling, int stride)
+static bool gem_set_tiling(int fd, uint32_t handle, int tiling, int stride)
{
struct drm_i915_gem_set_tiling set_tiling;
int ret;
if (DBG_NO_TILING)
- return I915_TILING_NONE;
+ return false;
VG_CLEAR(set_tiling);
do {
@@ -186,7 +216,7 @@ static int gem_set_tiling(int fd, uint32_t handle, int tiling, int stride)
ret = ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
- return set_tiling.tiling_mode;
+ return ret == 0;
}
static bool gem_set_cacheing(int fd, uint32_t handle, int cacheing)
@@ -206,14 +236,17 @@ static uint32_t gem_userptr(int fd, void *ptr, int size, int read_only)
VG_CLEAR(arg);
arg.user_ptr = (uintptr_t)ptr;
arg.user_size = size;
- arg.flags = 0;
+ arg.flags = I915_USERPTR_UNSYNCHRONIZED;
if (read_only)
arg.flags |= I915_USERPTR_READ_ONLY;
if (drmIoctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) {
- DBG(("%s: failed to map %p + %d bytes: %d\n",
- __FUNCTION__, ptr, size, errno));
- return 0;
+ arg.flags &= ~I915_USERPTR_UNSYNCHRONIZED;
+ if (drmIoctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) {
+ DBG(("%s: failed to map %p + %d bytes: %d\n",
+ __FUNCTION__, ptr, size, errno));
+ return 0;
+ }
}
return arg.handle;
@@ -262,6 +295,11 @@ retry_gtt:
if (kgem_expire_cache(kgem))
goto retry_gtt;
+ if (kgem->need_expire) {
+ kgem_cleanup_cache(kgem);
+ goto retry_gtt;
+ }
+
return NULL;
}
@@ -274,6 +312,11 @@ retry_mmap:
if (__kgem_throttle_retire(kgem, 0))
goto retry_mmap;
+ if (kgem->need_expire) {
+ kgem_cleanup_cache(kgem);
+ goto retry_mmap;
+ }
+
ptr = NULL;
}
@@ -345,8 +388,7 @@ static int gem_read(int fd, uint32_t handle, const void *dst,
return 0;
}
-static bool
-kgem_busy(struct kgem *kgem, int handle)
+bool __kgem_busy(struct kgem *kgem, int handle)
{
struct drm_i915_gem_busy busy;
@@ -360,26 +402,23 @@ kgem_busy(struct kgem *kgem, int handle)
return busy.busy;
}
-void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo)
+static void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo)
{
- DBG(("%s: handle=%d, domain=%d\n",
- __FUNCTION__, bo->handle, bo->domain));
- assert(bo->flush || !kgem_busy(kgem, bo->handle));
-
- if (bo->rq)
- kgem_retire(kgem);
-
- if (bo->exec == NULL) {
- DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d\n",
- __FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL));
- assert(list_is_empty(&bo->vma));
- bo->rq = NULL;
- list_del(&bo->request);
+ DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d [busy?=%d]\n",
+ __FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL,
+ __kgem_busy(kgem, bo->handle)));
+ assert(bo->exec == NULL);
+ assert(list_is_empty(&bo->vma));
- bo->needs_flush = false;
+ if (bo->rq) {
+ if (!__kgem_busy(kgem, bo->handle)) {
+ __kgem_bo_clear_busy(bo);
+ kgem_retire(kgem);
+ }
+ } else {
+ assert(!bo->needs_flush);
+ ASSERT_IDLE(kgem, bo->handle);
}
-
- bo->domain = DOMAIN_NONE;
}
bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
@@ -387,15 +426,18 @@ bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
{
assert(bo->refcnt);
assert(!bo->purged);
- assert(bo->flush || !kgem_busy(kgem, bo->handle));
assert(bo->proxy == NULL);
+ ASSERT_IDLE(kgem, bo->handle);
assert(length <= bytes(bo));
if (gem_write(kgem->fd, bo->handle, 0, length, data))
return false;
DBG(("%s: flush=%d, domain=%d\n", __FUNCTION__, bo->flush, bo->domain));
- kgem_bo_retire(kgem, bo);
+ if (bo->exec == NULL) {
+ kgem_bo_retire(kgem, bo);
+ bo->domain = DOMAIN_NONE;
+ }
return true;
}
@@ -490,10 +532,19 @@ static void gem_close(int fd, uint32_t handle)
constant inline static unsigned long __fls(unsigned long word)
{
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86__) || defined(__x86_64__))
asm("bsr %1,%0"
: "=r" (word)
: "rm" (word));
return word;
+#else
+ unsigned int v = 0;
+
+ while (word >>= 1)
+ v++;
+
+ return v;
+#endif
}
constant inline static int cache_bucket(int num_pages)
@@ -509,6 +560,7 @@ static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo,
bo->refcnt = 1;
bo->handle = handle;
+ bo->target_handle = -1;
num_pages(bo) = num_pages;
bucket(bo) = cache_bucket(num_pages);
bo->reusable = true;
@@ -536,9 +588,7 @@ static struct kgem_bo *__kgem_bo_alloc(int handle, int num_pages)
return __kgem_bo_init(bo, handle, num_pages);
}
-static struct kgem_request _kgem_static_request;
-
-static struct kgem_request *__kgem_request_alloc(void)
+static struct kgem_request *__kgem_request_alloc(struct kgem *kgem)
{
struct kgem_request *rq;
@@ -548,7 +598,7 @@ static struct kgem_request *__kgem_request_alloc(void)
} else {
rq = malloc(sizeof(*rq));
if (rq == NULL)
- rq = &_kgem_static_request;
+ rq = &kgem->static_request;
}
list_init(&rq->buffers);
@@ -567,11 +617,15 @@ static void __kgem_request_free(struct kgem_request *rq)
static struct list *inactive(struct kgem *kgem, int num_pages)
{
+ assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE);
+ assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS);
return &kgem->inactive[cache_bucket(num_pages)];
}
static struct list *active(struct kgem *kgem, int num_pages, int tiling)
{
+ assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE);
+ assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS);
return &kgem->active[cache_bucket(num_pages)][tiling];
}
@@ -581,7 +635,7 @@ agp_aperture_size(struct pci_device *dev, unsigned gen)
/* XXX assume that only future chipsets are unknown and follow
* the post gen2 PCI layout.
*/
- return dev->regions[gen < 30 ? 0 : 2].size;
+ return dev->regions[gen < 030 ? 0 : 2].size;
}
static size_t
@@ -636,6 +690,35 @@ static int gem_param(struct kgem *kgem, int name)
return v;
}
+static bool test_has_execbuffer2(struct kgem *kgem)
+{
+ struct drm_i915_gem_execbuffer2 execbuf;
+
+ memset(&execbuf, 0, sizeof(execbuf));
+ execbuf.buffer_count = 1;
+
+ return (drmIoctl(kgem->fd,
+ DRM_IOCTL_I915_GEM_EXECBUFFER2,
+ &execbuf) == -1 &&
+ errno == EFAULT);
+}
+
+static bool test_has_no_reloc(struct kgem *kgem)
+{
+ if (DBG_NO_FAST_RELOC)
+ return false;
+
+ return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0;
+}
+
+static bool test_has_handle_lut(struct kgem *kgem)
+{
+ if (DBG_NO_HANDLE_LUT)
+ return false;
+
+ return gem_param(kgem, LOCAL_I915_PARAM_HAS_HANDLE_LUT) > 0;
+}
+
static bool test_has_semaphores_enabled(struct kgem *kgem)
{
FILE *file;
@@ -674,6 +757,9 @@ static bool is_hw_supported(struct kgem *kgem,
if (DBG_NO_HW)
return false;
+ if (!test_has_execbuffer2(kgem))
+ return false;
+
if (kgem->gen == (unsigned)-1) /* unknown chipset, assume future gen */
return kgem->has_blt;
@@ -682,12 +768,12 @@ static bool is_hw_supported(struct kgem *kgem,
* hw acceleration.
*/
- if (kgem->gen == 60 && dev->revision < 8) {
+ if (kgem->gen == 060 && dev->revision < 8) {
/* pre-production SNB with dysfunctional BLT */
return false;
}
- if (kgem->gen >= 60) /* Only if the kernel supports the BLT ring */
+ if (kgem->gen >= 060) /* Only if the kernel supports the BLT ring */
return kgem->has_blt;
return true;
@@ -695,11 +781,11 @@ static bool is_hw_supported(struct kgem *kgem,
static bool test_has_relaxed_fencing(struct kgem *kgem)
{
- if (kgem->gen < 40) {
+ if (kgem->gen < 040) {
if (DBG_NO_RELAXED_FENCING)
return false;
- return gem_param(kgem, I915_PARAM_HAS_RELAXED_FENCING) > 0;
+ return gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_FENCING) > 0;
} else
return true;
}
@@ -716,7 +802,7 @@ static bool test_has_llc(struct kgem *kgem)
#endif
if (has_llc == -1) {
DBG(("%s: no kernel/drm support for HAS_LLC, assuming support for LLC based on GPU generation\n", __FUNCTION__));
- has_llc = kgem->gen >= 60;
+ has_llc = kgem->gen >= 060;
}
return has_llc;
@@ -731,7 +817,7 @@ static bool test_has_cacheing(struct kgem *kgem)
return false;
/* Incoherent blt and sampler hangs the GPU */
- if (kgem->gen == 40)
+ if (kgem->gen == 040)
return false;
handle = gem_create(kgem->fd, 1);
@@ -753,7 +839,7 @@ static bool test_has_userptr(struct kgem *kgem)
return false;
/* Incoherent blt and sampler hangs the GPU */
- if (kgem->gen == 40)
+ if (kgem->gen == 040)
return false;
ptr = malloc(PAGE_SIZE);
@@ -767,13 +853,101 @@ static bool test_has_userptr(struct kgem *kgem)
#endif
}
+static bool test_has_secure_batches(struct kgem *kgem)
+{
+ if (DBG_NO_SECURE_BATCHES)
+ return false;
+
+ return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0;
+}
+
+static bool test_has_pinned_batches(struct kgem *kgem)
+{
+ if (DBG_NO_PINNED_BATCHES)
+ return false;
+
+ return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0;
+}
+
static int kgem_get_screen_index(struct kgem *kgem)
{
struct sna *sna = container_of(kgem, struct sna, kgem);
return sna->scrn->scrnIndex;
}
-void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
+static bool kgem_init_pinned_batches(struct kgem *kgem)
+{
+ int count[2] = { 16, 4 };
+ int size[2] = { 1, 4 };
+ int n, i;
+
+ if (kgem->wedged)
+ return true;
+
+ for (n = 0; n < ARRAY_SIZE(count); n++) {
+ for (i = 0; i < count[n]; i++) {
+ struct drm_i915_gem_pin pin;
+ struct kgem_bo *bo;
+
+ VG_CLEAR(pin);
+
+ pin.handle = gem_create(kgem->fd, size[n]);
+ if (pin.handle == 0)
+ goto err;
+
+ DBG(("%s: new handle=%d, num_pages=%d\n",
+ __FUNCTION__, pin.handle, size[n]));
+
+ bo = __kgem_bo_alloc(pin.handle, size[n]);
+ if (bo == NULL) {
+ gem_close(kgem->fd, pin.handle);
+ goto err;
+ }
+
+ pin.alignment = 0;
+ if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_PIN, &pin)) {
+ gem_close(kgem->fd, pin.handle);
+ goto err;
+ }
+ bo->presumed_offset = pin.offset;
+ debug_alloc__bo(kgem, bo);
+ list_add(&bo->list, &kgem->pinned_batches[n]);
+ }
+ }
+
+ return true;
+
+err:
+ for (n = 0; n < ARRAY_SIZE(kgem->pinned_batches); n++) {
+ while (!list_is_empty(&kgem->pinned_batches[n])) {
+ kgem_bo_destroy(kgem,
+ list_first_entry(&kgem->pinned_batches[n],
+ struct kgem_bo, list));
+ }
+ }
+
+ /* For simplicity populate the lists with a single unpinned bo */
+ for (n = 0; n < ARRAY_SIZE(count); n++) {
+ struct kgem_bo *bo;
+ uint32_t handle;
+
+ handle = gem_create(kgem->fd, size[n]);
+ if (handle == 0)
+ break;
+
+ bo = __kgem_bo_alloc(handle, size[n]);
+ if (bo == NULL) {
+ gem_close(kgem->fd, handle);
+ break;
+ }
+
+ debug_alloc__bo(kgem, bo);
+ list_add(&bo->list, &kgem->pinned_batches[n]);
+ }
+ return false;
+}
+
+void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
{
struct drm_i915_gem_get_aperture aperture;
size_t totalram;
@@ -787,12 +961,36 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
kgem->fd = fd;
kgem->gen = gen;
- kgem->has_blt = gem_param(kgem, I915_PARAM_HAS_BLT) > 0;
+ list_init(&kgem->requests[0]);
+ list_init(&kgem->requests[1]);
+ list_init(&kgem->batch_buffers);
+ list_init(&kgem->active_buffers);
+ list_init(&kgem->flushing);
+ list_init(&kgem->large);
+ list_init(&kgem->large_inactive);
+ list_init(&kgem->snoop);
+ list_init(&kgem->scanout);
+ for (i = 0; i < ARRAY_SIZE(kgem->pinned_batches); i++)
+ list_init(&kgem->pinned_batches[i]);
+ for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
+ list_init(&kgem->inactive[i]);
+ for (i = 0; i < ARRAY_SIZE(kgem->active); i++) {
+ for (j = 0; j < ARRAY_SIZE(kgem->active[i]); j++)
+ list_init(&kgem->active[i][j]);
+ }
+ for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) {
+ for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++)
+ list_init(&kgem->vma[i].inactive[j]);
+ }
+ kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE;
+ kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE;
+
+ kgem->has_blt = gem_param(kgem, LOCAL_I915_PARAM_HAS_BLT) > 0;
DBG(("%s: has BLT ring? %d\n", __FUNCTION__,
kgem->has_blt));
kgem->has_relaxed_delta =
- gem_param(kgem, I915_PARAM_HAS_RELAXED_DELTA) > 0;
+ gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_DELTA) > 0;
DBG(("%s: has relaxed delta? %d\n", __FUNCTION__,
kgem->has_relaxed_delta));
@@ -812,16 +1010,32 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
DBG(("%s: has userptr? %d\n", __FUNCTION__,
kgem->has_userptr));
+ kgem->has_no_reloc = test_has_no_reloc(kgem);
+ DBG(("%s: has no-reloc? %d\n", __FUNCTION__,
+ kgem->has_no_reloc));
+
+ kgem->has_handle_lut = test_has_handle_lut(kgem);
+ DBG(("%s: has handle-lut? %d\n", __FUNCTION__,
+ kgem->has_handle_lut));
+
kgem->has_semaphores = false;
if (kgem->has_blt && test_has_semaphores_enabled(kgem))
kgem->has_semaphores = true;
DBG(("%s: semaphores enabled? %d\n", __FUNCTION__,
kgem->has_semaphores));
- kgem->can_blt_cpu = gen >= 30;
+ kgem->can_blt_cpu = gen >= 030;
DBG(("%s: can blt to cpu? %d\n", __FUNCTION__,
kgem->can_blt_cpu));
+ kgem->has_secure_batches = test_has_secure_batches(kgem);
+ DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__,
+ kgem->has_secure_batches));
+
+ kgem->has_pinned_batches = test_has_pinned_batches(kgem);
+ DBG(("%s: can use pinned batchbuffers (to avoid CS w/a)? %d\n", __FUNCTION__,
+ kgem->has_pinned_batches));
+
if (!is_hw_supported(kgem, dev)) {
xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING,
"Detected unsupported/dysfunctional hardware, disabling acceleration.\n");
@@ -833,47 +1047,35 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
}
kgem->batch_size = ARRAY_SIZE(kgem->batch);
- if (gen == 22)
+ if (gen == 020 && !kgem->has_pinned_batches)
+ /* Limited to what we can pin */
+ kgem->batch_size = 4*1024;
+ if (gen == 022)
/* 865g cannot handle a batch spanning multiple pages */
kgem->batch_size = PAGE_SIZE / sizeof(uint32_t);
- if (gen >= 70 && gen < 80)
+ if ((gen >> 3) == 7)
kgem->batch_size = 16*1024;
if (!kgem->has_relaxed_delta && kgem->batch_size > 4*1024)
kgem->batch_size = 4*1024;
+ if (!kgem_init_pinned_batches(kgem) && gen == 020) {
+ xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING,
+ "Unable to reserve memory for GPU, disabling acceleration.\n");
+ kgem->wedged = 1;
+ }
+
DBG(("%s: maximum batch size? %d\n", __FUNCTION__,
kgem->batch_size));
kgem->min_alignment = 4;
- if (gen < 40)
+ if (gen < 040)
kgem->min_alignment = 64;
kgem->half_cpu_cache_pages = cpu_cache_size() >> 13;
DBG(("%s: half cpu cache %d pages\n", __FUNCTION__,
kgem->half_cpu_cache_pages));
- list_init(&kgem->requests[0]);
- list_init(&kgem->requests[1]);
- list_init(&kgem->batch_buffers);
- list_init(&kgem->active_buffers);
- list_init(&kgem->flushing);
- list_init(&kgem->large);
- list_init(&kgem->large_inactive);
- list_init(&kgem->snoop);
- for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
- list_init(&kgem->inactive[i]);
- for (i = 0; i < ARRAY_SIZE(kgem->active); i++) {
- for (j = 0; j < ARRAY_SIZE(kgem->active[i]); j++)
- list_init(&kgem->active[i][j]);
- }
- for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) {
- for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++)
- list_init(&kgem->vma[i].inactive[j]);
- }
- kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE;
- kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE;
-
- kgem->next_request = __kgem_request_alloc();
+ kgem->next_request = __kgem_request_alloc(kgem);
DBG(("%s: cpu bo enabled %d: llc? %d, set-cache-level? %d, userptr? %d\n", __FUNCTION__,
!DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_cacheing),
@@ -885,10 +1087,15 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
if (aperture.aper_size == 0)
aperture.aper_size = 64*1024*1024;
+ DBG(("%s: aperture size %lld, available now %lld\n",
+ __FUNCTION__,
+ (long long)aperture.aper_size,
+ (long long)aperture.aper_available_size));
+
kgem->aperture_total = aperture.aper_size;
kgem->aperture_high = aperture.aper_size * 3/4;
kgem->aperture_low = aperture.aper_size * 1/3;
- if (gen < 33) {
+ if (gen < 033) {
/* Severe alignment penalties */
kgem->aperture_high /= 2;
kgem->aperture_low /= 2;
@@ -907,21 +1114,15 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
kgem->buffer_size = 64 * 1024;
while (kgem->buffer_size < kgem->aperture_mappable >> 10)
kgem->buffer_size *= 2;
+ if (kgem->buffer_size >> 12 > kgem->half_cpu_cache_pages)
+ kgem->buffer_size = kgem->half_cpu_cache_pages << 12;
DBG(("%s: buffer size=%d [%d KiB]\n", __FUNCTION__,
kgem->buffer_size, kgem->buffer_size / 1024));
- kgem->max_object_size = 2 * aperture.aper_size / 3;
+ kgem->max_object_size = 3 * (kgem->aperture_high >> 12) << 10;
kgem->max_gpu_size = kgem->max_object_size;
if (!kgem->has_llc)
kgem->max_gpu_size = MAX_CACHE_SIZE;
- if (gen < 40) {
- /* If we have to use fences for blitting, we have to make
- * sure we can fit them into the aperture.
- */
- kgem->max_gpu_size = kgem->aperture_mappable / 2;
- if (kgem->max_gpu_size > kgem->aperture_low)
- kgem->max_gpu_size = kgem->aperture_low;
- }
totalram = total_ram_size();
if (totalram == 0) {
@@ -935,12 +1136,9 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
if (kgem->max_gpu_size > totalram / 4)
kgem->max_gpu_size = totalram / 4;
- half_gpu_max = kgem->max_gpu_size / 2;
- if (kgem->gen >= 40)
- kgem->max_cpu_size = half_gpu_max;
- else
- kgem->max_cpu_size = kgem->max_object_size;
+ kgem->max_cpu_size = kgem->max_object_size;
+ half_gpu_max = kgem->max_gpu_size / 2;
kgem->max_copy_tile_size = (MAX_CACHE_SIZE + 1)/2;
if (kgem->max_copy_tile_size > half_gpu_max)
kgem->max_copy_tile_size = half_gpu_max;
@@ -981,6 +1179,14 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
if ((int)kgem->fence_max < 0)
kgem->fence_max = 5; /* minimum safe value for all hw */
DBG(("%s: max fences=%d\n", __FUNCTION__, kgem->fence_max));
+
+ kgem->batch_flags_base = 0;
+ if (kgem->has_no_reloc)
+ kgem->batch_flags_base |= LOCAL_I915_EXEC_NO_RELOC;
+ if (kgem->has_handle_lut)
+ kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT;
+ if (kgem->has_pinned_batches)
+ kgem->batch_flags_base |= LOCAL_I915_EXEC_IS_PINNED;
}
/* XXX hopefully a good approximation */
@@ -1013,9 +1219,9 @@ static uint32_t kgem_untiled_pitch(struct kgem *kgem,
void kgem_get_tile_size(struct kgem *kgem, int tiling,
int *tile_width, int *tile_height, int *tile_size)
{
- if (kgem->gen <= 30) {
+ if (kgem->gen <= 030) {
if (tiling) {
- if (kgem->gen < 30) {
+ if (kgem->gen < 030) {
*tile_width = 128;
*tile_height = 16;
*tile_size = 2048;
@@ -1064,14 +1270,14 @@ static uint32_t kgem_surface_size(struct kgem *kgem,
assert(width <= MAXSHORT);
assert(height <= MAXSHORT);
- if (kgem->gen <= 30) {
+ if (kgem->gen <= 030) {
if (tiling) {
- if (kgem->gen < 30) {
+ if (kgem->gen < 030) {
tile_width = 128;
- tile_height = 16;
+ tile_height = 32;
} else {
tile_width = 512;
- tile_height = 8;
+ tile_height = 16;
}
} else {
tile_width = 2 * bpp >> 3;
@@ -1087,19 +1293,21 @@ static uint32_t kgem_surface_size(struct kgem *kgem,
kgem_pitch_alignment(kgem, flags));
tile_height = 2;
break;
+
+ /* XXX align to an even tile row */
case I915_TILING_X:
tile_width = 512;
- tile_height = 8;
+ tile_height = 16;
break;
case I915_TILING_Y:
tile_width = 128;
- tile_height = 32;
+ tile_height = 64;
break;
}
*pitch = ALIGN(width * bpp / 8, tile_width);
height = ALIGN(height, tile_height);
- if (kgem->gen >= 40)
+ if (kgem->gen >= 040)
return PAGE_ALIGN(*pitch * height);
/* If it is too wide for the blitter, don't even bother. */
@@ -1120,7 +1328,7 @@ static uint32_t kgem_surface_size(struct kgem *kgem,
return PAGE_ALIGN(size);
/* We need to allocate a pot fence region for a tiled buffer. */
- if (kgem->gen < 30)
+ if (kgem->gen < 030)
tile_width = 512 * 1024;
else
tile_width = 1024 * 1024;
@@ -1134,18 +1342,19 @@ static uint32_t kgem_aligned_height(struct kgem *kgem,
{
uint32_t tile_height;
- if (kgem->gen <= 30) {
- tile_height = tiling ? kgem->gen < 30 ? 16 : 8 : 1;
+ if (kgem->gen <= 030) {
+ tile_height = tiling ? kgem->gen < 030 ? 32 : 16 : 1;
} else switch (tiling) {
+ /* XXX align to an even tile row */
default:
case I915_TILING_NONE:
- tile_height = 2;
+ tile_height = 1;
break;
case I915_TILING_X:
- tile_height = 8;
+ tile_height = 16;
break;
case I915_TILING_Y:
- tile_height = 32;
+ tile_height = 64;
break;
}
@@ -1161,6 +1370,7 @@ kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo)
__FUNCTION__, bo->handle, kgem->nexec));
assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
+ bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle;
exec = memset(&kgem->exec[kgem->nexec++], 0, sizeof(*exec));
exec->handle = bo->handle;
exec->offset = bo->presumed_offset;
@@ -1170,10 +1380,10 @@ kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo)
return exec;
}
-void _kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo)
+static void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo)
{
bo->exec = kgem_add_handle(kgem, bo);
- bo->rq = kgem->next_request;
+ bo->rq = MAKE_REQUEST(kgem->next_request, kgem->ring);
list_move_tail(&bo->request, &kgem->next_request->buffers);
@@ -1194,14 +1404,30 @@ static void kgem_fixup_self_relocs(struct kgem *kgem, struct kgem_bo *bo)
{
int n;
- for (n = 0; n < kgem->nreloc; n++) {
- if (kgem->reloc[n].target_handle == 0) {
- kgem->reloc[n].target_handle = bo->handle;
- kgem->reloc[n].presumed_offset = bo->presumed_offset;
- kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
- kgem->reloc[n].delta + bo->presumed_offset;
+ if (kgem->nreloc__self == 0)
+ return;
+
+ for (n = 0; n < kgem->nreloc__self; n++) {
+ int i = kgem->reloc__self[n];
+ assert(kgem->reloc[i].target_handle == ~0U);
+ kgem->reloc[i].target_handle = bo->target_handle;
+ kgem->reloc[i].presumed_offset = bo->presumed_offset;
+ kgem->batch[kgem->reloc[i].offset/sizeof(kgem->batch[0])] =
+ kgem->reloc[i].delta + bo->presumed_offset;
+ }
+
+ if (n == 256) {
+ for (n = kgem->reloc__self[255]; n < kgem->nreloc; n++) {
+ if (kgem->reloc[n].target_handle == ~0U) {
+ kgem->reloc[n].target_handle = bo->target_handle;
+ kgem->reloc[n].presumed_offset = bo->presumed_offset;
+ kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
+ kgem->reloc[n].delta + bo->presumed_offset;
+ }
}
+
}
+
}
static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo)
@@ -1284,11 +1510,12 @@ inline static void kgem_bo_move_to_inactive(struct kgem *kgem,
assert(bo->rq == NULL);
assert(bo->exec == NULL);
assert(bo->domain != DOMAIN_GPU);
- assert(!kgem_busy(kgem, bo->handle));
assert(!bo->proxy);
assert(!bo->io);
+ assert(!bo->scanout);
assert(!bo->needs_flush);
assert(list_is_empty(&bo->vma));
+ ASSERT_IDLE(kgem, bo->handle);
kgem->need_expire = true;
@@ -1302,7 +1529,7 @@ inline static void kgem_bo_move_to_inactive(struct kgem *kgem,
if (bo->map) {
int type = IS_CPU_MAP(bo->map);
if (bucket(bo) >= NUM_CACHE_BUCKETS ||
- (!type && !kgem_bo_is_mappable(kgem, bo))) {
+ (!type && !__kgem_bo_is_mappable(kgem, bo))) {
munmap(MAP(bo->map), bytes(bo));
bo->map = NULL;
}
@@ -1313,6 +1540,32 @@ inline static void kgem_bo_move_to_inactive(struct kgem *kgem,
}
}
+static struct kgem_bo *kgem_bo_replace_io(struct kgem_bo *bo)
+{
+ struct kgem_bo *base;
+
+ if (!bo->io)
+ return bo;
+
+ assert(!bo->snoop);
+ base = malloc(sizeof(*base));
+ if (base) {
+ DBG(("%s: transferring io handle=%d to bo\n",
+ __FUNCTION__, bo->handle));
+ /* transfer the handle to a minimum bo */
+ memcpy(base, bo, sizeof(*base));
+ base->io = false;
+ list_init(&base->list);
+ list_replace(&bo->request, &base->request);
+ list_replace(&bo->vma, &base->vma);
+ free(bo);
+ bo = base;
+ } else
+ bo->reusable = false;
+
+ return bo;
+}
+
inline static void kgem_bo_remove_from_inactive(struct kgem *kgem,
struct kgem_bo *bo)
{
@@ -1335,16 +1588,14 @@ inline static void kgem_bo_remove_from_active(struct kgem *kgem,
list_del(&bo->list);
assert(bo->rq != NULL);
- if (bo->rq == &_kgem_static_request)
+ if (bo->rq == (void *)kgem)
list_del(&bo->request);
assert(list_is_empty(&bo->vma));
}
static void kgem_bo_clear_scanout(struct kgem *kgem, struct kgem_bo *bo)
{
- if (!bo->scanout)
- return;
-
+ assert(bo->scanout);
assert(bo->proxy == NULL);
DBG(("%s: handle=%d, fb=%d (reusable=%d)\n",
@@ -1356,7 +1607,6 @@ static void kgem_bo_clear_scanout(struct kgem *kgem, struct kgem_bo *bo)
}
bo->scanout = false;
- bo->needs_flush = true;
bo->flush = false;
bo->reusable = true;
@@ -1376,6 +1626,20 @@ static void _kgem_bo_delete_buffer(struct kgem *kgem, struct kgem_bo *bo)
io->used = bo->delta;
}
+static void kgem_bo_move_to_scanout(struct kgem *kgem, struct kgem_bo *bo)
+{
+ assert(bo->refcnt == 0);
+ assert(bo->exec == NULL);
+ assert(bo->scanout);
+ assert(bo->delta);
+ assert(!bo->snoop);
+ assert(!bo->io);
+
+ DBG(("%s: moving %d [fb %d] to scanout cachee\n", __FUNCTION__,
+ bo->handle, bo->delta));
+ list_move(&bo->list, &kgem->scanout);
+}
+
static void kgem_bo_move_to_snoop(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->refcnt == 0);
@@ -1416,6 +1680,7 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
list_for_each_entry(bo, &kgem->snoop, list) {
assert(bo->refcnt == 0);
assert(bo->snoop);
+ assert(!bo->scanout);
assert(bo->proxy == NULL);
assert(bo->tiling == I915_TILING_NONE);
assert(bo->rq == NULL);
@@ -1462,7 +1727,6 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
assert(bo->proxy == NULL);
bo->binding.offset = 0;
- kgem_bo_clear_scanout(kgem, bo);
if (DBG_NO_CACHE)
goto destroy;
@@ -1471,39 +1735,22 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
DBG(("%s: handle=%d is snooped\n", __FUNCTION__, bo->handle));
assert(!bo->flush);
assert(list_is_empty(&bo->list));
+ if (bo->exec == NULL && bo->rq && !__kgem_busy(kgem, bo->handle))
+ __kgem_bo_clear_busy(bo);
if (bo->rq == NULL) {
- if (bo->needs_flush && kgem_busy(kgem, bo->handle)) {
- DBG(("%s: handle=%d is snooped, tracking until free\n",
- __FUNCTION__, bo->handle));
- list_add(&bo->request, &kgem->flushing);
- bo->rq = &_kgem_static_request;
- }
- }
- if (bo->rq == NULL)
+ assert(!bo->needs_flush);
kgem_bo_move_to_snoop(kgem, bo);
+ }
return;
}
- if (bo->io) {
- struct kgem_bo *base;
-
- assert(!bo->snoop);
- base = malloc(sizeof(*base));
- if (base) {
- DBG(("%s: transferring io handle=%d to bo\n",
- __FUNCTION__, bo->handle));
- /* transfer the handle to a minimum bo */
- memcpy(base, bo, sizeof(*base));
- base->io = false;
- list_init(&base->list);
- list_replace(&bo->request, &base->request);
- list_replace(&bo->vma, &base->vma);
- free(bo);
- bo = base;
- } else
- bo->reusable = false;
+ if (bo->scanout) {
+ kgem_bo_move_to_scanout(kgem, bo);
+ return;
}
+ if (bo->io)
+ bo = kgem_bo_replace_io(bo);
if (!bo->reusable) {
DBG(("%s: handle=%d, not reusable\n",
__FUNCTION__, bo->handle));
@@ -1519,6 +1766,20 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
assert(bo->io == false);
assert(bo->scanout == false);
+ if (bo->exec && kgem->nexec == 1) {
+ DBG(("%s: only handle in batch, discarding last operations\n",
+ __FUNCTION__));
+ assert(bo->exec == &kgem->exec[0]);
+ assert(kgem->exec[0].handle == bo->handle);
+ assert(RQ(bo->rq) == kgem->next_request);
+ bo->refcnt = 1;
+ kgem_reset(kgem);
+ bo->refcnt = 0;
+ }
+
+ if (bo->rq && bo->exec == NULL && !__kgem_busy(kgem, bo->handle))
+ __kgem_bo_clear_busy(bo);
+
if (bo->rq) {
struct list *cache;
@@ -1534,26 +1795,6 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
assert(bo->exec == NULL);
assert(list_is_empty(&bo->request));
- if (bo->needs_flush) {
- if ((bo->needs_flush = kgem_busy(kgem, bo->handle))) {
- struct list *cache;
-
- DBG(("%s: handle=%d -> flushing\n",
- __FUNCTION__, bo->handle));
-
- list_add(&bo->request, &kgem->flushing);
- if (bucket(bo) < NUM_CACHE_BUCKETS)
- cache = &kgem->active[bucket(bo)][bo->tiling];
- else
- cache = &kgem->large;
- list_add(&bo->list, cache);
- bo->rq = &_kgem_static_request;
- return;
- }
-
- bo->domain = DOMAIN_NONE;
- }
-
if (!IS_CPU_MAP(bo->map)) {
if (!kgem_bo_set_purgeable(kgem, bo))
goto destroy;
@@ -1627,27 +1868,27 @@ static bool kgem_retire__flushing(struct kgem *kgem)
bool retired = false;
list_for_each_entry_safe(bo, next, &kgem->flushing, request) {
- assert(bo->rq == &_kgem_static_request);
+ assert(bo->rq == (void *)kgem);
assert(bo->exec == NULL);
- if (kgem_busy(kgem, bo->handle))
+ if (__kgem_busy(kgem, bo->handle))
break;
- bo->needs_flush = false;
- bo->domain = DOMAIN_NONE;
- bo->rq = NULL;
- list_del(&bo->request);
+ __kgem_bo_clear_busy(bo);
- if (!bo->refcnt) {
- if (bo->snoop) {
- kgem_bo_move_to_snoop(kgem, bo);
- } else if (kgem_bo_set_purgeable(kgem, bo)) {
- assert(bo->reusable);
- kgem_bo_move_to_inactive(kgem, bo);
- retired = true;
- } else
- kgem_bo_free(kgem, bo);
- }
+ if (bo->refcnt)
+ continue;
+
+ if (bo->snoop) {
+ kgem_bo_move_to_snoop(kgem, bo);
+ } else if (bo->scanout) {
+ kgem_bo_move_to_scanout(kgem, bo);
+ } else if ((bo = kgem_bo_replace_io(bo))->reusable &&
+ kgem_bo_set_purgeable(kgem, bo)) {
+ kgem_bo_move_to_inactive(kgem, bo);
+ retired = true;
+ } else
+ kgem_bo_free(kgem, bo);
}
#if HAS_DEBUG_FULL
{
@@ -1658,149 +1899,143 @@ static bool kgem_retire__flushing(struct kgem *kgem)
}
#endif
+ kgem->need_retire |= !list_is_empty(&kgem->flushing);
+
return retired;
}
-static bool kgem_retire__requests(struct kgem *kgem)
+
+static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq)
{
- struct kgem_bo *bo;
bool retired = false;
- int n;
- for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
- while (!list_is_empty(&kgem->requests[n])) {
- struct kgem_request *rq;
+ DBG(("%s: request %d complete\n",
+ __FUNCTION__, rq->bo->handle));
- rq = list_first_entry(&kgem->requests[n],
- struct kgem_request,
- list);
- if (kgem_busy(kgem, rq->bo->handle))
- break;
-
- DBG(("%s: request %d complete\n",
- __FUNCTION__, rq->bo->handle));
+ while (!list_is_empty(&rq->buffers)) {
+ struct kgem_bo *bo;
- while (!list_is_empty(&rq->buffers)) {
- bo = list_first_entry(&rq->buffers,
- struct kgem_bo,
- request);
+ bo = list_first_entry(&rq->buffers,
+ struct kgem_bo,
+ request);
- assert(bo->rq == rq);
- assert(bo->exec == NULL);
- assert(bo->domain == DOMAIN_GPU);
-
- list_del(&bo->request);
-
- if (bo->needs_flush)
- bo->needs_flush = kgem_busy(kgem, bo->handle);
- if (bo->needs_flush) {
- DBG(("%s: moving %d to flushing\n",
- __FUNCTION__, bo->handle));
- list_add(&bo->request, &kgem->flushing);
- bo->rq = &_kgem_static_request;
- } else {
- bo->domain = DOMAIN_NONE;
- bo->rq = NULL;
- }
+ assert(RQ(bo->rq) == rq);
+ assert(bo->exec == NULL);
+ assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE);
- if (bo->refcnt)
- continue;
+ list_del(&bo->request);
- if (bo->snoop) {
- if (bo->needs_flush) {
- list_add(&bo->request, &kgem->flushing);
- bo->rq = &_kgem_static_request;
- } else {
- kgem_bo_move_to_snoop(kgem, bo);
- }
- continue;
- }
+ if (bo->needs_flush)
+ bo->needs_flush = __kgem_busy(kgem, bo->handle);
+ if (bo->needs_flush) {
+ DBG(("%s: moving %d to flushing\n",
+ __FUNCTION__, bo->handle));
+ list_add(&bo->request, &kgem->flushing);
+ bo->rq = (void *)kgem;
+ continue;
+ }
- if (!bo->reusable) {
- DBG(("%s: closing %d\n",
- __FUNCTION__, bo->handle));
- kgem_bo_free(kgem, bo);
- continue;
- }
+ bo->domain = DOMAIN_NONE;
+ bo->rq = NULL;
+ if (bo->refcnt)
+ continue;
- if (!bo->needs_flush) {
- if (kgem_bo_set_purgeable(kgem, bo)) {
- kgem_bo_move_to_inactive(kgem, bo);
- retired = true;
- } else {
- DBG(("%s: closing %d\n",
- __FUNCTION__, bo->handle));
- kgem_bo_free(kgem, bo);
- }
- }
- }
+ if (bo->snoop) {
+ kgem_bo_move_to_snoop(kgem, bo);
+ } else if (bo->scanout) {
+ kgem_bo_move_to_scanout(kgem, bo);
+ } else if ((bo = kgem_bo_replace_io(bo))->reusable &&
+ kgem_bo_set_purgeable(kgem, bo)) {
+ kgem_bo_move_to_inactive(kgem, bo);
+ retired = true;
+ } else {
+ DBG(("%s: closing %d\n",
+ __FUNCTION__, bo->handle));
+ kgem_bo_free(kgem, bo);
+ }
+ }
- assert(rq->bo->rq == NULL);
- assert(list_is_empty(&rq->bo->request));
-
- if (--rq->bo->refcnt == 0) {
- if (kgem_bo_set_purgeable(kgem, rq->bo)) {
- kgem_bo_move_to_inactive(kgem, rq->bo);
- retired = true;
- } else {
- DBG(("%s: closing %d\n",
- __FUNCTION__, rq->bo->handle));
- kgem_bo_free(kgem, rq->bo);
- }
- }
+ assert(rq->bo->rq == NULL);
+ assert(list_is_empty(&rq->bo->request));
- __kgem_request_free(rq);
- kgem->num_requests--;
+ if (--rq->bo->refcnt == 0) {
+ if (kgem_bo_set_purgeable(kgem, rq->bo)) {
+ kgem_bo_move_to_inactive(kgem, rq->bo);
+ retired = true;
+ } else {
+ DBG(("%s: closing %d\n",
+ __FUNCTION__, rq->bo->handle));
+ kgem_bo_free(kgem, rq->bo);
}
+ }
-#if HAS_DEBUG_FULL
- {
- int count = 0;
+ __kgem_request_free(rq);
+ return retired;
+}
- list_for_each_entry(bo, &kgem->requests[n], request)
- count++;
+static bool kgem_retire__requests_ring(struct kgem *kgem, int ring)
+{
+ bool retired = false;
- bo = NULL;
- if (!list_is_empty(&kgem->requests[n]))
- bo = list_first_entry(&kgem->requests[n],
- struct kgem_request,
- list)->bo;
+ while (!list_is_empty(&kgem->requests[ring])) {
+ struct kgem_request *rq;
- ErrorF("%s: ring=%d, %d outstanding requests, oldest=%d\n",
- __FUNCTION__, n, count, bo ? bo->handle : 0);
- }
-#endif
+ rq = list_first_entry(&kgem->requests[ring],
+ struct kgem_request,
+ list);
+ if (__kgem_busy(kgem, rq->bo->handle))
+ break;
+
+ retired |= __kgem_retire_rq(kgem, rq);
}
#if HAS_DEBUG_FULL
{
+ struct kgem_bo *bo;
int count = 0;
- for (n = 0; n < ARRAY_SIZE(kgem->requests); n++)
- list_for_each_entry(bo, &kgem->requests[n], request)
- count++;
+ list_for_each_entry(bo, &kgem->requests[ring], request)
+ count++;
- assert(count == kgem->num_requests);
+ bo = NULL;
+ if (!list_is_empty(&kgem->requests[ring]))
+ bo = list_first_entry(&kgem->requests[ring],
+ struct kgem_request,
+ list)->bo;
+
+ ErrorF("%s: ring=%d, %d outstanding requests, oldest=%d\n",
+ __FUNCTION__, ring, count, bo ? bo->handle : 0);
}
#endif
return retired;
}
+static bool kgem_retire__requests(struct kgem *kgem)
+{
+ bool retired = false;
+ int n;
+
+ for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
+ retired |= kgem_retire__requests_ring(kgem, n);
+ kgem->need_retire |= !list_is_empty(&kgem->requests[n]);
+ }
+
+ return retired;
+}
+
bool kgem_retire(struct kgem *kgem)
{
bool retired = false;
DBG(("%s\n", __FUNCTION__));
+ kgem->need_retire = false;
+
retired |= kgem_retire__flushing(kgem);
- if (kgem->num_requests)
- retired |= kgem_retire__requests(kgem);
+ retired |= kgem_retire__requests(kgem);
retired |= kgem_retire__buffers(kgem);
- kgem->need_retire =
- kgem->num_requests ||
- !list_is_empty(&kgem->flushing);
DBG(("%s -- retired=%d, need_retire=%d\n",
__FUNCTION__, retired, kgem->need_retire));
@@ -1809,31 +2044,25 @@ bool kgem_retire(struct kgem *kgem)
return retired;
}
-bool __kgem_is_idle(struct kgem *kgem)
+bool __kgem_ring_is_idle(struct kgem *kgem, int ring)
{
- int n;
+ struct kgem_request *rq;
- assert(kgem->num_requests);
+ assert(!list_is_empty(&kgem->requests[ring]));
- for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
- struct kgem_request *rq;
-
- if (list_is_empty(&kgem->requests[n]))
- continue;
+ rq = list_last_entry(&kgem->requests[ring],
+ struct kgem_request, list);
+ if (__kgem_busy(kgem, rq->bo->handle)) {
+ DBG(("%s: last requests handle=%d still busy\n",
+ __FUNCTION__, rq->bo->handle));
+ return false;
+ }
- rq = list_last_entry(&kgem->requests[n],
- struct kgem_request, list);
- if (kgem_busy(kgem, rq->bo->handle)) {
- DBG(("%s: last requests handle=%d still busy\n",
- __FUNCTION__, rq->bo->handle));
- return false;
- }
+ DBG(("%s: ring=%d idle (handle=%d)\n",
+ __FUNCTION__, ring, rq->bo->handle));
- DBG(("%s: ring=%d idle (handle=%d)\n",
- __FUNCTION__, n, rq->bo->handle));
- }
- kgem_retire__requests(kgem);
- assert(kgem->num_requests == 0);
+ kgem_retire__requests_ring(kgem, ring);
+ assert(list_is_empty(&kgem->requests[ring]));
return true;
}
@@ -1853,10 +2082,11 @@ static void kgem_commit(struct kgem *kgem)
assert(!bo->purged);
assert(bo->exec);
assert(bo->proxy == NULL || bo->exec == &_kgem_dummy_exec);
- assert(bo->rq == rq || (bo->proxy->rq == rq));
+ assert(RQ(bo->rq) == rq || (RQ(bo->proxy->rq) == rq));
bo->presumed_offset = bo->exec->offset;
bo->exec = NULL;
+ bo->target_handle = -1;
if (!bo->refcnt && !bo->reusable) {
assert(!bo->snoop);
@@ -1870,13 +2100,14 @@ static void kgem_commit(struct kgem *kgem)
if (bo->proxy) {
/* proxies are not used for domain tracking */
- list_del(&bo->request);
- bo->rq = NULL;
bo->exec = NULL;
+ __kgem_bo_clear_busy(bo);
}
+
+ kgem->scanout_busy |= bo->scanout;
}
- if (rq == &_kgem_static_request) {
+ if (rq == &kgem->static_request) {
struct drm_i915_gem_set_domain set_domain;
DBG(("%s: syncing due to allocation failure\n", __FUNCTION__));
@@ -1894,10 +2125,10 @@ static void kgem_commit(struct kgem *kgem)
assert(list_is_empty(&rq->buffers));
gem_close(kgem->fd, rq->bo->handle);
+ kgem_cleanup_cache(kgem);
} else {
list_add_tail(&rq->list, &kgem->requests[rq->ring]);
kgem->need_throttle = kgem->need_retire = 1;
- kgem->num_requests++;
}
kgem->next_request = NULL;
@@ -1946,13 +2177,12 @@ static void kgem_finish_buffers(struct kgem *kgem)
assert(!bo->need_io);
- used = ALIGN(bo->used + PAGE_SIZE-1, PAGE_SIZE);
+ used = ALIGN(bo->used, PAGE_SIZE);
if (!DBG_NO_UPLOAD_ACTIVE &&
used + PAGE_SIZE <= bytes(&bo->base) &&
- (kgem->has_llc || !IS_CPU_MAP(bo->base.map))) {
+ (kgem->has_llc || !IS_CPU_MAP(bo->base.map) || bo->base.snoop)) {
DBG(("%s: retaining upload buffer (%d/%d)\n",
__FUNCTION__, bo->used, bytes(&bo->base)));
- assert(!bo->base.snoop);
bo->used = used;
list_move(&bo->base.list,
&kgem->active_buffers);
@@ -1973,16 +2203,65 @@ static void kgem_finish_buffers(struct kgem *kgem)
}
assert(bo->need_io);
- assert(bo->base.rq == kgem->next_request);
+ assert(bo->base.rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
assert(bo->base.domain != DOMAIN_GPU);
if (bo->base.refcnt == 1 &&
bo->base.size.pages.count > 1 &&
bo->used < bytes(&bo->base) / 2) {
struct kgem_bo *shrink;
+ unsigned alloc = NUM_PAGES(bo->used);
+
+ shrink = search_snoop_cache(kgem, alloc,
+ CREATE_INACTIVE | CREATE_NO_RETIRE);
+ if (shrink) {
+ void *map;
+ int n;
- shrink = search_linear_cache(kgem,
- PAGE_ALIGN(bo->used),
+ DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n",
+ __FUNCTION__,
+ bo->used, bytes(&bo->base), bytes(shrink),
+ bo->base.handle, shrink->handle));
+
+ assert(bo->used <= bytes(shrink));
+ map = kgem_bo_map__cpu(kgem, shrink);
+ if (map) {
+ kgem_bo_sync__cpu(kgem, shrink);
+ memcpy(map, bo->mem, bo->used);
+
+ shrink->target_handle =
+ kgem->has_handle_lut ? bo->base.target_handle : shrink->handle;
+ for (n = 0; n < kgem->nreloc; n++) {
+ if (kgem->reloc[n].target_handle == bo->base.target_handle) {
+ kgem->reloc[n].target_handle = shrink->target_handle;
+ kgem->reloc[n].presumed_offset = shrink->presumed_offset;
+ kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
+ kgem->reloc[n].delta + shrink->presumed_offset;
+ }
+ }
+
+ bo->base.exec->handle = shrink->handle;
+ bo->base.exec->offset = shrink->presumed_offset;
+ shrink->exec = bo->base.exec;
+ shrink->rq = bo->base.rq;
+ list_replace(&bo->base.request,
+ &shrink->request);
+ list_init(&bo->base.request);
+ shrink->needs_flush = bo->base.dirty;
+
+ bo->base.exec = NULL;
+ bo->base.rq = NULL;
+ bo->base.dirty = false;
+ bo->base.needs_flush = false;
+ bo->used = 0;
+
+ goto decouple;
+ }
+
+ __kgem_bo_destroy(kgem, shrink);
+ }
+
+ shrink = search_linear_cache(kgem, alloc,
CREATE_INACTIVE | CREATE_NO_RETIRE);
if (shrink) {
int n;
@@ -1993,40 +2272,44 @@ static void kgem_finish_buffers(struct kgem *kgem)
bo->base.handle, shrink->handle));
assert(bo->used <= bytes(shrink));
- gem_write(kgem->fd, shrink->handle,
- 0, bo->used, bo->mem);
-
- for (n = 0; n < kgem->nreloc; n++) {
- if (kgem->reloc[n].target_handle == bo->base.handle) {
- kgem->reloc[n].target_handle = shrink->handle;
- kgem->reloc[n].presumed_offset = shrink->presumed_offset;
- kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
- kgem->reloc[n].delta + shrink->presumed_offset;
+ if (gem_write(kgem->fd, shrink->handle,
+ 0, bo->used, bo->mem) == 0) {
+ shrink->target_handle =
+ kgem->has_handle_lut ? bo->base.target_handle : shrink->handle;
+ for (n = 0; n < kgem->nreloc; n++) {
+ if (kgem->reloc[n].target_handle == bo->base.target_handle) {
+ kgem->reloc[n].target_handle = shrink->target_handle;
+ kgem->reloc[n].presumed_offset = shrink->presumed_offset;
+ kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
+ kgem->reloc[n].delta + shrink->presumed_offset;
+ }
}
+
+ bo->base.exec->handle = shrink->handle;
+ bo->base.exec->offset = shrink->presumed_offset;
+ shrink->exec = bo->base.exec;
+ shrink->rq = bo->base.rq;
+ list_replace(&bo->base.request,
+ &shrink->request);
+ list_init(&bo->base.request);
+ shrink->needs_flush = bo->base.dirty;
+
+ bo->base.exec = NULL;
+ bo->base.rq = NULL;
+ bo->base.dirty = false;
+ bo->base.needs_flush = false;
+ bo->used = 0;
+
+ goto decouple;
}
- bo->base.exec->handle = shrink->handle;
- bo->base.exec->offset = shrink->presumed_offset;
- shrink->exec = bo->base.exec;
- shrink->rq = bo->base.rq;
- list_replace(&bo->base.request,
- &shrink->request);
- list_init(&bo->base.request);
- shrink->needs_flush = bo->base.dirty;
-
- bo->base.exec = NULL;
- bo->base.rq = NULL;
- bo->base.dirty = false;
- bo->base.needs_flush = false;
- bo->used = 0;
-
- goto decouple;
+ __kgem_bo_destroy(kgem, shrink);
}
}
DBG(("%s: handle=%d, uploading %d/%d\n",
__FUNCTION__, bo->base.handle, bo->used, bytes(&bo->base)));
- assert(!kgem_busy(kgem, bo->base.handle));
+ ASSERT_IDLE(kgem, bo->base.handle);
assert(bo->used <= bytes(&bo->base));
gem_write(kgem->fd, bo->base.handle,
0, bo->used, bo->mem);
@@ -2058,11 +2341,9 @@ static void kgem_cleanup(struct kgem *kgem)
struct kgem_bo,
request);
- list_del(&bo->request);
- bo->rq = NULL;
bo->exec = NULL;
- bo->domain = DOMAIN_NONE;
bo->dirty = false;
+ __kgem_bo_clear_busy(bo);
if (bo->refcnt == 0)
kgem_bo_free(kgem, bo);
}
@@ -2071,7 +2352,6 @@ static void kgem_cleanup(struct kgem *kgem)
}
}
- kgem->num_requests = 0;
kgem_close_inactive(kgem);
}
@@ -2079,7 +2359,7 @@ static int kgem_batch_write(struct kgem *kgem, uint32_t handle, uint32_t size)
{
int ret;
- assert(!kgem_busy(kgem, handle));
+ ASSERT_IDLE(kgem, handle);
/* If there is no surface data, just upload the batch */
if (kgem->surface == kgem->batch_size)
@@ -2122,34 +2402,46 @@ void kgem_reset(struct kgem *kgem)
request);
list_del(&bo->request);
+ assert(RQ(bo->rq) == rq);
+
bo->binding.offset = 0;
bo->exec = NULL;
+ bo->target_handle = -1;
bo->dirty = false;
- bo->rq = NULL;
- bo->domain = DOMAIN_NONE;
- if (!bo->refcnt) {
+ if (bo->needs_flush && __kgem_busy(kgem, bo->handle)) {
+ list_add(&bo->request, &kgem->flushing);
+ bo->rq = (void *)kgem;
+ } else
+ __kgem_bo_clear_busy(bo);
+
+ if (!bo->refcnt && !bo->reusable) {
+ assert(!bo->snoop);
DBG(("%s: discarding handle=%d\n",
__FUNCTION__, bo->handle));
kgem_bo_free(kgem, bo);
}
}
- if (kgem->next_request != &_kgem_static_request)
- free(kgem->next_request);
+ if (rq != &kgem->static_request) {
+ list_init(&rq->list);
+ __kgem_request_free(rq);
+ }
}
kgem->nfence = 0;
kgem->nexec = 0;
kgem->nreloc = 0;
+ kgem->nreloc__self = 0;
kgem->aperture = 0;
kgem->aperture_fenced = 0;
kgem->nbatch = 0;
kgem->surface = kgem->batch_size;
kgem->mode = KGEM_NONE;
kgem->flush = 0;
+ kgem->batch_flags = kgem->batch_flags_base;
- kgem->next_request = __kgem_request_alloc();
+ kgem->next_request = __kgem_request_alloc(kgem);
kgem_sna_reset(kgem);
}
@@ -2173,7 +2465,7 @@ static int compact_batch_surface(struct kgem *kgem)
shrink *= sizeof(uint32_t);
for (n = 0; n < kgem->nreloc; n++) {
if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION &&
- kgem->reloc[n].target_handle == 0)
+ kgem->reloc[n].target_handle == ~0U)
kgem->reloc[n].delta -= shrink;
if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch)
@@ -2184,6 +2476,74 @@ static int compact_batch_surface(struct kgem *kgem)
return size * sizeof(uint32_t);
}
+static struct kgem_bo *
+kgem_create_batch(struct kgem *kgem, int size)
+{
+ struct drm_i915_gem_set_domain set_domain;
+ struct kgem_bo *bo;
+
+ if (size <= 4096) {
+ bo = list_first_entry(&kgem->pinned_batches[0],
+ struct kgem_bo,
+ list);
+ if (!bo->rq) {
+out_4096:
+ list_move_tail(&bo->list, &kgem->pinned_batches[0]);
+ return kgem_bo_reference(bo);
+ }
+
+ if (!__kgem_busy(kgem, bo->handle)) {
+ assert(RQ(bo->rq)->bo == bo);
+ __kgem_retire_rq(kgem, RQ(bo->rq));
+ goto out_4096;
+ }
+ }
+
+ if (size <= 16384) {
+ bo = list_first_entry(&kgem->pinned_batches[1],
+ struct kgem_bo,
+ list);
+ if (!bo->rq) {
+out_16384:
+ list_move_tail(&bo->list, &kgem->pinned_batches[1]);
+ return kgem_bo_reference(bo);
+ }
+
+ if (!__kgem_busy(kgem, bo->handle)) {
+ assert(RQ(bo->rq)->bo == bo);
+ __kgem_retire_rq(kgem, RQ(bo->rq));
+ goto out_16384;
+ }
+ }
+
+ if (kgem->gen == 020 && !kgem->has_pinned_batches) {
+ assert(size <= 16384);
+
+ bo = list_first_entry(&kgem->pinned_batches[size > 4096],
+ struct kgem_bo,
+ list);
+ list_move_tail(&bo->list, &kgem->pinned_batches[size > 4096]);
+
+ DBG(("%s: syncing due to busy batches\n", __FUNCTION__));
+
+ VG_CLEAR(set_domain);
+ set_domain.handle = bo->handle;
+ set_domain.read_domains = I915_GEM_DOMAIN_GTT;
+ set_domain.write_domain = I915_GEM_DOMAIN_GTT;
+ if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
+ DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
+ kgem_throttle(kgem);
+ return NULL;
+ }
+
+ kgem_retire(kgem);
+ assert(bo->rq == NULL);
+ return kgem_bo_reference(bo);
+ }
+
+ return kgem_create_linear(kgem, size, CREATE_NO_THROTTLE);
+}
+
void _kgem_submit(struct kgem *kgem)
{
struct kgem_request *rq;
@@ -2212,7 +2572,7 @@ void _kgem_submit(struct kgem *kgem)
kgem_finish_buffers(kgem);
-#if HAS_DEBUG_FULL && SHOW_BATCH
+#if SHOW_BATCH
__kgem_batch_debug(kgem, batch_end);
#endif
@@ -2221,7 +2581,7 @@ void _kgem_submit(struct kgem *kgem)
size = compact_batch_surface(kgem);
else
size = kgem->nbatch * sizeof(kgem->batch[0]);
- rq->bo = kgem_create_linear(kgem, size, CREATE_NO_THROTTLE);
+ rq->bo = kgem_create_batch(kgem, size);
if (rq->bo) {
uint32_t handle = rq->bo->handle;
int i;
@@ -2233,13 +2593,14 @@ void _kgem_submit(struct kgem *kgem)
kgem->exec[i].relocation_count = kgem->nreloc;
kgem->exec[i].relocs_ptr = (uintptr_t)kgem->reloc;
kgem->exec[i].alignment = 0;
- kgem->exec[i].offset = 0;
+ kgem->exec[i].offset = rq->bo->presumed_offset;
kgem->exec[i].flags = 0;
kgem->exec[i].rsvd1 = 0;
kgem->exec[i].rsvd2 = 0;
+ rq->bo->target_handle = kgem->has_handle_lut ? i : handle;
rq->bo->exec = &kgem->exec[i];
- rq->bo->rq = rq; /* useful sanity check */
+ rq->bo->rq = MAKE_REQUEST(rq, kgem->ring); /* useful sanity check */
list_add(&rq->bo->request, &rq->buffers);
rq->ring = kgem->ring == KGEM_BLT;
@@ -2258,7 +2619,7 @@ void _kgem_submit(struct kgem *kgem)
execbuf.num_cliprects = 0;
execbuf.DR1 = 0;
execbuf.DR4 = 0;
- execbuf.flags = kgem->ring;
+ execbuf.flags = kgem->ring | kgem->batch_flags;
execbuf.rsvd1 = 0;
execbuf.rsvd2 = 0;
@@ -2281,13 +2642,23 @@ void _kgem_submit(struct kgem *kgem)
DRM_IOCTL_I915_GEM_EXECBUFFER2,
&execbuf);
}
- if (ret == -1 && (errno == EIO || errno == EBUSY)) {
- DBG(("%s: GPU hang detected\n", __FUNCTION__));
- kgem_throttle(kgem);
- ret = 0;
+ if (DEBUG_SYNC && ret == 0) {
+ struct drm_i915_gem_set_domain set_domain;
+
+ VG_CLEAR(set_domain);
+ set_domain.handle = handle;
+ set_domain.read_domains = I915_GEM_DOMAIN_GTT;
+ set_domain.write_domain = I915_GEM_DOMAIN_GTT;
+
+ ret = drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
}
+ if (ret == -1) {
+ DBG(("%s: GPU hang detected [%d]\n",
+ __FUNCTION__, errno));
+ kgem_throttle(kgem);
+ kgem->wedged = true;
+
#if !NDEBUG
- if (ret < 0) {
ret = errno;
ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d: errno=%d\n",
kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface,
@@ -2323,33 +2694,16 @@ void _kgem_submit(struct kgem *kgem)
(int)kgem->reloc[i].presumed_offset);
}
- i = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666);
- if (i != -1) {
- i = write(i, kgem->batch, batch_end*sizeof(uint32_t));
- (void)i;
- }
-
- FatalError("SNA: failed to submit batchbuffer, errno=%d\n", ret);
- }
-#endif
-
- if (DEBUG_FLUSH_SYNC) {
- struct drm_i915_gem_set_domain set_domain;
-
- DBG(("%s: debug sync, starting\n", __FUNCTION__));
-
- VG_CLEAR(set_domain);
- set_domain.handle = handle;
- set_domain.read_domains = I915_GEM_DOMAIN_GTT;
- set_domain.write_domain = I915_GEM_DOMAIN_GTT;
+ if (DEBUG_SYNC) {
+ int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666);
+ if (fd != -1) {
+ write(fd, kgem->batch, batch_end*sizeof(uint32_t));
+ close(fd);
+ }
- ret = drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
- if (ret == -1) {
- DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
- kgem_throttle(kgem);
+ FatalError("SNA: failed to submit batchbuffer, errno=%d\n", ret);
}
-
- DBG(("%s: debug sync, completed\n", __FUNCTION__));
+#endif
}
}
@@ -2425,6 +2779,13 @@ bool kgem_expire_cache(struct kgem *kgem)
}
+ while (!list_is_empty(&kgem->scanout)) {
+ bo = list_first_entry(&kgem->scanout, struct kgem_bo, list);
+ list_del(&bo->list);
+ kgem_bo_clear_scanout(kgem, bo);
+ __kgem_bo_destroy(kgem, bo);
+ }
+
expire = 0;
list_for_each_entry(bo, &kgem->snoop, list) {
if (bo->delta) {
@@ -2619,7 +2980,7 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
return NULL;
}
- if (!__kgem_throttle_retire(kgem, 0)) {
+ if (!__kgem_throttle_retire(kgem, flags)) {
DBG(("%s: nothing retired\n", __FUNCTION__));
return NULL;
}
@@ -2642,6 +3003,7 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
assert(bo->proxy == NULL);
assert(bo->rq == NULL);
assert(bo->exec == NULL);
+ assert(!bo->scanout);
if (num_pages > num_pages(bo)) {
DBG(("inactive too small: %d < %d\n",
@@ -2655,8 +3017,8 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
}
if (I915_TILING_NONE != bo->tiling &&
- gem_set_tiling(kgem->fd, bo->handle,
- I915_TILING_NONE, 0) != I915_TILING_NONE)
+ !gem_set_tiling(kgem->fd, bo->handle,
+ I915_TILING_NONE, 0))
continue;
kgem_bo_remove_from_inactive(kgem, bo);
@@ -2668,12 +3030,15 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
__FUNCTION__, bo->handle, num_pages(bo)));
assert(use_active || bo->domain != DOMAIN_GPU);
assert(!bo->needs_flush);
- //assert(!kgem_busy(kgem, bo->handle));
+ ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active);
return bo;
}
if (flags & CREATE_EXACT)
return NULL;
+
+ if (flags & CREATE_CPU_MAP && !kgem->has_llc)
+ return NULL;
}
cache = use_active ? active(kgem, num_pages, I915_TILING_NONE) : inactive(kgem, num_pages);
@@ -2682,12 +3047,13 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
assert(bo->reusable);
assert(!!bo->rq == !!use_active);
assert(bo->proxy == NULL);
+ assert(!bo->scanout);
if (num_pages > num_pages(bo))
continue;
if (use_active &&
- kgem->gen <= 40 &&
+ kgem->gen <= 040 &&
bo->tiling != I915_TILING_NONE)
continue;
@@ -2703,11 +3069,12 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
if (first)
continue;
- if (gem_set_tiling(kgem->fd, bo->handle,
- I915_TILING_NONE, 0) != I915_TILING_NONE)
+ if (!gem_set_tiling(kgem->fd, bo->handle,
+ I915_TILING_NONE, 0))
continue;
bo->tiling = I915_TILING_NONE;
+ bo->pitch = 0;
}
if (bo->map) {
@@ -2751,7 +3118,7 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
assert(list_is_empty(&bo->list));
assert(use_active || bo->domain != DOMAIN_GPU);
assert(!bo->needs_flush || use_active);
- //assert(use_active || !kgem_busy(kgem, bo->handle));
+ ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active);
return bo;
}
@@ -2771,7 +3138,7 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
assert(list_is_empty(&first->list));
assert(use_active || first->domain != DOMAIN_GPU);
assert(!first->needs_flush || use_active);
- //assert(use_active || !kgem_busy(kgem, first->handle));
+ ASSERT_MAYBE_IDLE(kgem, first->handle, !use_active);
return first;
}
@@ -2878,10 +3245,15 @@ struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags)
size = (size + PAGE_SIZE - 1) / PAGE_SIZE;
bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags);
if (bo) {
+ assert(bo->domain != DOMAIN_GPU);
+ ASSERT_IDLE(kgem, bo->handle);
bo->refcnt = 1;
return bo;
}
+ if (flags & CREATE_CACHED)
+ return NULL;
+
handle = gem_create(kgem->fd, size);
if (handle == 0)
return NULL;
@@ -2902,7 +3274,7 @@ int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int
if (DBG_NO_TILING)
return tiling < 0 ? tiling : I915_TILING_NONE;
- if (kgem->gen < 40) {
+ if (kgem->gen < 040) {
if (tiling && width * bpp > 8192 * 8) {
DBG(("%s: pitch too large for tliing [%d]\n",
__FUNCTION__, width*bpp/8));
@@ -2910,13 +3282,17 @@ int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int
goto done;
}
} else {
+ /* XXX rendering to I915_TILING_Y seems broken? */
+ if (kgem->gen < 050 && tiling == I915_TILING_Y)
+ tiling = I915_TILING_X;
+
if (width*bpp > (MAXSHORT-512) * 8) {
- DBG(("%s: large pitch [%d], forcing TILING_X\n",
- __FUNCTION__, width*bpp/8));
if (tiling > 0)
tiling = -tiling;
else if (tiling == 0)
tiling = -I915_TILING_X;
+ DBG(("%s: large pitch [%d], forcing TILING [%d]\n",
+ __FUNCTION__, width*bpp/8, tiling));
} else if (tiling && (width|height) > 8192) {
DBG(("%s: large tiled buffer [%dx%d], forcing TILING_X\n",
__FUNCTION__, width, height));
@@ -2927,9 +3303,9 @@ int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int
if (tiling < 0)
return tiling;
- if (tiling && height == 1) {
- DBG(("%s: disabling tiling [%d] for single row\n",
- __FUNCTION__,height));
+ if (tiling && (height == 1 || width == 1)) {
+ DBG(("%s: disabling tiling [%dx%d] for single row/col\n",
+ __FUNCTION__,width, height));
tiling = I915_TILING_NONE;
goto done;
}
@@ -3004,6 +3380,7 @@ unsigned kgem_can_create_2d(struct kgem *kgem,
{
uint32_t pitch, size;
unsigned flags = 0;
+ int tiling;
int bpp;
DBG(("%s: %dx%d @ %d\n", __FUNCTION__, width, height, depth));
@@ -3023,33 +3400,41 @@ unsigned kgem_can_create_2d(struct kgem *kgem,
size = kgem_surface_size(kgem, false, 0,
width, height, bpp,
I915_TILING_NONE, &pitch);
- if (size > 0 && size <= kgem->max_cpu_size)
- flags |= KGEM_CAN_CREATE_CPU | KGEM_CAN_CREATE_GPU;
- if (size > 0 && size <= kgem->aperture_mappable/4)
- flags |= KGEM_CAN_CREATE_GTT;
- if (size > kgem->large_object_size)
- flags |= KGEM_CAN_CREATE_LARGE;
- if (size > kgem->max_object_size) {
- DBG(("%s: too large (untiled) %d > %d\n",
- __FUNCTION__, size, kgem->max_object_size));
- return 0;
+ DBG(("%s: untiled size=%d\n", __FUNCTION__, size));
+ if (size > 0) {
+ if (size <= kgem->max_cpu_size)
+ flags |= KGEM_CAN_CREATE_CPU;
+ if (size <= kgem->max_gpu_size)
+ flags |= KGEM_CAN_CREATE_GPU;
+ if (size <= kgem->aperture_mappable/4)
+ flags |= KGEM_CAN_CREATE_GTT;
+ if (size > kgem->large_object_size)
+ flags |= KGEM_CAN_CREATE_LARGE;
+ if (size > kgem->max_object_size) {
+ DBG(("%s: too large (untiled) %d > %d\n",
+ __FUNCTION__, size, kgem->max_object_size));
+ return 0;
+ }
}
- size = kgem_surface_size(kgem, false, 0,
- width, height, bpp,
- kgem_choose_tiling(kgem, I915_TILING_X,
- width, height, bpp),
- &pitch);
- if (size > 0 && size <= kgem->max_gpu_size)
- flags |= KGEM_CAN_CREATE_GPU;
- if (size > 0 && size <= kgem->aperture_mappable/4)
- flags |= KGEM_CAN_CREATE_GTT;
- if (size > kgem->large_object_size)
- flags |= KGEM_CAN_CREATE_LARGE;
- if (size > kgem->max_object_size) {
- DBG(("%s: too large (tiled) %d > %d\n",
- __FUNCTION__, size, kgem->max_object_size));
- return 0;
+ tiling = kgem_choose_tiling(kgem, I915_TILING_X,
+ width, height, bpp);
+ if (tiling != I915_TILING_NONE) {
+ size = kgem_surface_size(kgem, false, 0,
+ width, height, bpp, tiling,
+ &pitch);
+ DBG(("%s: tiled[%d] size=%d\n", __FUNCTION__, tiling, size));
+ if (size > 0 && size <= kgem->max_gpu_size)
+ flags |= KGEM_CAN_CREATE_GPU;
+ if (size > 0 && size <= kgem->aperture_mappable/4)
+ flags |= KGEM_CAN_CREATE_GTT;
+ if (size > kgem->large_object_size)
+ flags |= KGEM_CAN_CREATE_LARGE;
+ if (size > kgem->max_object_size) {
+ DBG(("%s: too large (tiled) %d > %d\n",
+ __FUNCTION__, size, kgem->max_object_size));
+ return 0;
+ }
}
return flags;
@@ -3060,9 +3445,9 @@ inline int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo)
unsigned int size;
assert(bo->tiling);
- assert(kgem->gen < 40);
+ assert(kgem->gen < 040);
- if (kgem->gen < 30)
+ if (kgem->gen < 030)
size = 512 * 1024;
else
size = 1024 * 1024;
@@ -3104,6 +3489,36 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
size /= PAGE_SIZE;
bucket = cache_bucket(size);
+ if (flags & CREATE_SCANOUT) {
+ list_for_each_entry(bo, &kgem->scanout, list) {
+ assert(bo->scanout);
+ assert(bo->delta);
+ assert(!bo->purged);
+
+ if (size > num_pages(bo) || num_pages(bo) > 2*size)
+ continue;
+
+ if (bo->tiling != tiling ||
+ (tiling != I915_TILING_NONE && bo->pitch != pitch)) {
+ if (!gem_set_tiling(kgem->fd, bo->handle,
+ tiling, pitch))
+ continue;
+
+ bo->tiling = tiling;
+ bo->pitch = pitch;
+ }
+
+ list_del(&bo->list);
+
+ bo->unique_id = kgem_get_unique_id(kgem);
+ DBG((" 1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n",
+ bo->pitch, bo->tiling, bo->handle, bo->unique_id));
+ assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
+ bo->refcnt = 1;
+ return bo;
+ }
+ }
+
if (bucket >= NUM_CACHE_BUCKETS) {
DBG(("%s: large bo num pages=%d, bucket=%d\n",
__FUNCTION__, size, bucket));
@@ -3116,10 +3531,12 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
list_for_each_entry(bo, &kgem->large, list) {
assert(!bo->purged);
+ assert(!bo->scanout);
assert(bo->refcnt == 0);
assert(bo->reusable);
+ assert(bo->flush == true);
- if (kgem->gen < 40) {
+ if (kgem->gen < 040) {
if (bo->pitch < pitch) {
DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
bo->tiling, tiling,
@@ -3134,11 +3551,12 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
continue;
if (bo->pitch != pitch || bo->tiling != tiling) {
- if (gem_set_tiling(kgem->fd, bo->handle,
- tiling, pitch) != tiling)
+ if (!gem_set_tiling(kgem->fd, bo->handle,
+ tiling, pitch))
continue;
bo->pitch = pitch;
+ bo->tiling = tiling;
}
}
@@ -3157,16 +3575,19 @@ large_inactive:
list_for_each_entry(bo, &kgem->large_inactive, list) {
assert(bo->refcnt == 0);
assert(bo->reusable);
+ assert(!bo->scanout);
if (size > num_pages(bo))
continue;
if (bo->tiling != tiling ||
(tiling != I915_TILING_NONE && bo->pitch != pitch)) {
- if (tiling != gem_set_tiling(kgem->fd,
- bo->handle,
- tiling, pitch))
+ if (!gem_set_tiling(kgem->fd, bo->handle,
+ tiling, pitch))
continue;
+
+ bo->tiling = tiling;
+ bo->pitch = pitch;
}
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
@@ -3201,10 +3622,12 @@ large_inactive:
list_for_each_entry(bo, cache, vma) {
assert(bucket(bo) == bucket);
assert(bo->refcnt == 0);
+ assert(!bo->scanout);
assert(bo->map);
assert(IS_CPU_MAP(bo->map) == for_cpu);
assert(bo->rq == NULL);
assert(list_is_empty(&bo->request));
+ assert(bo->flush == false);
if (size > num_pages(bo)) {
DBG(("inactive too small: %d < %d\n",
@@ -3233,13 +3656,17 @@ large_inactive:
DBG((" from inactive vma: pitch=%d, tiling=%d: handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->reusable);
- assert(bo->domain != DOMAIN_GPU && !kgem_busy(kgem, bo->handle));
+ assert(bo->domain != DOMAIN_GPU);
+ ASSERT_IDLE(kgem, bo->handle);
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
bo->refcnt = 1;
return bo;
}
} while (!list_is_empty(cache) &&
__kgem_throttle_retire(kgem, flags));
+
+ if (flags & CREATE_CPU_MAP && !kgem->has_llc)
+ goto create;
}
if (flags & CREATE_INACTIVE)
@@ -3260,8 +3687,10 @@ search_again:
assert(bucket(bo) == bucket);
assert(bo->reusable);
assert(bo->tiling == tiling);
+ assert(bo->flush == false);
+ assert(!bo->scanout);
- if (kgem->gen < 40) {
+ if (kgem->gen < 040) {
if (bo->pitch < pitch) {
DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
bo->tiling, tiling,
@@ -3276,9 +3705,10 @@ search_again:
continue;
if (bo->pitch != pitch) {
- gem_set_tiling(kgem->fd,
- bo->handle,
- tiling, pitch);
+ if (!gem_set_tiling(kgem->fd,
+ bo->handle,
+ tiling, pitch))
+ continue;
bo->pitch = pitch;
}
@@ -3300,7 +3730,9 @@ search_again:
assert(!bo->purged);
assert(bo->refcnt == 0);
assert(bo->reusable);
+ assert(!bo->scanout);
assert(bo->tiling == tiling);
+ assert(bo->flush == false);
if (num_pages(bo) < size)
continue;
@@ -3319,7 +3751,7 @@ search_again:
}
if (--retry && flags & CREATE_EXACT) {
- if (kgem->gen >= 40) {
+ if (kgem->gen >= 040) {
for (i = I915_TILING_NONE; i <= I915_TILING_Y; i++) {
if (i == tiling)
continue;
@@ -3329,13 +3761,15 @@ search_again:
assert(!bo->purged);
assert(bo->refcnt == 0);
assert(bo->reusable);
+ assert(!bo->scanout);
+ assert(bo->flush == false);
if (num_pages(bo) < size)
continue;
- if (tiling != gem_set_tiling(kgem->fd,
- bo->handle,
- tiling, pitch))
+ if (!gem_set_tiling(kgem->fd,
+ bo->handle,
+ tiling, pitch))
continue;
kgem_bo_remove_from_active(kgem, bo);
@@ -3369,6 +3803,8 @@ search_again:
assert(!bo->purged);
assert(bo->refcnt == 0);
assert(bo->reusable);
+ assert(!bo->scanout);
+ assert(bo->flush == false);
if (bo->tiling) {
if (bo->pitch < pitch) {
@@ -3408,6 +3844,8 @@ search_inactive:
list_for_each_entry(bo, cache, list) {
assert(bucket(bo) == bucket);
assert(bo->reusable);
+ assert(!bo->scanout);
+ assert(bo->flush == false);
if (size > num_pages(bo)) {
DBG(("inactive too small: %d < %d\n",
@@ -3417,9 +3855,8 @@ search_inactive:
if (bo->tiling != tiling ||
(tiling != I915_TILING_NONE && bo->pitch != pitch)) {
- if (tiling != gem_set_tiling(kgem->fd,
- bo->handle,
- tiling, pitch))
+ if (!gem_set_tiling(kgem->fd, bo->handle,
+ tiling, pitch))
continue;
if (bo->map)
@@ -3444,7 +3881,7 @@ search_inactive:
assert(bo->refcnt == 0);
assert(bo->reusable);
assert((flags & CREATE_INACTIVE) == 0 || bo->domain != DOMAIN_GPU);
- assert((flags & CREATE_INACTIVE) == 0 || !kgem_busy(kgem, bo->handle));
+ ASSERT_MAYBE_IDLE(kgem, bo->handle, flags & CREATE_INACTIVE);
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
bo->refcnt = 1;
return bo;
@@ -3479,8 +3916,9 @@ create:
bo->domain = DOMAIN_CPU;
bo->unique_id = kgem_get_unique_id(kgem);
bo->pitch = pitch;
- if (tiling != I915_TILING_NONE)
- bo->tiling = gem_set_tiling(kgem->fd, handle, tiling, pitch);
+ if (tiling != I915_TILING_NONE &&
+ gem_set_tiling(kgem->fd, handle, tiling, pitch))
+ bo->tiling = tiling;
if (bucket >= NUM_CACHE_BUCKETS) {
DBG(("%s: marking large bo for automatic flushing\n",
__FUNCTION__));
@@ -3611,16 +4049,23 @@ void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
__kgem_bo_destroy(kgem, bo);
}
-bool __kgem_flush(struct kgem *kgem, struct kgem_bo *bo)
+void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo)
{
+ assert(bo->rq);
+ assert(bo->exec == NULL);
+ assert(bo->needs_flush);
+
/* The kernel will emit a flush *and* update its own flushing lists. */
- if (!bo->needs_flush)
- return false;
+ if (!__kgem_busy(kgem, bo->handle))
+ __kgem_bo_clear_busy(bo);
- bo->needs_flush = kgem_busy(kgem, bo->handle);
DBG(("%s: handle=%d, busy?=%d\n",
- __FUNCTION__, bo->handle, bo->needs_flush));
- return bo->needs_flush;
+ __FUNCTION__, bo->handle, bo->rq != NULL));
+}
+
+inline static bool needs_semaphore(struct kgem *kgem, struct kgem_bo *bo)
+{
+ return kgem->nreloc && bo->rq && RQ_RING(bo->rq) != kgem->ring;
}
bool kgem_check_bo(struct kgem *kgem, ...)
@@ -3629,22 +4074,22 @@ bool kgem_check_bo(struct kgem *kgem, ...)
struct kgem_bo *bo;
int num_exec = 0;
int num_pages = 0;
-
- if (kgem_flush(kgem))
- return false;
+ bool flush = false;
va_start(ap, kgem);
while ((bo = va_arg(ap, struct kgem_bo *))) {
+ while (bo->proxy)
+ bo = bo->proxy;
if (bo->exec)
continue;
- while (bo->proxy) {
- bo = bo->proxy;
- if (bo->exec)
- continue;
- }
+ if (needs_semaphore(kgem, bo))
+ return false;
+
num_pages += num_pages(bo);
num_exec++;
+
+ flush |= bo->flush;
}
va_end(ap);
@@ -3654,7 +4099,11 @@ bool kgem_check_bo(struct kgem *kgem, ...)
if (!num_pages)
return true;
- if (kgem->aperture > kgem->aperture_low && kgem_is_idle(kgem)) {
+ if (kgem_flush(kgem, flush))
+ return false;
+
+ if (kgem->aperture > kgem->aperture_low &&
+ kgem_ring_is_idle(kgem, kgem->ring)) {
DBG(("%s: current aperture usage (%d) is greater than low water mark (%d)\n",
__FUNCTION__, kgem->aperture, kgem->aperture_low));
return false;
@@ -3679,46 +4128,55 @@ bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo)
{
uint32_t size;
- if (kgem_flush(kgem))
- return false;
-
while (bo->proxy)
bo = bo->proxy;
if (bo->exec) {
- if (kgem->gen < 40 &&
+ if (kgem->gen < 040 &&
bo->tiling != I915_TILING_NONE &&
(bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
if (kgem->nfence >= kgem->fence_max)
return false;
+ if (3*kgem->aperture_fenced > kgem->aperture_mappable &&
+ kgem_ring_is_idle(kgem, kgem->ring))
+ return false;
+
size = kgem->aperture_fenced;
size += kgem_bo_fenced_size(kgem, bo);
- if (4*size > 3*kgem->aperture_mappable)
+ if (3*size > 2*kgem->aperture_mappable)
return false;
}
return true;
}
+ if (needs_semaphore(kgem, bo))
+ return false;
+
+ if (kgem_flush(kgem, bo->flush))
+ return false;
+
if (kgem->nexec >= KGEM_EXEC_SIZE(kgem) - 1)
return false;
- if (kgem->aperture > kgem->aperture_low)
+ if (kgem->aperture > kgem->aperture_low &&
+ kgem_ring_is_idle(kgem, kgem->ring))
return false;
if (kgem->aperture + num_pages(bo) > kgem->aperture_high)
return false;
- if (kgem->gen < 40 && bo->tiling != I915_TILING_NONE) {
+ if (kgem->gen < 040 && bo->tiling != I915_TILING_NONE) {
if (kgem->nfence >= kgem->fence_max)
return false;
- if (2*kgem->aperture_fenced > kgem->aperture_mappable)
+ if (3*kgem->aperture_fenced > kgem->aperture_mappable &&
+ kgem_ring_is_idle(kgem, kgem->ring))
return false;
size = kgem->aperture_fenced;
size += kgem_bo_fenced_size(kgem, bo);
- if (4*size > 3*kgem->aperture_mappable)
+ if (3*size > 2*kgem->aperture_mappable)
return false;
}
@@ -3733,16 +4191,14 @@ bool kgem_check_many_bo_fenced(struct kgem *kgem, ...)
int num_exec = 0;
int num_pages = 0;
int fenced_size = 0;
-
- if (kgem_flush(kgem))
- return false;
+ bool flush = false;
va_start(ap, kgem);
while ((bo = va_arg(ap, struct kgem_bo *))) {
while (bo->proxy)
bo = bo->proxy;
if (bo->exec) {
- if (kgem->gen >= 40 || bo->tiling == I915_TILING_NONE)
+ if (kgem->gen >= 040 || bo->tiling == I915_TILING_NONE)
continue;
if ((bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
@@ -3753,12 +4209,17 @@ bool kgem_check_many_bo_fenced(struct kgem *kgem, ...)
continue;
}
+ if (needs_semaphore(kgem, bo))
+ return false;
+
num_pages += num_pages(bo);
num_exec++;
- if (kgem->gen < 40 && bo->tiling) {
+ if (kgem->gen < 040 && bo->tiling) {
fenced_size += kgem_bo_fenced_size(kgem, bo);
num_fence++;
}
+
+ flush |= bo->flush;
}
va_end(ap);
@@ -3766,15 +4227,20 @@ bool kgem_check_many_bo_fenced(struct kgem *kgem, ...)
if (kgem->nfence + num_fence > kgem->fence_max)
return false;
- if (2*kgem->aperture_fenced > kgem->aperture_mappable)
+ if (3*kgem->aperture_fenced > kgem->aperture_mappable &&
+ kgem_ring_is_idle(kgem, kgem->ring))
return false;
- if (4*(fenced_size + kgem->aperture_fenced) > 3*kgem->aperture_mappable)
+ if (3*(fenced_size + kgem->aperture_fenced) > 2*kgem->aperture_mappable)
return false;
}
if (num_pages) {
- if (kgem->aperture > kgem->aperture_low)
+ if (kgem_flush(kgem, flush))
+ return false;
+
+ if (kgem->aperture > kgem->aperture_low &&
+ kgem_ring_is_idle(kgem, kgem->ring))
return false;
if (num_pages + kgem->aperture > kgem->aperture_high)
@@ -3816,20 +4282,25 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
if (bo->exec == NULL) {
list_move_tail(&bo->request,
&kgem->next_request->buffers);
- bo->rq = kgem->next_request;
+ bo->rq = MAKE_REQUEST(kgem->next_request,
+ kgem->ring);
bo->exec = &_kgem_dummy_exec;
}
+ if (read_write_domain & 0x7fff && !bo->dirty)
+ __kgem_bo_mark_dirty(bo);
+
bo = bo->proxy;
assert(bo->refcnt);
assert(!bo->purged);
}
if (bo->exec == NULL)
- _kgem_add_bo(kgem, bo);
- assert(bo->rq == kgem->next_request);
+ kgem_add_bo(kgem, bo);
+ assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
+ assert(RQ_RING(bo->rq) == kgem->ring);
- if (kgem->gen < 40 && read_write_domain & KGEM_RELOC_FENCED) {
+ if (kgem->gen < 040 && read_write_domain & KGEM_RELOC_FENCED) {
if (bo->tiling &&
(bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
assert(kgem->nfence < kgem->fence_max);
@@ -3841,19 +4312,21 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
}
kgem->reloc[index].delta = delta;
- kgem->reloc[index].target_handle = bo->handle;
+ kgem->reloc[index].target_handle = bo->target_handle;
kgem->reloc[index].presumed_offset = bo->presumed_offset;
- if (read_write_domain & 0x7ff) {
+ if (read_write_domain & 0x7fff && !bo->dirty) {
assert(!bo->snoop || kgem->can_blt_cpu);
- kgem_bo_mark_dirty(bo);
+ __kgem_bo_mark_dirty(bo);
}
delta += bo->presumed_offset;
} else {
kgem->reloc[index].delta = delta;
- kgem->reloc[index].target_handle = 0;
+ kgem->reloc[index].target_handle = ~0U;
kgem->reloc[index].presumed_offset = 0;
+ if (kgem->nreloc__self < 256)
+ kgem->reloc__self[kgem->nreloc__self++] = index;
}
kgem->reloc[index].read_domains = read_write_domain >> 16;
kgem->reloc[index].write_domain = read_write_domain & 0x7fff;
@@ -3984,7 +4457,7 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
ptr = bo->map;
if (ptr == NULL) {
assert(kgem_bo_size(bo) <= kgem->aperture_mappable / 2);
- assert(kgem->gen != 21 || bo->tiling != I915_TILING_Y);
+ assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y);
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
@@ -4005,7 +4478,7 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
struct drm_i915_gem_set_domain set_domain;
DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
- bo->needs_flush, bo->domain, kgem_busy(kgem, bo->handle)));
+ bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle)));
/* XXX use PROT_READ to avoid the write flush? */
@@ -4097,6 +4570,11 @@ retry:
if (__kgem_throttle_retire(kgem, 0))
goto retry;
+ if (kgem->need_expire) {
+ kgem_cleanup_cache(kgem);
+ goto retry;
+ }
+
return NULL;
}
@@ -4132,6 +4610,11 @@ retry:
if (__kgem_throttle_retire(kgem, 0))
goto retry;
+ if (kgem->need_expire) {
+ kgem_cleanup_cache(kgem);
+ goto retry;
+ }
+
return NULL;
}
@@ -4180,10 +4663,7 @@ uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo)
*/
bo->reusable = false;
- /* The bo is outside of our control, so presume it is written to */
- bo->needs_flush = true;
- if (bo->domain != DOMAIN_GPU)
- bo->domain = DOMAIN_NONE;
+ kgem_bo_unclean(kgem, bo);
/* Henceforth, we need to broadcast all updates to clients and
* flush our rendering before doing so.
@@ -4231,8 +4711,8 @@ void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo)
if (bo->domain != DOMAIN_CPU) {
struct drm_i915_gem_set_domain set_domain;
- DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
- bo->needs_flush, bo->domain, kgem_busy(kgem, bo->handle)));
+ DBG(("%s: SYNC: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
+ bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle)));
VG_CLEAR(set_domain);
set_domain.handle = bo->handle;
@@ -4246,6 +4726,30 @@ void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo)
}
}
+void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write)
+{
+ assert(bo->proxy == NULL);
+ kgem_bo_submit(kgem, bo);
+
+ if (bo->domain != DOMAIN_CPU) {
+ struct drm_i915_gem_set_domain set_domain;
+
+ DBG(("%s: SYNC: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
+ bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle)));
+
+ VG_CLEAR(set_domain);
+ set_domain.handle = bo->handle;
+ set_domain.read_domains = I915_GEM_DOMAIN_CPU;
+ set_domain.write_domain = write ? I915_GEM_DOMAIN_CPU : 0;
+
+ if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) {
+ if (write || bo->needs_flush)
+ kgem_bo_retire(kgem, bo);
+ bo->domain = write ? DOMAIN_CPU : DOMAIN_NONE;
+ }
+ }
+}
+
void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->proxy == NULL);
@@ -4254,8 +4758,8 @@ void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo)
if (bo->domain != DOMAIN_GTT) {
struct drm_i915_gem_set_domain set_domain;
- DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
- bo->needs_flush, bo->domain, kgem_busy(kgem, bo->handle)));
+ DBG(("%s: SYNC: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
+ bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle)));
VG_CLEAR(set_domain);
set_domain.handle = bo->handle;
@@ -4271,10 +4775,10 @@ void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo)
void kgem_clear_dirty(struct kgem *kgem)
{
- struct kgem_request *rq = kgem->next_request;
+ struct list * const buffers = &kgem->next_request->buffers;
struct kgem_bo *bo;
- list_for_each_entry(bo, &rq->buffers, request) {
+ list_for_each_entry(bo, buffers, request) {
if (!bo->dirty)
break;
@@ -4305,6 +4809,7 @@ struct kgem_bo *kgem_create_proxy(struct kgem *kgem,
bo->tiling = target->tiling;
bo->pitch = target->pitch;
+ assert(!bo->scanout);
bo->proxy = kgem_bo_reference(target);
bo->delta = offset;
@@ -4351,7 +4856,7 @@ static inline bool
use_snoopable_buffer(struct kgem *kgem, uint32_t flags)
{
if ((flags & KGEM_BUFFER_WRITE) == 0)
- return kgem->gen >= 30;
+ return kgem->gen >= 030;
return true;
}
@@ -4425,8 +4930,6 @@ create_snoopable_buffer(struct kgem *kgem, unsigned alloc)
struct kgem_buffer *bo;
uint32_t handle;
- assert(!kgem->has_llc);
-
if (kgem->has_cacheing) {
struct kgem_bo *old;
@@ -4524,9 +5027,6 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
/* we should never be asked to create anything TOO large */
assert(size <= kgem->max_object_size);
- if (kgem->has_llc)
- flags &= ~KGEM_BUFFER_INPLACE;
-
#if !DBG_NO_UPLOAD_CACHE
list_for_each_entry(bo, &kgem->batch_buffers, base.list) {
assert(bo->base.io);
@@ -4580,8 +5080,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
assert(bo->base.io);
assert(bo->base.refcnt >= 1);
assert(bo->mmapped);
- assert(!bo->base.snoop);
- assert(!IS_CPU_MAP(bo->base.map) || kgem->has_llc);
+ assert(!IS_CPU_MAP(bo->base.map) || kgem->has_llc || bo->base.snoop);
if ((bo->write & ~flags) & KGEM_BUFFER_INPLACE) {
DBG(("%s: skip write %x buffer, need %x\n",
@@ -4608,11 +5107,16 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
alloc = ALIGN(size, kgem->buffer_size);
if (alloc > MAX_CACHE_SIZE)
alloc = PAGE_ALIGN(size);
+
+ if (alloc > kgem->aperture_mappable / 4)
+ flags &= ~KGEM_BUFFER_INPLACE;
alloc /= PAGE_SIZE;
- if (kgem->has_llc) {
+
+ if (kgem->has_llc &&
+ (flags & KGEM_BUFFER_WRITE_INPLACE) != KGEM_BUFFER_WRITE_INPLACE) {
bo = buffer_alloc();
if (bo == NULL)
- return NULL;
+ goto skip_llc;
old = NULL;
if ((flags & KGEM_BUFFER_WRITE) == 0)
@@ -4630,7 +5134,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
uint32_t handle = gem_create(kgem->fd, alloc);
if (handle == 0) {
free(bo);
- return NULL;
+ goto skip_llc;
}
__kgem_bo_init(&bo->base, handle, alloc);
DBG(("%s: created LLC handle=%d for buffer\n",
@@ -4646,17 +5150,14 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
if (bo->mem) {
if (flags & KGEM_BUFFER_WRITE)
kgem_bo_sync__cpu(kgem, &bo->base);
-
- alloc = num_pages(&bo->base);
+ flags &= ~KGEM_BUFFER_INPLACE;
goto init;
} else {
bo->base.refcnt = 0; /* for valgrind */
kgem_bo_free(kgem, &bo->base);
}
}
-
- if (PAGE_SIZE * alloc > kgem->aperture_mappable / 4)
- flags &= ~KGEM_BUFFER_INPLACE;
+skip_llc:
if ((flags & KGEM_BUFFER_WRITE_INPLACE) == KGEM_BUFFER_WRITE_INPLACE) {
/* The issue with using a GTT upload buffer is that we may
@@ -4695,7 +5196,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP);
if (old == NULL) {
old = search_linear_cache(kgem, alloc, CREATE_INACTIVE);
- if (old && !kgem_bo_is_mappable(kgem, old)) {
+ if (old && !__kgem_bo_is_mappable(kgem, old)) {
_kgem_bo_destroy(kgem, old);
old = NULL;
}
@@ -4703,7 +5204,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
if (old) {
DBG(("%s: reusing handle=%d for buffer\n",
__FUNCTION__, old->handle));
- assert(kgem_bo_is_mappable(kgem, old));
+ assert(__kgem_bo_is_mappable(kgem, old));
assert(!old->snoop);
assert(old->rq == NULL);
@@ -4719,9 +5220,8 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
bo->mem = kgem_bo_map(kgem, &bo->base);
if (bo->mem) {
- alloc = num_pages(&bo->base);
if (IS_CPU_MAP(bo->base.map))
- flags &= ~KGEM_BUFFER_INPLACE;
+ flags &= ~KGEM_BUFFER_INPLACE;
goto init;
} else {
bo->base.refcnt = 0;
@@ -4742,16 +5242,13 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
if (flags & KGEM_BUFFER_WRITE)
kgem_bo_sync__cpu(kgem, &bo->base);
flags &= ~KGEM_BUFFER_INPLACE;
- alloc = num_pages(&bo->base);
goto init;
}
- if ((flags & KGEM_BUFFER_WRITE_INPLACE) != KGEM_BUFFER_WRITE_INPLACE) {
+ if ((flags & KGEM_BUFFER_INPLACE) == 0) {
bo = create_snoopable_buffer(kgem, alloc);
- if (bo) {
- flags &= ~KGEM_BUFFER_INPLACE;
+ if (bo)
goto init;
- }
}
}
@@ -4765,8 +5262,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
if (old) {
DBG(("%s: reusing ordinary handle %d for io\n",
__FUNCTION__, old->handle));
- alloc = num_pages(old);
- bo = buffer_alloc_with_data(alloc);
+ bo = buffer_alloc_with_data(num_pages(old));
if (bo == NULL)
return NULL;
@@ -4793,7 +5289,6 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
DBG(("%s: reusing handle=%d for buffer\n",
__FUNCTION__, old->handle));
- alloc = num_pages(old);
init_buffer_from_bo(bo, old);
} else {
uint32_t handle = gem_create(kgem->fd, alloc);
@@ -4803,7 +5298,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
}
DBG(("%s: created handle=%d for buffer\n",
- __FUNCTION__, bo->base.handle));
+ __FUNCTION__, handle));
__kgem_bo_init(&bo->base, handle, alloc);
debug_alloc(kgem, alloc * PAGE_SIZE);
@@ -4815,16 +5310,18 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
if (flags & KGEM_BUFFER_WRITE) {
bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
- if (bo->mem != NULL)
+ if (bo->mem != NULL) {
kgem_bo_sync__cpu(kgem, &bo->base);
- goto init;
+ goto init;
+ }
}
DBG(("%s: failing back to new pwrite buffer\n", __FUNCTION__));
old = &bo->base;
- bo = buffer_alloc_with_data(alloc);
+ bo = buffer_alloc_with_data(num_pages(old));
if (bo == NULL) {
- free(old);
+ old->refcnt= 0;
+ kgem_bo_free(kgem, old);
return NULL;
}
@@ -4839,7 +5336,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
init:
bo->base.io = true;
assert(bo->base.refcnt == 1);
- assert(num_pages(&bo->base) == alloc);
+ assert(num_pages(&bo->base) >= NUM_PAGES(size));
assert(!bo->need_io || !bo->base.needs_flush);
assert(!bo->need_io || bo->base.domain != DOMAIN_GPU);
assert(bo->mem);
@@ -4852,8 +5349,8 @@ init:
assert(list_is_empty(&bo->base.list));
list_add(&bo->base.list, &kgem->batch_buffers);
- DBG(("%s(pages=%d) new handle=%d, used=%d, write=%d\n",
- __FUNCTION__, alloc, bo->base.handle, bo->used, bo->write));
+ DBG(("%s(pages=%d [%d]) new handle=%d, used=%d, write=%d\n",
+ __FUNCTION__, num_pages(&bo->base), alloc, bo->base.handle, bo->used, bo->write));
done:
bo->used = ALIGN(bo->used, UPLOAD_ALIGNMENT);
@@ -4919,10 +5416,10 @@ struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem,
struct kgem_bo *kgem_upload_source_image(struct kgem *kgem,
const void *data,
- BoxPtr box,
+ const BoxRec *box,
int stride, int bpp)
{
- int width = box->x2 - box->x1;
+ int width = box->x2 - box->x1;
int height = box->y2 - box->y1;
struct kgem_bo *bo;
void *dst;
@@ -4987,7 +5484,7 @@ void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo)
__FUNCTION__,
bo->base.needs_flush,
bo->base.domain,
- kgem_busy(kgem, bo->base.handle)));
+ __kgem_busy(kgem, bo->base.handle)));
assert(!IS_CPU_MAP(bo->base.map) || bo->base.snoop || kgem->has_llc);
@@ -5007,6 +5504,7 @@ void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo)
return;
}
kgem_bo_retire(kgem, &bo->base);
+ bo->base.domain = DOMAIN_NONE;
}
uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format)
@@ -5104,18 +5602,22 @@ kgem_replace_bo(struct kgem *kgem,
dst->unique_id = kgem_get_unique_id(kgem);
dst->refcnt = 1;
- kgem_set_mode(kgem, KGEM_BLT);
+ kgem_set_mode(kgem, KGEM_BLT, dst);
if (!kgem_check_batch(kgem, 8) ||
!kgem_check_reloc(kgem, 2) ||
!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
- _kgem_submit(kgem);
+ kgem_submit(kgem);
+ if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
+ kgem_bo_destroy(kgem, dst);
+ return NULL;
+ }
_kgem_set_mode(kgem, KGEM_BLT);
}
br00 = XY_SRC_COPY_BLT_CMD;
br13 = pitch;
pitch = src->pitch;
- if (kgem->gen >= 40 && src->tiling) {
+ if (kgem->gen >= 040 && src->tiling) {
br00 |= BLT_SRC_TILED;
pitch >>= 2;
}
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index e547215bb..a23194feb 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -43,6 +43,12 @@
#endif
struct kgem_bo {
+ struct kgem_request *rq;
+#define RQ(rq) ((struct kgem_request *)((uintptr_t)(rq) & ~3))
+#define RQ_RING(rq) ((uintptr_t)(rq) & 3)
+#define RQ_IS_BLT(rq) (RQ_RING(rq) == KGEM_BLT)
+ struct drm_i915_gem_exec_object2 *exec;
+
struct kgem_bo *proxy;
struct list list;
@@ -52,8 +58,6 @@ struct kgem_bo {
void *map;
#define IS_CPU_MAP(ptr) ((uintptr_t)(ptr) & 1)
#define IS_GTT_MAP(ptr) (ptr && ((uintptr_t)(ptr) & 1) == 0)
- struct kgem_request *rq;
- struct drm_i915_gem_exec_object2 *exec;
struct kgem_bo_binding {
struct kgem_bo_binding *next;
@@ -64,6 +68,7 @@ struct kgem_bo {
uint32_t unique_id;
uint32_t refcnt;
uint32_t handle;
+ uint32_t target_handle;
uint32_t presumed_offset;
uint32_t delta;
union {
@@ -126,22 +131,30 @@ struct kgem {
struct list large_inactive;
struct list active[NUM_CACHE_BUCKETS][3];
struct list inactive[NUM_CACHE_BUCKETS];
+ struct list pinned_batches[2];
struct list snoop;
+ struct list scanout;
struct list batch_buffers, active_buffers;
struct list requests[2];
struct kgem_request *next_request;
- uint32_t num_requests;
+ struct kgem_request static_request;
struct {
struct list inactive[NUM_CACHE_BUCKETS];
int16_t count;
} vma[NUM_MAP_TYPES];
+ uint32_t batch_flags;
+ uint32_t batch_flags_base;
+#define I915_EXEC_SECURE (1<<9)
+#define LOCAL_EXEC_OBJECT_WRITE (1<<2)
+
uint16_t nbatch;
uint16_t surface;
uint16_t nexec;
uint16_t nreloc;
+ uint16_t nreloc__self;
uint16_t nfence;
uint16_t batch_size;
uint16_t min_alignment;
@@ -151,6 +164,7 @@ struct kgem {
uint32_t need_purge:1;
uint32_t need_retire:1;
uint32_t need_throttle:1;
+ uint32_t scanout_busy:1;
uint32_t busy:1;
uint32_t has_userptr :1;
@@ -158,8 +172,12 @@ struct kgem {
uint32_t has_relaxed_fencing :1;
uint32_t has_relaxed_delta :1;
uint32_t has_semaphores :1;
+ uint32_t has_secure_batches :1;
+ uint32_t has_pinned_batches :1;
uint32_t has_cacheing :1;
uint32_t has_llc :1;
+ uint32_t has_no_reloc :1;
+ uint32_t has_handle_lut :1;
uint32_t can_blt_cpu :1;
@@ -179,6 +197,7 @@ struct kgem {
uint32_t batch[64*1024-8];
struct drm_i915_gem_exec_object2 exec[256];
struct drm_i915_gem_relocation_entry reloc[4096];
+ uint16_t reloc__self[256];
#ifdef DEBUG_MEMORY
struct {
@@ -200,7 +219,7 @@ struct kgem {
#define KGEM_EXEC_SIZE(K) (int)(ARRAY_SIZE((K)->exec)-KGEM_EXEC_RESERVED)
#define KGEM_RELOC_SIZE(K) (int)(ARRAY_SIZE((K)->reloc)-KGEM_RELOC_RESERVED)
-void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen);
+void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen);
void kgem_reset(struct kgem *kgem);
struct kgem_bo *kgem_create_map(struct kgem *kgem,
@@ -218,7 +237,7 @@ struct kgem_bo *kgem_create_proxy(struct kgem *kgem,
struct kgem_bo *kgem_upload_source_image(struct kgem *kgem,
const void *data,
- BoxPtr box,
+ const BoxRec *box,
int stride, int bpp);
void kgem_proxy_bo_attach(struct kgem_bo *bo, struct kgem_bo **ptr);
@@ -245,8 +264,9 @@ enum {
CREATE_SCANOUT = 0x10,
CREATE_PRIME = 0x20,
CREATE_TEMPORARY = 0x40,
- CREATE_NO_RETIRE = 0x80,
- CREATE_NO_THROTTLE = 0x100,
+ CREATE_CACHED = 0x80,
+ CREATE_NO_RETIRE = 0x100,
+ CREATE_NO_THROTTLE = 0x200,
};
struct kgem_bo *kgem_create_2d(struct kgem *kgem,
int width,
@@ -264,17 +284,25 @@ uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format);
void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset);
int kgem_bo_get_swizzling(struct kgem *kgem, struct kgem_bo *bo);
-void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo);
bool kgem_retire(struct kgem *kgem);
-bool __kgem_is_idle(struct kgem *kgem);
+
+bool __kgem_ring_is_idle(struct kgem *kgem, int ring);
+static inline bool kgem_ring_is_idle(struct kgem *kgem, int ring)
+{
+ ring = ring == KGEM_BLT;
+
+ if (list_is_empty(&kgem->requests[ring]))
+ return true;
+
+ return __kgem_ring_is_idle(kgem, ring);
+}
+
static inline bool kgem_is_idle(struct kgem *kgem)
{
- if (kgem->num_requests == 0) {
- DBG(("%s: no outstanding requests\n", __FUNCTION__));
+ if (!kgem->need_retire)
return true;
- }
- return __kgem_is_idle(kgem);
+ return kgem_ring_is_idle(kgem, kgem->ring);
}
void _kgem_submit(struct kgem *kgem);
@@ -284,9 +312,12 @@ static inline void kgem_submit(struct kgem *kgem)
_kgem_submit(kgem);
}
-static inline bool kgem_flush(struct kgem *kgem)
+static inline bool kgem_flush(struct kgem *kgem, bool flush)
{
- return kgem->flush && kgem_is_idle(kgem);
+ if (kgem->nreloc == 0)
+ return false;
+
+ return (kgem->flush ^ flush) && kgem_ring_is_idle(kgem, kgem->ring);
}
static inline void kgem_bo_submit(struct kgem *kgem, struct kgem_bo *bo)
@@ -295,7 +326,7 @@ static inline void kgem_bo_submit(struct kgem *kgem, struct kgem_bo *bo)
_kgem_submit(kgem);
}
-bool __kgem_flush(struct kgem *kgem, struct kgem_bo *bo);
+void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo);
static inline void kgem_bo_flush(struct kgem *kgem, struct kgem_bo *bo)
{
kgem_bo_submit(kgem, bo);
@@ -307,7 +338,7 @@ static inline void kgem_bo_flush(struct kgem *kgem, struct kgem_bo *bo)
* we assume direct access. And as the useual failure is EIO, we do
* not actualy care.
*/
- (void)__kgem_flush(kgem, bo);
+ __kgem_flush(kgem, bo);
}
static inline struct kgem_bo *kgem_bo_reference(struct kgem_bo *bo)
@@ -327,7 +358,9 @@ static inline void kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
void kgem_clear_dirty(struct kgem *kgem);
-static inline void kgem_set_mode(struct kgem *kgem, enum kgem_mode mode)
+static inline void kgem_set_mode(struct kgem *kgem,
+ enum kgem_mode mode,
+ struct kgem_bo *bo)
{
assert(!kgem->wedged);
@@ -335,6 +368,9 @@ static inline void kgem_set_mode(struct kgem *kgem, enum kgem_mode mode)
kgem_submit(kgem);
#endif
+ if (kgem->nreloc && bo->exec == NULL && kgem_ring_is_idle(kgem, kgem->ring))
+ _kgem_submit(kgem);
+
if (kgem->mode == mode)
return;
@@ -346,6 +382,7 @@ static inline void _kgem_set_mode(struct kgem *kgem, enum kgem_mode mode)
{
assert(kgem->mode == KGEM_NONE);
assert(kgem->nbatch == 0);
+ assert(!kgem->wedged);
kgem->context_switch(kgem, mode);
kgem->mode = mode;
}
@@ -384,33 +421,21 @@ static inline bool kgem_check_batch_with_surfaces(struct kgem *kgem,
kgem_check_exec(kgem, num_surfaces);
}
-static inline uint32_t *kgem_get_batch(struct kgem *kgem, int num_dwords)
+static inline uint32_t *kgem_get_batch(struct kgem *kgem)
{
- if (!kgem_check_batch(kgem, num_dwords))
+ if (kgem->nreloc) {
+ unsigned mode = kgem->mode;
_kgem_submit(kgem);
+ _kgem_set_mode(kgem, mode);
+ }
return kgem->batch + kgem->nbatch;
}
-static inline void kgem_advance_batch(struct kgem *kgem, int num_dwords)
-{
- kgem->nbatch += num_dwords;
-}
-
bool kgem_check_bo(struct kgem *kgem, ...) __attribute__((sentinel(0)));
bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo);
bool kgem_check_many_bo_fenced(struct kgem *kgem, ...) __attribute__((sentinel(0)));
-void _kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo);
-static inline void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo)
-{
- if (bo->proxy)
- bo = bo->proxy;
-
- if (bo->exec == NULL)
- _kgem_add_bo(kgem, bo);
-}
-
#define KGEM_RELOC_FENCED 0x8000
uint32_t kgem_add_reloc(struct kgem *kgem,
uint32_t pos,
@@ -425,6 +450,7 @@ void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo);
void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo);
+void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write);
void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo);
void __kgem_bo_unmap__cpu(struct kgem *kgem, struct kgem_bo *bo, void *ptr);
uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo);
@@ -460,7 +486,7 @@ static inline bool kgem_bo_blt_pitch_is_ok(struct kgem *kgem,
struct kgem_bo *bo)
{
int pitch = bo->pitch;
- if (kgem->gen >= 40 && bo->tiling)
+ if (kgem->gen >= 040 && bo->tiling)
pitch /= 4;
if (pitch > MAXSHORT) {
DBG(("%s: can not blt to handle=%d, adjusted pitch=%d\n",
@@ -483,16 +509,13 @@ static inline bool kgem_bo_can_blt(struct kgem *kgem,
return kgem_bo_blt_pitch_is_ok(kgem, bo);
}
-static inline bool kgem_bo_is_mappable(struct kgem *kgem,
- struct kgem_bo *bo)
+static inline bool __kgem_bo_is_mappable(struct kgem *kgem,
+ struct kgem_bo *bo)
{
- DBG(("%s: domain=%d, offset: %d size: %d\n",
- __FUNCTION__, bo->domain, bo->presumed_offset, kgem_bo_size(bo)));
-
if (bo->domain == DOMAIN_GTT)
return true;
- if (kgem->gen < 40 && bo->tiling &&
+ if (kgem->gen < 040 && bo->tiling &&
bo->presumed_offset & (kgem_bo_fenced_size(kgem, bo) - 1))
return false;
@@ -502,17 +525,24 @@ static inline bool kgem_bo_is_mappable(struct kgem *kgem,
return bo->presumed_offset + kgem_bo_size(bo) <= kgem->aperture_mappable;
}
+static inline bool kgem_bo_is_mappable(struct kgem *kgem,
+ struct kgem_bo *bo)
+{
+ DBG(("%s: domain=%d, offset: %d size: %d\n",
+ __FUNCTION__, bo->domain, bo->presumed_offset, kgem_bo_size(bo)));
+ assert(bo->refcnt);
+ return __kgem_bo_is_mappable(kgem, bo);
+}
+
static inline bool kgem_bo_mapped(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: map=%p, tiling=%d, domain=%d\n",
__FUNCTION__, bo->map, bo->tiling, bo->domain));
+ assert(bo->refcnt);
if (bo->map == NULL)
return bo->tiling == I915_TILING_NONE && bo->domain == DOMAIN_CPU;
- if (bo->tiling == I915_TILING_X && !bo->scanout && kgem->has_llc)
- return IS_CPU_MAP(bo->map);
-
return IS_CPU_MAP(bo->map) == !bo->tiling;
}
@@ -524,7 +554,7 @@ static inline bool kgem_bo_can_map(struct kgem *kgem, struct kgem_bo *bo)
if (!bo->tiling && kgem->has_llc)
return true;
- if (kgem->gen == 21 && bo->tiling == I915_TILING_Y)
+ if (kgem->gen == 021 && bo->tiling == I915_TILING_Y)
return false;
return kgem_bo_size(bo) <= kgem->aperture_mappable / 4;
@@ -532,15 +562,32 @@ static inline bool kgem_bo_can_map(struct kgem *kgem, struct kgem_bo *bo)
static inline bool kgem_bo_is_snoop(struct kgem_bo *bo)
{
+ assert(bo->refcnt);
while (bo->proxy)
bo = bo->proxy;
return bo->snoop;
}
+bool __kgem_busy(struct kgem *kgem, int handle);
+
+static inline void kgem_bo_mark_busy(struct kgem_bo *bo, int ring)
+{
+ bo->rq = (struct kgem_request *)((uintptr_t)bo->rq | ring);
+}
+
+inline static void __kgem_bo_clear_busy(struct kgem_bo *bo)
+{
+ bo->needs_flush = false;
+ list_del(&bo->request);
+ bo->rq = NULL;
+ bo->domain = DOMAIN_NONE;
+}
+
static inline bool kgem_bo_is_busy(struct kgem_bo *bo)
{
DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__,
bo->handle, bo->domain, bo->exec != NULL, bo->rq != NULL));
+ assert(bo->refcnt);
return bo->rq;
}
@@ -548,10 +595,17 @@ static inline bool __kgem_bo_is_busy(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__,
bo->handle, bo->domain, bo->exec != NULL, bo->rq != NULL));
- if (kgem_flush(kgem))
+ assert(bo->refcnt);
+
+ if (bo->exec)
+ return true;
+
+ if (kgem_flush(kgem, bo->flush))
kgem_submit(kgem);
- if (bo->rq && !bo->exec)
- kgem_retire(kgem);
+
+ if (bo->rq && !__kgem_busy(kgem, bo->handle))
+ __kgem_bo_clear_busy(bo);
+
return kgem_bo_is_busy(bo);
}
@@ -560,21 +614,42 @@ static inline bool kgem_bo_is_dirty(struct kgem_bo *bo)
if (bo == NULL)
return false;
+ assert(bo->refcnt);
return bo->dirty;
}
+static inline void kgem_bo_unclean(struct kgem *kgem, struct kgem_bo *bo)
+{
+ /* The bo is outside of our control, so presume it is written to */
+ bo->needs_flush = true;
+ if (bo->rq == NULL)
+ bo->rq = (void *)kgem;
+
+ if (bo->domain != DOMAIN_GPU)
+ bo->domain = DOMAIN_NONE;
+}
+
+static inline void __kgem_bo_mark_dirty(struct kgem_bo *bo)
+{
+ DBG(("%s: handle=%d (proxy? %d)\n", __FUNCTION__,
+ bo->handle, bo->proxy != NULL));
+
+ bo->exec->flags |= LOCAL_EXEC_OBJECT_WRITE;
+ bo->needs_flush = bo->dirty = true;
+ list_move(&bo->request, &RQ(bo->rq)->buffers);
+}
+
static inline void kgem_bo_mark_dirty(struct kgem_bo *bo)
{
+ assert(bo->refcnt);
do {
- if (bo->dirty)
- return;
-
- DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
assert(bo->exec);
assert(bo->rq);
- bo->needs_flush = bo->dirty = true;
- list_move(&bo->request, &bo->rq->buffers);
+ if (bo->dirty)
+ return;
+
+ __kgem_bo_mark_dirty(bo);
} while ((bo = bo->proxy));
}
@@ -600,7 +675,7 @@ bool kgem_expire_cache(struct kgem *kgem);
void kgem_purge_cache(struct kgem *kgem);
void kgem_cleanup_cache(struct kgem *kgem);
-#if HAS_EXTRA_DEBUG
+#if HAS_DEBUG_FULL
void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch);
#else
static inline void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch)
diff --git a/src/sna/kgem_debug.c b/src/sna/kgem_debug.c
index 2dc1b4564..48c75889c 100644
--- a/src/sna/kgem_debug.c
+++ b/src/sna/kgem_debug.c
@@ -62,7 +62,7 @@ kgem_debug_get_bo_for_reloc_entry(struct kgem *kgem,
return NULL;
list_for_each_entry(bo, &kgem->next_request->buffers, request)
- if (bo->handle == reloc->target_handle && bo->proxy == NULL)
+ if (bo->target_handle == reloc->target_handle && bo->proxy == NULL)
break;
assert(&bo->request != &kgem->next_request->buffers);
@@ -74,6 +74,9 @@ static int kgem_debug_handle_is_fenced(struct kgem *kgem, uint32_t handle)
{
int i;
+ if (kgem->has_handle_lut)
+ return kgem->exec[handle].flags & EXEC_OBJECT_NEEDS_FENCE;
+
for (i = 0; i < kgem->nexec; i++)
if (kgem->exec[i].handle == handle)
return kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE;
@@ -86,7 +89,7 @@ static int kgem_debug_handle_tiling(struct kgem *kgem, uint32_t handle)
struct kgem_bo *bo;
list_for_each_entry(bo, &kgem->next_request->buffers, request)
- if (bo->handle == handle)
+ if (bo->target_handle == handle)
return bo->tiling;
return 0;
@@ -95,7 +98,7 @@ static int kgem_debug_handle_tiling(struct kgem *kgem, uint32_t handle)
void
kgem_debug_print(const uint32_t *data,
uint32_t offset, unsigned int index,
- char *fmt, ...)
+ const char *fmt, ...)
{
va_list va;
char buf[240];
@@ -273,7 +276,7 @@ decode_2d(struct kgem *kgem, uint32_t offset)
kgem_debug_handle_is_fenced(kgem, reloc->target_handle),
kgem_debug_handle_tiling(kgem, reloc->target_handle));
kgem_debug_print(data, offset, 5, "color\n");
- assert(kgem->gen >= 40 ||
+ assert(kgem->gen >= 040 ||
kgem_debug_handle_is_fenced(kgem, reloc->target_handle));
return len;
@@ -321,7 +324,7 @@ decode_2d(struct kgem *kgem, uint32_t offset)
reloc->read_domains, reloc->write_domain,
kgem_debug_handle_is_fenced(kgem, reloc->target_handle),
kgem_debug_handle_tiling(kgem, reloc->target_handle));
- assert(kgem->gen >= 40 ||
+ assert(kgem->gen >= 040 ||
kgem_debug_handle_is_fenced(kgem, reloc->target_handle));
kgem_debug_print(data, offset, 5, "src (%d,%d)\n",
@@ -336,7 +339,7 @@ decode_2d(struct kgem *kgem, uint32_t offset)
reloc->read_domains, reloc->write_domain,
kgem_debug_handle_is_fenced(kgem, reloc->target_handle),
kgem_debug_handle_tiling(kgem, reloc->target_handle));
- assert(kgem->gen >= 40 ||
+ assert(kgem->gen >= 040 ||
kgem_debug_handle_is_fenced(kgem, reloc->target_handle));
return len;
@@ -368,18 +371,18 @@ decode_2d(struct kgem *kgem, uint32_t offset)
static int (*decode_3d(int gen))(struct kgem*, uint32_t)
{
- if (gen >= 80) {
- } else if (gen >= 70) {
+ if (gen >= 0100) {
+ } else if (gen >= 070) {
return kgem_gen7_decode_3d;
- } else if (gen >= 60) {
+ } else if (gen >= 060) {
return kgem_gen6_decode_3d;
- } else if (gen >= 50) {
+ } else if (gen >= 050) {
return kgem_gen5_decode_3d;
- } else if (gen >= 40) {
+ } else if (gen >= 040) {
return kgem_gen4_decode_3d;
- } else if (gen >= 30) {
+ } else if (gen >= 030) {
return kgem_gen3_decode_3d;
- } else if (gen >= 20) {
+ } else if (gen >= 020) {
return kgem_gen2_decode_3d;
}
assert(0);
@@ -387,18 +390,18 @@ static int (*decode_3d(int gen))(struct kgem*, uint32_t)
static void (*finish_state(int gen))(struct kgem*)
{
- if (gen >= 80) {
- } else if (gen >= 70) {
+ if (gen >= 0100) {
+ } else if (gen >= 070) {
return kgem_gen7_finish_state;
- } else if (gen >= 60) {
+ } else if (gen >= 060) {
return kgem_gen6_finish_state;
- } else if (gen >= 50) {
+ } else if (gen >= 050) {
return kgem_gen5_finish_state;
- } else if (gen >= 40) {
+ } else if (gen >= 040) {
return kgem_gen4_finish_state;
- } else if (gen >= 30) {
+ } else if (gen >= 030) {
return kgem_gen3_finish_state;
- } else if (gen >= 20) {
+ } else if (gen >= 020) {
return kgem_gen2_finish_state;
}
assert(0);
diff --git a/src/sna/kgem_debug.h b/src/sna/kgem_debug.h
index 82d6f6664..a0c9fc177 100644
--- a/src/sna/kgem_debug.h
+++ b/src/sna/kgem_debug.h
@@ -4,7 +4,7 @@
void
kgem_debug_print(const uint32_t *data,
uint32_t offset, unsigned int index,
- char *fmt, ...);
+ const char *fmt, ...);
struct drm_i915_gem_relocation_entry *
kgem_debug_get_reloc_entry(struct kgem *kgem, uint32_t offset);
diff --git a/src/sna/kgem_debug_gen5.c b/src/sna/kgem_debug_gen5.c
index e23ceb1fa..8b55dd919 100644
--- a/src/sna/kgem_debug_gen5.c
+++ b/src/sna/kgem_debug_gen5.c
@@ -73,7 +73,7 @@ static void gen5_update_vertex_buffer(struct kgem *kgem, const uint32_t *data)
int i, size;
reloc = kgem_debug_get_reloc_entry(kgem, &data[1] - kgem->batch);
- if (reloc->target_handle == 0) {
+ if (reloc->target_handle == -1) {
base = kgem->batch;
size = kgem->nbatch * sizeof(uint32_t);
} else {
@@ -529,20 +529,19 @@ int kgem_gen5_decode_3d(struct kgem *kgem, uint32_t offset)
for (i = 1; i < len;) {
gen5_update_vertex_elements(kgem, (i - 1)/2, data + i);
- kgem_debug_print(data, offset, i, "buffer %d: %svalid, type 0x%04x, "
- "src offset 0x%04x bytes\n",
- data[i] >> 27,
- data[i] & (1 << 26) ? "" : "in",
- (data[i] >> 16) & 0x1ff,
- data[i] & 0x07ff);
+ kgem_debug_print(data, offset, i,
+ "buffer %d: %svalid, type 0x%04x, "
+ "src offset 0x%04x bytes\n",
+ data[i] >> 27,
+ data[i] & (1 << 26) ? "" : "in",
+ (data[i] >> 16) & 0x1ff,
+ data[i] & 0x07ff);
i++;
- kgem_debug_print(data, offset, i, "(%s, %s, %s, %s), "
- "dst offset 0x%02x bytes\n",
+ kgem_debug_print(data, offset, i, "(%s, %s, %s, %s)\n",
get_965_element_component(data[i], 0),
get_965_element_component(data[i], 1),
get_965_element_component(data[i], 2),
- get_965_element_component(data[i], 3),
- (data[i] & 0xff) * 4);
+ get_965_element_component(data[i], 3));
i++;
}
state.num_ve = (len - 1) / 2; /* XXX? */
diff --git a/src/sna/kgem_debug_gen6.c b/src/sna/kgem_debug_gen6.c
index e0b09d558..7ef55d38f 100644
--- a/src/sna/kgem_debug_gen6.c
+++ b/src/sna/kgem_debug_gen6.c
@@ -75,11 +75,11 @@ static void gen6_update_vertex_buffer(struct kgem *kgem, const uint32_t *data)
assert(i < kgem->nreloc);
reloc = kgem->reloc[i].target_handle;
- if (reloc == 0) {
+ if (reloc == -1) {
base = kgem->batch;
} else {
list_for_each_entry(bo, &kgem->next_request->buffers, request)
- if (bo->handle == reloc)
+ if (bo->target_handle == reloc)
break;
assert(&bo->request != &kgem->next_request->buffers);
base = kgem_bo_map__debug(kgem, bo);
@@ -643,7 +643,7 @@ int kgem_gen6_decode_3d(struct kgem *kgem, uint32_t offset)
case 0x6101:
i = 0;
kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n");
- if (kgem->gen >= 60) {
+ if (kgem->gen >= 060) {
assert(len == 10);
state_base_out(data, offset, i++, "general");
@@ -658,7 +658,7 @@ int kgem_gen6_decode_3d(struct kgem *kgem, uint32_t offset)
state_max_out(data, offset, i++, "instruction");
gen6_update_dynamic_buffer(kgem, offset + 3);
- } else if (kgem->gen >= 50) {
+ } else if (kgem->gen >= 050) {
assert(len == 8);
state_base_out(data, offset, i++, "general");
@@ -674,7 +674,7 @@ int kgem_gen6_decode_3d(struct kgem *kgem, uint32_t offset)
return len;
case 0x7801:
- if (kgem->gen >= 60) {
+ if (kgem->gen >= 060) {
assert(len == 4);
kgem_debug_print(data, offset, 0,
@@ -686,7 +686,7 @@ int kgem_gen6_decode_3d(struct kgem *kgem, uint32_t offset)
kgem_debug_print(data, offset, 1, "VS binding table\n");
kgem_debug_print(data, offset, 2, "GS binding table\n");
kgem_debug_print(data, offset, 3, "WM binding table\n");
- } else if (kgem->gen >= 40) {
+ } else if (kgem->gen >= 040) {
assert(len == 6);
kgem_debug_print(data, offset, 0,
diff --git a/src/sna/sna.h b/src/sna/sna.h
index 031be7287..b470c48a0 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -42,6 +42,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#endif
#include <stdint.h>
+
#include "compiler.h"
#include <xorg-server.h>
@@ -79,11 +80,9 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define DBG(x)
#endif
-#define DEBUG_NO_RENDER 0
#define DEBUG_NO_BLT 0
#define DEBUG_FLUSH_BATCH 0
-#define DEBUG_FLUSH_SYNC 0
#define TEST_ALL 0
#define TEST_ACCEL (TEST_ALL || 0)
@@ -112,9 +111,9 @@ struct sna_pixmap {
struct kgem_bo *gpu_bo, *cpu_bo;
struct sna_damage *gpu_damage, *cpu_damage;
void *ptr;
+#define PTR(ptr) ((void*)((uintptr_t)(ptr) & ~1))
struct list list;
- struct list inactive;
uint32_t stride;
uint32_t clear_color;
@@ -127,11 +126,10 @@ struct sna_pixmap {
#define PIN_SCANOUT 0x1
#define PIN_DRI 0x2
#define PIN_PRIME 0x4
+ uint8_t create :4;
uint8_t mapped :1;
uint8_t shm :1;
uint8_t clear :1;
- uint8_t undamaged :1;
- uint8_t create :3;
uint8_t header :1;
uint8_t cpu :1;
};
@@ -143,6 +141,15 @@ struct sna_glyph {
uint16_t size, pos;
};
+static inline WindowPtr root(ScreenPtr screen)
+{
+#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,10,0,0,0)
+ return screen->root;
+#else
+ return WindowTable[screen->myNum];
+#endif
+}
+
static inline PixmapPtr get_window_pixmap(WindowPtr window)
{
return fbGetWindowPixmap(window);
@@ -160,7 +167,7 @@ extern DevPrivateKeyRec sna_pixmap_key;
constant static inline struct sna_pixmap *sna_pixmap(PixmapPtr pixmap)
{
- return ((void **)dixGetPrivateAddr(&pixmap->devPrivates, &sna_pixmap_key))[1];
+ return ((void **)__get_private(pixmap, sna_pixmap_key))[1];
}
static inline struct sna_pixmap *sna_pixmap_from_drawable(DrawablePtr drawable)
@@ -178,14 +185,13 @@ struct sna_gc {
static inline struct sna_gc *sna_gc(GCPtr gc)
{
- return dixGetPrivateAddr(&gc->devPrivates, &sna_gc_key);
+ return (struct sna_gc *)__get_private(gc, sna_gc_key);
}
enum {
FLUSH_TIMER = 0,
THROTTLE_TIMER,
EXPIRE_TIMER,
- INACTIVE_TIMER,
#if DEBUG_MEMORY
DEBUG_MEMORY_TIMER,
#endif
@@ -196,10 +202,9 @@ struct sna {
ScrnInfoPtr scrn;
unsigned flags;
-#define SNA_NO_THROTTLE 0x1
-#define SNA_NO_DELAYED_FLUSH 0x2
-#define SNA_NO_WAIT 0x4
-#define SNA_NO_FLIP 0x8
+#define SNA_NO_WAIT 0x1
+#define SNA_NO_FLIP 0x2
+#define SNA_TRIPLE_BUFFER 0x4
#define SNA_TEAR_FREE 0x10
#define SNA_FORCE_SHADOW 0x20
@@ -213,7 +218,6 @@ struct sna {
struct list flush_pixmaps;
struct list active_pixmaps;
- struct list inactive_clock[2];
PixmapPtr front;
PixmapPtr freed_pixmap;
@@ -237,7 +241,6 @@ struct sna {
unsigned int tiling;
#define SNA_TILING_FB 0x1
#define SNA_TILING_2D 0x2
-#define SNA_TILING_3D 0x4
#define SNA_TILING_ALL (~0)
EntityInfoPtr pEnt;
@@ -262,7 +265,6 @@ struct sna {
struct gen6_render_state gen6;
struct gen7_render_state gen7;
} render_state;
- uint32_t have_render;
bool dri_available;
bool dri_open;
@@ -298,6 +300,7 @@ extern void sna_mode_update(struct sna *sna);
extern void sna_mode_disable_unused(struct sna *sna);
extern void sna_mode_wakeup(struct sna *sna);
extern void sna_mode_redisplay(struct sna *sna);
+extern void sna_mode_close(struct sna *sna);
extern void sna_mode_fini(struct sna *sna);
extern int sna_page_flip(struct sna *sna,
@@ -320,7 +323,7 @@ to_sna_from_screen(ScreenPtr screen)
constant static inline struct sna *
to_sna_from_pixmap(PixmapPtr pixmap)
{
- return ((void **)dixGetPrivateAddr(&pixmap->devPrivates, &sna_pixmap_key))[0];
+ return ((void **)__get_private(pixmap, sna_pixmap_key))[0];
}
constant static inline struct sna *
@@ -371,10 +374,11 @@ static inline void sna_dri_vblank_handler(struct sna *sna, struct drm_event_vbla
static inline void sna_dri_destroy_window(WindowPtr win) { }
static inline void sna_dri_close(struct sna *sna, ScreenPtr pScreen) { }
#endif
+void sna_dri_pixmap_update_bo(struct sna *sna, PixmapPtr pixmap);
extern int sna_crtc_to_pipe(xf86CrtcPtr crtc);
-extern int sna_crtc_to_plane(xf86CrtcPtr crtc);
-extern int sna_crtc_id(xf86CrtcPtr crtc);
+extern uint32_t sna_crtc_to_plane(xf86CrtcPtr crtc);
+extern uint32_t sna_crtc_id(xf86CrtcPtr crtc);
CARD32 sna_format_for_depth(int depth);
CARD32 sna_render_format_for_depth(int depth);
@@ -438,6 +442,9 @@ void sna_pixmap_destroy(PixmapPtr pixmap);
#define __MOVE_FORCE 0x40
#define __MOVE_DRI 0x80
+bool
+sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int flags);
+
struct sna_pixmap *sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags);
static inline struct sna_pixmap *
sna_pixmap_force_to_gpu(PixmapPtr pixmap, unsigned flags)
@@ -483,6 +490,24 @@ struct kgem_bo *
sna_drawable_use_bo(DrawablePtr drawable, unsigned flags, const BoxRec *box,
struct sna_damage ***damage);
+inline static int16_t bound(int16_t a, uint16_t b)
+{
+ int v = (int)a + (int)b;
+ if (v > MAXSHORT)
+ return MAXSHORT;
+ return v;
+}
+
+inline static int16_t clamp(int16_t a, int16_t b)
+{
+ int v = (int)a + (int)b;
+ if (v > MAXSHORT)
+ return MAXSHORT;
+ if (v < MINSHORT)
+ return MINSHORT;
+ return v;
+}
+
static inline bool
box_inplace(PixmapPtr pixmap, const BoxRec *box)
{
@@ -587,6 +612,20 @@ _sna_get_transformed_coordinates(int x, int y,
*y_out = result[1] / (double)result[2];
}
+static inline void
+_sna_get_transformed_scaled(int x, int y,
+ const PictTransform *transform, const float *sf,
+ float *x_out, float *y_out)
+{
+ *x_out = sf[0] * (transform->matrix[0][0] * x +
+ transform->matrix[0][1] * y +
+ transform->matrix[0][2]);
+
+ *y_out = sf[1] * (transform->matrix[1][0] * x +
+ transform->matrix[1][1] * y +
+ transform->matrix[1][2]);
+}
+
void
sna_get_transformed_coordinates(int x, int y,
const PictTransform *transform,
@@ -602,6 +641,12 @@ bool sna_transform_is_integer_translation(const PictTransform *t,
int16_t *tx, int16_t *ty);
bool sna_transform_is_translation(const PictTransform *t,
pixman_fixed_t *tx, pixman_fixed_t *ty);
+static inline bool
+sna_affine_transform_is_rotation(const PictTransform *t)
+{
+ assert(sna_transform_is_affine(t));
+ return t->matrix[0][1] | t->matrix[1][0];
+}
static inline bool
sna_transform_equal(const PictTransform *a, const PictTransform *b)
@@ -635,7 +680,7 @@ static inline bool wedged(struct sna *sna)
static inline bool can_render(struct sna *sna)
{
- return likely(!sna->kgem.wedged && sna->have_render);
+ return likely(!sna->kgem.wedged && sna->render.prefer_gpu & PREFER_GPU_RENDER);
}
static inline uint32_t pixmap_size(PixmapPtr pixmap)
@@ -665,6 +710,15 @@ void sna_composite(CARD8 op,
INT16 mask_x, INT16 mask_y,
INT16 dst_x, INT16 dst_y,
CARD16 width, CARD16 height);
+void sna_composite_fb(CARD8 op,
+ PicturePtr src,
+ PicturePtr mask,
+ PicturePtr dst,
+ RegionPtr region,
+ INT16 src_x, INT16 src_y,
+ INT16 mask_x, INT16 mask_y,
+ INT16 dst_x, INT16 dst_y,
+ CARD16 width, CARD16 height);
void sna_composite_rectangles(CARD8 op,
PicturePtr dst,
xRenderColor *color,
@@ -787,6 +841,7 @@ memcpy_xor(const void *src, void *dst, int bpp,
#define SNA_CREATE_FB 0x10
#define SNA_CREATE_SCRATCH 0x11
+#define SNA_CREATE_GLYPHS 0x12
inline static bool is_power_of_two(unsigned x)
{
@@ -801,4 +856,22 @@ inline static bool is_clipped(const RegionRec *r,
r->extents.y2 - r->extents.y1 != d->height);
}
+void sna_threads_init(void);
+int sna_use_threads (int width, int height, int threshold);
+void sna_threads_run(void (*func)(void *arg), void *arg);
+void sna_threads_wait(void);
+
+void sna_image_composite(pixman_op_t op,
+ pixman_image_t *src,
+ pixman_image_t *mask,
+ pixman_image_t *dst,
+ int16_t src_x,
+ int16_t src_y,
+ int16_t mask_x,
+ int16_t mask_y,
+ int16_t dst_x,
+ int16_t dst_y,
+ uint16_t width,
+ uint16_t height);
+
#endif /* _SNA_H */
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index a8a0c931a..827dcf4ac 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -29,6 +29,7 @@
#include "config.h"
#endif
+#include "intel_options.h"
#include "sna.h"
#include "sna_reg.h"
#include "rop.h"
@@ -51,6 +52,8 @@
#include <sys/mman.h>
#include <unistd.h>
+#define FAULT_INJECTION 0
+
#define FORCE_INPLACE 0
#define FORCE_FALLBACK 0
#define FORCE_FLUSH 0
@@ -60,8 +63,9 @@
#define USE_INPLACE 1
#define USE_WIDE_SPANS 0 /* -1 force CPU, 1 force GPU */
#define USE_ZERO_SPANS 1 /* -1 force CPU, 1 force GPU */
-#define USE_INACTIVE 0
#define USE_CPU_BO 1
+#define USE_USERPTR_UPLOADS 1
+#define USE_USERPTR_DOWNLOADS 1
#define MIGRATE_ALL 0
#define DBG_NO_CPU_UPLOAD 0
@@ -92,6 +96,9 @@
#define NO_TILE_8x8 0
#define NO_STIPPLE_8x8 0
+#define IS_STATIC_PTR(ptr) ((uintptr_t)(ptr) & 1)
+#define MAKE_STATIC_PTR(ptr) ((void*)((uintptr_t)(ptr) | 1))
+
#if 0
static void __sna_fallback_flush(DrawablePtr d)
{
@@ -318,6 +325,8 @@ static void assert_pixmap_damage(PixmapPtr p)
if (priv == NULL)
return;
+ assert(priv->gpu_damage == NULL || priv->gpu_bo);
+
if (priv->clear) {
assert(DAMAGE_IS_ALL(priv->gpu_damage));
assert(priv->cpu_damage == NULL);
@@ -345,7 +354,7 @@ static void assert_pixmap_damage(PixmapPtr p)
_sna_damage_debug_get_region(DAMAGE_PTR(priv->cpu_damage), &cpu);
RegionIntersect(&reg, &cpu, &gpu);
- assert(!RegionNotEmpty(&reg));
+ assert(RegionNil(&reg));
RegionUninit(&reg);
RegionUninit(&gpu);
@@ -405,8 +414,6 @@ static void sna_pixmap_free_gpu(struct sna *sna, struct sna_pixmap *priv)
priv->mapped = false;
}
- list_del(&priv->inactive);
-
/* and reset the upload counter */
priv->source_count = SOURCE_BIAS;
}
@@ -433,20 +440,20 @@ sna_pixmap_alloc_cpu(struct sna *sna,
pixmap->drawable.width,
pixmap->drawable.height,
pixmap->drawable.bitsPerPixel,
- from_gpu ? 0 : CREATE_CPU_MAP | CREATE_INACTIVE);
+ from_gpu ? 0 : CREATE_CPU_MAP | CREATE_INACTIVE | CREATE_NO_THROTTLE);
if (priv->cpu_bo) {
priv->ptr = kgem_bo_map__cpu(&sna->kgem, priv->cpu_bo);
- priv->stride = priv->cpu_bo->pitch;
if (priv->ptr) {
DBG(("%s: allocated CPU handle=%d (snooped? %d)\n", __FUNCTION__,
priv->cpu_bo->handle, priv->cpu_bo->snoop));
+ priv->stride = priv->cpu_bo->pitch;
#ifdef DEBUG_MEMORY
sna->debug_memory.cpu_bo_allocs++;
sna->debug_memory.cpu_bo_bytes += kgem_bo_size(priv->cpu_bo);
+#endif
} else {
kgem_bo_destroy(&sna->kgem, priv->cpu_bo);
priv->cpu_bo = NULL;
-#endif
}
}
}
@@ -459,17 +466,15 @@ sna_pixmap_alloc_cpu(struct sna *sna,
assert(priv->ptr);
done:
- pixmap->devPrivate.ptr = priv->ptr;
- pixmap->devKind = priv->stride;
assert(priv->stride);
+ assert(!priv->mapped);
+ pixmap->devPrivate.ptr = PTR(priv->ptr);
+ pixmap->devKind = priv->stride;
return priv->ptr != NULL;
}
-static void sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv)
+static void __sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv)
{
- assert(priv->cpu_damage == NULL);
- assert(list_is_empty(&priv->list));
-
if (priv->cpu_bo) {
DBG(("%s: discarding CPU buffer, handle=%d, size=%d\n",
__FUNCTION__, priv->cpu_bo->handle, kgem_bo_size(priv->cpu_bo)));
@@ -477,17 +482,29 @@ static void sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv)
sna->debug_memory.cpu_bo_allocs--;
sna->debug_memory.cpu_bo_bytes -= kgem_bo_size(priv->cpu_bo);
#endif
- if (priv->cpu_bo->flush) {
- assert(priv->cpu_bo->reusable == false);
+ if (!priv->cpu_bo->reusable) {
+ assert(priv->cpu_bo->flush == true);
kgem_bo_sync__cpu(&sna->kgem, priv->cpu_bo);
sna_accel_watch_flush(sna, -1);
}
kgem_bo_destroy(&sna->kgem, priv->cpu_bo);
- priv->cpu_bo = NULL;
- } else
+ } else if (!IS_STATIC_PTR(priv->ptr))
free(priv->ptr);
+}
+
+static void sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv)
+{
+ assert(priv->cpu_damage == NULL);
+ assert(list_is_empty(&priv->list));
+
+ if (IS_STATIC_PTR(priv->ptr))
+ return;
+ __sna_pixmap_free_cpu(sna, priv);
+
+ priv->cpu_bo = NULL;
priv->ptr = NULL;
+
if (!priv->mapped)
priv->pixmap->devPrivate.ptr = NULL;
}
@@ -499,14 +516,14 @@ static inline uint32_t default_tiling(PixmapPtr pixmap,
struct sna *sna = to_sna_from_pixmap(pixmap);
/* Try to avoid hitting the Y-tiling GTT mapping bug on 855GM */
- if (sna->kgem.gen == 21)
+ if (sna->kgem.gen == 021)
return I915_TILING_X;
/* Only on later generations was the render pipeline
* more flexible than the BLT. So on gen2/3, prefer to
* keep large objects accessible through the BLT.
*/
- if (sna->kgem.gen < 40 &&
+ if (sna->kgem.gen < 040 &&
(pixmap->drawable.width > sna->render.max_3d_size ||
pixmap->drawable.height > sna->render.max_3d_size))
return I915_TILING_X;
@@ -518,7 +535,6 @@ static inline uint32_t default_tiling(PixmapPtr pixmap,
DBG(("%s: entire source is damaged, using Y-tiling\n",
__FUNCTION__));
sna_damage_destroy(&priv->gpu_damage);
- priv->undamaged = false;
return I915_TILING_Y;
}
@@ -611,7 +627,7 @@ struct kgem_bo *sna_pixmap_change_tiling(PixmapPtr pixmap, uint32_t tiling)
static inline void sna_set_pixmap(PixmapPtr pixmap, struct sna_pixmap *sna)
{
- ((void **)dixGetPrivateAddr(&pixmap->devPrivates, &sna_pixmap_key))[1] = sna;
+ ((void **)__get_private(pixmap, sna_pixmap_key))[1] = sna;
assert(sna_pixmap(pixmap) == sna);
}
@@ -619,7 +635,6 @@ static struct sna_pixmap *
_sna_pixmap_init(struct sna_pixmap *priv, PixmapPtr pixmap)
{
list_init(&priv->list);
- list_init(&priv->inactive);
priv->source_count = SOURCE_BIAS;
priv->pixmap = pixmap;
@@ -664,6 +679,7 @@ bool sna_pixmap_attach_to_bo(PixmapPtr pixmap, struct kgem_bo *bo)
return false;
priv->gpu_bo = kgem_bo_reference(bo);
+ assert(priv->gpu_bo->proxy == NULL);
sna_damage_all(&priv->gpu_damage,
pixmap->drawable.width,
pixmap->drawable.height);
@@ -711,11 +727,13 @@ create_pixmap(struct sna *sna, ScreenPtr screen,
datasize += adjust;
}
+ DBG(("%s: allocating pixmap %dx%d, depth=%d, size=%ld\n",
+ __FUNCTION__, width, height, depth, (long)datasize));
pixmap = AllocatePixmap(screen, datasize);
if (!pixmap)
return NullPixmap;
- ((void **)dixGetPrivateAddr(&pixmap->devPrivates, &sna_pixmap_key))[0] = sna;
+ ((void **)__get_private(pixmap, sna_pixmap_key))[0] = sna;
assert(to_sna_from_pixmap(pixmap) == sna);
pixmap->drawable.type = DRAWABLE_PIXMAP;
@@ -764,7 +782,7 @@ sna_pixmap_create_shm(ScreenPtr screen,
DBG(("%s(%dx%d, depth=%d, bpp=%d, pitch=%d)\n",
__FUNCTION__, width, height, depth, bpp, pitch));
- if (wedged(sna) || bpp == 0 || pitch*height <= 4096) {
+ if (wedged(sna) || bpp == 0 || pitch*height < 4096) {
fallback:
pixmap = sna_pixmap_create_unattached(screen, 0, 0, depth);
if (pixmap == NULL)
@@ -833,6 +851,8 @@ fallback:
priv->cpu = true;
priv->shm = true;
+ priv->stride = pitch;
+ priv->ptr = MAKE_STATIC_PTR(addr);
sna_damage_all(&priv->cpu_damage, width, height);
pixmap->devKind = pitch;
@@ -863,7 +883,8 @@ sna_pixmap_create_scratch(ScreenPtr screen,
width, height, depth, tiling));
bpp = bits_per_pixel(depth);
- if (tiling == I915_TILING_Y && !sna->have_render)
+ if (tiling == I915_TILING_Y &&
+ (sna->render.prefer_gpu & PREFER_GPU_RENDER) == 0)
tiling = I915_TILING_X;
if (tiling == I915_TILING_Y &&
@@ -1108,6 +1129,7 @@ sna_create_pixmap_shared(struct sna *sna, ScreenPtr screen,
assert(priv->gpu_bo->tiling == I915_TILING_NONE);
assert((priv->gpu_bo->pitch & 255) == 0);
+ assert(!priv->mapped);
pixmap->devPrivate.ptr =
kgem_bo_map__async(&sna->kgem, priv->gpu_bo);
if (pixmap->devPrivate.ptr == NULL) {
@@ -1136,8 +1158,10 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen,
{
struct sna *sna = to_sna_from_screen(screen);
PixmapPtr pixmap;
+ struct sna_pixmap *priv;
unsigned flags;
int pad;
+ void *ptr;
DBG(("%s(%d, %d, %d, usage=%x)\n", __FUNCTION__,
width, height, depth, usage));
@@ -1161,19 +1185,29 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen,
goto fallback;
}
- if (!can_render(sna))
+ if (unlikely((sna->render.prefer_gpu & PREFER_GPU_RENDER) == 0))
+ flags &= ~KGEM_CAN_CREATE_GPU;
+ if (wedged(sna))
flags = 0;
- if (usage == CREATE_PIXMAP_USAGE_SCRATCH) {
+ switch (usage) {
+ case CREATE_PIXMAP_USAGE_SCRATCH:
if (flags & KGEM_CAN_CREATE_GPU)
return sna_pixmap_create_scratch(screen,
width, height, depth,
I915_TILING_X);
else
goto fallback;
- }
- if (usage == SNA_CREATE_SCRATCH) {
+ case SNA_CREATE_GLYPHS:
+ if (flags & KGEM_CAN_CREATE_GPU)
+ return sna_pixmap_create_scratch(screen,
+ width, height, depth,
+ -I915_TILING_Y);
+ else
+ goto fallback;
+
+ case SNA_CREATE_SCRATCH:
if (flags & KGEM_CAN_CREATE_GPU)
return sna_pixmap_create_scratch(screen,
width, height, depth,
@@ -1188,7 +1222,7 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen,
usage = 0;
pad = PixmapBytePad(width, depth);
- if (pad * height <= 4096) {
+ if (pad * height < 4096) {
DBG(("%s: small buffer [%d], attaching to shadow pixmap\n",
__FUNCTION__, pad * height));
pixmap = create_pixmap(sna, screen,
@@ -1196,10 +1230,10 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen,
if (pixmap == NullPixmap)
return NullPixmap;
- sna_pixmap_attach(pixmap);
+ ptr = MAKE_STATIC_PTR(pixmap->devPrivate.ptr);
+ pad = pixmap->devKind;
+ flags &= ~(KGEM_CAN_CREATE_GPU | KGEM_CAN_CREATE_CPU);
} else {
- struct sna_pixmap *priv;
-
DBG(("%s: creating GPU pixmap %dx%d, stride=%d, flags=%x\n",
__FUNCTION__, width, height, pad, flags));
@@ -1212,16 +1246,19 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen,
pixmap->devKind = pad;
pixmap->devPrivate.ptr = NULL;
- priv = sna_pixmap_attach(pixmap);
- if (priv == NULL) {
- free(pixmap);
- goto fallback;
- }
+ ptr = NULL;
+ }
- priv->stride = pad;
- priv->create = flags;
+ priv = sna_pixmap_attach(pixmap);
+ if (priv == NULL) {
+ free(pixmap);
+ goto fallback;
}
+ priv->stride = pad;
+ priv->create = flags;
+ priv->ptr = ptr;
+
return pixmap;
fallback:
@@ -1235,9 +1272,10 @@ void sna_add_flush_pixmap(struct sna *sna,
DBG(("%s: marking pixmap=%ld for flushing\n",
__FUNCTION__, priv->pixmap->drawable.serialNumber));
assert(bo);
+ assert(bo->flush);
list_move(&priv->list, &sna->flush_pixmaps);
- if (bo->exec == NULL) {
+ if (bo->exec == NULL && kgem_is_idle(&sna->kgem)) {
DBG(("%s: new flush bo, flushin before\n", __FUNCTION__));
kgem_submit(&sna->kgem);
}
@@ -1248,12 +1286,11 @@ static void __sna_free_pixmap(struct sna *sna,
struct sna_pixmap *priv)
{
list_del(&priv->list);
- list_del(&priv->inactive);
sna_damage_destroy(&priv->gpu_damage);
sna_damage_destroy(&priv->cpu_damage);
- sna_pixmap_free_cpu(sna, priv);
+ __sna_pixmap_free_cpu(sna, priv);
if (priv->header) {
assert(!priv->shm);
@@ -1308,7 +1345,8 @@ void sna_pixmap_destroy(PixmapPtr pixmap)
static inline bool pixmap_inplace(struct sna *sna,
PixmapPtr pixmap,
- struct sna_pixmap *priv)
+ struct sna_pixmap *priv,
+ bool write_only)
{
if (FORCE_INPLACE)
return FORCE_INPLACE > 0;
@@ -1317,7 +1355,10 @@ static inline bool pixmap_inplace(struct sna *sna,
return false;
if (priv->mapped)
- return true;
+ return !IS_CPU_MAP(priv->gpu_bo->map);
+
+ if (!write_only && priv->cpu_damage)
+ return false;
return (pixmap->devKind * pixmap->drawable.height >> 12) >
sna->kgem.half_cpu_cache_pages;
@@ -1332,8 +1373,12 @@ sna_pixmap_create_mappable_gpu(PixmapPtr pixmap)
if (wedged(sna))
return false;
+ if ((priv->create & KGEM_CAN_CREATE_GTT) == 0)
+ return false;
+
assert_pixmap_damage(pixmap);
+ assert(priv->gpu_damage == NULL);
assert(priv->gpu_bo == NULL);
priv->gpu_bo =
kgem_create_2d(&sna->kgem,
@@ -1395,21 +1440,43 @@ static inline bool use_cpu_bo_for_upload(struct sna *sna,
kgem_bo_is_busy(priv->gpu_bo),
kgem_bo_is_busy(priv->cpu_bo)));
+ if (!priv->cpu)
+ return true;
+
if (flags & (MOVE_WRITE | MOVE_ASYNC_HINT))
return true;
+ if (priv->gpu_bo->tiling)
+ return true;
+
return kgem_bo_is_busy(priv->gpu_bo) || kgem_bo_is_busy(priv->cpu_bo);
}
static inline bool operate_inplace(struct sna_pixmap *priv, unsigned flags)
{
- if ((flags & MOVE_INPLACE_HINT) == 0 || priv->gpu_bo == NULL)
+ if ((flags & MOVE_INPLACE_HINT) == 0) {
+ DBG(("%s: no, inplace operation not suitable\n", __FUNCTION__));
+ return false;
+ }
+
+ assert((flags & MOVE_ASYNC_HINT) == 0);
+
+ if ((priv->create & KGEM_CAN_CREATE_GTT) == 0) {
+ DBG(("%s: no, not accessible via GTT\n", __FUNCTION__));
return false;
+ }
+
+ if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) {
+ DBG(("%s: yes, CPU is busy\n", __FUNCTION__));
+ return true;
+ }
- if (flags & MOVE_WRITE && kgem_bo_is_busy(priv->gpu_bo))
+ if (flags & MOVE_WRITE && priv->gpu_bo&&kgem_bo_is_busy(priv->gpu_bo)) {
+ DBG(("%s: no, GPU is busy, so stage write\n", __FUNCTION__));
return false;
+ }
- return priv->stride != 0;
+ return true;
}
bool
@@ -1437,13 +1504,15 @@ _sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags)
priv->gpu_bo ? priv->gpu_bo->handle : 0,
priv->gpu_damage, priv->cpu_damage, priv->clear));
+ assert(priv->gpu_damage == NULL || priv->gpu_bo);
+
if (USE_INPLACE && (flags & MOVE_READ) == 0) {
assert(flags & MOVE_WRITE);
DBG(("%s: no readbck, discarding gpu damage [%d], pending clear[%d]\n",
__FUNCTION__, priv->gpu_damage != NULL, priv->clear));
if (priv->create & KGEM_CAN_CREATE_GPU &&
- pixmap_inplace(sna, pixmap, priv)) {
+ pixmap_inplace(sna, pixmap, priv, true)) {
assert(!priv->shm);
DBG(("%s: write inplace\n", __FUNCTION__));
if (priv->gpu_bo) {
@@ -1460,29 +1529,26 @@ _sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags)
!sna_pixmap_create_mappable_gpu(pixmap))
goto skip_inplace_map;
- if (!priv->mapped) {
- pixmap->devPrivate.ptr =
- kgem_bo_map(&sna->kgem, priv->gpu_bo);
- if (pixmap->devPrivate.ptr == NULL)
- goto skip_inplace_map;
+ pixmap->devPrivate.ptr =
+ kgem_bo_map(&sna->kgem, priv->gpu_bo);
+ priv->mapped = pixmap->devPrivate.ptr != NULL;
+ if (!priv->mapped)
+ goto skip_inplace_map;
- priv->mapped = true;
- }
pixmap->devKind = priv->gpu_bo->pitch;
+ assert(priv->gpu_bo->proxy == NULL);
sna_damage_all(&priv->gpu_damage,
pixmap->drawable.width,
pixmap->drawable.height);
sna_damage_destroy(&priv->cpu_damage);
- priv->undamaged = false;
priv->clear = false;
priv->cpu = false;
list_del(&priv->list);
- if (priv->cpu_bo) {
- assert(!priv->shm);
- assert(!priv->cpu_bo->flush);
- sna_pixmap_free_cpu(sna, priv);
- }
+
+ assert(!priv->shm);
+ assert(priv->cpu_bo == NULL || !priv->cpu_bo->flush);
+ sna_pixmap_free_cpu(sna, priv);
assert_pixmap_damage(pixmap);
return true;
@@ -1490,6 +1556,7 @@ _sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags)
skip_inplace_map:
sna_damage_destroy(&priv->gpu_damage);
+ priv->clear = false;
if (priv->cpu_bo && !priv->cpu_bo->flush &&
__kgem_bo_is_busy(&sna->kgem, priv->cpu_bo)) {
DBG(("%s: discarding busy CPU bo\n", __FUNCTION__));
@@ -1497,10 +1564,12 @@ skip_inplace_map:
assert(priv->gpu_bo == NULL || priv->gpu_damage == NULL);
sna_damage_destroy(&priv->cpu_damage);
- priv->undamaged = false;
-
- sna_pixmap_free_gpu(sna, priv);
sna_pixmap_free_cpu(sna, priv);
+
+ if (!sna_pixmap_alloc_cpu(sna, pixmap, priv, false))
+ return false;
+
+ goto mark_damage;
}
}
@@ -1512,52 +1581,89 @@ skip_inplace_map:
assert(priv->gpu_bo == NULL || priv->gpu_bo->proxy == NULL);
if (operate_inplace(priv, flags) &&
- pixmap_inplace(sna, pixmap, priv) &&
- sna_pixmap_move_to_gpu(pixmap, flags)) {
+ pixmap_inplace(sna, pixmap, priv, (flags & MOVE_READ) == 0) &&
+ (priv->gpu_bo || sna_pixmap_create_mappable_gpu(pixmap))) {
kgem_bo_submit(&sna->kgem, priv->gpu_bo);
- DBG(("%s: try to operate inplace\n", __FUNCTION__));
- assert(priv->cpu == false);
+ DBG(("%s: try to operate inplace (GTT)\n", __FUNCTION__));
+ assert((flags & MOVE_READ) == 0 || priv->cpu == false);
- pixmap->devPrivate.ptr =
- kgem_bo_map(&sna->kgem, priv->gpu_bo);
- if (pixmap->devPrivate.ptr != NULL) {
- priv->mapped = true;
+ pixmap->devPrivate.ptr = kgem_bo_map(&sna->kgem, priv->gpu_bo);
+ priv->mapped = pixmap->devPrivate.ptr != NULL;
+ if (priv->mapped) {
pixmap->devKind = priv->gpu_bo->pitch;
if (flags & MOVE_WRITE) {
+ assert(priv->gpu_bo->proxy == NULL);
sna_damage_all(&priv->gpu_damage,
pixmap->drawable.width,
pixmap->drawable.height);
sna_damage_destroy(&priv->cpu_damage);
+ sna_pixmap_free_cpu(sna, priv);
list_del(&priv->list);
- priv->undamaged = false;
priv->clear = false;
}
assert_pixmap_damage(pixmap);
- DBG(("%s: operate inplace\n", __FUNCTION__));
+ DBG(("%s: operate inplace (GTT)\n", __FUNCTION__));
return true;
}
-
- priv->mapped = false;
}
if (priv->mapped) {
- assert(!priv->shm);
- pixmap->devPrivate.ptr = NULL;
+ assert(!priv->shm && priv->stride);
+ pixmap->devPrivate.ptr = PTR(priv->ptr);
+ pixmap->devKind = priv->stride;
priv->mapped = false;
}
- if (priv->clear && priv->cpu_bo && !priv->cpu_bo->flush &&
+ if (priv->gpu_damage &&
+ ((flags & MOVE_ASYNC_HINT) == 0 ||
+ !__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo)) &&
+ priv->gpu_bo->tiling == I915_TILING_NONE &&
+ sna_pixmap_move_to_gpu(pixmap, MOVE_READ)) {
+ kgem_bo_submit(&sna->kgem, priv->gpu_bo);
+
+ DBG(("%s: try to operate inplace (CPU)\n", __FUNCTION__));
+
+ assert(!priv->mapped);
+ pixmap->devPrivate.ptr =
+ kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo);
+ if (pixmap->devPrivate.ptr != NULL) {
+ priv->cpu = true;
+ priv->mapped = true;
+ pixmap->devKind = priv->gpu_bo->pitch;
+ if (flags & MOVE_WRITE) {
+ assert(priv->gpu_bo->proxy == NULL);
+ sna_damage_all(&priv->gpu_damage,
+ pixmap->drawable.width,
+ pixmap->drawable.height);
+ sna_damage_destroy(&priv->cpu_damage);
+ sna_pixmap_free_cpu(sna, priv);
+ list_del(&priv->list);
+ priv->clear = false;
+ }
+
+ kgem_bo_sync__cpu_full(&sna->kgem,
+ priv->gpu_bo, flags & MOVE_WRITE);
+ assert_pixmap_damage(pixmap);
+ DBG(("%s: operate inplace (CPU)\n", __FUNCTION__));
+ return true;
+ }
+ }
+
+ if (((flags & MOVE_READ) == 0 || priv->clear) &&
+ priv->cpu_bo && !priv->cpu_bo->flush &&
__kgem_bo_is_busy(&sna->kgem, priv->cpu_bo)) {
assert(!priv->shm);
- assert(DAMAGE_IS_ALL(priv->gpu_damage));
sna_pixmap_free_cpu(sna, priv);
}
if (pixmap->devPrivate.ptr == NULL &&
- !sna_pixmap_alloc_cpu(sna, pixmap, priv, priv->gpu_damage != NULL))
+ !sna_pixmap_alloc_cpu(sna, pixmap, priv,
+ flags & MOVE_READ ? priv->gpu_damage && !priv->clear : 0))
return false;
+ assert(pixmap->devPrivate.ptr);
+ assert(!priv->mapped);
if (priv->clear) {
DBG(("%s: applying clear [%08x]\n",
@@ -1585,7 +1691,7 @@ skip_inplace_map:
pixmap->drawable.width,
pixmap->drawable.height);
sna_pixmap_free_gpu(sna, priv);
- priv->undamaged = false;
+ assert(priv->gpu_damage == NULL);
priv->clear = false;
}
@@ -1594,6 +1700,7 @@ skip_inplace_map:
int n;
DBG(("%s: flushing GPU damage\n", __FUNCTION__));
+ assert(priv->gpu_bo);
n = sna_damage_get_boxes(priv->gpu_damage, &box);
if (n) {
@@ -1615,16 +1722,15 @@ skip_inplace_map:
__sna_damage_destroy(DAMAGE_PTR(priv->gpu_damage));
priv->gpu_damage = NULL;
- priv->undamaged = true;
}
if (flags & MOVE_WRITE || priv->create & KGEM_CAN_CREATE_LARGE) {
+mark_damage:
DBG(("%s: marking as damaged\n", __FUNCTION__));
sna_damage_all(&priv->cpu_damage,
pixmap->drawable.width,
pixmap->drawable.height);
sna_pixmap_free_gpu(sna, priv);
- priv->undamaged = false;
if (priv->flush) {
assert(!priv->shm);
@@ -1634,25 +1740,25 @@ skip_inplace_map:
done:
if (flags & MOVE_WRITE) {
+ assert(DAMAGE_IS_ALL(priv->cpu_damage));
priv->source_count = SOURCE_BIAS;
assert(priv->gpu_bo == NULL || priv->gpu_bo->proxy == NULL);
if (priv->gpu_bo && priv->gpu_bo->domain != DOMAIN_GPU) {
DBG(("%s: discarding inactive GPU bo\n", __FUNCTION__));
- assert(DAMAGE_IS_ALL(priv->cpu_damage));
sna_pixmap_free_gpu(sna, priv);
- priv->undamaged = false;
}
}
if (priv->cpu_bo) {
if ((flags & MOVE_ASYNC_HINT) == 0) {
DBG(("%s: syncing CPU bo\n", __FUNCTION__));
- kgem_bo_sync__cpu(&sna->kgem, priv->cpu_bo);
+ kgem_bo_sync__cpu_full(&sna->kgem,
+ priv->cpu_bo, flags & MOVE_WRITE);
+ assert(!priv->shm || !kgem_bo_is_busy(priv->cpu_bo));
}
if (flags & MOVE_WRITE) {
DBG(("%s: discarding GPU bo in favour of CPU bo\n", __FUNCTION__));
sna_pixmap_free_gpu(sna, priv);
- priv->undamaged = false;
}
}
priv->cpu = (flags & MOVE_ASYNC_HINT) == 0;
@@ -1717,29 +1823,30 @@ static inline bool region_inplace(struct sna *sna,
if (wedged(sna) && !priv->pinned)
return false;
- if (priv->cpu) {
- DBG(("%s: no, preferring last action of CPU\n", __FUNCTION__));
- return false;
- }
-
- if (!write_only &&
+ if ((priv->cpu || !write_only) &&
region_overlaps_damage(region, priv->cpu_damage, 0, 0)) {
DBG(("%s: no, uncovered CPU damage pending\n", __FUNCTION__));
return false;
}
- if (priv->flush) {
- DBG(("%s: yes, exported via dri, will flush\n", __FUNCTION__));
- return true;
+ if (priv->cpu) {
+ DBG(("%s: no, preferring last action of CPU\n", __FUNCTION__));
+ return false;
}
if (priv->mapped) {
DBG(("%s: yes, already mapped, continuiung\n", __FUNCTION__));
+ return !IS_CPU_MAP(priv->gpu_bo->map);
+ }
+
+ if (priv->flush) {
+ DBG(("%s: yes, exported via dri, will flush\n", __FUNCTION__));
return true;
}
if (DAMAGE_IS_ALL(priv->gpu_damage)) {
DBG(("%s: yes, already wholly damaged on the GPU\n", __FUNCTION__));
+ assert(priv->gpu_bo);
return true;
}
@@ -1785,6 +1892,8 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
return true;
}
+ assert(priv->gpu_damage == NULL || priv->gpu_bo);
+
if (sna_damage_is_all(&priv->cpu_damage,
pixmap->drawable.width,
pixmap->drawable.height)) {
@@ -1792,7 +1901,6 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
__FUNCTION__, pixmap->drawable.serialNumber));
sna_damage_destroy(&priv->gpu_damage);
- priv->undamaged = false;
if (flags & MOVE_WRITE)
sna_pixmap_free_gpu(sna, priv);
@@ -1804,6 +1912,14 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
goto out;
}
+ if (USE_INPLACE &&
+ (flags & (MOVE_READ | MOVE_ASYNC_HINT)) == 0 &&
+ (priv->flush || box_inplace(pixmap, &region->extents))) {
+ DBG(("%s: marking for inplace hint (%d, %d)\n",
+ __FUNCTION__, priv->flush, box_inplace(pixmap, &region->extents)));
+ flags |= MOVE_INPLACE_HINT;
+ }
+
if (flags & MOVE_WHOLE_HINT)
return _sna_pixmap_move_to_cpu(pixmap, flags);
@@ -1824,132 +1940,40 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
return _sna_pixmap_move_to_cpu(pixmap, flags);
}
- if (USE_INPLACE && (flags & MOVE_READ) == 0) {
- DBG(("%s: no read, checking to see if we can stream the write into the GPU bo\n",
- __FUNCTION__));
- assert(flags & MOVE_WRITE);
-
- if (priv->stride && priv->gpu_bo &&
- kgem_bo_can_map(&sna->kgem, priv->gpu_bo) &&
- region_inplace(sna, pixmap, region, priv, true)) {
- assert(priv->gpu_bo->proxy == NULL);
- if (!__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo)) {
- pixmap->devPrivate.ptr =
- kgem_bo_map(&sna->kgem, priv->gpu_bo);
- if (pixmap->devPrivate.ptr == NULL) {
- if (dx | dy)
- RegionTranslate(region, -dx, -dy);
- return false;
- }
-
- priv->mapped = true;
- pixmap->devKind = priv->gpu_bo->pitch;
-
- sna_damage_subtract(&priv->cpu_damage, region);
- if (priv->cpu_damage == NULL) {
- list_del(&priv->list);
- sna_damage_all(&priv->gpu_damage,
- pixmap->drawable.width,
- pixmap->drawable.height);
- priv->undamaged = false;
- } else
- sna_damage_add(&priv->gpu_damage,
- region);
-
- priv->clear = false;
- priv->cpu = false;
- assert_pixmap_damage(pixmap);
- if (dx | dy)
- RegionTranslate(region, -dx, -dy);
- return true;
- }
- }
-
- if (priv->cpu_bo && !priv->cpu_bo->flush) {
- if (__kgem_bo_is_busy(&sna->kgem, priv->cpu_bo)) {
- sna_damage_subtract(&priv->cpu_damage, region);
- if (!sna_pixmap_move_to_gpu(pixmap, MOVE_WRITE)) {
- if (dx | dy)
- RegionTranslate(region, -dx, -dy);
- return false;
- }
-
- assert(!priv->shm);
- sna_pixmap_free_cpu(sna, priv);
- }
- }
-
- if (priv->gpu_bo == NULL && priv->stride &&
- sna_pixmap_choose_tiling(pixmap, DEFAULT_TILING) != I915_TILING_NONE &&
- region_inplace(sna, pixmap, region, priv, true) &&
- sna_pixmap_create_mappable_gpu(pixmap)) {
- pixmap->devPrivate.ptr =
- kgem_bo_map(&sna->kgem, priv->gpu_bo);
- if (pixmap->devPrivate.ptr == NULL) {
- if (dx | dy)
- RegionTranslate(region, -dx, -dy);
- return false;
- }
-
- priv->mapped = true;
- pixmap->devKind = priv->gpu_bo->pitch;
-
- sna_damage_subtract(&priv->cpu_damage, region);
- if (priv->cpu_damage == NULL) {
- list_del(&priv->list);
- sna_damage_all(&priv->gpu_damage,
- pixmap->drawable.width,
- pixmap->drawable.height);
- priv->undamaged = false;
- } else
- sna_damage_add(&priv->gpu_damage, region);
-
- assert_pixmap_damage(pixmap);
- priv->clear = false;
- priv->cpu = false;
- if (dx | dy)
- RegionTranslate(region, -dx, -dy);
- return true;
- }
- }
-
if (operate_inplace(priv, flags) &&
- kgem_bo_can_map(&sna->kgem, priv->gpu_bo) &&
- region_inplace(sna, pixmap, region, priv, (flags & MOVE_READ) == 0)) {
+ region_inplace(sna, pixmap, region, priv, (flags & MOVE_READ) == 0) &&
+ (priv->gpu_bo || sna_pixmap_create_mappable_gpu(pixmap))) {
kgem_bo_submit(&sna->kgem, priv->gpu_bo);
DBG(("%s: try to operate inplace\n", __FUNCTION__));
- pixmap->devPrivate.ptr =
- kgem_bo_map(&sna->kgem, priv->gpu_bo);
- if (pixmap->devPrivate.ptr != NULL) {
- priv->mapped = true;
+ pixmap->devPrivate.ptr = kgem_bo_map(&sna->kgem, priv->gpu_bo);
+ priv->mapped = pixmap->devPrivate.ptr != NULL;
+ if (priv->mapped) {
pixmap->devKind = priv->gpu_bo->pitch;
- if (flags & MOVE_WRITE &&
- !DAMAGE_IS_ALL(priv->gpu_damage)) {
- sna_damage_add(&priv->gpu_damage, region);
- if (sna_damage_is_all(&priv->gpu_damage,
- pixmap->drawable.width,
- pixmap->drawable.height)) {
- DBG(("%s: replaced entire pixmap, destroying CPU shadow\n",
- __FUNCTION__));
- sna_damage_destroy(&priv->cpu_damage);
- priv->undamaged = false;
- list_del(&priv->list);
- } else
- sna_damage_subtract(&priv->cpu_damage,
- region);
+ if (flags & MOVE_WRITE) {
+ if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
+ sna_damage_add(&priv->gpu_damage, region);
+ if (sna_damage_is_all(&priv->gpu_damage,
+ pixmap->drawable.width,
+ pixmap->drawable.height)) {
+ DBG(("%s: replaced entire pixmap, destroying CPU shadow\n",
+ __FUNCTION__));
+ sna_damage_destroy(&priv->cpu_damage);
+ list_del(&priv->list);
+ } else
+ sna_damage_subtract(&priv->cpu_damage,
+ region);
+ }
+ priv->clear = false;
}
assert_pixmap_damage(pixmap);
- priv->clear = false;
priv->cpu = false;
if (dx | dy)
RegionTranslate(region, -dx, -dy);
DBG(("%s: operate inplace\n", __FUNCTION__));
return true;
}
-
- priv->mapped = false;
}
if (priv->clear && flags & MOVE_WRITE) {
@@ -1965,12 +1989,26 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
priv->mapped = false;
}
+ if ((priv->clear || (flags & MOVE_READ) == 0) &&
+ priv->cpu_bo && !priv->cpu_bo->flush &&
+ __kgem_bo_is_busy(&sna->kgem, priv->cpu_bo)) {
+ sna_damage_subtract(&priv->cpu_damage, region);
+ if (sna_pixmap_move_to_gpu(pixmap, MOVE_READ | MOVE_ASYNC_HINT)) {
+ sna_damage_all(&priv->gpu_damage,
+ pixmap->drawable.width,
+ pixmap->drawable.height);
+ sna_pixmap_free_cpu(sna, priv);
+ }
+ }
+
if (pixmap->devPrivate.ptr == NULL &&
- !sna_pixmap_alloc_cpu(sna, pixmap, priv, priv->gpu_damage != NULL)) {
+ !sna_pixmap_alloc_cpu(sna, pixmap, priv,
+ flags & MOVE_READ ? priv->gpu_damage && !priv->clear : 0)) {
if (dx | dy)
RegionTranslate(region, -dx, -dy);
return false;
}
+ assert(pixmap->devPrivate.ptr);
if (priv->gpu_bo == NULL) {
assert(priv->gpu_damage == NULL);
@@ -1979,8 +2017,8 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
assert(priv->gpu_bo->proxy == NULL);
if (priv->clear) {
- int n = REGION_NUM_RECTS(region);
- BoxPtr box = REGION_RECTS(region);
+ int n = RegionNumRects(region);
+ BoxPtr box = RegionRects(region);
DBG(("%s: pending clear, doing partial fill\n", __FUNCTION__));
if (priv->cpu_bo) {
@@ -2022,6 +2060,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
DBG(("%s: forced migration\n", __FUNCTION__));
assert(pixmap_contains_damage(pixmap, priv->gpu_damage));
+ assert(priv->gpu_bo);
ok = false;
if (use_cpu_bo_for_download(sna, priv, &priv->gpu_damage->extents)) {
@@ -2038,7 +2077,6 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
box, n);
}
sna_damage_destroy(&priv->gpu_damage);
- priv->undamaged = true;
}
if (priv->gpu_damage &&
@@ -2048,6 +2086,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
__FUNCTION__,
region->extents.x2 - region->extents.x1,
region->extents.y2 - region->extents.y1));
+ assert(priv->gpu_bo);
if (priv->cpu_damage == NULL) {
if ((flags & MOVE_WRITE) == 0 &&
@@ -2095,9 +2134,9 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
* reads.
*/
if (flags & MOVE_WRITE) {
- int n = REGION_NUM_RECTS(region), i;
- BoxPtr boxes = REGION_RECTS(region);
- BoxPtr blocks = malloc(sizeof(BoxRec) * REGION_NUM_RECTS(region));
+ int n = RegionNumRects(region), i;
+ BoxPtr boxes = RegionRects(region);
+ BoxPtr blocks = malloc(sizeof(BoxRec) * RegionNumRects(region));
if (blocks) {
for (i = 0; i < n; i++) {
blocks[i].x1 = boxes[i].x1 & ~31;
@@ -2150,12 +2189,11 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
}
sna_damage_destroy(&priv->gpu_damage);
- priv->undamaged = true;
} else if (DAMAGE_IS_ALL(priv->gpu_damage) ||
sna_damage_contains_box__no_reduce(priv->gpu_damage,
&r->extents)) {
- BoxPtr box = REGION_RECTS(r);
- int n = REGION_NUM_RECTS(r);
+ BoxPtr box = RegionRects(r);
+ int n = RegionNumRects(r);
bool ok = false;
DBG(("%s: region wholly inside damage\n",
@@ -2175,14 +2213,13 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
box, n);
sna_damage_subtract(&priv->gpu_damage, r);
- priv->undamaged = true;
} else {
RegionRec need;
pixman_region_init(&need);
if (sna_damage_intersect(priv->gpu_damage, r, &need)) {
- BoxPtr box = REGION_RECTS(&need);
- int n = REGION_NUM_RECTS(&need);
+ BoxPtr box = RegionRects(&need);
+ int n = RegionNumRects(&need);
bool ok = false;
DBG(("%s: region intersects damage\n",
@@ -2202,7 +2239,6 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
box, n);
sna_damage_subtract(&priv->gpu_damage, r);
- priv->undamaged = true;
RegionUninit(&need);
}
}
@@ -2212,7 +2248,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
}
done:
- if (flags & MOVE_WRITE) {
+ if ((flags & (MOVE_WRITE | MOVE_ASYNC_HINT)) == MOVE_WRITE) {
DBG(("%s: applying cpu damage\n", __FUNCTION__));
assert(!DAMAGE_IS_ALL(priv->cpu_damage));
assert_pixmap_contains_box(pixmap, RegionExtents(region));
@@ -2226,7 +2262,6 @@ done:
__FUNCTION__));
sna_pixmap_free_gpu(sna, priv);
}
- priv->undamaged = false;
}
if (priv->flush) {
assert(!priv->shm);
@@ -2245,8 +2280,8 @@ out:
}
if ((flags & MOVE_ASYNC_HINT) == 0 && priv->cpu_bo) {
DBG(("%s: syncing cpu bo\n", __FUNCTION__));
- kgem_bo_sync__cpu(&sna->kgem, priv->cpu_bo);
- assert(!kgem_bo_is_busy(priv->cpu_bo));
+ kgem_bo_sync__cpu_full(&sna->kgem,
+ priv->cpu_bo, flags & MOVE_WRITE);
}
priv->cpu = (flags & MOVE_ASYNC_HINT) == 0;
assert(pixmap->devPrivate.ptr);
@@ -2354,31 +2389,30 @@ static inline struct sna_pixmap *
sna_pixmap_mark_active(struct sna *sna, struct sna_pixmap *priv)
{
assert(priv->gpu_bo);
- if (USE_INACTIVE &&
- !priv->pinned && priv->gpu_bo->proxy == NULL &&
- (priv->create & KGEM_CAN_CREATE_LARGE) == 0)
- list_move(&priv->inactive, &sna->active_pixmaps);
return priv;
}
-static bool
+bool
sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int flags)
{
struct sna *sna = to_sna_from_pixmap(pixmap);
struct sna_pixmap *priv = sna_pixmap(pixmap);
RegionRec i, r;
- DBG(("%s()\n", __FUNCTION__));
+ DBG(("%s: pixmap=%ld box=(%d, %d), (%d, %d), flags=%x\n",
+ __FUNCTION__, pixmap->drawable.serialNumber,
+ box->x1, box->y1, box->x2, box->y2, flags));
+ assert(box->x2 > box->x1 && box->y2 > box->y1);
assert_pixmap_damage(pixmap);
assert_pixmap_contains_box(pixmap, box);
assert(!wedged(sna));
+ assert(priv->gpu_damage == NULL || priv->gpu_bo);
if (sna_damage_is_all(&priv->gpu_damage,
pixmap->drawable.width,
pixmap->drawable.height)) {
sna_damage_destroy(&priv->cpu_damage);
- priv->undamaged = false;
list_del(&priv->list);
goto done;
}
@@ -2390,7 +2424,6 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl
assert_pixmap_damage(pixmap);
if (priv->cpu_damage == NULL) {
- priv->undamaged = false;
list_del(&priv->list);
return sna_pixmap_move_to_gpu(pixmap, flags);
}
@@ -2398,6 +2431,8 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl
if (priv->gpu_bo == NULL) {
unsigned create, tiling;
+ assert(priv->gpu_damage == NULL);
+
create = CREATE_INACTIVE;
if (pixmap->usage_hint == SNA_CREATE_FB)
create |= CREATE_EXACT | CREATE_SCANOUT;
@@ -2423,6 +2458,11 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl
priv->mapped = false;
}
+ if (priv->shm) {
+ assert(!priv->flush);
+ sna_add_flush_pixmap(sna, priv, priv->cpu_bo);
+ }
+
region_set(&r, box);
if (MIGRATE_ALL || region_subsumes_damage(&r, priv->cpu_damage)) {
int n;
@@ -2437,15 +2477,12 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl
pixmap, priv->cpu_bo, 0, 0,
pixmap, priv->gpu_bo, 0, 0,
box, n, 0);
- if (ok && priv->shm) {
- assert(!priv->flush);
- sna_add_flush_pixmap(sna, priv, priv->cpu_bo);
- }
}
if (!ok) {
+ assert(!priv->mapped);
if (pixmap->devPrivate.ptr == NULL) {
- assert(priv->stride && priv->ptr);
- pixmap->devPrivate.ptr = priv->ptr;
+ assert(priv->ptr && priv->stride);
+ pixmap->devPrivate.ptr = PTR(priv->ptr);
pixmap->devKind = priv->stride;
}
if (n == 1 && !priv->pinned &&
@@ -2471,7 +2508,6 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl
sna_damage_destroy(&priv->cpu_damage);
list_del(&priv->list);
- priv->undamaged = true;
} else if (DAMAGE_IS_ALL(priv->cpu_damage) ||
sna_damage_contains_box__no_reduce(priv->cpu_damage, box)) {
bool ok = false;
@@ -2481,15 +2517,12 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl
pixmap, priv->cpu_bo, 0, 0,
pixmap, priv->gpu_bo, 0, 0,
box, 1, 0);
- if (ok && priv->shm) {
- assert(!priv->flush);
- sna_add_flush_pixmap(sna, priv, priv->cpu_bo);
- }
}
if (!ok) {
+ assert(!priv->mapped);
if (pixmap->devPrivate.ptr == NULL) {
- assert(priv->stride && priv->ptr);
- pixmap->devPrivate.ptr = priv->ptr;
+ assert(priv->ptr && priv->stride);
+ pixmap->devPrivate.ptr = PTR(priv->ptr);
pixmap->devKind = priv->stride;
}
ok = sna_write_boxes(sna, pixmap,
@@ -2503,12 +2536,11 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl
return false;
sna_damage_subtract(&priv->cpu_damage, &r);
- priv->undamaged = true;
} else if (sna_damage_intersect(priv->cpu_damage, &r, &i)) {
- int n = REGION_NUM_RECTS(&i);
+ int n = RegionNumRects(&i);
bool ok;
- box = REGION_RECTS(&i);
+ box = RegionRects(&i);
ok = false;
if (use_cpu_bo_for_upload(sna, priv, 0)) {
DBG(("%s: using CPU bo for upload to GPU, %d boxes\n", __FUNCTION__, n));
@@ -2516,15 +2548,12 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl
pixmap, priv->cpu_bo, 0, 0,
pixmap, priv->gpu_bo, 0, 0,
box, n, 0);
- if (ok && priv->shm) {
- assert(!priv->flush);
- sna_add_flush_pixmap(sna, priv, priv->cpu_bo);
- }
}
if (!ok) {
+ assert(!priv->mapped);
if (pixmap->devPrivate.ptr == NULL) {
- assert(priv->stride && priv->ptr);
- pixmap->devPrivate.ptr = priv->ptr;
+ assert(priv->ptr && priv->stride);
+ pixmap->devPrivate.ptr = PTR(priv->ptr);
pixmap->devKind = priv->stride;
}
ok = sna_write_boxes(sna, pixmap,
@@ -2538,26 +2567,21 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl
return false;
sna_damage_subtract(&priv->cpu_damage, &r);
- priv->undamaged = true;
RegionUninit(&i);
}
- if (priv->shm) {
- assert(!priv->flush);
- sna_add_flush_pixmap(sna, priv, priv->cpu_bo);
- }
-
done:
if (flags & MOVE_WRITE) {
priv->clear = false;
priv->cpu = false;
- if (priv->cpu_damage == NULL && box_inplace(pixmap, box)) {
+ if (priv->cpu_damage == NULL &&
+ box_inplace(pixmap, &r.extents)) {
DBG(("%s: large operation on undamaged, promoting to full GPU\n",
__FUNCTION__));
+ assert(priv->gpu_bo->proxy == NULL);
sna_damage_all(&priv->gpu_damage,
pixmap->drawable.width,
pixmap->drawable.height);
- priv->undamaged = false;
}
}
@@ -2582,6 +2606,8 @@ sna_drawable_use_bo(DrawablePtr drawable, unsigned flags, const BoxRec *box,
box->x1, box->y1, box->x2, box->y2,
flags));
+ assert(box->x2 > box->x1 && box->y2 > box->y1);
+ assert(pixmap->refcnt);
assert_pixmap_damage(pixmap);
assert_drawable_contains_box(drawable, box);
@@ -2604,7 +2630,7 @@ sna_drawable_use_bo(DrawablePtr drawable, unsigned flags, const BoxRec *box,
flags |= PREFER_GPU;
if (priv->shm)
flags &= ~PREFER_GPU;
- if (priv->cpu && (flags & FORCE_GPU) == 0)
+ if (priv->cpu && (flags & (FORCE_GPU | IGNORE_CPU)) == 0)
flags &= ~PREFER_GPU;
DBG(("%s: flush=%d, shm=%d, cpu=%d => flags=%x\n",
@@ -2619,6 +2645,7 @@ sna_drawable_use_bo(DrawablePtr drawable, unsigned flags, const BoxRec *box,
if (DAMAGE_IS_ALL(priv->gpu_damage)) {
DBG(("%s: use GPU fast path (all-damaged)\n", __FUNCTION__));
assert(priv->cpu_damage == NULL);
+ assert(priv->gpu_bo);
goto use_gpu_bo;
}
@@ -2692,7 +2719,6 @@ sna_drawable_use_bo(DrawablePtr drawable, unsigned flags, const BoxRec *box,
sna_damage_subtract(&priv->cpu_damage, &region);
if (priv->cpu_damage == NULL) {
list_del(&priv->list);
- priv->undamaged = false;
priv->cpu = false;
}
}
@@ -2721,6 +2747,7 @@ create_gpu_bo:
region.extents.x2, region.extents.y2));
if (priv->gpu_damage) {
+ assert(priv->gpu_bo);
if (!priv->cpu_damage) {
if (sna_damage_contains_box__no_reduce(priv->gpu_damage,
&region.extents)) {
@@ -2779,18 +2806,19 @@ move_to_gpu:
done:
assert(priv->gpu_bo != NULL);
+ assert(priv->gpu_bo->refcnt);
if (sna_damage_is_all(&priv->gpu_damage,
pixmap->drawable.width,
pixmap->drawable.height)) {
sna_damage_destroy(&priv->cpu_damage);
list_del(&priv->list);
- priv->undamaged = false;
*damage = NULL;
} else
*damage = &priv->gpu_damage;
DBG(("%s: using GPU bo with damage? %d\n",
__FUNCTION__, *damage != NULL));
+ assert(*damage == NULL || !DAMAGE_IS_ALL(*damage));
assert(priv->gpu_bo->proxy == NULL);
assert(priv->clear == false);
assert(priv->cpu == false);
@@ -2799,22 +2827,34 @@ done:
use_gpu_bo:
DBG(("%s: using whole GPU bo\n", __FUNCTION__));
assert(priv->gpu_bo != NULL);
+ assert(priv->gpu_bo->refcnt);
assert(priv->gpu_bo->proxy == NULL);
+ assert(priv->gpu_damage);
priv->clear = false;
priv->cpu = false;
- if (USE_INACTIVE &&
- !priv->pinned && (priv->create & KGEM_CAN_CREATE_LARGE) == 0)
- list_move(&priv->inactive,
- &to_sna_from_pixmap(pixmap)->active_pixmaps);
*damage = NULL;
return priv->gpu_bo;
use_cpu_bo:
- if (!USE_CPU_BO)
- return NULL;
+ if (!USE_CPU_BO || priv->cpu_bo == NULL) {
+cpu_fail:
+ if ((flags & FORCE_GPU) && priv->gpu_bo) {
+ get_drawable_deltas(drawable, pixmap, &dx, &dy);
+
+ region.extents = *box;
+ region.extents.x1 += dx;
+ region.extents.x2 += dx;
+ region.extents.y1 += dy;
+ region.extents.y2 += dy;
+ region.data = NULL;
+
+ goto move_to_gpu;
+ }
- if (priv->cpu_bo == NULL)
return NULL;
+ }
+
+ assert(priv->cpu_bo->refcnt);
sna = to_sna_from_pixmap(pixmap);
if ((flags & FORCE_GPU) == 0 &&
@@ -2851,12 +2891,25 @@ use_cpu_bo:
}
if (!sna->kgem.can_blt_cpu)
- return NULL;
+ goto cpu_fail;
if (!sna_drawable_move_region_to_cpu(&pixmap->drawable, &region,
- MOVE_READ | MOVE_ASYNC_HINT)) {
+ (flags & IGNORE_CPU ? MOVE_READ : 0) | MOVE_WRITE | MOVE_ASYNC_HINT)) {
DBG(("%s: failed to move-to-cpu, fallback\n", __FUNCTION__));
- return NULL;
+ goto cpu_fail;
+ }
+
+ if (priv->shm) {
+ assert(!priv->flush);
+ sna_add_flush_pixmap(sna, priv, priv->cpu_bo);
+
+ /* As we may have flushed and retired,, recheck for busy bo */
+ if ((flags & FORCE_GPU) == 0 && !kgem_bo_is_busy(priv->cpu_bo))
+ return NULL;
+ }
+ if (priv->flush) {
+ assert(!priv->shm);
+ sna_add_flush_pixmap(sna, priv, priv->gpu_bo);
}
if (sna_damage_is_all(&priv->cpu_damage,
@@ -2873,21 +2926,9 @@ use_cpu_bo:
*damage = &priv->cpu_damage;
}
- if (priv->shm) {
- assert(!priv->flush);
- sna_add_flush_pixmap(sna, priv, priv->cpu_bo);
-
- /* As we may have flushed and retired,, recheck for busy bo */
- if ((flags & FORCE_GPU) == 0 && !kgem_bo_is_busy(priv->cpu_bo))
- return NULL;
- }
- if (priv->flush) {
- assert(!priv->shm);
- sna_add_flush_pixmap(sna, priv, priv->gpu_bo);
- }
-
DBG(("%s: using CPU bo with damage? %d\n",
__FUNCTION__, *damage != NULL));
+ assert(damage == NULL || !DAMAGE_IS_ALL(*damage));
assert(priv->clear == false);
return priv->cpu_bo;
}
@@ -3001,19 +3042,21 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags)
pixmap->drawable.height);
}
+ assert(priv->gpu_damage == NULL || priv->gpu_bo);
+
if (sna_damage_is_all(&priv->gpu_damage,
pixmap->drawable.width,
pixmap->drawable.height)) {
DBG(("%s: already all-damaged\n", __FUNCTION__));
sna_damage_destroy(&priv->cpu_damage);
list_del(&priv->list);
- priv->undamaged = false;
- assert(priv->cpu == false);
+ assert(priv->cpu == false || IS_CPU_MAP(priv->gpu_bo->map));
goto active;
}
if (flags & MOVE_WRITE && priv->gpu_bo && priv->gpu_bo->proxy) {
DBG(("%s: discarding cached upload buffer\n", __FUNCTION__));
+ assert(priv->gpu_damage == NULL);
kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
priv->gpu_bo = NULL;
}
@@ -3045,6 +3088,8 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags)
create = 0;
if (priv->cpu_damage && priv->cpu_bo == NULL)
create = CREATE_GTT_MAP | CREATE_INACTIVE;
+ if (flags & MOVE_INPLACE_HINT)
+ create = CREATE_GTT_MAP | CREATE_INACTIVE;
priv->gpu_bo =
kgem_create_2d(&sna->kgem,
@@ -3055,6 +3100,7 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags)
}
if (priv->gpu_bo == NULL) {
DBG(("%s: not creating GPU bo\n", __FUNCTION__));
+ assert(priv->gpu_damage == NULL);
assert(list_is_empty(&priv->list));
return NULL;
}
@@ -3066,6 +3112,7 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags)
* synchronisation that takes the most time. This is
* mitigated by avoiding fallbacks in the first place.
*/
+ assert(priv->gpu_bo->proxy == NULL);
sna_damage_all(&priv->gpu_damage,
pixmap->drawable.width,
pixmap->drawable.height);
@@ -3078,17 +3125,16 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags)
if (priv->gpu_bo->proxy) {
DBG(("%s: reusing cached upload\n", __FUNCTION__));
assert((flags & MOVE_WRITE) == 0);
+ assert(priv->gpu_damage == NULL);
return priv;
}
if (priv->cpu_damage == NULL)
goto done;
- if (priv->mapped) {
- assert(priv->stride);
- pixmap->devPrivate.ptr = priv->ptr;
- pixmap->devKind = priv->stride;
- priv->mapped = false;
+ if (priv->shm) {
+ assert(!priv->flush);
+ sna_add_flush_pixmap(sna, priv, priv->cpu_bo);
}
n = sna_damage_get_boxes(priv->cpu_damage, &box);
@@ -3098,9 +3144,6 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags)
assert(pixmap_contains_damage(pixmap, priv->cpu_damage));
DBG(("%s: uploading %d damage boxes\n", __FUNCTION__, n));
- if (!priv->cpu)
- flags |= MOVE_ASYNC_HINT;
-
ok = false;
if (use_cpu_bo_for_upload(sna, priv, flags)) {
DBG(("%s: using CPU bo for upload to GPU\n", __FUNCTION__));
@@ -3110,9 +3153,15 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags)
box, n, 0);
}
if (!ok) {
+ if (priv->mapped) {
+ assert(priv->stride && priv->stride);
+ pixmap->devPrivate.ptr = PTR(priv->ptr);
+ pixmap->devKind = priv->stride;
+ priv->mapped = false;
+ }
if (pixmap->devPrivate.ptr == NULL) {
- assert(priv->stride && priv->ptr);
- pixmap->devPrivate.ptr = priv->ptr;
+ assert(priv->ptr && priv->stride);
+ pixmap->devPrivate.ptr = PTR(priv->ptr);
pixmap->devKind = priv->stride;
}
if (n == 1 && !priv->pinned &&
@@ -3137,15 +3186,14 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags)
__sna_damage_destroy(DAMAGE_PTR(priv->cpu_damage));
priv->cpu_damage = NULL;
- priv->undamaged = true;
-
- if (priv->shm) {
- assert(!priv->flush);
- sna_add_flush_pixmap(sna, priv, priv->cpu_bo);
- }
/* For large bo, try to keep only a single copy around */
- if (priv->create & KGEM_CAN_CREATE_LARGE) {
+ if (priv->create & KGEM_CAN_CREATE_LARGE ||
+ flags & MOVE_SOURCE_HINT) {
+ DBG(("%s: disposing of system copy for large/source\n",
+ __FUNCTION__));
+ assert(!priv->shm);
+ assert(priv->gpu_bo->proxy == NULL);
sna_damage_all(&priv->gpu_damage,
pixmap->drawable.width,
pixmap->drawable.height);
@@ -3157,14 +3205,8 @@ done:
sna_damage_reduce_all(&priv->gpu_damage,
pixmap->drawable.width,
pixmap->drawable.height);
- if (DAMAGE_IS_ALL(priv->gpu_damage)) {
- priv->undamaged = false;
- if (priv->ptr) {
- assert(priv->cpu_bo == NULL || !priv->cpu_bo->flush);
- assert(!priv->shm);
- sna_pixmap_free_cpu(sna, priv);
- }
- }
+ if (DAMAGE_IS_ALL(priv->gpu_damage))
+ sna_pixmap_free_cpu(sna, priv);
active:
if (flags & MOVE_WRITE)
@@ -3209,7 +3251,7 @@ static bool must_check sna_gc_move_to_cpu(GCPtr gc,
if (gc->clientClipType == CT_PIXMAP) {
PixmapPtr clip = gc->clientClip;
- gc->clientClip = BitmapToRegion(gc->pScreen, clip);
+ gc->clientClip = region_from_bitmap(gc->pScreen, clip);
gc->pScreen->DestroyPixmap(clip);
gc->clientClipType = gc->clientClip ? CT_REGION : CT_NONE;
changes |= GCClipMask;
@@ -3346,24 +3388,6 @@ static inline void box_add_pt(BoxPtr box, int16_t x, int16_t y)
box->y2 = y;
}
-static int16_t bound(int16_t a, uint16_t b)
-{
- int v = (int)a + (int)b;
- if (v > MAXSHORT)
- return MAXSHORT;
- return v;
-}
-
-static int16_t clamp(int16_t a, int16_t b)
-{
- int v = (int)a + (int)b;
- if (v > MAXSHORT)
- return MAXSHORT;
- if (v < MINSHORT)
- return MINSHORT;
- return v;
-}
-
static inline bool box32_to_box16(const Box32Rec *b32, BoxRec *b16)
{
b16->x1 = b32->x1;
@@ -3394,141 +3418,10 @@ static inline void box32_add_rect(Box32Rec *box, const xRectangle *r)
}
static bool
-sna_put_image_upload_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
- int x, int y, int w, int h, char *bits, int stride)
-{
- PixmapPtr pixmap = get_drawable_pixmap(drawable);
- struct sna *sna = to_sna_from_pixmap(pixmap);
- struct sna_pixmap *priv = sna_pixmap(pixmap);
- BoxPtr box;
- int nbox;
- int16_t dx, dy;
-
- box = REGION_RECTS(region);
- nbox = REGION_NUM_RECTS(region);
-
- DBG(("%s: %d x [(%d, %d), (%d, %d)...]\n",
- __FUNCTION__, nbox,
- box->x1, box->y1, box->x2, box->y2));
-
- if (gc->alu != GXcopy)
- return false;
-
- if (priv->gpu_bo == NULL &&
- !sna_pixmap_create_mappable_gpu(pixmap))
- return false;
-
- assert(priv->gpu_bo);
- assert(priv->gpu_bo->proxy == NULL);
-
- if (!priv->pinned && nbox == 1 &&
- box->x1 <= 0 && box->y1 <= 0 &&
- box->x2 >= pixmap->drawable.width &&
- box->y2 >= pixmap->drawable.height)
- return sna_replace(sna, pixmap, &priv->gpu_bo, bits, stride);
-
- get_drawable_deltas(drawable, pixmap, &dx, &dy);
- x += dx + drawable->x;
- y += dy + drawable->y;
-
- return sna_write_boxes(sna, pixmap,
- priv->gpu_bo, 0, 0,
- bits, stride, -x, -y,
- box, nbox);
-}
-
-static bool upload_inplace(struct sna *sna,
- PixmapPtr pixmap,
- struct sna_pixmap *priv,
- RegionRec *region)
-{
- if (priv->shm) {
- DBG(("%s: no, SHM Pixmap\n", __FUNCTION__));
- return false;
- }
-
- if (priv->create & KGEM_CAN_CREATE_LARGE) {
- if (priv->gpu_bo) {
- DBG(("%s: yes, large buffer and already have GPU bo\n",
- __FUNCTION__));
- return true;
- }
- if (priv->cpu_bo){
- DBG(("%s: no, large buffer and already have CPU bo\n",
- __FUNCTION__));
- return false;
- }
- }
-
- if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo) &&
- !(priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))) {
- DBG(("%s: yes, CPU bo is busy, but the GPU bo is not\n", __FUNCTION__));
- return true;
- }
-
- if (!region_inplace(sna, pixmap, region, priv, true)) {
- DBG(("%s? no, region not suitable\n", __FUNCTION__));
- return false;
- }
-
- if (sna->kgem.has_llc && !priv->flush) {
- if (priv->cpu_bo) {
- if (priv->cpu_damage &&
- kgem_bo_is_busy(priv->cpu_bo) &&
- !region_subsumes_damage(region, priv->cpu_damage)) {
- DBG(("%s? yes, CPU bo is busy\n", __FUNCTION__));
- return true;
- }
-
- DBG(("%s? no, have CPU bo\n", __FUNCTION__));
- return false;
- }
-
- if (priv->create & KGEM_CAN_CREATE_CPU) {
- DBG(("%s? no, can create CPU bo\n", __FUNCTION__));
- return false;
- }
- }
-
- if (priv->gpu_bo) {
- if (priv->gpu_bo->proxy)
- return false;
-
- if (!kgem_bo_can_map(&sna->kgem, priv->gpu_bo)) {
- DBG(("%s? no, GPU bo not mappable\n", __FUNCTION__));
- return false;
- }
-
- if (!kgem_bo_is_busy(priv->gpu_bo)) {
- DBG(("%s? yes, GPU bo is idle\n", __FUNCTION__));
- return true;
- }
-
- if (!priv->pinned &&
- region_subsumes_drawable(region, &pixmap->drawable)) {
- DBG(("%s? yes, will replace busy GPU\n", __FUNCTION__));
- return true;
- }
- }
-
- if ((priv->create & (KGEM_CAN_CREATE_GPU | KGEM_CAN_CREATE_CPU)) == KGEM_CAN_CREATE_GPU &&
- region_subsumes_drawable(region, &pixmap->drawable)) {
- DBG(("%s? yes, will fill fresh GPU bo\n", __FUNCTION__));
- return true;
- }
-
- return false;
-}
-
-static bool
sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
int x, int y, int w, int h, char *bits, int stride)
{
PixmapPtr pixmap = get_drawable_pixmap(drawable);
- struct sna *sna = to_sna_from_pixmap(pixmap);
- struct sna_pixmap *priv = sna_pixmap(pixmap);
- char *dst_bits;
- int dst_stride;
BoxRec *box;
int16_t dx, dy;
int n;
@@ -3538,170 +3431,22 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
if (gc->alu != GXcopy)
return false;
- if (!priv) {
- if (drawable->depth < 8)
- return false;
-
- goto blt;
- }
-
- /* XXX performing the upload inplace is currently about 20x slower
- * for putimage10 on gen6 -- mostly due to slow page faulting in kernel.
- * So we try again with vma caching and only for pixmaps who will be
- * immediately flushed...
- */
- if (upload_inplace(sna, pixmap, priv, region) &&
- sna_put_image_upload_blt(drawable, gc, region,
- x, y, w, h, bits, stride)) {
- if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
- DBG(("%s: marking damage\n", __FUNCTION__));
- if (region_subsumes_drawable(region, &pixmap->drawable))
- sna_damage_destroy(&priv->cpu_damage);
- else
- sna_damage_subtract(&priv->cpu_damage, region);
- if (priv->cpu_damage == NULL) {
- sna_damage_all(&priv->gpu_damage,
- pixmap->drawable.width,
- pixmap->drawable.height);
- list_del(&priv->list);
- priv->undamaged = false;
- } else
- sna_damage_add(&priv->gpu_damage, region);
- }
-
- /* And mark as having a valid GTT mapping for future uploads */
- if (priv->stride && kgem_bo_can_map(&sna->kgem, priv->gpu_bo)) {
- pixmap->devPrivate.ptr =
- kgem_bo_map__async(&sna->kgem, priv->gpu_bo);
- if (pixmap->devPrivate.ptr) {
- priv->mapped = true;
- pixmap->devKind = priv->gpu_bo->pitch;
- }
- }
-
- assert_pixmap_damage(pixmap);
- priv->clear = false;
- priv->cpu = false;
- return true;
- }
-
- if (priv->gpu_bo && priv->gpu_bo->proxy) {
- DBG(("%s: discarding cached upload buffer\n", __FUNCTION__));
- kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
- priv->gpu_bo = NULL;
- }
-
- if (priv->mapped) {
- assert(!priv->shm);
- pixmap->devPrivate.ptr = NULL;
- priv->mapped = false;
- }
-
- /* If the GPU is currently accessing the CPU pixmap, then
- * we will need to wait for that to finish before we can
- * modify the memory.
- *
- * However, we can queue some writes to the GPU bo to avoid
- * the wait. Or we can try to replace the CPU bo.
- */
- if (!priv->shm && priv->cpu_bo && __kgem_bo_is_busy(&sna->kgem, priv->cpu_bo)) {
- assert(!priv->cpu_bo->flush);
- DBG(("%s: cpu bo will stall, upload damage and discard\n",
- __FUNCTION__));
- if (priv->cpu_damage) {
- if (!region_subsumes_drawable(region, &pixmap->drawable)) {
- sna_damage_subtract(&priv->cpu_damage, region);
- if (!sna_pixmap_move_to_gpu(pixmap, MOVE_READ | MOVE_ASYNC_HINT))
- return false;
- } else {
- sna_damage_destroy(&priv->cpu_damage);
- priv->undamaged = false;
- }
- }
- assert(priv->cpu_damage == NULL);
- sna_damage_all(&priv->gpu_damage,
- pixmap->drawable.width,
- pixmap->drawable.height);
- sna_pixmap_free_cpu(sna, priv);
- assert(pixmap->devPrivate.ptr == NULL);
- }
-
- if (pixmap->devPrivate.ptr == NULL &&
- !sna_pixmap_alloc_cpu(sna, pixmap, priv, false))
- return true;
-
- if (priv->cpu_bo) {
- DBG(("%s: syncing CPU bo\n", __FUNCTION__));
- kgem_bo_sync__cpu(&sna->kgem, priv->cpu_bo);
- }
-
- if (priv->clear) {
- DBG(("%s: applying clear [%08x]\n",
- __FUNCTION__, priv->clear_color));
-
- if (priv->clear_color == 0) {
- memset(pixmap->devPrivate.ptr,
- 0, pixmap->devKind * pixmap->drawable.height);
- } else {
- pixman_fill(pixmap->devPrivate.ptr,
- pixmap->devKind/sizeof(uint32_t),
- pixmap->drawable.bitsPerPixel,
- 0, 0,
- pixmap->drawable.width,
- pixmap->drawable.height,
- priv->clear_color);
- }
-
- sna_damage_all(&priv->cpu_damage,
- pixmap->drawable.width,
- pixmap->drawable.height);
- sna_pixmap_free_gpu(sna, priv);
- priv->undamaged = false;
- }
+ if (drawable->depth < 8)
+ return false;
- if (!DAMAGE_IS_ALL(priv->cpu_damage)) {
- DBG(("%s: marking damage\n", __FUNCTION__));
- if (region_subsumes_drawable(region, &pixmap->drawable)) {
- DBG(("%s: replacing entire pixmap\n", __FUNCTION__));
- sna_damage_all(&priv->cpu_damage,
- pixmap->drawable.width,
- pixmap->drawable.height);
- sna_pixmap_free_gpu(sna, priv);
- priv->undamaged = false;
- assert(priv->gpu_damage == NULL);
- } else {
- sna_damage_subtract(&priv->gpu_damage, region);
- sna_damage_add(&priv->cpu_damage, region);
- if (priv->gpu_bo &&
- sna_damage_is_all(&priv->cpu_damage,
- pixmap->drawable.width,
- pixmap->drawable.height)) {
- DBG(("%s: replaced entire pixmap\n", __FUNCTION__));
- sna_pixmap_free_gpu(sna, priv);
- priv->undamaged = false;
- }
- }
- if (priv->flush) {
- assert(!priv->shm);
- sna_add_flush_pixmap(sna, priv, priv->gpu_bo);
- }
- }
- assert(!priv->flush || !list_is_empty(&priv->list));
- priv->cpu = true;
+ if (!sna_drawable_move_region_to_cpu(&pixmap->drawable,
+ region, MOVE_WRITE))
+ return false;
-blt:
get_drawable_deltas(drawable, pixmap, &dx, &dy);
x += dx + drawable->x;
y += dy + drawable->y;
DBG(("%s: upload(%d, %d, %d, %d)\n", __FUNCTION__, x, y, w, h));
- dst_stride = pixmap->devKind;
- dst_bits = pixmap->devPrivate.ptr;
-
/* Region is pre-clipped and translated into pixmap space */
- box = REGION_RECTS(region);
- n = REGION_NUM_RECTS(region);
+ box = RegionRects(region);
+ n = RegionNumRects(region);
do {
DBG(("%s: copy box (%d, %d)->(%d, %d)x(%d, %d)\n",
__FUNCTION__,
@@ -3722,9 +3467,9 @@ blt:
assert(box->x2 - x <= w);
assert(box->y2 - y <= h);
- memcpy_blt(bits, dst_bits,
+ memcpy_blt(bits, pixmap->devPrivate.ptr,
pixmap->drawable.bitsPerPixel,
- stride, dst_stride,
+ stride, pixmap->devKind,
box->x1 - x, box->y1 - y,
box->x1, box->y1,
box->x2 - box->x1, box->y2 - box->y1);
@@ -3790,11 +3535,11 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
x += dx + drawable->x;
y += dy + drawable->y;
- kgem_set_mode(&sna->kgem, KGEM_BLT);
+ kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
/* Region is pre-clipped and translated into pixmap space */
- box = REGION_RECTS(region);
- n = REGION_NUM_RECTS(region);
+ box = RegionRects(region);
+ n = RegionNumRects(region);
do {
int bx1 = (box->x1 - x) & ~7;
int bx2 = (box->x2 - x + 7) & ~7;
@@ -3810,7 +3555,9 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
if (!kgem_check_batch(&sna->kgem, 8) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc_and_exec(&sna->kgem, 2)) {
- _kgem_submit(&sna->kgem);
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo_fenced(&sna->kgem, bo))
+ return false;
_kgem_set_mode(&sna->kgem, KGEM_BLT);
}
@@ -3840,7 +3587,7 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
b[0] = XY_MONO_SRC_COPY | 3 << 20;
b[0] |= ((box->x1 - x) & 7) << 17;
b[1] = bo->pitch;
- if (sna->kgem.gen >= 40 && bo->tiling) {
+ if (sna->kgem.gen >= 040 && bo->tiling) {
b[0] |= BLT_DST_TILED;
b[1] >>= 2;
}
@@ -3912,12 +3659,12 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
x += dx + drawable->x;
y += dy + drawable->y;
- kgem_set_mode(&sna->kgem, KGEM_BLT);
+ kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
skip = h * BitmapBytePad(w + left);
for (i = 1 << (gc->depth-1); i; i >>= 1, bits += skip) {
- const BoxRec *box = REGION_RECTS(region);
- int n = REGION_NUM_RECTS(region);
+ const BoxRec *box = RegionRects(region);
+ int n = RegionNumRects(region);
if ((gc->planemask & i) == 0)
continue;
@@ -3938,7 +3685,9 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
if (!kgem_check_batch(&sna->kgem, 12) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc_and_exec(&sna->kgem, 2)) {
- _kgem_submit(&sna->kgem);
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo_fenced(&sna->kgem, bo))
+ return false;
_kgem_set_mode(&sna->kgem, KGEM_BLT);
}
@@ -3968,7 +3717,7 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
b[0] = XY_FULL_MONO_PATTERN_MONO_SRC_BLT | 3 << 20;
b[0] |= ((box->x1 - x) & 7) << 17;
b[1] = bo->pitch;
- if (sna->kgem.gen >= 40 && bo->tiling) {
+ if (sna->kgem.gen >= 040 && bo->tiling) {
b[0] |= BLT_DST_TILED;
b[1] >>= 2;
}
@@ -4037,7 +3786,7 @@ sna_put_image(DrawablePtr drawable, GCPtr gc, int depth,
gc->pCompositeClip->extents.x2 < region.extents.x2 ||
gc->pCompositeClip->extents.y2 < region.extents.y2) {
RegionIntersect(&region, &region, gc->pCompositeClip);
- if (!RegionNotEmpty(&region))
+ if (RegionNil(&region))
return;
}
@@ -4120,8 +3869,10 @@ move_to_gpu(PixmapPtr pixmap, struct sna_pixmap *priv,
int h = box->y2 - box->y1;
int count;
- if (DAMAGE_IS_ALL(priv->gpu_damage))
+ if (DAMAGE_IS_ALL(priv->gpu_damage)) {
+ assert(priv->gpu_bo);
return true;
+ }
if (priv->gpu_bo) {
if (alu != GXcopy)
@@ -4248,15 +3999,17 @@ sna_self_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
if (dst != src)
get_drawable_deltas(dst, pixmap, &tx, &ty);
- if (priv == NULL || DAMAGE_IS_ALL(priv->cpu_damage))
+ if (priv == NULL || DAMAGE_IS_ALL(priv->cpu_damage) || priv->shm)
goto fallback;
if (priv->gpu_damage) {
+ assert(priv->gpu_bo);
+
if (alu == GXcopy && priv->clear)
goto out;
assert(priv->gpu_bo->proxy == NULL);
- if (!sna_pixmap_move_to_gpu(pixmap, MOVE_WRITE | MOVE_READ)) {
+ if (!sna_pixmap_move_to_gpu(pixmap, MOVE_WRITE | MOVE_READ | MOVE_ASYNC_HINT)) {
DBG(("%s: fallback - not a pure copy and failed to move dst to GPU\n",
__FUNCTION__));
goto fallback;
@@ -4334,7 +4087,7 @@ sna_pixmap_is_gpu(PixmapPtr pixmap)
}
static int
-source_prefer_gpu(struct sna_pixmap *priv)
+source_prefer_gpu(struct sna *sna, struct sna_pixmap *priv)
{
if (priv == NULL) {
DBG(("%s: source unattached, use cpu\n", __FUNCTION__));
@@ -4348,6 +4101,7 @@ source_prefer_gpu(struct sna_pixmap *priv)
if (priv->gpu_damage) {
DBG(("%s: source has gpu damage, force gpu\n", __FUNCTION__));
+ assert(priv->gpu_bo);
return PREFER_GPU | FORCE_GPU;
}
@@ -4357,13 +4111,50 @@ source_prefer_gpu(struct sna_pixmap *priv)
}
if (DAMAGE_IS_ALL(priv->cpu_damage))
- return 0;
+ return priv->cpu_bo && kgem_is_idle(&sna->kgem);
DBG(("%s: source has GPU bo? %d\n",
__FUNCTION__, priv->gpu_bo != NULL));
return priv->gpu_bo != NULL;
}
+static bool use_shm_bo(struct sna *sna,
+ struct kgem_bo *bo,
+ struct sna_pixmap *priv,
+ int alu)
+{
+ if (priv == NULL || priv->cpu_bo == NULL) {
+ DBG(("%s: no, not attached\n", __FUNCTION__));
+ return false;
+ }
+
+ if (!priv->shm) {
+ DBG(("%s: yes, ordinary CPU bo\n", __FUNCTION__));
+ return true;
+ }
+
+ if (alu != GXcopy) {
+ DBG(("%s: yes, complex alu=%d\n", __FUNCTION__, alu));
+ return true;
+ }
+ if (bo->tiling) {
+ DBG(("%s:, yes, dst tiled=%d\n", __FUNCTION__, bo->tiling));
+ return true;
+ }
+
+ if (__kgem_bo_is_busy(&sna->kgem, bo)) {
+ DBG(("%s: yes, dst is busy\n", __FUNCTION__));
+ return true;
+ }
+
+ if (__kgem_bo_is_busy(&sna->kgem, priv->cpu_bo)) {
+ DBG(("%s: yes, src is busy\n", __FUNCTION__));
+ return true;
+ }
+
+ return false;
+}
+
static void
sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
RegionPtr region, int dx, int dy,
@@ -4435,7 +4226,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
if (dst_priv == NULL)
goto fallback;
- hint = source_prefer_gpu(src_priv) ?:
+ hint = source_prefer_gpu(sna, src_priv) ?:
region_inplace(sna, dst_pixmap, region,
dst_priv, alu_overwrites(alu));
if (dst_priv->cpu_damage && alu_overwrites(alu)) {
@@ -4453,6 +4244,8 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
if (region->data == NULL)
hint |= IGNORE_CPU;
}
+ if (replaces)
+ hint |= IGNORE_CPU;
bo = sna_drawable_use_bo(&dst_pixmap->drawable, hint,
&region->extents, &damage);
@@ -4493,7 +4286,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
if (src_priv &&
move_to_gpu(src_pixmap, src_priv, &region->extents, alu) &&
- sna_pixmap_move_to_gpu(src_pixmap, MOVE_READ)) {
+ sna_pixmap_move_to_gpu(src_pixmap, MOVE_READ | MOVE_ASYNC_HINT)) {
DBG(("%s: move whole src_pixmap to GPU and copy\n",
__FUNCTION__));
if (!sna->render.copy_boxes(sna, alu,
@@ -4525,7 +4318,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
area.y2 += src_dy;
if (!sna_pixmap_move_area_to_gpu(src_pixmap, &area,
- MOVE_READ))
+ MOVE_READ | MOVE_ASYNC_HINT))
goto fallback;
if (!sna->render.copy_boxes(sna, alu,
@@ -4545,11 +4338,11 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
if (bo != dst_priv->gpu_bo)
goto fallback;
- if (src_priv && src_priv->cpu_bo) {
+ if (use_shm_bo(sna, bo, src_priv, alu)) {
bool ret;
- DBG(("%s: region overlaps CPU damage, copy from CPU bo\n",
- __FUNCTION__));
+ DBG(("%s: region overlaps CPU damage, copy from CPU bo (shm? %d)\n",
+ __FUNCTION__, src_priv->shm));
assert(bo != dst_priv->cpu_bo);
@@ -4561,29 +4354,31 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
if (!ret)
goto fallback;
+ if (src_priv->shm) {
+ assert(!src_priv->flush);
+ sna_add_flush_pixmap(sna, src_priv, src_priv->cpu_bo);
+ }
+
if (!sna->render.copy_boxes(sna, alu,
src_pixmap, src_priv->cpu_bo, src_dx, src_dy,
dst_pixmap, bo, 0, 0,
- box, n, 0)) {
+ box, n, src_priv->shm ? COPY_LAST : 0)) {
DBG(("%s: fallback - accelerated copy boxes failed\n",
__FUNCTION__));
goto fallback;
}
- if (src_priv->shm) {
- assert(!src_priv->flush);
- sna_add_flush_pixmap(sna, src_priv, src_priv->cpu_bo);
- }
-
if (damage)
sna_damage_add(damage, region);
return;
}
- if (src_priv == NULL &&
+ if (USE_USERPTR_UPLOADS &&
+ src_priv == NULL &&
sna->kgem.has_userptr &&
- __kgem_bo_is_busy(&sna->kgem, bo) &&
- box_inplace(src_pixmap, &region->extents)) {
+ box_inplace(src_pixmap, &region->extents) &&
+ ((sna->kgem.has_llc && bo->tiling && !bo->scanout) ||
+ __kgem_bo_is_busy(&sna->kgem, bo))) {
struct kgem_bo *src_bo;
bool ok = false;
@@ -4664,8 +4459,13 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
if (n == 1 &&
tmp->drawable.width == src_pixmap->drawable.width &&
- tmp->drawable.height == src_pixmap->drawable.height)
+ tmp->drawable.height == src_pixmap->drawable.height) {
+ DBG(("%s: caching upload for src bo\n",
+ __FUNCTION__));
+ assert(src_priv->gpu_damage == NULL);
+ assert(src_priv->gpu_bo == NULL);
kgem_proxy_bo_attach(src_bo, &src_priv->gpu_bo);
+ }
if (!sna->render.copy_boxes(sna, alu,
tmp, src_bo, dx, dy,
@@ -4695,8 +4495,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
if (src_pixmap->devPrivate.ptr == NULL) {
if (!src_priv->ptr) /* uninitialised!*/
return;
- assert(src_priv->stride);
- src_pixmap->devPrivate.ptr = src_priv->ptr;
+ src_pixmap->devPrivate.ptr = PTR(src_priv->ptr);
src_pixmap->devKind = src_priv->stride;
}
}
@@ -4721,15 +4520,16 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
goto fallback;
}
+ assert(dst_priv->clear == false);
dst_priv->cpu = false;
if (damage) {
+ assert(dst_priv->gpu_bo->proxy == NULL);
if (replaces) {
sna_damage_destroy(&dst_priv->cpu_damage);
sna_damage_all(&dst_priv->gpu_damage,
dst_pixmap->drawable.width,
dst_pixmap->drawable.height);
list_del(&dst_priv->list);
- dst_priv->undamaged = false;
} else
sna_damage_add(&dst_priv->gpu_damage,
region);
@@ -4752,6 +4552,7 @@ fallback:
return;
}
+ assert(dst_pixmap->devPrivate.ptr);
do {
pixman_fill(dst_pixmap->devPrivate.ptr,
dst_pixmap->devKind/sizeof(uint32_t),
@@ -4891,10 +4692,7 @@ sna_do_copy(DrawablePtr src, DrawablePtr dst, GCPtr gc,
return NULL;
}
- if (src->pScreen->SourceValidate)
- src->pScreen->SourceValidate(src, sx, sy,
- width, height,
- gc->subWindowMode);
+ SourceValidate(src, sx, sy, width, height, gc->subWindowMode);
sx += src->x;
sy += src->y;
@@ -4938,7 +4736,7 @@ sna_do_copy(DrawablePtr src, DrawablePtr dst, GCPtr gc,
* VT is inactive, make sure the region isn't empty
*/
if (((WindowPtr)src)->parent ||
- !RegionNotEmpty(&((WindowPtr)src)->borderClip)) {
+ RegionNil(&((WindowPtr)src)->borderClip)) {
DBG(("%s: include inferiors\n", __FUNCTION__));
free_clip = clip = NotClippedByChildren((WindowPtr)src);
}
@@ -4973,17 +4771,17 @@ sna_do_copy(DrawablePtr src, DrawablePtr dst, GCPtr gc,
if (free_clip)
RegionDestroy(free_clip);
}
- DBG(("%s: src extents (%d, %d), (%d, %d) x %d\n", __FUNCTION__,
+ DBG(("%s: src extents (%d, %d), (%d, %d) x %ld\n", __FUNCTION__,
region.extents.x1, region.extents.y1,
region.extents.x2, region.extents.y2,
- RegionNumRects(&region)));
+ (long)RegionNumRects(&region)));
RegionTranslate(&region, dx-sx, dy-sy);
if (gc->pCompositeClip->data)
RegionIntersect(&region, &region, gc->pCompositeClip);
- DBG(("%s: copy region (%d, %d), (%d, %d) x %d\n", __FUNCTION__,
+ DBG(("%s: copy region (%d, %d), (%d, %d) x %ld\n", __FUNCTION__,
region.extents.x1, region.extents.y1,
region.extents.x2, region.extents.y2,
- RegionNumRects(&region)));
+ (long)RegionNumRects(&region)));
if (RegionNotEmpty(&region))
copy(src, dst, gc, &region, sx-dx, sy-dy, bitPlane, closure);
@@ -5005,8 +4803,8 @@ sna_fallback_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
RegionPtr region, int dx, int dy,
Pixel bitplane, void *closure)
{
- DBG(("%s (boxes=%dx[(%d, %d), (%d, %d)...], src=+(%d, %d), alu=%d\n",
- __FUNCTION__, RegionNumRects(region),
+ DBG(("%s (boxes=%ldx[(%d, %d), (%d, %d)...], src=+(%d, %d), alu=%d\n",
+ __FUNCTION__, (long)RegionNumRects(region),
region->extents.x1, region->extents.y1,
region->extents.x2, region->extents.y2,
dx, dy, gc->alu));
@@ -5014,16 +4812,17 @@ sna_fallback_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
if (!sna_gc_move_to_cpu(gc, dst, region))
return;
+ RegionTranslate(region, dx, dy);
+ if (!sna_drawable_move_region_to_cpu(src, region, MOVE_READ))
+ goto out_gc;
+ RegionTranslate(region, -dx, -dy);
+
if (src == dst ||
get_drawable_pixmap(src) == get_drawable_pixmap(dst)) {
+ DBG(("%s: self-copy\n", __FUNCTION__));
if (!sna_drawable_move_to_cpu(dst, MOVE_WRITE | MOVE_READ))
goto out_gc;
} else {
- RegionTranslate(region, dx, dy);
- if (!sna_drawable_move_region_to_cpu(src, region, MOVE_READ))
- goto out_gc;
- RegionTranslate(region, -dx, -dy);
-
if (!sna_drawable_move_region_to_cpu(dst, region,
drawable_gc_flags(dst, gc, false)))
goto out_gc;
@@ -5049,10 +4848,11 @@ sna_copy_area(DrawablePtr src, DrawablePtr dst, GCPtr gc,
if (gc->planemask == 0)
return NULL;
- DBG(("%s: src=(%d, %d)x(%d, %d)+(%d, %d) -> dst=(%d, %d)+(%d, %d)\n",
+ DBG(("%s: src=(%d, %d)x(%d, %d)+(%d, %d) -> dst=(%d, %d)+(%d, %d); alu=%d, pm=%lx\n",
__FUNCTION__,
src_x, src_y, width, height, src->x, src->y,
- dst_x, dst_y, dst->x, dst->y));
+ dst_x, dst_y, dst->x, dst->y,
+ gc->alu, gc->planemask));
if (FORCE_FALLBACK || !ACCEL_COPY_AREA || wedged(sna) ||
!PM_IS_SOLID(dst, gc->planemask))
@@ -5599,7 +5399,7 @@ no_damage_clipped:
region_set(&clip, extents);
region_maybe_clip(&clip, gc->pCompositeClip);
- if (!RegionNotEmpty(&clip))
+ if (RegionNil(&clip))
return true;
assert(dx + clip.extents.x1 >= 0);
@@ -5607,9 +5407,9 @@ no_damage_clipped:
assert(dx + clip.extents.x2 <= pixmap->drawable.width);
assert(dy + clip.extents.y2 <= pixmap->drawable.height);
- DBG(("%s: clip %d x [(%d, %d), (%d, %d)] x %d [(%d, %d)...]\n",
+ DBG(("%s: clip %ld x [(%d, %d), (%d, %d)] x %d [(%d, %d)...]\n",
__FUNCTION__,
- REGION_NUM_RECTS(&clip),
+ (long)RegionNumRects(&clip),
clip.extents.x1, clip.extents.y1, clip.extents.x2, clip.extents.y2,
n, pt->x, pt->y));
@@ -5700,7 +5500,7 @@ damage_clipped:
region_set(&clip, extents);
region_maybe_clip(&clip, gc->pCompositeClip);
- if (!RegionNotEmpty(&clip))
+ if (RegionNil(&clip))
return true;
assert(dx + clip.extents.x1 >= 0);
@@ -5708,9 +5508,9 @@ damage_clipped:
assert(dx + clip.extents.x2 <= pixmap->drawable.width);
assert(dy + clip.extents.y2 <= pixmap->drawable.height);
- DBG(("%s: clip %d x [(%d, %d), (%d, %d)] x %d [(%d, %d)...]\n",
+ DBG(("%s: clip %ld x [(%d, %d), (%d, %d)] x %d [(%d, %d)...]\n",
__FUNCTION__,
- REGION_NUM_RECTS(&clip),
+ RegionNumRects(&clip),
clip.extents.x1, clip.extents.y1, clip.extents.x2, clip.extents.y2,
n, pt->x, pt->y));
@@ -6021,7 +5821,7 @@ fallback:
DBG(("%s: fallback\n", __FUNCTION__));
region.data = NULL;
region_maybe_clip(&region, gc->pCompositeClip);
- if (!RegionNotEmpty(&region))
+ if (RegionNil(&region))
return;
if (!sna_gc_move_to_cpu(gc, drawable, &region))
@@ -6061,7 +5861,7 @@ sna_set_spans(DrawablePtr drawable, GCPtr gc, char *src,
fallback:
region.data = NULL;
region_maybe_clip(&region, gc->pCompositeClip);
- if (!RegionNotEmpty(&region))
+ if (RegionNil(&region))
return;
if (!sna_gc_move_to_cpu(gc, drawable, &region))
@@ -6098,10 +5898,11 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc,
BoxPtr box;
int n;
- DBG(("%s: plane=%x (%d,%d),(%d,%d)x%d\n",
- __FUNCTION__, (unsigned)bitplane, RegionNumRects(region),
+ DBG(("%s: plane=%x (%d,%d),(%d,%d)x%ld\n",
+ __FUNCTION__, (unsigned)bitplane,
region->extents.x1, region->extents.y1,
- region->extents.x2, region->extents.y2));
+ region->extents.x2, region->extents.y2,
+ (long)RegionNumRects(region)));
box = RegionRects(region);
n = RegionNumRects(region);
@@ -6112,14 +5913,14 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc,
br00 = 3 << 20;
br13 = arg->bo->pitch;
- if (sna->kgem.gen >= 40 && arg->bo->tiling) {
+ if (sna->kgem.gen >= 040 && arg->bo->tiling) {
br00 |= BLT_DST_TILED;
br13 >>= 2;
}
br13 |= blt_depth(drawable->depth) << 24;
br13 |= copy_ROP[gc->alu] << 16;
- kgem_set_mode(&sna->kgem, KGEM_BLT);
+ kgem_set_mode(&sna->kgem, KGEM_BLT, arg->bo);
do {
int bx1 = (box->x1 + sx) & ~7;
int bx2 = (box->x2 + sx + 7) & ~7;
@@ -6142,7 +5943,9 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc,
if (!kgem_check_batch(&sna->kgem, 7+src_stride) ||
!kgem_check_bo_fenced(&sna->kgem, arg->bo) ||
!kgem_check_reloc(&sna->kgem, 1)) {
- _kgem_submit(&sna->kgem);
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo_fenced(&sna->kgem, arg->bo))
+ return; /* XXX fallback? */
_kgem_set_mode(&sna->kgem, KGEM_BLT);
}
@@ -6184,7 +5987,9 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc,
if (!kgem_check_batch(&sna->kgem, 8) ||
!kgem_check_bo_fenced(&sna->kgem, arg->bo) ||
!kgem_check_reloc_and_exec(&sna->kgem, 2)) {
- _kgem_submit(&sna->kgem);
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo_fenced(&sna->kgem, arg->bo))
+ return; /* XXX fallback? */
_kgem_set_mode(&sna->kgem, KGEM_BLT);
}
@@ -6276,14 +6081,14 @@ sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc,
br00 = XY_MONO_SRC_COPY | 3 << 20;
br13 = arg->bo->pitch;
- if (sna->kgem.gen >= 40 && arg->bo->tiling) {
+ if (sna->kgem.gen >= 040 && arg->bo->tiling) {
br00 |= BLT_DST_TILED;
br13 >>= 2;
}
br13 |= blt_depth(drawable->depth) << 24;
br13 |= copy_ROP[gc->alu] << 16;
- kgem_set_mode(&sna->kgem, KGEM_BLT);
+ kgem_set_mode(&sna->kgem, KGEM_BLT, arg->bo);
do {
int bx1 = (box->x1 + sx) & ~7;
int bx2 = (box->x2 + sx + 7) & ~7;
@@ -6303,7 +6108,9 @@ sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc,
if (!kgem_check_batch(&sna->kgem, 8) ||
!kgem_check_bo_fenced(&sna->kgem, arg->bo) ||
!kgem_check_reloc_and_exec(&sna->kgem, 2)) {
- _kgem_submit(&sna->kgem);
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo_fenced(&sna->kgem, arg->bo))
+ return; /* XXX fallback? */
_kgem_set_mode(&sna->kgem, KGEM_BLT);
}
@@ -6504,7 +6311,7 @@ sna_copy_plane(DrawablePtr src, DrawablePtr dst, GCPtr gc,
__FUNCTION__,
region.extents.x1, region.extents.y1,
region.extents.x2, region.extents.y2));
- if (!RegionNotEmpty(&region))
+ if (RegionNil(&region))
goto empty;
RegionTranslate(&region,
@@ -6750,7 +6557,7 @@ sna_poly_point(DrawablePtr drawable, GCPtr gc,
DBG(("%s: trying solid fill [%08lx] blt paths\n",
__FUNCTION__, gc->fgPixel));
- if ((bo = sna_drawable_use_bo(drawable, 0,
+ if ((bo = sna_drawable_use_bo(drawable, PREFER_GPU,
&region.extents, &damage)) &&
sna_poly_point_blt(drawable, bo, damage,
gc, mode, n, pt, flags & 2))
@@ -6761,7 +6568,7 @@ fallback:
DBG(("%s: fallback\n", __FUNCTION__));
region.data = NULL;
region_maybe_clip(&region, gc->pCompositeClip);
- if (!RegionNotEmpty(&region))
+ if (RegionNil(&region))
return;
if (!sna_gc_move_to_cpu(gc, drawable, &region))
@@ -6816,7 +6623,7 @@ sna_poly_zero_line_blt(DrawablePtr drawable,
region_set(&clip, extents);
if (clipped) {
region_maybe_clip(&clip, gc->pCompositeClip);
- if (!RegionNotEmpty(&clip))
+ if (RegionNil(&clip))
return true;
}
@@ -6827,8 +6634,8 @@ sna_poly_zero_line_blt(DrawablePtr drawable,
clip.extents.x2, clip.extents.y2,
dx, dy, damage));
- extents = REGION_RECTS(&clip);
- last_extents = extents + REGION_NUM_RECTS(&clip);
+ extents = RegionRects(&clip);
+ last_extents = extents + RegionNumRects(&clip);
b = box;
do {
@@ -7223,6 +7030,8 @@ sna_poly_line_blt(DrawablePtr drawable,
b->y1 = p.y;
b->y2 = last.y;
}
+ b->y2 += last.x == p.x;
+ b->x2 += last.y == p.y;
DBG(("%s: blt (%d, %d), (%d, %d)\n",
__FUNCTION__,
b->x1, b->y1, b->x2, b->y2));
@@ -7241,7 +7050,7 @@ sna_poly_line_blt(DrawablePtr drawable,
region_set(&clip, extents);
region_maybe_clip(&clip, gc->pCompositeClip);
- if (!RegionNotEmpty(&clip))
+ if (RegionNil(&clip))
return true;
last.x = pt->x + drawable->x;
@@ -7280,6 +7089,8 @@ sna_poly_line_blt(DrawablePtr drawable,
b->y1 = p.y;
b->y2 = last.y;
}
+ b->y2 += last.x == p.x;
+ b->x2 += last.y == p.y;
DBG(("%s: blt (%d, %d), (%d, %d)\n",
__FUNCTION__,
b->x1, b->y1, b->x2, b->y2));
@@ -7336,6 +7147,8 @@ sna_poly_line_blt(DrawablePtr drawable,
box.y1 = p.y;
box.y2 = last.y;
}
+ b->y2 += last.x == p.x;
+ b->x2 += last.y == p.y;
DBG(("%s: blt (%d, %d), (%d, %d)\n",
__FUNCTION__,
box.x1, box.y1, box.x2, box.y2));
@@ -7683,7 +7496,7 @@ spans_fallback:
} else {
region_maybe_clip(&data.region,
gc->pCompositeClip);
- if (!RegionNotEmpty(&data.region))
+ if (RegionNil(&data.region))
return;
if (region_is_singular(&data.region))
@@ -7708,7 +7521,7 @@ spans_fallback:
} else {
region_maybe_clip(&data.region,
gc->pCompositeClip);
- if (!RegionNotEmpty(&data.region))
+ if (RegionNil(&data.region))
return;
if (region_is_singular(&data.region))
@@ -7785,7 +7598,7 @@ spans_fallback:
fallback:
DBG(("%s: fallback\n", __FUNCTION__));
region_maybe_clip(&data.region, gc->pCompositeClip);
- if (!RegionNotEmpty(&data.region))
+ if (RegionNil(&data.region))
return;
if (!sna_gc_move_to_cpu(gc, drawable, &data.region))
@@ -7915,7 +7728,7 @@ sna_poly_segment_blt(DrawablePtr drawable,
region_set(&clip, extents);
region_maybe_clip(&clip, gc->pCompositeClip);
- if (!RegionNotEmpty(&clip))
+ if (RegionNil(&clip))
goto done;
if (clip.data) {
@@ -8022,7 +7835,7 @@ sna_poly_zero_segment_blt(DrawablePtr drawable,
region_set(&clip, extents);
if (clipped) {
region_maybe_clip(&clip, gc->pCompositeClip);
- if (!RegionNotEmpty(&clip))
+ if (RegionNil(&clip))
return true;
}
DBG(("%s: [clipped] extents=(%d, %d), (%d, %d), delta=(%d, %d)\n",
@@ -8034,8 +7847,8 @@ sna_poly_zero_segment_blt(DrawablePtr drawable,
jump = _jump[(damage != NULL) | !!(dx|dy) << 1];
b = box;
- extents = REGION_RECTS(&clip);
- last_extents = extents + REGION_NUM_RECTS(&clip);
+ extents = RegionRects(&clip);
+ last_extents = extents + RegionNumRects(&clip);
do {
int n = _n;
const xSegment *s = _s;
@@ -8590,7 +8403,7 @@ spans_fallback:
} else {
region_maybe_clip(&data.region,
gc->pCompositeClip);
- if (!RegionNotEmpty(&data.region))
+ if (RegionNil(&data.region))
return;
if (region_is_singular(&data.region))
@@ -8629,7 +8442,7 @@ spans_fallback:
fallback:
DBG(("%s: fallback\n", __FUNCTION__));
region_maybe_clip(&data.region, gc->pCompositeClip);
- if (!RegionNotEmpty(&data.region))
+ if (RegionNil(&data.region))
return;
if (!sna_gc_move_to_cpu(gc, drawable, &data.region))
@@ -8657,6 +8470,7 @@ sna_poly_rectangle_extents(DrawablePtr drawable, GCPtr gc,
Box32Rec box;
int extra = gc->lineWidth >> 1;
bool clipped;
+ bool zero = false;
if (n == 0)
return 0;
@@ -8665,9 +8479,13 @@ sna_poly_rectangle_extents(DrawablePtr drawable, GCPtr gc,
box.y1 = r->y;
box.x2 = box.x1 + r->width;
box.y2 = box.y1 + r->height;
+ zero |= (r->width | r->height) == 0;
- while (--n)
- box32_add_rect(&box, ++r);
+ while (--n) {
+ r++;
+ zero |= (r->width | r->height) == 0;
+ box32_add_rect(&box, r);
+ }
box.x2++;
box.y2++;
@@ -8677,13 +8495,15 @@ sna_poly_rectangle_extents(DrawablePtr drawable, GCPtr gc,
box.x2 += extra;
box.y1 -= extra;
box.y2 += extra;
- }
+ zero = !zero;
+ } else
+ zero = true;
clipped = box32_trim_and_translate(&box, drawable, gc);
if (!box32_to_box16(&box, out))
return 0;
- return 1 | clipped << 1;
+ return 1 | clipped << 1 | zero << 2;
}
static bool
@@ -8722,7 +8542,7 @@ zero:
xRectangle rr = *r++;
if ((rr.width | rr.height) == 0)
- continue;
+ continue; /* XXX -> PolyLine */
DBG(("%s - zero : r[%d] = (%d, %d) x (%d, %d)\n", __FUNCTION__,
n, rr.x, rr.y, rr.width, rr.height));
@@ -8777,7 +8597,7 @@ zero_clipped:
region_set(&clip, extents);
region_maybe_clip(&clip, gc->pCompositeClip);
- if (!RegionNotEmpty(&clip))
+ if (RegionNil(&clip))
goto done;
if (clip.data) {
@@ -8791,7 +8611,7 @@ zero_clipped:
n, rr.x, rr.y, rr.width, rr.height));
if ((rr.width | rr.height) == 0)
- continue;
+ continue; /* XXX -> PolyLine */
rr.x += drawable->x;
rr.y += drawable->y;
@@ -8855,7 +8675,7 @@ zero_clipped:
n, rr.x, rr.y, rr.width, rr.height));
if ((rr.width | rr.height) == 0)
- continue;
+ continue; /* XXX -> PolyLine */
rr.x += drawable->x;
rr.y += drawable->y;
@@ -8923,7 +8743,7 @@ wide_clipped:
__FUNCTION__,
clip.extents.x1, clip.extents.y1,
clip.extents.x2, clip.extents.y2));
- if (!RegionNotEmpty(&clip))
+ if (RegionNil(&clip))
goto done;
if (clip.data) {
@@ -8935,7 +8755,7 @@ wide_clipped:
int count;
if ((rr.width | rr.height) == 0)
- continue;
+ continue; /* XXX -> PolyLine */
rr.x += drawable->x;
rr.y += drawable->y;
@@ -9100,7 +8920,7 @@ wide:
xRectangle rr = *r++;
if ((rr.width | rr.height) == 0)
- continue;
+ continue; /* XXX -> PolyLine */
rr.x += dx;
rr.y += dy;
@@ -9198,8 +9018,9 @@ sna_poly_rectangle(DrawablePtr drawable, GCPtr gc, int n, xRectangle *r)
goto fallback;
}
- DBG(("%s: line=%d [%d], join=%d [%d], mask=%lu [%d]\n",
+ DBG(("%s: fill=_%d [%d], line=%d [%d], join=%d [%d], mask=%lu [%d]\n",
__FUNCTION__,
+ gc->fillStyle, gc->fillStyle == FillSolid,
gc->lineStyle, gc->lineStyle == LineSolid,
gc->joinStyle, gc->joinStyle == JoinMiter,
gc->planemask, PM_IS_SOLID(drawable, gc->planemask)));
@@ -9207,7 +9028,7 @@ sna_poly_rectangle(DrawablePtr drawable, GCPtr gc, int n, xRectangle *r)
if (!PM_IS_SOLID(drawable, gc->planemask))
goto fallback;
- if (gc->lineStyle == LineSolid && gc->joinStyle == JoinMiter) {
+ if (flags & 4 && gc->fillStyle == FillSolid && gc->lineStyle == LineSolid && gc->joinStyle == JoinMiter) {
DBG(("%s: trying blt solid fill [%08lx] paths\n",
__FUNCTION__, gc->fgPixel));
if ((bo = sna_drawable_use_bo(drawable, PREFER_GPU,
@@ -9231,7 +9052,7 @@ fallback:
region.data = NULL;
region_maybe_clip(&region, gc->pCompositeClip);
- if (!RegionNotEmpty(&region))
+ if (RegionNil(&region))
return;
if (!sna_gc_move_to_cpu(gc, drawable, &region))
@@ -9370,7 +9191,7 @@ sna_poly_arc(DrawablePtr drawable, GCPtr gc, int n, xArc *arc)
} else {
region_maybe_clip(&data.region,
gc->pCompositeClip);
- if (!RegionNotEmpty(&data.region))
+ if (RegionNil(&data.region))
return;
if (region_is_singular(&data.region)) {
@@ -9394,7 +9215,7 @@ sna_poly_arc(DrawablePtr drawable, GCPtr gc, int n, xArc *arc)
} else {
region_maybe_clip(&data.region,
gc->pCompositeClip);
- if (!RegionNotEmpty(&data.region))
+ if (RegionNil(&data.region))
return;
sna_gc_ops__tmp.FillSpans = sna_fill_spans__gpu;
@@ -9430,7 +9251,7 @@ sna_poly_arc(DrawablePtr drawable, GCPtr gc, int n, xArc *arc)
fallback:
DBG(("%s -- fallback\n", __FUNCTION__));
region_maybe_clip(&data.region, gc->pCompositeClip);
- if (!RegionNotEmpty(&data.region))
+ if (RegionNil(&data.region))
return;
if (!sna_gc_move_to_cpu(gc, drawable, &data.region))
@@ -9502,12 +9323,12 @@ sna_poly_fill_rect_blt(DrawablePtr drawable,
r.y2 - r.y1 == pixmap->drawable.height) {
struct sna_pixmap *priv = sna_pixmap(pixmap);
if (bo == priv->gpu_bo) {
+ assert(priv->gpu_bo->proxy == NULL);
sna_damage_all(&priv->gpu_damage,
pixmap->drawable.width,
pixmap->drawable.height);
sna_damage_destroy(&priv->cpu_damage);
list_del(&priv->list);
- priv->undamaged = false;
priv->clear = true;
priv->clear_color = gc->alu == GXcopy ? pixel : 0;
@@ -9573,7 +9394,7 @@ sna_poly_fill_rect_blt(DrawablePtr drawable,
region_set(&clip, extents);
region_maybe_clip(&clip, gc->pCompositeClip);
- if (!RegionNotEmpty(&clip))
+ if (RegionNil(&clip))
goto done;
if (clip.data == NULL) {
@@ -9744,7 +9565,7 @@ sna_poly_fill_polygon(DrawablePtr draw, GCPtr gc,
} else {
region_maybe_clip(&data.region,
gc->pCompositeClip);
- if (!RegionNotEmpty(&data.region))
+ if (RegionNil(&data.region))
return;
if (region_is_singular(&data.region))
@@ -9781,7 +9602,7 @@ fallback:
data.region.extents.x1, data.region.extents.y1,
data.region.extents.x2, data.region.extents.y2));
region_maybe_clip(&data.region, gc->pCompositeClip);
- if (!RegionNotEmpty(&data.region)) {
+ if (RegionNil(&data.region)) {
DBG(("%s: nothing to do, all clipped\n", __FUNCTION__));
return;
}
@@ -9831,7 +9652,8 @@ sna_pixmap_get_source_bo(PixmapPtr pixmap)
return upload;
}
- if (priv->gpu_damage && !sna_pixmap_move_to_gpu(pixmap, MOVE_READ))
+ if (priv->gpu_damage &&
+ !sna_pixmap_move_to_gpu(pixmap, MOVE_READ | MOVE_ASYNC_HINT))
return NULL;
if (priv->cpu_damage && priv->cpu_bo)
@@ -9873,17 +9695,19 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable,
DBG(("%s x %d [(%d, %d)x(%d, %d)...], clipped=%x\n",
__FUNCTION__, n, r->x, r->y, r->width, r->height, clipped));
- kgem_set_mode(&sna->kgem, KGEM_BLT);
+ kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
if (!kgem_check_batch(&sna->kgem, 8+2*3) ||
!kgem_check_reloc(&sna->kgem, 2) ||
!kgem_check_bo_fenced(&sna->kgem, bo)) {
- _kgem_submit(&sna->kgem);
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo_fenced(&sna->kgem, bo))
+ return false;
_kgem_set_mode(&sna->kgem, KGEM_BLT);
}
br00 = XY_SCANLINE_BLT;
br13 = bo->pitch;
- if (sna->kgem.gen >= 40 && bo->tiling) {
+ if (sna->kgem.gen >= 040 && bo->tiling) {
br00 |= BLT_DST_TILED;
br13 >>= 2;
}
@@ -9988,7 +9812,7 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable,
region_set(&clip, extents);
region_maybe_clip(&clip, gc->pCompositeClip);
- if (!RegionNotEmpty(&clip))
+ if (RegionNil(&clip))
goto done;
b = sna->kgem.batch + sna->kgem.nbatch;
@@ -10315,7 +10139,7 @@ sna_poly_fill_rect_tiled_blt(DrawablePtr drawable,
region_set(&clip, extents);
region_maybe_clip(&clip, gc->pCompositeClip);
- if (!RegionNotEmpty(&clip))
+ if (RegionNil(&clip))
goto done;
if (clip.data == NULL) {
@@ -10393,8 +10217,8 @@ sna_poly_fill_rect_tiled_blt(DrawablePtr drawable,
region.data = NULL;
RegionIntersect(&region, &region, &clip);
- nbox = REGION_NUM_RECTS(&region);
- box = REGION_RECTS(&region);
+ nbox = RegionNumRects(&region);
+ box = RegionRects(&region);
while (nbox--) {
int height = box->y2 - box->y1;
int dst_y = box->y1;
@@ -10487,7 +10311,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable,
DBG(("%s: pat offset (%d, %d)\n", __FUNCTION__ ,px, py));
br00 = XY_SCANLINE_BLT | px << 12 | py << 8 | 3 << 20;
br13 = bo->pitch;
- if (sna->kgem.gen >= 40 && bo->tiling) {
+ if (sna->kgem.gen >= 040 && bo->tiling) {
br00 |= BLT_DST_TILED;
br13 >>= 2;
}
@@ -10507,11 +10331,13 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable,
} while (--j);
}
- kgem_set_mode(&sna->kgem, KGEM_BLT);
+ kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
if (!kgem_check_batch(&sna->kgem, 9 + 2*3) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc(&sna->kgem, 1)) {
- _kgem_submit(&sna->kgem);
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo_fenced(&sna->kgem, bo))
+ return false;
_kgem_set_mode(&sna->kgem, KGEM_BLT);
}
@@ -10592,7 +10418,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable,
region_set(&clip, extents);
region_maybe_clip(&clip, gc->pCompositeClip);
- if (!RegionNotEmpty(&clip))
+ if (RegionNil(&clip))
return true;
b = sna->kgem.batch + sna->kgem.nbatch;
@@ -10783,11 +10609,11 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
origin->x, origin->y));
get_drawable_deltas(drawable, pixmap, &dx, &dy);
- kgem_set_mode(&sna->kgem, KGEM_BLT);
+ kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
br00 = 3 << 20;
br13 = bo->pitch;
- if (sna->kgem.gen >= 40 && bo->tiling) {
+ if (sna->kgem.gen >= 040 && bo->tiling) {
br00 |= BLT_DST_TILED;
br13 >>= 2;
}
@@ -10821,7 +10647,9 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
if (!kgem_check_batch(&sna->kgem, 7+src_stride) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc(&sna->kgem, 1)) {
- _kgem_submit(&sna->kgem);
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo_fenced(&sna->kgem, bo))
+ return false;
_kgem_set_mode(&sna->kgem, KGEM_BLT);
}
@@ -10863,7 +10691,9 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
if (!kgem_check_batch(&sna->kgem, 8) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc_and_exec(&sna->kgem, 2)) {
- _kgem_submit(&sna->kgem);
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo_fenced(&sna->kgem, bo))
+ return false;
_kgem_set_mode(&sna->kgem, KGEM_BLT);
}
@@ -10920,7 +10750,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
region_set(&clip, extents);
region_maybe_clip(&clip, gc->pCompositeClip);
- if (!RegionNotEmpty(&clip))
+ if (RegionNil(&clip))
return true;
pat.x = origin->x + drawable->x;
@@ -10963,7 +10793,9 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
if (!kgem_check_batch(&sna->kgem, 7+src_stride) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc(&sna->kgem, 1)) {
- _kgem_submit(&sna->kgem);
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo_fenced(&sna->kgem, bo))
+ return false;
_kgem_set_mode(&sna->kgem, KGEM_BLT);
}
@@ -11002,7 +10834,9 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
if (!kgem_check_batch(&sna->kgem, 8) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc_and_exec(&sna->kgem, 2)) {
- _kgem_submit(&sna->kgem);
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo_fenced(&sna->kgem, bo))
+ return false;
_kgem_set_mode(&sna->kgem, KGEM_BLT);
}
@@ -11103,7 +10937,9 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
if (!kgem_check_batch(&sna->kgem, 7+src_stride) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc(&sna->kgem, 1)) {
- _kgem_submit(&sna->kgem);
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo_fenced(&sna->kgem, bo))
+ return false;
_kgem_set_mode(&sna->kgem, KGEM_BLT);
}
@@ -11142,7 +10978,9 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
if (!kgem_check_batch(&sna->kgem, 8) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc_and_exec(&sna->kgem, 2)) {
- _kgem_submit(&sna->kgem);
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo_fenced(&sna->kgem, bo))
+ return false;
_kgem_set_mode(&sna->kgem, KGEM_BLT);
}
@@ -11252,7 +11090,9 @@ sna_poly_fill_rect_stippled_n_box__imm(struct sna *sna,
if (!kgem_check_batch(&sna->kgem, 7+len) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc(&sna->kgem, 1)) {
- _kgem_submit(&sna->kgem);
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo_fenced(&sna->kgem, bo))
+ return; /* XXX fallback? */
_kgem_set_mode(&sna->kgem, KGEM_BLT);
}
@@ -11356,7 +11196,9 @@ sna_poly_fill_rect_stippled_n_box(struct sna *sna,
if (!kgem_check_batch(&sna->kgem, 7+len) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc(&sna->kgem, 2)) {
- _kgem_submit(&sna->kgem);
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo_fenced(&sna->kgem, bo))
+ return; /* XXX fallback? */
_kgem_set_mode(&sna->kgem, KGEM_BLT);
}
@@ -11479,11 +11321,11 @@ sna_poly_fill_rect_stippled_n_blt__imm(DrawablePtr drawable,
clipped, gc->alu, gc->fillStyle == FillOpaqueStippled));
get_drawable_deltas(drawable, pixmap, &dx, &dy);
- kgem_set_mode(&sna->kgem, KGEM_BLT);
+ kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
br00 = XY_MONO_SRC_COPY_IMM | 3 << 20;
br13 = bo->pitch;
- if (sna->kgem.gen >= 40 && bo->tiling) {
+ if (sna->kgem.gen >= 040 && bo->tiling) {
br00 |= BLT_DST_TILED;
br13 >>= 2;
}
@@ -11517,7 +11359,7 @@ sna_poly_fill_rect_stippled_n_blt__imm(DrawablePtr drawable,
region_set(&clip, extents);
region_maybe_clip(&clip, gc->pCompositeClip);
- if (!RegionNotEmpty(&clip)) {
+ if (RegionNil(&clip)) {
DBG(("%s: all clipped\n", __FUNCTION__));
return true;
}
@@ -11624,11 +11466,11 @@ sna_poly_fill_rect_stippled_n_blt(DrawablePtr drawable,
extents, clipped);
get_drawable_deltas(drawable, pixmap, &dx, &dy);
- kgem_set_mode(&sna->kgem, KGEM_BLT);
+ kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
br00 = XY_MONO_SRC_COPY | 3 << 20;
br13 = bo->pitch;
- if (sna->kgem.gen >= 40 && bo->tiling) {
+ if (sna->kgem.gen >= 040 && bo->tiling) {
br00 |= BLT_DST_TILED;
br13 >>= 2;
}
@@ -11662,7 +11504,7 @@ sna_poly_fill_rect_stippled_n_blt(DrawablePtr drawable,
region_set(&clip, extents);
region_maybe_clip(&clip, gc->pCompositeClip);
- if (!RegionNotEmpty(&clip)) {
+ if (RegionNil(&clip)) {
DBG(("%s: all clipped\n", __FUNCTION__));
return true;
}
@@ -11943,10 +11785,10 @@ sna_poly_fill_rect(DrawablePtr draw, GCPtr gc, int n, xRectangle *rect)
box_inplace(pixmap, &region.extents))) {
DBG(("%s: promoting to full GPU\n", __FUNCTION__));
if (priv->gpu_bo) {
+ assert(priv->gpu_bo->proxy == NULL);
sna_damage_all(&priv->gpu_damage,
pixmap->drawable.width,
pixmap->drawable.height);
- priv->undamaged = false;
}
}
if (priv->cpu_damage == NULL) {
@@ -12000,7 +11842,7 @@ fallback:
region.extents.x2, region.extents.y2));
region.data = NULL;
region_maybe_clip(&region, gc->pCompositeClip);
- if (!RegionNotEmpty(&region)) {
+ if (RegionNil(&region)) {
DBG(("%s: nothing to do, all clipped\n", __FUNCTION__));
return;
}
@@ -12093,7 +11935,7 @@ sna_poly_fill_arc(DrawablePtr draw, GCPtr gc, int n, xArc *arc)
} else {
region_maybe_clip(&data.region,
gc->pCompositeClip);
- if (!RegionNotEmpty(&data.region))
+ if (RegionNil(&data.region))
return;
if (region_is_singular(&data.region))
@@ -12130,7 +11972,7 @@ fallback:
data.region.extents.x1, data.region.extents.y1,
data.region.extents.x2, data.region.extents.y2));
region_maybe_clip(&data.region, gc->pCompositeClip);
- if (!RegionNotEmpty(&data.region)) {
+ if (RegionNil(&data.region)) {
DBG(("%s: nothing to do, all clipped\n", __FUNCTION__));
return;
}
@@ -12256,19 +12098,21 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc,
_y += drawable->y + dy;
RegionTranslate(clip, dx, dy);
- extents = REGION_RECTS(clip);
- last_extents = extents + REGION_NUM_RECTS(clip);
+ extents = RegionRects(clip);
+ last_extents = extents + RegionNumRects(clip);
if (!transparent) /* emulate miImageGlyphBlt */
sna_blt_fill_boxes(sna, GXcopy,
bo, drawable->bitsPerPixel,
- bg, extents, REGION_NUM_RECTS(clip));
+ bg, extents, RegionNumRects(clip));
- kgem_set_mode(&sna->kgem, KGEM_BLT);
+ kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
if (!kgem_check_batch(&sna->kgem, 16) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc(&sna->kgem, 1)) {
- _kgem_submit(&sna->kgem);
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo_fenced(&sna->kgem, bo))
+ return false;
_kgem_set_mode(&sna->kgem, KGEM_BLT);
}
@@ -12280,7 +12124,7 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc,
b = sna->kgem.batch + sna->kgem.nbatch;
b[0] = XY_SETUP_BLT | 3 << 20;
b[1] = bo->pitch;
- if (sna->kgem.gen >= 40 && bo->tiling) {
+ if (sna->kgem.gen >= 040 && bo->tiling) {
b[0] |= BLT_DST_TILED;
b[1] >>= 2;
}
@@ -12298,7 +12142,7 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc,
sna->kgem.nbatch += 8;
br00 = XY_TEXT_IMMEDIATE_BLT;
- if (bo->tiling && sna->kgem.gen >= 40)
+ if (bo->tiling && sna->kgem.gen >= 040)
br00 |= BLT_DST_TILED;
do {
@@ -12343,7 +12187,7 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc,
b = sna->kgem.batch + sna->kgem.nbatch;
b[0] = XY_SETUP_BLT | 3 << 20;
b[1] = bo->pitch;
- if (sna->kgem.gen >= 40 && bo->tiling) {
+ if (sna->kgem.gen >= 040 && bo->tiling) {
b[0] |= BLT_DST_TILED;
b[1] >>= 2;
}
@@ -12585,7 +12429,7 @@ sna_poly_text8(DrawablePtr drawable, GCPtr gc,
region.data = NULL;
region_maybe_clip(&region, gc->pCompositeClip);
- if (!RegionNotEmpty(&region))
+ if (RegionNil(&region))
return x + extents.overallRight;
if (FORCE_FALLBACK)
@@ -12659,7 +12503,7 @@ sna_poly_text16(DrawablePtr drawable, GCPtr gc,
region.data = NULL;
region_maybe_clip(&region, gc->pCompositeClip);
- if (!RegionNotEmpty(&region))
+ if (RegionNil(&region))
return x + extents.overallRight;
if (FORCE_FALLBACK)
@@ -12740,7 +12584,7 @@ sna_image_text8(DrawablePtr drawable, GCPtr gc,
region.data = NULL;
region_maybe_clip(&region, gc->pCompositeClip);
- if (!RegionNotEmpty(&region))
+ if (RegionNil(&region))
return;
DBG(("%s: clipped extents (%d, %d), (%d, %d)\n",
@@ -12822,7 +12666,7 @@ sna_image_text16(DrawablePtr drawable, GCPtr gc,
region.data = NULL;
region_maybe_clip(&region, gc->pCompositeClip);
- if (!RegionNotEmpty(&region))
+ if (RegionNil(&region))
return;
DBG(("%s: clipped extents (%d, %d), (%d, %d)\n",
@@ -12901,19 +12745,21 @@ sna_reversed_glyph_blt(DrawablePtr drawable, GCPtr gc,
_y += drawable->y + dy;
RegionTranslate(clip, dx, dy);
- extents = REGION_RECTS(clip);
- last_extents = extents + REGION_NUM_RECTS(clip);
+ extents = RegionRects(clip);
+ last_extents = extents + RegionNumRects(clip);
if (!transparent) /* emulate miImageGlyphBlt */
sna_blt_fill_boxes(sna, GXcopy,
bo, drawable->bitsPerPixel,
- bg, extents, REGION_NUM_RECTS(clip));
+ bg, extents, RegionNumRects(clip));
- kgem_set_mode(&sna->kgem, KGEM_BLT);
+ kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
if (!kgem_check_batch(&sna->kgem, 16) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc(&sna->kgem, 1)) {
- _kgem_submit(&sna->kgem);
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo_fenced(&sna->kgem, bo))
+ return false;
_kgem_set_mode(&sna->kgem, KGEM_BLT);
}
@@ -12924,7 +12770,7 @@ sna_reversed_glyph_blt(DrawablePtr drawable, GCPtr gc,
b = sna->kgem.batch + sna->kgem.nbatch;
b[0] = XY_SETUP_BLT | 1 << 20;
b[1] = bo->pitch;
- if (sna->kgem.gen >= 40 && bo->tiling) {
+ if (sna->kgem.gen >= 040 && bo->tiling) {
b[0] |= BLT_DST_TILED;
b[1] >>= 2;
}
@@ -13005,7 +12851,7 @@ sna_reversed_glyph_blt(DrawablePtr drawable, GCPtr gc,
b = sna->kgem.batch + sna->kgem.nbatch;
b[0] = XY_SETUP_BLT | 1 << 20;
b[1] = bo->pitch;
- if (sna->kgem.gen >= 40 && bo->tiling) {
+ if (sna->kgem.gen >= 040 && bo->tiling) {
b[0] |= BLT_DST_TILED;
b[1] >>= 2;
}
@@ -13028,7 +12874,7 @@ sna_reversed_glyph_blt(DrawablePtr drawable, GCPtr gc,
sna->kgem.nbatch += 3 + len;
b[0] = XY_TEXT_IMMEDIATE_BLT | (1 + len);
- if (bo->tiling && sna->kgem.gen >= 40)
+ if (bo->tiling && sna->kgem.gen >= 040)
b[0] |= BLT_DST_TILED;
b[1] = (uint16_t)y1 << 16 | (uint16_t)x1;
b[2] = (uint16_t)(y1+h) << 16 | (uint16_t)(x1+w);
@@ -13122,7 +12968,7 @@ sna_image_glyph(DrawablePtr drawable, GCPtr gc,
region.data = NULL;
region_maybe_clip(&region, gc->pCompositeClip);
- if (!RegionNotEmpty(&region))
+ if (RegionNil(&region))
return;
if (FORCE_FALLBACK)
@@ -13200,7 +13046,7 @@ sna_poly_glyph(DrawablePtr drawable, GCPtr gc,
region.data = NULL;
region_maybe_clip(&region, gc->pCompositeClip);
- if (!RegionNotEmpty(&region))
+ if (RegionNil(&region))
return;
if (FORCE_FALLBACK)
@@ -13289,11 +13135,11 @@ sna_push_pixels_solid_blt(GCPtr gc,
region->extents.x1, region->extents.y1,
region->extents.x2, region->extents.y2));
- kgem_set_mode(&sna->kgem, KGEM_BLT);
+ kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
/* Region is pre-clipped and translated into pixmap space */
- box = REGION_RECTS(region);
- n = REGION_NUM_RECTS(region);
+ box = RegionRects(region);
+ n = RegionNumRects(region);
do {
int bx1 = (box->x1 - region->extents.x1) & ~7;
int bx2 = (box->x2 - region->extents.x1 + 7) & ~7;
@@ -13309,7 +13155,9 @@ sna_push_pixels_solid_blt(GCPtr gc,
if (!kgem_check_batch(&sna->kgem, 8) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc_and_exec(&sna->kgem, 2)) {
- _kgem_submit(&sna->kgem);
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo_fenced(&sna->kgem, bo))
+ return false;
_kgem_set_mode(&sna->kgem, KGEM_BLT);
}
@@ -13340,7 +13188,7 @@ sna_push_pixels_solid_blt(GCPtr gc,
b[0] = XY_MONO_SRC_COPY | 3 << 20;
b[0] |= ((box->x1 - region->extents.x1) & 7) << 17;
b[1] = bo->pitch;
- if (sna->kgem.gen >= 40 && bo->tiling) {
+ if (sna->kgem.gen >= 040 && bo->tiling) {
b[0] |= BLT_DST_TILED;
b[1] >>= 2;
}
@@ -13399,7 +13247,7 @@ sna_push_pixels(GCPtr gc, PixmapPtr bitmap, DrawablePtr drawable,
region.data = NULL;
region_maybe_clip(&region, gc->pCompositeClip);
- if (!RegionNotEmpty(&region))
+ if (RegionNil(&region))
return;
switch (gc->fillStyle) {
@@ -13537,6 +13385,10 @@ static int sna_create_gc(GCPtr gc)
gc->miTranslate = 1;
gc->fExpose = 1;
+ gc->freeCompClip = 0;
+ gc->pCompositeClip = 0;
+ gc->pRotatedPixmap = 0;
+
fb_gc(gc)->bpp = bits_per_pixel(gc->depth);
gc->funcs = (GCFuncs *)&sna_gc_funcs;
@@ -13544,6 +13396,82 @@ static int sna_create_gc(GCPtr gc)
return true;
}
+static bool
+sna_get_image_blt(DrawablePtr drawable,
+ RegionPtr region,
+ char *dst)
+{
+ PixmapPtr pixmap = get_drawable_pixmap(drawable);
+ struct sna_pixmap *priv = sna_pixmap(pixmap);
+ struct sna *sna = to_sna_from_pixmap(pixmap);
+ struct kgem_bo *dst_bo;
+ bool ok = false;
+ int pitch;
+
+ if (!USE_USERPTR_DOWNLOADS)
+ return false;
+
+ if (priv == NULL)
+ return false;
+
+ if (priv->clear) {
+ int w = region->extents.x2 - region->extents.x1;
+ int h = region->extents.y2 - region->extents.y1;
+
+ pitch = PixmapBytePad(w, pixmap->drawable.depth);
+ if (priv->clear_color == 0 ||
+ pixmap->drawable.bitsPerPixel == 8) {
+ memset(dst, priv->clear_color, pitch * h);
+ } else {
+ pixman_fill((uint32_t *)dst,
+ pitch/sizeof(uint32_t),
+ pixmap->drawable.bitsPerPixel,
+ 0, 0,
+ w, h,
+ priv->clear_color);
+ }
+
+ return true;
+ }
+
+ if (!sna->kgem.has_userptr)
+ return false;
+
+ if (!DAMAGE_IS_ALL(priv->gpu_damage) ||
+ !__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo))
+ return false;
+
+ DBG(("%s: download through a temporary map\n", __FUNCTION__));
+
+ pitch = PixmapBytePad(region->extents.x2 - region->extents.x1,
+ drawable->depth);
+ dst_bo = kgem_create_map(&sna->kgem, dst,
+ pitch * (region->extents.y2 - region->extents.y1),
+ false);
+ if (dst_bo) {
+ int16_t dx, dy;
+
+ dst_bo->flush = true;
+ dst_bo->pitch = pitch;
+ dst_bo->reusable = false;
+
+ get_drawable_deltas(drawable, pixmap, &dx, &dy);
+
+ ok = sna->render.copy_boxes(sna, GXcopy,
+ pixmap, priv->gpu_bo, dx, dy,
+ pixmap, dst_bo,
+ -region->extents.x1,
+ -region->extents.y1,
+ &region->extents, 1,
+ COPY_LAST);
+
+ kgem_bo_sync__cpu(&sna->kgem, dst_bo);
+ kgem_bo_destroy(&sna->kgem, dst_bo);
+ }
+
+ return ok;
+}
+
static void
sna_get_image(DrawablePtr drawable,
int x, int y, int w, int h,
@@ -13552,6 +13480,7 @@ sna_get_image(DrawablePtr drawable,
{
RegionRec region;
unsigned int flags;
+ bool can_blt;
if (!fbDrawableEnabled(drawable))
return;
@@ -13564,6 +13493,13 @@ sna_get_image(DrawablePtr drawable,
region.extents.y2 = region.extents.y1 + h;
region.data = NULL;
+ can_blt = format == ZPixmap &&
+ drawable->bitsPerPixel >= 8 &&
+ PM_IS_SOLID(drawable, mask);
+
+ if (can_blt && sna_get_image_blt(drawable, &region, dst))
+ return;
+
flags = MOVE_READ;
if ((w | h) == 1)
flags |= MOVE_INPLACE_HINT;
@@ -13572,9 +13508,7 @@ sna_get_image(DrawablePtr drawable,
if (!sna_drawable_move_region_to_cpu(drawable, &region, flags))
return;
- if (format == ZPixmap &&
- drawable->bitsPerPixel >= 8 &&
- PM_IS_SOLID(drawable, mask)) {
+ if (can_blt) {
PixmapPtr pixmap = get_drawable_pixmap(drawable);
int16_t dx, dy;
@@ -13629,7 +13563,7 @@ sna_copy_window(WindowPtr win, DDXPointRec origin, RegionPtr src)
RegionNull(&dst);
RegionIntersect(&dst, &win->borderClip, src);
- if (!RegionNotEmpty(&dst))
+ if (RegionNil(&dst))
return;
#ifdef COMPOSITE
@@ -13697,8 +13631,10 @@ sna_accel_flush_callback(CallbackListPtr *list,
list_del(&priv->list);
if (priv->shm) {
- DBG(("%s: syncing SHM pixmap=%ld\n", __FUNCTION__,
- priv->pixmap->drawable.serialNumber));
+ DBG(("%s: syncing SHM pixmap=%ld (refcnt=%d)\n",
+ __FUNCTION__,
+ priv->pixmap->drawable.serialNumber,
+ priv->pixmap->refcnt));
ret = sna_pixmap_move_to_cpu(priv->pixmap,
MOVE_READ | MOVE_WRITE);
assert(!ret || priv->gpu_bo == NULL);
@@ -13707,8 +13643,9 @@ sna_accel_flush_callback(CallbackListPtr *list,
} else {
DBG(("%s: flushing DRI pixmap=%ld\n", __FUNCTION__,
priv->pixmap->drawable.serialNumber));
- ret = sna_pixmap_move_to_gpu(priv->pixmap,
- MOVE_READ | __MOVE_FORCE);
+ if (sna_pixmap_move_to_gpu(priv->pixmap,
+ MOVE_READ | __MOVE_FORCE))
+ kgem_bo_unclean(&sna->kgem, priv->gpu_bo);
}
(void)ret;
}
@@ -13810,6 +13747,15 @@ static bool stop_flush(struct sna *sna, struct sna_pixmap *scanout)
return scanout->cpu_damage || scanout->gpu_bo->needs_flush;
}
+static void timer_enable(struct sna *sna, int whom, int interval)
+{
+ if (!sna->timer_active)
+ UpdateCurrentTimeIf();
+ sna->timer_active |= 1 << whom;
+ sna->timer_expire[whom] = TIME + interval;
+ DBG(("%s (time=%ld), starting timer %d\n", __FUNCTION__, (long)TIME, whom));
+}
+
static bool sna_accel_do_flush(struct sna *sna)
{
struct sna_pixmap *priv;
@@ -13822,9 +13768,6 @@ static bool sna_accel_do_flush(struct sna *sna)
return false;
}
- if (sna->flags & SNA_NO_DELAYED_FLUSH)
- return true;
-
interval = sna->vblank_interval ?: 20;
if (sna->timer_active & (1<<(FLUSH_TIMER))) {
int32_t delta = sna->timer_expire[FLUSH_TIMER] - TIME;
@@ -13835,26 +13778,18 @@ static bool sna_accel_do_flush(struct sna *sna)
sna->timer_expire[FLUSH_TIMER] = TIME + interval;
return true;
}
- } else {
- if (!start_flush(sna, priv)) {
- DBG(("%s -- no pending write to scanout\n", __FUNCTION__));
- if (priv)
- kgem_bo_flush(&sna->kgem, priv->gpu_bo);
- } else {
- sna->timer_active |= 1 << FLUSH_TIMER;
- sna->timer_expire[FLUSH_TIMER] = TIME + interval / 2;
- DBG(("%s (time=%ld), starting\n", __FUNCTION__, (long)TIME));
- }
- }
+ } else if (!start_flush(sna, priv)) {
+ DBG(("%s -- no pending write to scanout\n", __FUNCTION__));
+ if (priv)
+ kgem_bo_flush(&sna->kgem, priv->gpu_bo);
+ } else
+ timer_enable(sna, FLUSH_TIMER, interval/2);
return false;
}
static bool sna_accel_do_throttle(struct sna *sna)
{
- if (sna->flags & SNA_NO_THROTTLE)
- return false;
-
if (sna->timer_active & (1<<(THROTTLE_TIMER))) {
int32_t delta = sna->timer_expire[THROTTLE_TIMER] - TIME;
if (delta <= 3) {
@@ -13862,15 +13797,10 @@ static bool sna_accel_do_throttle(struct sna *sna)
sna->timer_expire[THROTTLE_TIMER] = TIME + 20;
return true;
}
- } else {
- if (!sna->kgem.need_retire) {
- DBG(("%s -- no pending activity\n", __FUNCTION__));
- } else {
- DBG(("%s (time=%ld), starting\n", __FUNCTION__, (long)TIME));
- sna->timer_active |= 1 << THROTTLE_TIMER;
- sna->timer_expire[THROTTLE_TIMER] = TIME + 20;
- }
- }
+ } else if (!sna->kgem.need_retire) {
+ DBG(("%s -- no pending activity\n", __FUNCTION__));
+ } else
+ timer_enable(sna, THROTTLE_TIMER, 20);
return false;
}
@@ -13885,65 +13815,12 @@ static bool sna_accel_do_expire(struct sna *sna)
TIME + MAX_INACTIVE_TIME * 1000;
return true;
}
- } else {
- if (sna->kgem.need_expire) {
- sna->timer_active |= 1 << EXPIRE_TIMER;
- sna->timer_expire[EXPIRE_TIMER] =
- TIME + MAX_INACTIVE_TIME * 1000;
- DBG(("%s (time=%ld), starting\n", __FUNCTION__, (long)TIME));
- }
- }
+ } else if (sna->kgem.need_expire)
+ timer_enable(sna, EXPIRE_TIMER, MAX_INACTIVE_TIME * 1000);
return false;
}
-static bool sna_accel_do_inactive(struct sna *sna)
-{
- if (!USE_INACTIVE)
- return false;
-
- if (sna->timer_active & (1<<(INACTIVE_TIMER))) {
- int32_t delta = sna->timer_expire[INACTIVE_TIMER] - TIME;
- if (delta <= 3) {
- sna->timer_expire[INACTIVE_TIMER] =
- TIME + 120 * 1000;
- DBG(("%s (time=%ld), triggered\n", __FUNCTION__, (long)TIME));
- return true;
- }
- } else {
- if (!list_is_empty(&sna->active_pixmaps)) {
- sna->timer_active |= 1 << INACTIVE_TIMER;
- sna->timer_expire[INACTIVE_TIMER] =
- TIME + 120 * 1000;
- DBG(("%s (time=%ld), starting\n", __FUNCTION__, (long)TIME));
- }
- }
-
- return false;
-}
-
-static int32_t sna_timeout(struct sna *sna)
-{
- int32_t now = TIME, next = 0;
- int i;
-
- DBG(("%s: now=%d, active=%08x\n",
- __FUNCTION__, (int)now, sna->timer_active));
- for (i = 0; i < NUM_TIMERS; i++) {
- if (sna->timer_active & (1 << i)) {
- int32_t delta = sna->timer_expire[i] - now;
- DBG(("%s: timer[%d] expires in %d [%d]\n",
- __FUNCTION__, i, delta, sna->timer_expire[i]));
- if (next == 0 || delta < next)
- next = delta;
- }
- }
-
- DBG(("%s: active=%08x, next=+%d\n",
- __FUNCTION__, sna->timer_active, next));
- return next;
-}
-
static void sna_accel_post_damage(struct sna *sna)
{
#if HAS_PIXMAP_SHARING
@@ -13958,7 +13835,7 @@ static void sna_accel_post_damage(struct sna *sna)
int n;
damage = DamageRegion(dirty->damage);
- if (!RegionNotEmpty(damage))
+ if (RegionNil(damage))
continue;
src = dirty->src;
@@ -13979,9 +13856,19 @@ static void sna_accel_post_damage(struct sna *sna)
region.extents.x2, region.extents.y2));
RegionIntersect(&region, &region, damage);
+ if (RegionNil(&region))
+ goto skip;
+
+ RegionTranslate(&region, -dirty->x, -dirty->y);
+ DamageRegionAppend(&dirty->slave_dst->drawable, &region);
+
+ DBG(("%s: slave: ((%d, %d), (%d, %d))x%d\n", __FUNCTION__,
+ region.extents.x1, region.extents.y1,
+ region.extents.x2, region.extents.y2,
+ RegionNumRects(&region.extents)));
- box = REGION_RECTS(&region);
- n = REGION_NUM_RECTS(&region);
+ box = RegionRects(&region);
+ n = RegionNumRects(&region);
if (wedged(sna)) {
fallback:
if (!sna_pixmap_move_to_cpu(src, MOVE_READ))
@@ -13994,53 +13881,52 @@ fallback:
do {
DBG(("%s: copy box (%d, %d)->(%d, %d)x(%d, %d)\n",
__FUNCTION__,
+ box->x1 + dirty->x, box->y1 + dirty->y,
box->x1, box->y1,
- box->x1 - dirty->x, box->y1 - dirty->y,
box->x2 - box->x1, box->y2 - box->y1));
assert(box->x2 > box->x1);
assert(box->y2 > box->y1);
+ assert(box->x1 + dirty->x >= 0);
+ assert(box->y1 + dirty->y >= 0);
+ assert(box->x2 + dirty->x <= src->drawable.width);
+ assert(box->y2 + dirty->y <= src->drawable.height);
+
assert(box->x1 >= 0);
assert(box->y1 >= 0);
assert(box->x2 <= src->drawable.width);
assert(box->y2 <= src->drawable.height);
- assert(box->x1 - dirty->x >= 0);
- assert(box->y1 - dirty->y >= 0);
- assert(box->x2 - dirty->x <= src->drawable.width);
- assert(box->y2 - dirty->y <= src->drawable.height);
-
memcpy_blt(src->devPrivate.ptr,
dst->devPrivate.ptr,
src->drawable.bitsPerPixel,
src->devKind, dst->devKind,
- box->x1, box->y1,
- box->x1 - dirty->x,
- box->y1 - dirty->y,
+ box->x1 + dirty->x,
+ box->y1 + dirty->y,
+ box->x1,
+ box->y1,
box->x2 - box->x1,
box->y2 - box->y1);
box++;
} while (--n);
} else {
- if (!sna_pixmap_move_to_gpu(src, MOVE_READ | __MOVE_FORCE))
+ if (!sna_pixmap_move_to_gpu(src, MOVE_READ | MOVE_ASYNC_HINT | __MOVE_FORCE))
goto fallback;
- if (!sna_pixmap_move_to_gpu(dst, MOVE_READ | MOVE_WRITE | __MOVE_FORCE))
+ if (!sna_pixmap_move_to_gpu(dst, MOVE_READ | MOVE_WRITE | MOVE_ASYNC_HINT | __MOVE_FORCE))
goto fallback;
if (!sna->render.copy_boxes(sna, GXcopy,
- src, sna_pixmap_get_bo(src), 0, 0,
- dst, sna_pixmap_get_bo(dst), -dirty->x, -dirty->y,
+ src, sna_pixmap_get_bo(src), dirty->x, dirty->y,
+ dst, sna_pixmap_get_bo(dst),0, 0,
box, n, COPY_LAST))
goto fallback;
flush = true;
}
- RegionTranslate(&region, -dirty->x, -dirty->y);
- DamageRegionAppend(&dirty->slave_dst->drawable, &region);
-
+ DamageRegionProcessPending(&dirty->slave_dst->drawable);
skip:
RegionUninit(&region);
DamageEmpty(dirty->damage);
@@ -14099,105 +13985,6 @@ static void sna_accel_expire(struct sna *sna)
sna_accel_disarm_timer(sna, EXPIRE_TIMER);
}
-static void sna_accel_inactive(struct sna *sna)
-{
- struct sna_pixmap *priv;
- struct list preserve;
-
- DBG(("%s (time=%ld)\n", __FUNCTION__, (long)TIME));
-
-#if HAS_FULL_DEBUG
- {
- unsigned count, bytes;
-
- count = bytes = 0;
- list_for_each_entry(priv, &sna->inactive_clock[1], inactive)
- if (!priv->pinned)
- count++, bytes += kgem_bo_size(priv->gpu_bo);
-
- DBG(("%s: trimming %d inactive GPU buffers, %d bytes\n",
- __FUNCTION__, count, bytes));
-
- count = bytes = 0;
- list_for_each_entry(priv, &sna->active_pixmaps, inactive) {
- if (priv->ptr &&
- sna_damage_is_all(&priv->gpu_damage,
- priv->pixmap->drawable.width,
- priv->pixmap->drawable.height)) {
- count++, bytes += priv->pixmap->devKind * priv->pixmap->drawable.height;
- }
- }
-
- DBG(("%s: trimming %d inactive CPU buffers, %d bytes\n",
- __FUNCTION__, count, bytes));
- }
-#endif
-
- /* clear out the oldest inactive pixmaps */
- list_init(&preserve);
- while (!list_is_empty(&sna->inactive_clock[1])) {
- priv = list_first_entry(&sna->inactive_clock[1],
- struct sna_pixmap,
- inactive);
- assert((priv->create & KGEM_CAN_CREATE_LARGE) == 0);
- assert(priv->gpu_bo);
- assert(!priv->gpu_bo->proxy);
-
- /* XXX Rather than discarding the GPU buffer here, we
- * could mark it purgeable and allow the shrinker to
- * reap its storage only under memory pressure.
- */
- list_del(&priv->inactive);
- if (priv->pinned)
- continue;
-
- if (priv->ptr &&
- sna_damage_is_all(&priv->gpu_damage,
- priv->pixmap->drawable.width,
- priv->pixmap->drawable.height)) {
- DBG(("%s: discarding inactive CPU shadow\n",
- __FUNCTION__));
- sna_damage_destroy(&priv->cpu_damage);
- list_del(&priv->list);
-
- assert(priv->cpu_bo == NULL || !priv->cpu_bo->flush);
- assert(!priv->shm);
- sna_pixmap_free_cpu(sna, priv);
- priv->undamaged = false;
- priv->cpu = false;
-
- list_add(&priv->inactive, &preserve);
- } else {
- DBG(("%s: discarding inactive GPU bo handle=%d\n",
- __FUNCTION__, priv->gpu_bo->handle));
- if (!sna_pixmap_move_to_cpu(priv->pixmap,
- MOVE_READ | MOVE_WRITE | MOVE_ASYNC_HINT))
- list_add(&priv->inactive, &preserve);
- }
- }
-
- /* Age the current inactive pixmaps */
- sna->inactive_clock[1].next = sna->inactive_clock[0].next;
- sna->inactive_clock[0].next->prev = &sna->inactive_clock[1];
- sna->inactive_clock[0].prev->next = &sna->inactive_clock[1];
- sna->inactive_clock[1].prev = sna->inactive_clock[0].prev;
-
- sna->inactive_clock[0].next = sna->active_pixmaps.next;
- sna->active_pixmaps.next->prev = &sna->inactive_clock[0];
- sna->active_pixmaps.prev->next = &sna->inactive_clock[0];
- sna->inactive_clock[0].prev = sna->active_pixmaps.prev;
-
- sna->active_pixmaps.next = preserve.next;
- preserve.next->prev = &sna->active_pixmaps;
- preserve.prev->next = &sna->active_pixmaps;
- sna->active_pixmaps.prev = preserve.prev;
-
- if (list_is_empty(&sna->inactive_clock[1]) &&
- list_is_empty(&sna->inactive_clock[0]) &&
- list_is_empty(&sna->active_pixmaps))
- sna_accel_disarm_timer(sna, INACTIVE_TIMER);
-}
-
#ifdef DEBUG_MEMORY
static bool sna_accel_do_debug_memory(struct sna *sna)
{
@@ -14236,7 +14023,7 @@ sna_get_window_pixmap(WindowPtr window)
static void
sna_set_window_pixmap(WindowPtr window, PixmapPtr pixmap)
{
- *(PixmapPtr *)dixGetPrivateAddr(&window->devPrivates, &sna_window_key) = pixmap;
+ *(PixmapPtr *)__get_private(window, sna_window_key) = pixmap;
}
static Bool
@@ -14306,11 +14093,15 @@ static bool sna_picture_init(ScreenPtr screen)
{
PictureScreenPtr ps;
+ DBG(("%s\n", __FUNCTION__));
+
if (!miPictureInit(screen, NULL, 0))
return false;
ps = GetPictureScreen(screen);
assert(ps != NULL);
+ assert(ps->CreatePicture != NULL);
+ assert(ps->DestroyPicture != NULL);
ps->Composite = sna_composite;
ps->CompositeRects = sna_composite_rectangles;
@@ -14320,25 +14111,38 @@ static bool sna_picture_init(ScreenPtr screen)
ps->UnrealizeGlyph = sna_glyph_unrealize;
ps->AddTraps = sna_add_traps;
ps->Trapezoids = sna_composite_trapezoids;
+#if HAS_PIXMAN_TRIANGLES
ps->Triangles = sna_composite_triangles;
#if PICTURE_SCREEN_VERSION >= 2
ps->TriStrip = sna_composite_tristrip;
ps->TriFan = sna_composite_trifan;
#endif
+#endif
return true;
}
+static bool sna_option_accel_blt(struct sna *sna)
+{
+ const char *s;
+
+ s = xf86GetOptValString(sna->Options, OPTION_ACCEL_METHOD);
+ if (s == NULL)
+ return false;
+
+ return strcasecmp(s, "blt") == 0;
+}
+
bool sna_accel_init(ScreenPtr screen, struct sna *sna)
{
const char *backend;
+ DBG(("%s\n", __FUNCTION__));
+
sna_font_key = AllocateFontPrivateIndex();
list_init(&sna->flush_pixmaps);
list_init(&sna->active_pixmaps);
- list_init(&sna->inactive_clock[0]);
- list_init(&sna->inactive_clock[1]);
AddGeneralSocket(sna->kgem.fd);
@@ -14404,33 +14208,30 @@ bool sna_accel_init(ScreenPtr screen, struct sna *sna)
return false;
backend = "no";
- sna->have_render = false;
no_render_init(sna);
-#if !DEBUG_NO_RENDER
- if (sna->info->gen >= 80) {
- } else if (sna->info->gen >= 70) {
- if ((sna->have_render = gen7_render_init(sna)))
+ if (sna_option_accel_blt(sna) || sna->info->gen >= 0100) {
+ } else if (sna->info->gen >= 070) {
+ if (gen7_render_init(sna))
backend = "IvyBridge";
- } else if (sna->info->gen >= 60) {
- if ((sna->have_render = gen6_render_init(sna)))
+ } else if (sna->info->gen >= 060) {
+ if (gen6_render_init(sna))
backend = "SandyBridge";
- } else if (sna->info->gen >= 50) {
- if ((sna->have_render = gen5_render_init(sna)))
+ } else if (sna->info->gen >= 050) {
+ if (gen5_render_init(sna))
backend = "Ironlake";
- } else if (sna->info->gen >= 40) {
- if ((sna->have_render = gen4_render_init(sna)))
- backend = "Broadwater";
- } else if (sna->info->gen >= 30) {
- if ((sna->have_render = gen3_render_init(sna)))
+ } else if (sna->info->gen >= 040) {
+ if (gen4_render_init(sna))
+ backend = "Broadwater/Crestline";
+ } else if (sna->info->gen >= 030) {
+ if (gen3_render_init(sna))
backend = "gen3";
- } else if (sna->info->gen >= 20) {
- if ((sna->have_render = gen2_render_init(sna)))
+ } else if (sna->info->gen >= 020) {
+ if (gen2_render_init(sna))
backend = "gen2";
}
-#endif
- DBG(("%s(backend=%s, have_render=%d)\n",
- __FUNCTION__, backend, sna->have_render));
+ DBG(("%s(backend=%s, prefer_gpu=%x)\n",
+ __FUNCTION__, backend, sna->render.prefer_gpu));
kgem_reset(&sna->kgem);
@@ -14443,6 +14244,8 @@ bool sna_accel_init(ScreenPtr screen, struct sna *sna)
void sna_accel_create(struct sna *sna)
{
+ DBG(("%s\n", __FUNCTION__));
+
if (!sna_glyphs_create(sna))
goto fail;
@@ -14457,7 +14260,6 @@ void sna_accel_create(struct sna *sna)
fail:
xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR,
"Failed to allocate caches, disabling RENDER acceleration\n");
- sna->have_render = false;
no_render_init(sna);
}
@@ -14481,6 +14283,8 @@ void sna_accel_watch_flush(struct sna *sna, int enable)
void sna_accel_close(struct sna *sna)
{
+ DBG(("%s\n", __FUNCTION__));
+
sna_composite_close(sna);
sna_gradients_close(sna);
sna_glyphs_close(sna);
@@ -14500,24 +14304,25 @@ void sna_accel_close(struct sna *sna)
void sna_accel_block_handler(struct sna *sna, struct timeval **tv)
{
- UpdateCurrentTimeIf();
+ if (sna->timer_active)
+ UpdateCurrentTimeIf();
- if (sna->kgem.nbatch && kgem_is_idle(&sna->kgem)) {
+ if (sna->kgem.nbatch &&
+ (sna->kgem.scanout_busy ||
+ kgem_ring_is_idle(&sna->kgem, sna->kgem.ring))) {
DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
_kgem_submit(&sna->kgem);
}
if (sna_accel_do_flush(sna))
sna_accel_flush(sna);
- assert(sna->flags & SNA_NO_DELAYED_FLUSH ||
- sna_accel_scanout(sna) == NULL ||
+ assert(sna_accel_scanout(sna) == NULL ||
sna_accel_scanout(sna)->gpu_bo->exec == NULL ||
sna->timer_active & (1<<(FLUSH_TIMER)));
if (sna_accel_do_throttle(sna))
sna_accel_throttle(sna);
- assert(sna->flags & SNA_NO_THROTTLE ||
- !sna->kgem.need_retire ||
+ assert(!sna->kgem.need_retire ||
sna->timer_active & (1<<(THROTTLE_TIMER)));
if (sna_accel_do_expire(sna))
@@ -14525,9 +14330,6 @@ void sna_accel_block_handler(struct sna *sna, struct timeval **tv)
assert(!sna->kgem.need_expire ||
sna->timer_active & (1<<(EXPIRE_TIMER)));
- if (sna_accel_do_inactive(sna))
- sna_accel_inactive(sna);
-
if (sna_accel_do_debug_memory(sna))
sna_accel_debug_memory(sna);
@@ -14537,24 +14339,28 @@ void sna_accel_block_handler(struct sna *sna, struct timeval **tv)
sna->watch_flush = 0;
}
- if (sna->timer_active) {
+ if (sna->timer_active & 1) {
int32_t timeout;
DBG(("%s: evaluating timers, active=%x\n",
__FUNCTION__, sna->timer_active));
- timeout = sna_timeout(sna);
- if (timeout) {
- if (*tv == NULL) {
- *tv = &sna->timer_tv;
- goto set_tv;
- }
- if ((*tv)->tv_sec * 1000 + (*tv)->tv_usec / 1000 > timeout) {
+
+ timeout = sna->timer_expire[0] - TIME;
+ DBG(("%s: flush timer expires in %d [%d]\n",
+ __FUNCTION__, timeout, sna->timer_expire[0]));
+
+ if (*tv == NULL) {
+ *tv = &sna->timer_tv;
+ goto set_tv;
+ }
+ if ((*tv)->tv_sec * 1000 + (*tv)->tv_usec / 1000 > timeout) {
set_tv:
- (*tv)->tv_sec = timeout / 1000;
- (*tv)->tv_usec = timeout % 1000 * 1000;
- }
+ (*tv)->tv_sec = timeout / 1000;
+ (*tv)->tv_usec = timeout % 1000 * 1000;
}
}
+
+ sna->kgem.scanout_busy = false;
}
void sna_accel_wakeup_handler(struct sna *sna)
@@ -14563,14 +14369,22 @@ void sna_accel_wakeup_handler(struct sna *sna)
if (sna->kgem.need_retire)
kgem_retire(&sna->kgem);
- if (!sna->mode.shadow_active && !sna->kgem.need_retire) {
+ if (sna->kgem.nbatch && !sna->kgem.need_retire) {
DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
- kgem_submit(&sna->kgem);
+ _kgem_submit(&sna->kgem);
}
if (sna->kgem.need_purge)
kgem_purge_cache(&sna->kgem);
+
+ if (FAULT_INJECTION && (rand() % FAULT_INJECTION) == 0) {
+ ErrorF("%s hardware acceleration\n",
+ sna->kgem.wedged ? "Re-enabling" : "Disabling");
+ kgem_submit(&sna->kgem);
+ sna->kgem.wedged = !sna->kgem.wedged;
+ }
}
void sna_accel_free(struct sna *sna)
{
+ DBG(("%s\n", __FUNCTION__));
}
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 7410eb116..edfcb9ea4 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -119,7 +119,7 @@ static bool sna_blt_fill_init(struct sna *sna,
blt->br13 = bo->pitch;
blt->cmd = XY_SCANLINE_BLT;
- if (kgem->gen >= 40 && bo->tiling) {
+ if (kgem->gen >= 040 && bo->tiling) {
blt->cmd |= BLT_DST_TILED;
blt->br13 >>= 2;
}
@@ -145,11 +145,12 @@ static bool sna_blt_fill_init(struct sna *sna,
blt->pixel = pixel;
blt->bpp = bpp;
- kgem_set_mode(kgem, KGEM_BLT);
+ kgem_set_mode(kgem, KGEM_BLT, bo);
if (!kgem_check_batch(kgem, 12) ||
!kgem_check_bo_fenced(kgem, bo)) {
- _kgem_submit(kgem);
- assert(kgem_check_bo_fenced(kgem, bo));
+ kgem_submit(kgem);
+ if (!kgem_check_bo_fenced(kgem, bo))
+ return false;
_kgem_set_mode(kgem, KGEM_BLT);
}
@@ -267,14 +268,14 @@ static bool sna_blt_copy_init(struct sna *sna,
blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
blt->pitch[0] = src->pitch;
- if (kgem->gen >= 40 && src->tiling) {
+ if (kgem->gen >= 040 && src->tiling) {
blt->cmd |= BLT_SRC_TILED;
blt->pitch[0] >>= 2;
}
assert(blt->pitch[0] <= MAXSHORT);
blt->pitch[1] = dst->pitch;
- if (kgem->gen >= 40 && dst->tiling) {
+ if (kgem->gen >= 040 && dst->tiling) {
blt->cmd |= BLT_DST_TILED;
blt->pitch[1] >>= 2;
}
@@ -289,9 +290,9 @@ static bool sna_blt_copy_init(struct sna *sna,
case 8: break;
}
- kgem_set_mode(kgem, KGEM_BLT);
+ kgem_set_mode(kgem, KGEM_BLT, dst);
if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
- _kgem_submit(kgem);
+ kgem_submit(kgem);
if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL))
return false;
_kgem_set_mode(kgem, KGEM_BLT);
@@ -317,14 +318,14 @@ static bool sna_blt_alpha_fixup_init(struct sna *sna,
blt->cmd = XY_FULL_MONO_PATTERN_BLT;
blt->pitch[0] = src->pitch;
- if (kgem->gen >= 40 && src->tiling) {
+ if (kgem->gen >= 040 && src->tiling) {
blt->cmd |= BLT_SRC_TILED;
blt->pitch[0] >>= 2;
}
assert(blt->pitch[0] <= MAXSHORT);
blt->pitch[1] = dst->pitch;
- if (kgem->gen >= 40 && dst->tiling) {
+ if (kgem->gen >= 040 && dst->tiling) {
blt->cmd |= BLT_DST_TILED;
blt->pitch[1] >>= 2;
}
@@ -341,9 +342,9 @@ static bool sna_blt_alpha_fixup_init(struct sna *sna,
}
blt->pixel = alpha;
- kgem_set_mode(kgem, KGEM_BLT);
+ kgem_set_mode(kgem, KGEM_BLT, dst);
if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
- _kgem_submit(kgem);
+ kgem_submit(kgem);
if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL))
return false;
_kgem_set_mode(kgem, KGEM_BLT);
@@ -433,7 +434,7 @@ static void sna_blt_copy_one(struct sna *sna,
kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (blt->cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB))) &&
kgem->batch[kgem->nbatch-4] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) &&
kgem->batch[kgem->nbatch-3] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width)) &&
- kgem->reloc[kgem->nreloc-1].target_handle == blt->bo[1]->handle) {
+ kgem->reloc[kgem->nreloc-1].target_handle == blt->bo[1]->target_handle) {
DBG(("%s: replacing last fill\n", __FUNCTION__));
if (kgem_check_batch(kgem, 8-6)) {
b = kgem->batch + kgem->nbatch - 6;
@@ -933,6 +934,76 @@ static void blt_composite_fill_boxes_no_offset(struct sna *sna,
_sna_blt_fill_boxes(sna, &op->u.blt, box, n);
}
+static void blt_composite_fill_boxes_no_offset__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ struct kgem *kgem = &sna->kgem;
+ const struct sna_blt_state *blt = &op->u.blt;
+ uint32_t cmd = blt->cmd;
+
+ DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
+
+ sna_vertex_lock(&sna->render);
+ if (!kgem_check_batch(kgem, 3)) {
+ sna_vertex_wait__locked(&sna->render);
+ sna_blt_fill_begin(sna, blt);
+ }
+
+ do {
+ uint32_t *b = kgem->batch + kgem->nbatch;
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ kgem->nbatch += 3 * nbox_this_time;
+ assert(kgem->nbatch < kgem->surface);
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ while (nbox_this_time >= 8) {
+ b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
+ b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
+ b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
+ b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
+ b[12] = cmd; *(uint64_t *)(b+13) = *(const uint64_t *)box++;
+ b[15] = cmd; *(uint64_t *)(b+16) = *(const uint64_t *)box++;
+ b[18] = cmd; *(uint64_t *)(b+19) = *(const uint64_t *)box++;
+ b[21] = cmd; *(uint64_t *)(b+22) = *(const uint64_t *)box++;
+ b += 24;
+ nbox_this_time -= 8;
+ }
+ if (nbox_this_time & 4) {
+ b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
+ b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
+ b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
+ b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
+ b += 12;
+ }
+ if (nbox_this_time & 2) {
+ b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
+ b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
+ b += 6;
+ }
+ if (nbox_this_time & 1) {
+ b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
+ }
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ if (!nbox)
+ break;
+
+ sna_vertex_wait__locked(&sna->render);
+ sna_blt_fill_begin(sna, blt);
+ } while (1);
+ sna_vertex_unlock(&sna->render);
+}
+
fastcall static void blt_composite_fill_box(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box)
@@ -956,6 +1027,92 @@ static void blt_composite_fill_boxes(struct sna *sna,
} while (--n);
}
+static inline uint64_t add4(const BoxRec *b, int16_t x, int16_t y)
+{
+ union {
+ uint64_t v;
+ int16_t i[4];
+ } vi;
+ vi.v = *(uint64_t *)b;
+ vi.i[0] += x;
+ vi.i[1] += y;
+ vi.i[2] += x;
+ vi.i[3] += y;
+ return vi.v;
+}
+
+static void blt_composite_fill_boxes__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ struct kgem *kgem = &sna->kgem;
+ const struct sna_blt_state *blt = &op->u.blt;
+ uint32_t cmd = blt->cmd;
+ int16_t dx = op->dst.x;
+ int16_t dy = op->dst.y;
+
+ DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
+
+ sna_vertex_lock(&sna->render);
+ if (!kgem_check_batch(kgem, 3)) {
+ sna_vertex_wait__locked(&sna->render);
+ sna_blt_fill_begin(sna, blt);
+ }
+
+ do {
+ uint32_t *b = kgem->batch + kgem->nbatch;
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ kgem->nbatch += 3 * nbox_this_time;
+ assert(kgem->nbatch < kgem->surface);
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ while (nbox_this_time >= 8) {
+ b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
+ b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
+ b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy);
+ b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy);
+ b[12] = cmd; *(uint64_t *)(b+13) = add4(box++, dx, dy);
+ b[15] = cmd; *(uint64_t *)(b+16) = add4(box++, dx, dy);
+ b[18] = cmd; *(uint64_t *)(b+19) = add4(box++, dx, dy);
+ b[21] = cmd; *(uint64_t *)(b+22) = add4(box++, dx, dy);
+ b += 24;
+ nbox_this_time -= 8;
+ }
+ if (nbox_this_time & 4) {
+ b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
+ b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
+ b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy);
+ b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy);
+ b += 12;
+ }
+ if (nbox_this_time & 2) {
+ b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
+ b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
+ b += 6;
+ }
+ if (nbox_this_time & 1) {
+ b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
+ }
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ if (!nbox)
+ break;
+
+ sna_vertex_wait__locked(&sna->render);
+ sna_blt_fill_begin(sna, blt);
+ } while (1);
+ sna_vertex_unlock(&sna->render);
+}
+
fastcall
static void blt_composite_nop(struct sna *sna,
const struct sna_composite_op *op,
@@ -980,8 +1137,10 @@ begin_blt(struct sna *sna,
struct sna_composite_op *op)
{
if (!kgem_check_bo_fenced(&sna->kgem, op->dst.bo)) {
- _kgem_submit(&sna->kgem);
- assert(kgem_check_bo_fenced(&sna->kgem, op->dst.bo));
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo_fenced(&sna->kgem, op->dst.bo))
+ return false;
+
_kgem_set_mode(&sna->kgem, KGEM_BLT);
}
@@ -1011,6 +1170,7 @@ prepare_blt_clear(struct sna *sna,
op->blt = blt_composite_fill__cpu;
op->box = blt_composite_fill_box__cpu;
op->boxes = blt_composite_fill_boxes__cpu;
+ op->thread_boxes = blt_composite_fill_boxes__cpu;
op->done = nop_done;
op->u.blt.pixel = 0;
return true;
@@ -1020,9 +1180,11 @@ prepare_blt_clear(struct sna *sna,
if (op->dst.x|op->dst.y) {
op->box = blt_composite_fill_box;
op->boxes = blt_composite_fill_boxes;
+ op->thread_boxes = blt_composite_fill_boxes__thread;
} else {
op->box = blt_composite_fill_box_no_offset;
op->boxes = blt_composite_fill_boxes_no_offset;
+ op->thread_boxes = blt_composite_fill_boxes_no_offset__thread;
}
op->done = nop_done;
@@ -1047,6 +1209,7 @@ prepare_blt_fill(struct sna *sna,
op->blt = blt_composite_fill__cpu;
op->box = blt_composite_fill_box__cpu;
op->boxes = blt_composite_fill_boxes__cpu;
+ op->thread_boxes = blt_composite_fill_boxes__cpu;
op->done = nop_done;
return true;
}
@@ -1055,9 +1218,11 @@ prepare_blt_fill(struct sna *sna,
if (op->dst.x|op->dst.y) {
op->box = blt_composite_fill_box;
op->boxes = blt_composite_fill_boxes;
+ op->thread_boxes = blt_composite_fill_boxes__thread;
} else {
op->box = blt_composite_fill_box_no_offset;
op->boxes = blt_composite_fill_boxes_no_offset;
+ op->thread_boxes = blt_composite_fill_boxes_no_offset__thread;
}
op->done = nop_done;
@@ -1148,6 +1313,141 @@ static void blt_composite_copy_boxes(struct sna *sna,
} while(--nbox);
}
+static inline uint32_t add2(uint32_t v, int16_t x, int16_t y)
+{
+ x += v & 0xffff;
+ y += v >> 16;
+ return (uint16_t)y << 16 | x;
+}
+
+static void blt_composite_copy_boxes__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ struct kgem *kgem = &sna->kgem;
+ int dst_dx = op->dst.x;
+ int dst_dy = op->dst.y;
+ int src_dx = op->src.offset[0];
+ int src_dy = op->src.offset[1];
+ uint32_t cmd = op->u.blt.cmd;
+ uint32_t br13 = op->u.blt.br13;
+ struct kgem_bo *src_bo = op->u.blt.bo[0];
+ struct kgem_bo *dst_bo = op->u.blt.bo[1];
+ int src_pitch = op->u.blt.pitch[0];
+
+ DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
+
+ sna_vertex_lock(&sna->render);
+
+ if ((dst_dx | dst_dy) == 0) {
+ uint64_t hdr = (uint64_t)br13 << 32 | cmd;
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ do {
+ uint32_t *b = kgem->batch + kgem->nbatch;
+
+ DBG((" %s: box=(%d, %d)x(%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1,
+ box->x2 - box->x1, box->y2 - box->y1));
+
+ assert(box->x1 + src_dx >= 0);
+ assert(box->y1 + src_dy >= 0);
+ assert(box->x1 + src_dx <= INT16_MAX);
+ assert(box->y1 + src_dy <= INT16_MAX);
+
+ assert(box->x1 >= 0);
+ assert(box->y1 >= 0);
+
+ *(uint64_t *)&b[0] = hdr;
+ *(uint64_t *)&b[2] = *(const uint64_t *)box;
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = add2(b[2], src_dx, src_dy);
+ b[6] = src_pitch;
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 8;
+ assert(kgem->nbatch < kgem->surface);
+ box++;
+ } while (--nbox_this_time);
+
+ if (!nbox)
+ break;
+
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ } while (1);
+ } else {
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ do {
+ uint32_t *b = kgem->batch + kgem->nbatch;
+
+ DBG((" %s: box=(%d, %d)x(%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1,
+ box->x2 - box->x1, box->y2 - box->y1));
+
+ assert(box->x1 + src_dx >= 0);
+ assert(box->y1 + src_dy >= 0);
+
+ assert(box->x1 + dst_dx >= 0);
+ assert(box->y1 + dst_dy >= 0);
+
+ b[0] = cmd;
+ b[1] = br13;
+ b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
+ b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
+ b[6] = src_pitch;
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 8;
+ assert(kgem->nbatch < kgem->surface);
+ box++;
+ } while (--nbox_this_time);
+
+ if (!nbox)
+ break;
+
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ } while (1);
+ }
+ sna_vertex_unlock(&sna->render);
+}
+
fastcall static void
blt_composite_copy_with_alpha(struct sna *sna,
const struct sna_composite_op *op,
@@ -1245,7 +1545,7 @@ prepare_blt_copy(struct sna *sna,
}
if (!kgem_check_many_bo_fenced(&sna->kgem, op->dst.bo, bo, NULL)) {
- _kgem_submit(&sna->kgem);
+ kgem_submit(&sna->kgem);
if (!kgem_check_many_bo_fenced(&sna->kgem,
op->dst.bo, bo, NULL)) {
DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__));
@@ -1256,7 +1556,7 @@ prepare_blt_copy(struct sna *sna,
DBG(("%s\n", __FUNCTION__));
- if (sna->kgem.gen >= 60)
+ if (sna->kgem.gen >= 060 && op->dst.bo == bo)
op->done = gen6_blt_copy_done;
else
op->done = nop_done;
@@ -1274,6 +1574,7 @@ prepare_blt_copy(struct sna *sna,
op->blt = blt_composite_copy;
op->box = blt_composite_copy_box;
op->boxes = blt_composite_copy_boxes;
+ op->thread_boxes = blt_composite_copy_boxes__thread;
if (!sna_blt_copy_init(sna, &op->u.blt, bo, op->dst.bo,
src->drawable.bitsPerPixel,
@@ -1715,8 +2016,14 @@ sna_blt_composite(struct sna *sna,
was_clear = sna_drawable_is_clear(dst->pDrawable);
tmp->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
- dst_box.x1 = dst_x; dst_box.x2 = dst_x + width;
- dst_box.y1 = dst_y; dst_box.y2 = dst_y + height;
+ if (width | height) {
+ dst_box.x1 = dst_x;
+ dst_box.x2 = bound(dst_x, width);
+ dst_box.y1 = dst_y;
+ dst_box.y2 = bound(dst_y, height);
+ } else
+ sna_render_picture_extents(dst, &dst_box);
+
bo = sna_drawable_use_bo(dst->pDrawable, PREFER_GPU, &dst_box, &tmp->damage);
if (bo && !kgem_bo_can_blt(&sna->kgem, bo)) {
DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n",
@@ -1834,10 +2141,21 @@ clear:
if (x < 0 || y < 0 ||
x + width > src->pDrawable->width ||
y + height > src->pDrawable->height) {
- DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d\n",
+ DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d, repeat=%d\n",
__FUNCTION__,
- x, y, x+width, y+width, src->pDrawable->width, src->pDrawable->height));
- return false;
+ x, y, x+width, y+width, src->pDrawable->width, src->pDrawable->height, src->repeatType));
+ if (src->repeat && src->repeatType == RepeatNormal) {
+ x = x % src->pDrawable->width;
+ y = y % src->pDrawable->height;
+ if (x < 0)
+ x += src->pDrawable->width;
+ if (y < 0)
+ y += src->pDrawable->height;
+ if (x + width > src->pDrawable->width ||
+ y + height > src->pDrawable->height)
+ return false;
+ } else
+ return false;
}
src_pixmap = get_drawable_pixmap(src->pDrawable);
@@ -1899,6 +2217,7 @@ put:
if (tmp->dst.bo == priv->cpu_bo) {
DBG(("%s: forcing the stall to overwrite a busy CPU bo\n", __FUNCTION__));
tmp->dst.bo = NULL;
+ tmp->damage = NULL;
}
}
}
@@ -1924,7 +2243,7 @@ static void convert_done(struct sna *sna, const struct sna_composite_op *op)
{
struct kgem *kgem = &sna->kgem;
- if (kgem->gen >= 60 && kgem_check_batch(kgem, 3)) {
+ if (kgem->gen >= 060 && op->src.bo == op->dst.bo && kgem_check_batch(kgem, 3)) {
uint32_t *b = kgem->batch + kgem->nbatch;
b[0] = XY_SETUP_CLIP;
b[1] = b[2] = 0;
@@ -1943,6 +2262,7 @@ sna_blt_composite__convert(struct sna *sna,
struct sna_composite_op *tmp)
{
uint32_t alpha_fixup;
+ int sx, sy;
uint8_t op;
#if DEBUG_NO_BLT || NO_BLT_COMPOSITE
@@ -1993,19 +2313,36 @@ sna_blt_composite__convert(struct sna *sna,
return false;
}
- x += tmp->src.offset[0];
- y += tmp->src.offset[1];
+ sx = tmp->src.offset[0];
+ sy = tmp->src.offset[1];
+
+ x += sx;
+ y += sy;
if (x < 0 || y < 0 ||
x + width > tmp->src.width ||
y + height > tmp->src.height) {
DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d\n",
__FUNCTION__,
x, y, x+width, y+width, tmp->src.width, tmp->src.height));
- return false;
+ if (tmp->src.repeat == RepeatNormal) {
+ int xx = x % tmp->src.width;
+ int yy = y % tmp->src.height;
+ if (xx < 0)
+ xx += tmp->src.width;
+ if (yy < 0)
+ yy += tmp->src.height;
+ if (xx + width > tmp->src.width ||
+ yy + height > tmp->src.height)
+ return false;
+
+ sx += xx - x;
+ sy += yy - y;
+ } else
+ return false;
}
if (!kgem_check_many_bo_fenced(&sna->kgem, tmp->dst.bo, tmp->src.bo, NULL)) {
- _kgem_submit(&sna->kgem);
+ kgem_submit(&sna->kgem);
if (!kgem_check_many_bo_fenced(&sna->kgem,
tmp->dst.bo, tmp->src.bo, NULL)) {
DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__));
@@ -2014,13 +2351,14 @@ sna_blt_composite__convert(struct sna *sna,
_kgem_set_mode(&sna->kgem, KGEM_BLT);
}
- tmp->u.blt.src_pixmap = NULL;
- tmp->u.blt.sx = tmp->src.offset[0];
- tmp->u.blt.sy = tmp->src.offset[1];
DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n",
__FUNCTION__,
tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy, alpha_fixup));
+ tmp->u.blt.src_pixmap = NULL;
+ tmp->u.blt.sx = sx;
+ tmp->u.blt.sy = sy;
+
if (alpha_fixup) {
tmp->blt = blt_composite_copy_with_alpha;
tmp->box = blt_composite_copy_box_with_alpha;
@@ -2035,6 +2373,7 @@ sna_blt_composite__convert(struct sna *sna,
tmp->blt = blt_composite_copy;
tmp->box = blt_composite_copy_box;
tmp->boxes = blt_composite_copy_boxes;
+ tmp->thread_boxes = blt_composite_copy_boxes__thread;
if (!sna_blt_copy_init(sna, &tmp->u.blt,
tmp->src.bo, tmp->dst.bo,
@@ -2148,7 +2487,7 @@ bool sna_blt_copy(struct sna *sna, uint8_t alu,
return false;
op->blt = sna_blt_copy_op_blt;
- if (sna->kgem.gen >= 60)
+ if (sna->kgem.gen >= 060 && src == dst)
op->done = gen6_blt_copy_op_done;
else
op->done = sna_blt_copy_op_done;
@@ -2174,7 +2513,7 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
cmd = XY_COLOR_BLT;
br13 = bo->pitch;
- if (kgem->gen >= 40 && bo->tiling) {
+ if (kgem->gen >= 040 && bo->tiling) {
cmd |= BLT_DST_TILED;
br13 >>= 2;
}
@@ -2194,7 +2533,7 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
if (overwrites && kgem->nbatch >= 6 &&
kgem->batch[kgem->nbatch-6] == cmd &&
*(uint64_t *)&kgem->batch[kgem->nbatch-4] == *(const uint64_t *)box &&
- kgem->reloc[kgem->nreloc-1].target_handle == bo->handle) {
+ kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) {
DBG(("%s: replacing last fill\n", __FUNCTION__));
kgem->batch[kgem->nbatch-5] = br13;
kgem->batch[kgem->nbatch-1] = color;
@@ -2203,7 +2542,7 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
if (overwrites && kgem->nbatch >= 8 &&
(kgem->batch[kgem->nbatch-8] & 0xffc0000f) == XY_SRC_COPY_BLT_CMD &&
*(uint64_t *)&kgem->batch[kgem->nbatch-6] == *(const uint64_t *)box &&
- kgem->reloc[kgem->nreloc-2].target_handle == bo->handle) {
+ kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) {
DBG(("%s: replacing last copy\n", __FUNCTION__));
kgem->batch[kgem->nbatch-8] = cmd;
kgem->batch[kgem->nbatch-7] = br13;
@@ -2226,11 +2565,11 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
return false;
}
- kgem_set_mode(kgem, KGEM_BLT);
+ kgem_set_mode(kgem, KGEM_BLT, bo);
if (!kgem_check_batch(kgem, 6) ||
!kgem_check_reloc(kgem, 1) ||
!kgem_check_bo_fenced(kgem, bo)) {
- _kgem_submit(kgem);
+ kgem_submit(kgem);
assert(kgem_check_bo_fenced(&sna->kgem, bo));
_kgem_set_mode(kgem, KGEM_BLT);
}
@@ -2288,7 +2627,7 @@ bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
br13 = bo->pitch;
cmd = XY_SCANLINE_BLT;
- if (kgem->gen >= 40 && bo->tiling) {
+ if (kgem->gen >= 040 && bo->tiling) {
cmd |= 1 << 11;
br13 >>= 2;
}
@@ -2302,11 +2641,12 @@ bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
case 8: break;
}
- kgem_set_mode(kgem, KGEM_BLT);
+ kgem_set_mode(kgem, KGEM_BLT, bo);
if (!kgem_check_batch(kgem, 12) ||
!kgem_check_bo_fenced(kgem, bo)) {
- _kgem_submit(kgem);
- assert(kgem_check_bo_fenced(&sna->kgem, bo));
+ kgem_submit(kgem);
+ if (!kgem_check_bo_fenced(&sna->kgem, bo))
+ return false;
_kgem_set_mode(kgem, KGEM_BLT);
}
@@ -2405,13 +2745,6 @@ bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
return true;
}
-static inline uint32_t add2(uint32_t v, int16_t x, int16_t y)
-{
- x += v & 0xffff;
- y += v >> 16;
- return (uint16_t)y << 16 | x;
-}
-
bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
@@ -2442,14 +2775,14 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
src_pitch = src_bo->pitch;
- if (kgem->gen >= 40 && src_bo->tiling) {
+ if (kgem->gen >= 040 && src_bo->tiling) {
cmd |= BLT_SRC_TILED;
src_pitch >>= 2;
}
assert(src_pitch <= MAXSHORT);
br13 = dst_bo->pitch;
- if (kgem->gen >= 40 && dst_bo->tiling) {
+ if (kgem->gen >= 040 && dst_bo->tiling) {
cmd |= BLT_DST_TILED;
br13 >>= 2;
}
@@ -2466,7 +2799,7 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
/* Compare first box against a previous fill */
if (kgem->nbatch >= 6 &&
(alu == GXcopy || alu == GXclear || alu == GXset) &&
- kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->handle &&
+ kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle &&
kgem->batch[kgem->nbatch-6] == ((cmd & ~XY_SRC_COPY_BLT_CMD) | XY_COLOR_BLT) &&
kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
@@ -2475,11 +2808,11 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
kgem->nreloc--;
}
- kgem_set_mode(kgem, KGEM_BLT);
+ kgem_set_mode(kgem, KGEM_BLT, dst_bo);
if (!kgem_check_batch(kgem, 8) ||
!kgem_check_reloc(kgem, 2) ||
!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
- _kgem_submit(kgem);
+ kgem_submit(kgem);
if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL))
return sna_tiling_blt_copy_boxes(sna, alu,
src_bo, src_dx, src_dy,
@@ -2595,7 +2928,7 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
} while (1);
}
- if (kgem->gen >= 60 && kgem_check_batch(kgem, 3)) {
+ if (kgem->gen >= 060 && kgem_check_batch(kgem, 3)) {
uint32_t *b = kgem->batch + kgem->nbatch;
b[0] = XY_SETUP_CLIP;
b[1] = b[2] = 0;
diff --git a/src/sna/sna_composite.c b/src/sna/sna_composite.c
index 60d39cdea..a4b85fe58 100644
--- a/src/sna/sna_composite.c
+++ b/src/sna/sna_composite.c
@@ -399,8 +399,8 @@ static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char
static void apply_damage(struct sna_composite_op *op, RegionPtr region)
{
- DBG(("%s: damage=%p, region=%d [(%d, %d), (%d, %d) + (%d, %d)]\n",
- __FUNCTION__, op->damage, REGION_NUM_RECTS(region),
+ DBG(("%s: damage=%p, region=%ld [(%d, %d), (%d, %d) + (%d, %d)]\n",
+ __FUNCTION__, op->damage, RegionNumRects(region),
region->extents.x1, region->extents.y1,
region->extents.x2, region->extents.y2,
op->dst.x, op->dst.y));
@@ -438,6 +438,92 @@ static inline bool use_cpu(PixmapPtr pixmap, struct sna_pixmap *priv,
return (priv->create & KGEM_CAN_CREATE_GPU) == 0;
}
+static void validate_source(PicturePtr picture)
+{
+#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,10,99,901,0)
+ miCompositeSourceValidate(picture);
+#else
+ miCompositeSourceValidate(picture,
+ 0, 0,
+ picture->pDrawable ? picture->pDrawable->width : 0,
+ picture->pDrawable ? picture->pDrawable->height : 0);
+#endif
+}
+
+void
+sna_composite_fb(CARD8 op,
+ PicturePtr src,
+ PicturePtr mask,
+ PicturePtr dst,
+ RegionPtr region,
+ INT16 src_x, INT16 src_y,
+ INT16 mask_x, INT16 mask_y,
+ INT16 dst_x, INT16 dst_y,
+ CARD16 width, CARD16 height)
+{
+ pixman_image_t *src_image, *mask_image, *dest_image;
+ int src_xoff, src_yoff;
+ int msk_xoff, msk_yoff;
+ int dst_xoff, dst_yoff;
+ unsigned flags;
+
+ DBG(("%s: fallback -- move dst to cpu\n", __FUNCTION__));
+ if (op <= PictOpSrc && !dst->alphaMap)
+ flags = MOVE_WRITE | MOVE_INPLACE_HINT;
+ else
+ flags = MOVE_WRITE | MOVE_READ;
+ if (!sna_drawable_move_region_to_cpu(dst->pDrawable, region, flags))
+ return;
+ if (dst->alphaMap &&
+ !sna_drawable_move_to_cpu(dst->alphaMap->pDrawable, flags))
+ return;
+
+ if (src->pDrawable) {
+ DBG(("%s: fallback -- move src to cpu\n", __FUNCTION__));
+ if (!sna_drawable_move_to_cpu(src->pDrawable,
+ MOVE_READ))
+ return;
+
+ if (src->alphaMap &&
+ !sna_drawable_move_to_cpu(src->alphaMap->pDrawable,
+ MOVE_READ))
+ return;
+ }
+
+ if (mask && mask->pDrawable) {
+ DBG(("%s: fallback -- move mask to cpu\n", __FUNCTION__));
+ if (!sna_drawable_move_to_cpu(mask->pDrawable,
+ MOVE_READ))
+ return;
+
+ if (mask->alphaMap &&
+ !sna_drawable_move_to_cpu(mask->alphaMap->pDrawable,
+ MOVE_READ))
+ return;
+ }
+
+ DBG(("%s: fallback -- fbComposite\n", __FUNCTION__));
+
+ validate_source(src);
+ if (mask)
+ validate_source(mask);
+
+ src_image = image_from_pict(src, FALSE, &src_xoff, &src_yoff);
+ mask_image = image_from_pict(mask, FALSE, &msk_xoff, &msk_yoff);
+ dest_image = image_from_pict(dst, TRUE, &dst_xoff, &dst_yoff);
+
+ if (src_image && dest_image && !(mask && !mask_image))
+ sna_image_composite(op, src_image, mask_image, dest_image,
+ src_x + src_xoff, src_y + src_yoff,
+ mask_x + msk_xoff, mask_y + msk_yoff,
+ dst_x + dst_xoff, dst_y + dst_yoff,
+ width, height);
+
+ free_pixman_pict(src, src_image);
+ free_pixman_pict(mask, mask_image);
+ free_pixman_pict(dst, dest_image);
+}
+
void
sna_composite(CARD8 op,
PicturePtr src,
@@ -510,7 +596,7 @@ sna_composite(CARD8 op,
}
if (use_cpu(pixmap, priv, op, width, height) &&
- !picture_is_gpu(src) && !picture_is_gpu(mask)) {
+ !picture_is_gpu(sna, src) && !picture_is_gpu(sna, mask)) {
DBG(("%s: fallback, dst pixmap=%ld is too small (or completely damaged)\n",
__FUNCTION__, pixmap->drawable.serialNumber));
goto fallback;
@@ -561,8 +647,8 @@ sna_composite(CARD8 op,
tmp.box(sna, &tmp, &region.extents);
else
tmp.boxes(sna, &tmp,
- REGION_BOXPTR(&region),
- REGION_NUM_RECTS(&region));
+ RegionBoxptr(&region),
+ RegionNumRects(&region));
apply_damage(&tmp, &region);
tmp.done(sna, &tmp);
@@ -610,23 +696,15 @@ fallback:
}
DBG(("%s: fallback -- fbComposite\n", __FUNCTION__));
- fbComposite(op, src, mask, dst,
- src_x, src_y,
- mask_x, mask_y,
- dst_x, dst_y,
- width, height);
+ sna_composite_fb(op, src, mask, dst, &region,
+ src_x, src_y,
+ mask_x, mask_y,
+ dst_x, dst_y,
+ width, height);
out:
REGION_UNINIT(NULL, &region);
}
-static int16_t bound(int16_t a, uint16_t b)
-{
- int v = (int)a + (int)b;
- if (v > MAXSHORT)
- return MAXSHORT;
- return v;
-}
-
static bool
_pixman_region_init_clipped_rectangles(pixman_region16_t *region,
unsigned int num_rects,
@@ -778,11 +856,11 @@ sna_composite_rectangles(CARD8 op,
return;
}
- DBG(("%s: drawable extents (%d, %d),(%d, %d) x %d\n",
+ DBG(("%s: drawable extents (%d, %d),(%d, %d) x %ld\n",
__FUNCTION__,
RegionExtents(&region)->x1, RegionExtents(&region)->y1,
RegionExtents(&region)->x2, RegionExtents(&region)->y2,
- RegionNumRects(&region)));
+ (long)RegionNumRects(&region)));
if (dst->pCompositeClip->data &&
(!pixman_region_intersect(&region, &region, dst->pCompositeClip) ||
@@ -793,11 +871,11 @@ sna_composite_rectangles(CARD8 op,
return;
}
- DBG(("%s: clipped extents (%d, %d),(%d, %d) x %d\n",
+ DBG(("%s: clipped extents (%d, %d),(%d, %d) x %ld\n",
__FUNCTION__,
RegionExtents(&region)->x1, RegionExtents(&region)->y1,
RegionExtents(&region)->x2, RegionExtents(&region)->y2,
- RegionNumRects(&region)));
+ (long)RegionNumRects(&region)));
pixmap = get_drawable_pixmap(dst->pDrawable);
get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y);
@@ -827,6 +905,10 @@ sna_composite_rectangles(CARD8 op,
goto fallback;
}
+ /* XXX xserver-1.8: CompositeRects is not tracked by Damage, so we must
+ * manually append the damaged regions ourselves.
+ */
+ DamageRegionAppend(&pixmap->drawable, &region);
boxes = pixman_region_rectangles(&region, &num_boxes);
/* If we going to be overwriting any CPU damage with a subsequent
@@ -849,10 +931,10 @@ sna_composite_rectangles(CARD8 op,
box_inplace(pixmap, &region.extents)) {
DBG(("%s: promoting to full GPU\n", __FUNCTION__));
if (priv->gpu_bo && priv->cpu_damage == NULL) {
+ assert(priv->gpu_bo->proxy == NULL);
sna_damage_all(&priv->gpu_damage,
pixmap->drawable.width,
pixmap->drawable.height);
- priv->undamaged = false;
}
}
if (priv->cpu_damage == NULL) {
@@ -890,7 +972,6 @@ sna_composite_rectangles(CARD8 op,
pixmap->drawable.height);
sna_damage_destroy(damage == &priv->gpu_damage ?
&priv->cpu_damage : &priv->gpu_damage);
- priv->undamaged = false;
}
if (op <= PictOpSrc && bo == priv->gpu_bo) {
@@ -927,9 +1008,11 @@ fallback:
!sna_drawable_move_to_cpu(dst->alphaMap->pDrawable, error))
goto done;
+ assert(pixmap->devPrivate.ptr);
+
if (op <= PictOpSrc) {
- int nbox = REGION_NUM_RECTS(&region);
- BoxPtr box = REGION_RECTS(&region);
+ int nbox = RegionNumRects(&region);
+ BoxPtr box = RegionRects(&region);
uint32_t pixel;
if (op == PictOpClear)
@@ -984,10 +1067,6 @@ fallback_composite:
}
done:
- /* XXX xserver-1.8: CompositeRects is not tracked by Damage, so we must
- * manually append the damaged regions ourselves.
- */
- DamageRegionAppend(&pixmap->drawable, &region);
DamageRegionProcessPending(&pixmap->drawable);
pixman_region_fini(&region);
diff --git a/src/sna/sna_damage.c b/src/sna/sna_damage.c
index a870cbf5c..ab693af3a 100644
--- a/src/sna/sna_damage.c
+++ b/src/sna/sna_damage.c
@@ -507,6 +507,7 @@ static void __sna_damage_reduce(struct sna_damage *damage)
} else {
pixman_region16_t tmp;
+ assert(n == nboxes);
pixman_region_init_rects(&tmp, boxes, nboxes);
pixman_region_subtract(region, region, &tmp);
pixman_region_fini(&tmp);
@@ -1033,7 +1034,7 @@ static struct sna_damage *__sna_damage_subtract(struct sna_damage *damage,
if (damage == NULL)
return NULL;
- if (!RegionNotEmpty(&damage->region)) {
+ if (RegionNil(&damage->region)) {
no_damage:
__sna_damage_destroy(damage);
return NULL;
@@ -1126,7 +1127,7 @@ inline static struct sna_damage *__sna_damage_subtract_box(struct sna_damage *da
if (damage == NULL)
return NULL;
- if (!RegionNotEmpty(&damage->region)) {
+ if (RegionNil(&damage->region)) {
__sna_damage_destroy(damage);
return NULL;
}
@@ -1198,7 +1199,7 @@ static struct sna_damage *__sna_damage_subtract_boxes(struct sna_damage *damage,
if (damage == NULL)
return NULL;
- if (!RegionNotEmpty(&damage->region)) {
+ if (RegionNil(&damage->region)) {
__sna_damage_destroy(damage);
return NULL;
}
diff --git a/src/sna/sna_damage.h b/src/sna/sna_damage.h
index 5e800b7dc..03a54a3d0 100644
--- a/src/sna/sna_damage.h
+++ b/src/sna/sna_damage.h
@@ -2,7 +2,6 @@
#define SNA_DAMAGE_H
#include <regionstr.h>
-#include <list.h>
#include "compiler.h"
@@ -259,20 +258,22 @@ static inline void sna_damage_reduce_all(struct sna_damage **_damage,
if (damage == NULL || DAMAGE_IS_ALL(damage))
return;
- if (damage->mode == DAMAGE_ADD &&
- damage->extents.x1 <= 0 &&
- damage->extents.y1 <= 0 &&
- damage->extents.x2 >= width &&
- damage->extents.y2 >= height) {
- if (damage->dirty) {
- damage = *_damage = _sna_damage_reduce(damage);
- if (damage == NULL)
- return;
+ if (damage->mode == DAMAGE_ADD) {
+ if (damage->extents.x1 <= 0 &&
+ damage->extents.y1 <= 0 &&
+ damage->extents.x2 >= width &&
+ damage->extents.y2 >= height) {
+ if (damage->dirty) {
+ damage = *_damage = _sna_damage_reduce(damage);
+ if (damage == NULL)
+ return;
+ }
+
+ if (damage->region.data == NULL)
+ *_damage = _sna_damage_all(damage, width, height);
}
-
- if (damage->region.data == NULL)
- *_damage = _sna_damage_all(damage, width, height);
- }
+ } else
+ *_damage = _sna_damage_reduce(damage);
}
void __sna_damage_destroy(struct sna_damage *damage);
diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
index ed3237558..558d70626 100644
--- a/src/sna/sna_display.c
+++ b/src/sna/sna_display.c
@@ -113,7 +113,7 @@ static inline uint32_t fb_id(struct kgem_bo *bo)
return bo->delta;
}
-int sna_crtc_id(xf86CrtcPtr crtc)
+uint32_t sna_crtc_id(xf86CrtcPtr crtc)
{
return to_sna_crtc(crtc)->id;
}
@@ -123,7 +123,7 @@ int sna_crtc_to_pipe(xf86CrtcPtr crtc)
return to_sna_crtc(crtc)->pipe;
}
-int sna_crtc_to_plane(xf86CrtcPtr crtc)
+uint32_t sna_crtc_to_plane(xf86CrtcPtr crtc)
{
return to_sna_crtc(crtc)->plane;
}
@@ -164,6 +164,7 @@ static unsigned get_fb(struct sna *sna, struct kgem_bo *bo,
scrn->depth, scrn->bitsPerPixel, bo->pitch, errno);
return 0;
}
+ assert(arg.fb_id != 0);
bo->scanout = true;
return bo->delta = arg.fb_id;
@@ -197,13 +198,15 @@ sna_output_backlight_set(xf86OutputPtr output, int level)
char path[1024], val[BACKLIGHT_VALUE_LEN];
int fd, len, ret;
- DBG(("%s: level=%d\n", __FUNCTION__, level));
+ DBG(("%s: level=%d, max=%d\n", __FUNCTION__,
+ level, sna_output->backlight_max));
- if (level > sna_output->backlight_max)
- level = sna_output->backlight_max;
- if (!sna_output->backlight_iface || level < 0)
+ if (!sna_output->backlight_iface)
return;
+ if ((unsigned)level > sna_output->backlight_max)
+ level = sna_output->backlight_max;
+
len = snprintf(val, BACKLIGHT_VALUE_LEN, "%d\n", level);
sprintf(path, "%s/%s/brightness",
BACKLIGHT_CLASS, sna_output->backlight_iface);
@@ -886,6 +889,9 @@ void sna_copy_fbcon(struct sna *sna)
DBG(("%s\n", __FUNCTION__));
+ priv = sna_pixmap(sna->front);
+ assert(priv && priv->gpu_bo);
+
/* Scan the connectors for a framebuffer and assume that is the fbcon */
VG_CLEAR(fbcon);
fbcon.fb_id = 0;
@@ -912,6 +918,11 @@ void sna_copy_fbcon(struct sna *sna)
return;
}
+ if (fbcon.fb_id == priv->gpu_bo->delta) {
+ DBG(("%s: fb already installed as scanout\n", __FUNCTION__));
+ return;
+ }
+
/* Wrap the fbcon in a pixmap so that we select the right formats
* in the render copy in case we need to preserve the fbcon
* across a depth change upon starting X.
@@ -933,9 +944,6 @@ void sna_copy_fbcon(struct sna *sna)
DBG(("%s: fbcon handle=%d\n", __FUNCTION__, bo->handle));
- priv = sna_pixmap(sna->front);
- assert(priv && priv->gpu_bo);
-
sx = dx = 0;
if (box.x2 < (uint16_t)fbcon.width)
sx = (fbcon.width - box.x2) / 2;
@@ -957,7 +965,9 @@ void sna_copy_fbcon(struct sna *sna)
kgem_bo_destroy(&sna->kgem, bo);
+#if ABI_VIDEODRV_VERSION >= SET_ABI_VERSION(10, 0)
sna->scrn->pScreen->canDoBGNoneRoot = ok;
+#endif
cleanup_scratch:
FreeScratchPixmapHeader(scratch);
@@ -1223,6 +1233,9 @@ sna_crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode,
struct drm_mode_modeinfo saved_kmode;
bool saved_transform;
+ if (mode->HDisplay == 0 || mode->VDisplay == 0)
+ return FALSE;
+
xf86DrvMsg(crtc->scrn->scrnIndex, X_INFO,
"switch to mode %dx%d on crtc %d (pipe %d)\n",
mode->HDisplay, mode->VDisplay,
@@ -1251,6 +1264,8 @@ retry: /* Attach per-crtc pixmap or direct */
if (bo == NULL)
return FALSE;
+ kgem_bo_submit(&sna->kgem, bo);
+
sna_crtc->bo = bo;
mode_to_kmode(&sna_crtc->kmode, mode);
if (!sna_crtc_apply(crtc)) {
@@ -1307,9 +1322,12 @@ sna_crtc_dpms(xf86CrtcPtr crtc, int mode)
void sna_mode_adjust_frame(struct sna *sna, int x, int y)
{
xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn);
- xf86OutputPtr output = config->output[config->compat_output];
- xf86CrtcPtr crtc = output->crtc;
+ xf86CrtcPtr crtc;
+ if ((unsigned)config->compat_output >= config->num_output)
+ return;
+
+ crtc = config->output[config->compat_output]->crtc;
if (crtc && crtc->enabled) {
int saved_x = crtc->x;
int saved_y = crtc->y;
@@ -1456,9 +1474,10 @@ static const xf86CrtcFuncsRec sna_crtc_funcs = {
#endif
};
-static uint32_t
+static int
sna_crtc_find_plane(struct sna *sna, int pipe)
{
+#ifdef DRM_IOCTL_MODE_GETPLANERESOURCES
struct drm_mode_get_plane_res r;
uint32_t *planes, id = 0;
int i;
@@ -1494,7 +1513,11 @@ sna_crtc_find_plane(struct sna *sna, int pipe)
}
free(planes);
+ assert(id);
return id;
+#else
+ return 0;
+#endif
}
static void
@@ -1691,41 +1714,40 @@ static DisplayModePtr
sna_output_panel_edid(xf86OutputPtr output, DisplayModePtr modes)
{
xf86MonPtr mon = output->MonInfo;
+ DisplayModePtr i, m, preferred = NULL;
+ int max_x = 0, max_y = 0;
+ float max_vrefresh = 0.0;
- if (!mon || !GTF_SUPPORTED(mon->features.msc)) {
- DisplayModePtr i, m, p = NULL;
- int max_x = 0, max_y = 0;
- float max_vrefresh = 0.0;
-
- for (m = modes; m; m = m->next) {
- if (m->type & M_T_PREFERRED)
- p = m;
- max_x = max(max_x, m->HDisplay);
- max_y = max(max_y, m->VDisplay);
- max_vrefresh = max(max_vrefresh, xf86ModeVRefresh(m));
- }
+ if (mon && GTF_SUPPORTED(mon->features.msc))
+ return modes;
- max_vrefresh = max(max_vrefresh, 60.0);
- max_vrefresh *= (1 + SYNC_TOLERANCE);
-
- m = xf86GetDefaultModes();
- xf86ValidateModesSize(output->scrn, m, max_x, max_y, 0);
+ for (m = modes; m; m = m->next) {
+ if (m->type & M_T_PREFERRED)
+ preferred = m;
+ max_x = max(max_x, m->HDisplay);
+ max_y = max(max_y, m->VDisplay);
+ max_vrefresh = max(max_vrefresh, xf86ModeVRefresh(m));
+ }
- for (i = m; i; i = i->next) {
- if (xf86ModeVRefresh(i) > max_vrefresh)
- i->status = MODE_VSYNC;
- if (p && i->HDisplay >= p->HDisplay &&
- i->VDisplay >= p->VDisplay &&
- xf86ModeVRefresh(i) >= xf86ModeVRefresh(p))
- i->status = MODE_VSYNC;
- }
+ max_vrefresh = max(max_vrefresh, 60.0);
+ max_vrefresh *= (1 + SYNC_TOLERANCE);
- xf86PruneInvalidModes(output->scrn, &m, FALSE);
+ m = xf86GetDefaultModes();
+ xf86ValidateModesSize(output->scrn, m, max_x, max_y, 0);
- modes = xf86ModesAdd(modes, m);
+ for (i = m; i; i = i->next) {
+ if (xf86ModeVRefresh(i) > max_vrefresh)
+ i->status = MODE_VSYNC;
+ if (preferred &&
+ i->HDisplay >= preferred->HDisplay &&
+ i->VDisplay >= preferred->VDisplay &&
+ xf86ModeVRefresh(i) >= xf86ModeVRefresh(preferred))
+ i->status = MODE_PANEL;
}
- return modes;
+ xf86PruneInvalidModes(output->scrn, &m, FALSE);
+
+ return xf86ModesAdd(modes, m);
}
static DisplayModePtr
@@ -1759,6 +1781,7 @@ sna_output_get_modes(xf86OutputPtr output)
*/
sna_output->has_panel_limits = false;
if (is_panel(koutput->connector_type)) {
+ sna_output->panel_hdisplay = sna_output->panel_vdisplay = 0;
for (i = 0; i < koutput->count_modes; i++) {
drmModeModeInfo *mode_ptr;
@@ -1768,7 +1791,6 @@ sna_output_get_modes(xf86OutputPtr output)
if (mode_ptr->vdisplay > sna_output->panel_vdisplay)
sna_output->panel_vdisplay = mode_ptr->vdisplay;
}
-
sna_output->has_panel_limits =
sna_output->panel_hdisplay &&
sna_output->panel_vdisplay;
@@ -2459,13 +2481,13 @@ sna_crtc_resize(ScrnInfoPtr scrn, int width, int height)
sna_crtc_disable(crtc);
}
- if (screen->root) {
+ if (root(screen)) {
struct sna_visit_set_pixmap_window visit;
visit.old = old_front;
visit.new = sna->front;
- TraverseTree(screen->root, sna_visit_set_window_pixmap, &visit);
- assert(screen->GetWindowPixmap(screen->root) == sna->front);
+ TraverseTree(root(screen), sna_visit_set_window_pixmap, &visit);
+ assert(screen->GetWindowPixmap(root(screen)) == sna->front);
}
screen->SetScreenPixmap(sna->front);
assert(screen->GetScreenPixmap(screen) == sna->front);
@@ -2522,6 +2544,12 @@ static int do_page_flip(struct sna *sna, struct kgem_bo *bo,
DBG(("%s: flip [fb=%d] on crtc %d [%d] failed - %d\n",
__FUNCTION__, arg.fb_id, i, crtc->id, errno));
disable:
+ if (count == 0)
+ return 0;
+
+ xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR,
+ "%s: page flipping failed, disabling CRTC:%d (pipe=%d)\n",
+ __FUNCTION__, crtc->id, crtc->pipe);
sna_crtc_disable(config->crtc[i]);
continue;
}
@@ -2614,6 +2642,31 @@ bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna)
return true;
}
+static Bool sna_mode_has_pending_events(struct sna *sna)
+{
+ struct pollfd pfd;
+ pfd.fd = sna->kgem.fd;
+ pfd.events = POLLIN;
+ return poll(&pfd, 1, 0) == 1;
+}
+
+void
+sna_mode_close(struct sna *sna)
+{
+ xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(sna->scrn);
+ int i;
+
+ /* In order to workaround a kernel bug in not honouring O_NONBLOCK,
+ * check that the fd is readable before attempting to read the next
+ * event from drm.
+ */
+ if (sna_mode_has_pending_events(sna))
+ sna_mode_wakeup(sna);
+
+ for (i = 0; i < xf86_config->num_crtc; i++)
+ sna_crtc_disable_shadow(sna, to_sna_crtc(xf86_config->crtc[i]));
+}
+
void
sna_mode_fini(struct sna *sna)
{
@@ -2689,6 +2742,11 @@ sna_covering_crtc(ScrnInfoPtr scrn,
__FUNCTION__, c,
crtc->bounds.x1, crtc->bounds.y1,
crtc->bounds.x2, crtc->bounds.y2));
+ if (*(const uint64_t *)box == *(uint64_t *)&crtc->bounds) {
+ DBG(("%s: box exactly matches crtc [%d]\n",
+ __FUNCTION__, c));
+ return crtc;
+ }
if (!sna_box_intersect(&cover_box, &crtc->bounds, box))
continue;
@@ -2717,41 +2775,123 @@ sna_covering_crtc(ScrnInfoPtr scrn,
return best_crtc;
}
-/* Gen6 wait for scan line support */
#define MI_LOAD_REGISTER_IMM (0x22<<23)
-/* gen6: Scan lines register */
-#define GEN6_PIPEA_SLC (0x70004)
-#define GEN6_PIPEB_SLC (0x71004)
-
-static void sna_emit_wait_for_scanline_gen6(struct sna *sna,
+static bool sna_emit_wait_for_scanline_gen7(struct sna *sna,
+ xf86CrtcPtr crtc,
int pipe, int y1, int y2,
bool full_height)
{
- uint32_t event;
uint32_t *b;
+ uint32_t event;
- assert (y2 > 0);
+ if (!sna->kgem.has_secure_batches)
+ return false;
- /* We just wait until the trace passes the roi */
- if (pipe == 0) {
- pipe = GEN6_PIPEA_SLC;
- event = MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW;
- } else {
- pipe = GEN6_PIPEB_SLC;
- event = MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW;
+ assert(y1 >= 0);
+ assert(y2 > y1);
+ assert(sna->kgem.mode);
+
+ /* Always program one less than the desired value */
+ if (--y1 < 0)
+ y1 = crtc->bounds.y2;
+ y2--;
+
+ switch (pipe) {
+ default:
+ assert(0);
+ case 0:
+ event = 1 << (full_height ? 3 : 0);
+ break;
+ case 1:
+ event = 1 << (full_height ? 11 : 8);
+ break;
+ case 2:
+ event = 1 << (full_height ? 21 : 14);
+ break;
+ }
+
+ b = kgem_get_batch(&sna->kgem);
+
+ /* Both the LRI and WAIT_FOR_EVENT must be in the same cacheline */
+ if (((sna->kgem.nbatch + 6) >> 4) != (sna->kgem.nbatch + 10) >> 4) {
+ int dw = sna->kgem.nbatch + 6;
+ dw = ALIGN(dw, 16) - dw;
+ while (dw--)
+ *b++ = MI_NOOP;
}
- kgem_set_mode(&sna->kgem, KGEM_RENDER);
- b = kgem_get_batch(&sna->kgem, 4);
b[0] = MI_LOAD_REGISTER_IMM | 1;
- b[1] = pipe;
- b[2] = y2 - 1;
- b[3] = MI_WAIT_FOR_EVENT | event;
- kgem_advance_batch(&sna->kgem, 4);
+ b[1] = 0x44050; /* DERRMR */
+ b[2] = ~event;
+ b[3] = MI_LOAD_REGISTER_IMM | 1;
+ b[4] = 0xa188; /* FORCEWAKE_MT */
+ b[5] = 2 << 16 | 2;
+ b[6] = MI_LOAD_REGISTER_IMM | 1;
+ b[7] = 0x70068 + 0x1000 * pipe;
+ b[8] = (1 << 31) | (1 << 30) | (y1 << 16) | y2;
+ b[9] = MI_WAIT_FOR_EVENT | event;
+ b[10] = MI_LOAD_REGISTER_IMM | 1;
+ b[11] = 0xa188; /* FORCEWAKE_MT */
+ b[12] = 2 << 16;
+ b[13] = MI_LOAD_REGISTER_IMM | 1;
+ b[14] = 0x44050; /* DERRMR */
+ b[15] = ~0;
+
+ sna->kgem.nbatch = b - sna->kgem.batch + 16;
+
+ sna->kgem.batch_flags |= I915_EXEC_SECURE;
+ return true;
+}
+
+static bool sna_emit_wait_for_scanline_gen6(struct sna *sna,
+ xf86CrtcPtr crtc,
+ int pipe, int y1, int y2,
+ bool full_height)
+{
+ uint32_t *b;
+ uint32_t event;
+
+ if (!sna->kgem.has_secure_batches)
+ return false;
+
+ assert(y1 >= 0);
+ assert(y2 > y1);
+ assert(sna->kgem.mode == KGEM_RENDER);
+
+ /* Always program one less than the desired value */
+ if (--y1 < 0)
+ y1 = crtc->bounds.y2;
+ y2--;
+
+ /* The scanline granularity is 3 bits */
+ y1 &= ~7;
+ y2 &= ~7;
+ if (y2 == y1)
+ return false;
+
+ event = 1 << (3*full_height + pipe*8);
+
+ b = kgem_get_batch(&sna->kgem);
+ sna->kgem.nbatch += 10;
+
+ b[0] = MI_LOAD_REGISTER_IMM | 1;
+ b[1] = 0x44050; /* DERRMR */
+ b[2] = ~event;
+ b[3] = MI_LOAD_REGISTER_IMM | 1;
+ b[4] = 0x4f100; /* magic */
+ b[5] = (1 << 31) | (1 << 30) | pipe << 29 | (y1 << 16) | y2;
+ b[6] = MI_WAIT_FOR_EVENT | event;
+ b[7] = MI_LOAD_REGISTER_IMM | 1;
+ b[8] = 0x44050; /* DERRMR */
+ b[9] = ~0;
+
+ sna->kgem.batch_flags |= I915_EXEC_SECURE;
+ return true;
}
-static void sna_emit_wait_for_scanline_gen4(struct sna *sna,
+static bool sna_emit_wait_for_scanline_gen4(struct sna *sna,
+ xf86CrtcPtr crtc,
int pipe, int y1, int y2,
bool full_height)
{
@@ -2770,17 +2910,20 @@ static void sna_emit_wait_for_scanline_gen4(struct sna *sna,
event = MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW;
}
- kgem_set_mode(&sna->kgem, KGEM_BLT);
- b = kgem_get_batch(&sna->kgem, 5);
+ b = kgem_get_batch(&sna->kgem);
+ sna->kgem.nbatch += 5;
+
/* The documentation says that the LOAD_SCAN_LINES command
* always comes in pairs. Don't ask me why. */
b[2] = b[0] = MI_LOAD_SCAN_LINES_INCL | pipe << 20;
b[3] = b[1] = (y1 << 16) | (y2-1);
b[4] = MI_WAIT_FOR_EVENT | event;
- kgem_advance_batch(&sna->kgem, 5);
+
+ return true;
}
-static void sna_emit_wait_for_scanline_gen2(struct sna *sna,
+static bool sna_emit_wait_for_scanline_gen2(struct sna *sna,
+ xf86CrtcPtr crtc,
int pipe, int y1, int y2,
bool full_height)
{
@@ -2794,17 +2937,16 @@ static void sna_emit_wait_for_scanline_gen2(struct sna *sna,
if (full_height)
y2 -= 2;
- kgem_set_mode(&sna->kgem, KGEM_BLT);
- b = kgem_get_batch(&sna->kgem, 5);
+ b = kgem_get_batch(&sna->kgem);
+ sna->kgem.nbatch += 5;
+
/* The documentation says that the LOAD_SCAN_LINES command
* always comes in pairs. Don't ask me why. */
b[2] = b[0] = MI_LOAD_SCAN_LINES_INCL | pipe << 20;
b[3] = b[1] = (y1 << 16) | (y2-1);
- if (pipe == 0)
- b[4] = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW;
- else
- b[4] = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW;
- kgem_advance_batch(&sna->kgem, 5);
+ b[4] = MI_WAIT_FOR_EVENT | 1 << (1 + 4*pipe);
+
+ return true;
}
bool
@@ -2815,15 +2957,12 @@ sna_wait_for_scanline(struct sna *sna,
{
bool full_height;
int y1, y2, pipe;
+ bool ret;
assert(crtc);
assert(to_sna_crtc(crtc)->bo != NULL);
assert(pixmap == sna->front);
- /* XXX WAIT_EVENT is still causing hangs on SNB */
- if (sna->kgem.gen >= 60)
- return false;
-
/*
* Make sure we don't wait for a scanline that will
* never occur
@@ -2850,14 +2989,20 @@ sna_wait_for_scanline(struct sna *sna,
DBG(("%s: pipe=%d, y1=%d, y2=%d, full_height?=%d\n",
__FUNCTION__, pipe, y1, y2, full_height));
- if (sna->kgem.gen >= 60)
- sna_emit_wait_for_scanline_gen6(sna, pipe, y1, y2, full_height);
- else if (sna->kgem.gen >= 40)
- sna_emit_wait_for_scanline_gen4(sna, pipe, y1, y2, full_height);
+ if (sna->kgem.gen >= 0100)
+ ret = false;
+ else if (sna->kgem.gen == 071)
+ ret =sna_emit_wait_for_scanline_gen6(sna, crtc, pipe, y1, y2, full_height);
+ else if (sna->kgem.gen >= 070)
+ ret = sna_emit_wait_for_scanline_gen7(sna, crtc, pipe, y1, y2, full_height);
+ else if (sna->kgem.gen >= 060)
+ ret =sna_emit_wait_for_scanline_gen6(sna, crtc, pipe, y1, y2, full_height);
+ else if (sna->kgem.gen >= 040)
+ ret = sna_emit_wait_for_scanline_gen4(sna, crtc, pipe, y1, y2, full_height);
else
- sna_emit_wait_for_scanline_gen2(sna, pipe, y1, y2, full_height);
+ ret = sna_emit_wait_for_scanline_gen2(sna, crtc, pipe, y1, y2, full_height);
- return true;
+ return ret;
}
void sna_mode_update(struct sna *sna)
@@ -3028,7 +3173,7 @@ sna_crtc_redisplay__composite(xf86CrtcPtr crtc, RegionPtr region)
0, 0,
0, 0,
0, 0,
- 0, 0,
+ crtc->mode.HDisplay, crtc->mode.VDisplay,
memset(&tmp, 0, sizeof(tmp)))) {
DBG(("%s: unsupported operation!\n", __FUNCTION__));
sna_crtc_redisplay__fallback(crtc, region);
@@ -3118,10 +3263,11 @@ void sna_mode_redisplay(struct sna *sna)
assert(sna->mode.shadow_active);
region = DamageRegion(sna->mode.shadow_damage);
- if (!RegionNotEmpty(region))
+ if (RegionNil(region))
return;
- if (!sna_pixmap_move_to_gpu(sna->front, MOVE_READ)) {
+ if (!can_render(sna) ||
+ !sna_pixmap_move_to_gpu(sna->front, MOVE_READ)) {
if (!sna_pixmap_move_to_cpu(sna->front, MOVE_READ))
return;
@@ -3164,7 +3310,7 @@ void sna_mode_redisplay(struct sna *sna)
RegionIntersect(&damage, &damage, region);
if (RegionNotEmpty(&damage)) {
sna_crtc_redisplay(crtc, &damage);
- __kgem_flush(&sna->kgem, sna_crtc->bo);
+ kgem_bo_flush(&sna->kgem, sna_crtc->bo);
}
RegionUninit(&damage);
}
@@ -3184,6 +3330,7 @@ void sna_mode_redisplay(struct sna *sna)
for (i = 0; i < config->num_crtc; i++) {
struct sna_crtc *crtc = config->crtc[i]->driver_private;
+ struct drm_mode_crtc_page_flip arg;
DBG(("%s: crtc %d [%d, pipe=%d] active? %d\n",
__FUNCTION__, i, crtc->id, crtc->pipe, crtc->bo != NULL));
@@ -3191,41 +3338,36 @@ void sna_mode_redisplay(struct sna *sna)
continue;
assert(config->crtc[i]->enabled);
-
- if (crtc->dpms_mode == DPMSModeOn) {
- struct drm_mode_crtc_page_flip arg;
- arg.crtc_id = crtc->id;
- arg.fb_id = get_fb(sna, new,
- sna->scrn->virtualX,
- sna->scrn->virtualY);
- if (arg.fb_id == 0)
- goto disable;
-
- /* Only the reference crtc will finally deliver its page flip
- * completion event. All other crtc's events will be discarded.
- */
- arg.user_data = 0;
- arg.flags = DRM_MODE_PAGE_FLIP_EVENT;
- arg.reserved = 0;
-
- if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_PAGE_FLIP, &arg)) {
- DBG(("%s: flip [fb=%d] on crtc %d [%d, pipe=%d] failed - %d\n",
- __FUNCTION__, arg.fb_id, i, crtc->id, crtc->pipe, errno));
+ assert(crtc->dpms_mode == DPMSModeOn);
+
+ arg.crtc_id = crtc->id;
+ arg.fb_id = get_fb(sna, new,
+ sna->scrn->virtualX,
+ sna->scrn->virtualY);
+ if (arg.fb_id == 0)
+ goto disable;
+
+ arg.user_data = 0;
+ arg.flags = DRM_MODE_PAGE_FLIP_EVENT;
+ arg.reserved = 0;
+
+ if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_PAGE_FLIP, &arg)) {
+ DBG(("%s: flip [fb=%d] on crtc %d [%d, pipe=%d] failed - %d\n",
+ __FUNCTION__, arg.fb_id, i, crtc->id, crtc->pipe, errno));
disable:
- sna_crtc_disable(config->crtc[i]);
- continue;
- }
- sna->mode.shadow_flip++;
+ xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR,
+ "%s: page flipping failed, disabling CRTC:%d (pipe=%d)\n",
+ __FUNCTION__, crtc->id, crtc->pipe);
+ sna_crtc_disable(config->crtc[i]);
+ continue;
}
+ sna->mode.shadow_flip++;
kgem_bo_destroy(&sna->kgem, old);
crtc->bo = kgem_bo_reference(new);
}
if (sna->mode.shadow) {
- /* XXX only works if the kernel stalls fwrites to the current
- * scanout whilst the flip is pending
- */
while (sna->mode.shadow_flip)
sna_mode_wakeup(sna);
(void)sna->render.copy_boxes(sna, GXcopy,
@@ -3237,8 +3379,9 @@ disable:
kgem_submit(&sna->kgem);
sna_pixmap(sna->front)->gpu_bo = old;
- sna->mode.shadow = new;
+ sna_dri_pixmap_update_bo(sna, sna->front);
+ sna->mode.shadow = new;
new->flush = old->flush;
}
diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index 15ac46a3e..f04f1afeb 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -41,6 +41,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "sna.h"
#include "sna_reg.h"
+#include "intel_options.h"
#include <xf86drm.h>
#include <i915_drm.h>
@@ -51,18 +52,17 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#endif
#if DRI2INFOREC_VERSION < 10
+#undef USE_ASYNC_SWAP
#define USE_ASYNC_SWAP 0
#endif
#define COLOR_PREFER_TILING_Y 0
-#define FLIP_OFF_DELAY 5
enum frame_event_type {
DRI2_SWAP,
DRI2_SWAP_WAIT,
DRI2_SWAP_THROTTLE,
DRI2_XCHG_THROTTLE,
- DRI2_ASYNC_FLIP,
DRI2_FLIP,
DRI2_FLIP_THROTTLE,
DRI2_WAITMSC,
@@ -91,15 +91,17 @@ struct sna_dri_frame_event {
struct dri_bo {
struct kgem_bo *bo;
uint32_t name;
- } old_front, next_front, cache;
+ } scanout[2], cache;
- int off_delay;
+ int mode;
};
struct sna_dri_private {
- int refcnt;
PixmapPtr pixmap;
struct kgem_bo *bo;
+ bool scanout;
+ uint32_t size;
+ int refcnt;
};
static inline struct sna_dri_frame_event *
@@ -144,7 +146,8 @@ static uint32_t color_tiling(struct sna *sna, DrawablePtr draw)
static uint32_t other_tiling(struct sna *sna, DrawablePtr draw)
{
/* XXX Can mix color X / depth Y? */
- return kgem_choose_tiling(&sna->kgem, -I915_TILING_Y,
+ return kgem_choose_tiling(&sna->kgem,
+ sna->kgem.gen >=40 ? -I915_TILING_Y : -I915_TILING_X,
draw->width,
draw->height,
draw->bitsPerPixel);
@@ -173,6 +176,7 @@ static struct kgem_bo *sna_pixmap_set_dri(struct sna *sna,
}
assert(priv->cpu_damage == NULL);
+ assert(priv->gpu_bo->proxy == NULL);
if (priv->flush++)
return priv->gpu_bo;
@@ -198,13 +202,38 @@ static struct kgem_bo *sna_pixmap_set_dri(struct sna *sna,
constant static inline void *sna_pixmap_get_buffer(PixmapPtr pixmap)
{
assert(pixmap->refcnt);
- return ((void **)dixGetPrivateAddr(&pixmap->devPrivates, &sna_pixmap_key))[2];
+ return ((void **)__get_private(pixmap, sna_pixmap_key))[2];
}
static inline void sna_pixmap_set_buffer(PixmapPtr pixmap, void *ptr)
{
assert(pixmap->refcnt);
- ((void **)dixGetPrivateAddr(&pixmap->devPrivates, &sna_pixmap_key))[2] = ptr;
+ ((void **)__get_private(pixmap, sna_pixmap_key))[2] = ptr;
+}
+
+void
+sna_dri_pixmap_update_bo(struct sna *sna, PixmapPtr pixmap)
+{
+ DRI2Buffer2Ptr buffer;
+ struct sna_dri_private *private;
+ struct kgem_bo *bo;
+
+ buffer = sna_pixmap_get_buffer(pixmap);
+ if (buffer == NULL)
+ return;
+
+ private = get_private(buffer);
+ assert(private->pixmap == pixmap);
+
+ bo = sna_pixmap(pixmap)->gpu_bo;
+ if (private->bo == bo)
+ return;
+
+ kgem_bo_destroy(&sna->kgem, private->bo);
+ buffer->name = kgem_bo_flink(&sna->kgem, bo);
+ private->bo = ref(bo);
+
+ /* XXX DRI2InvalidateDrawable(&pixmap->drawable); */
}
static DRI2Buffer2Ptr
@@ -217,12 +246,15 @@ sna_dri_create_buffer(DrawablePtr draw,
struct sna_dri_private *private;
PixmapPtr pixmap;
struct kgem_bo *bo;
+ unsigned flags = CREATE_EXACT;
+ uint32_t size;
int bpp;
DBG(("%s(attachment=%d, format=%d, drawable=%dx%d)\n",
__FUNCTION__, attachment, format, draw->width, draw->height));
pixmap = NULL;
+ size = (uint32_t)draw->height << 16 | draw->width;
switch (attachment) {
case DRI2BufferFrontLeft:
pixmap = get_drawable_pixmap(draw);
@@ -236,7 +268,10 @@ sna_dri_create_buffer(DrawablePtr draw,
assert(private->pixmap == pixmap);
assert(sna_pixmap(pixmap)->gpu_bo == private->bo);
+ assert(sna_pixmap(pixmap)->pinned & PIN_DRI);
assert(kgem_bo_flink(&sna->kgem, private->bo) == buffer->name);
+ assert(8*private->bo->pitch >= pixmap->drawable.width * pixmap->drawable.bitsPerPixel);
+ assert(private->bo->pitch * pixmap->drawable.height <= kgem_bo_size(private->bo));
private->refcnt++;
return buffer;
@@ -252,6 +287,9 @@ sna_dri_create_buffer(DrawablePtr draw,
__FUNCTION__,
pixmap->drawable.width, pixmap->drawable.height,
pixmap, pixmap->refcnt));
+ if (pixmap == sna->front)
+ flags |= CREATE_SCANOUT;
+ size = (uint32_t)pixmap->drawable.height << 16 | pixmap->drawable.width;
break;
case DRI2BufferBackLeft:
@@ -260,12 +298,15 @@ sna_dri_create_buffer(DrawablePtr draw,
case DRI2BufferFakeFrontLeft:
case DRI2BufferFakeFrontRight:
bpp = draw->bitsPerPixel;
+ if (draw->width == sna->front->drawable.width &&
+ draw->height == sna->front->drawable.height)
+ flags |= CREATE_SCANOUT;
bo = kgem_create_2d(&sna->kgem,
draw->width,
draw->height,
draw->bitsPerPixel,
color_tiling(sna, draw),
- CREATE_SCANOUT | CREATE_EXACT);
+ flags);
break;
case DRI2BufferStencil:
@@ -296,7 +337,7 @@ sna_dri_create_buffer(DrawablePtr draw,
bo = kgem_create_2d(&sna->kgem,
ALIGN(draw->width, 64),
ALIGN((draw->height + 1) / 2, 64),
- bpp, I915_TILING_NONE, CREATE_EXACT);
+ bpp, I915_TILING_NONE, flags);
break;
case DRI2BufferDepth:
@@ -307,7 +348,7 @@ sna_dri_create_buffer(DrawablePtr draw,
bo = kgem_create_2d(&sna->kgem,
draw->width, draw->height, bpp,
other_tiling(sna, draw),
- CREATE_EXACT);
+ flags);
break;
default:
@@ -331,6 +372,8 @@ sna_dri_create_buffer(DrawablePtr draw,
private->refcnt = 1;
private->bo = bo;
private->pixmap = pixmap;
+ private->scanout = !!(flags & CREATE_SCANOUT);
+ private->size = size;
if (buffer->name == 0)
goto err;
@@ -415,13 +458,13 @@ damage_all:
pixmap->drawable.width,
pixmap->drawable.height);
sna_damage_destroy(&priv->cpu_damage);
- priv->undamaged = false;
} else {
sna_damage_subtract(&priv->cpu_damage, region);
if (priv->cpu_damage == NULL)
goto damage_all;
sna_damage_add(&priv->gpu_damage, region);
}
+ priv->cpu = false;
}
static void set_bo(PixmapPtr pixmap, struct kgem_bo *bo)
@@ -430,6 +473,10 @@ static void set_bo(PixmapPtr pixmap, struct kgem_bo *bo)
struct sna_pixmap *priv = sna_pixmap(pixmap);
RegionRec region;
+ assert(pixmap->drawable.width * pixmap->drawable.bitsPerPixel <= 8*bo->pitch);
+ assert(pixmap->drawable.height * bo->pitch <= kgem_bo_size(bo));
+ assert(bo->proxy == NULL);
+
/* Post damage on the new front buffer so that listeners, such
* as DisplayLink know take a copy and shove it over the USB,
* also for software cursors and the like.
@@ -446,12 +493,17 @@ static void set_bo(PixmapPtr pixmap, struct kgem_bo *bo)
sna_damage_destroy(&priv->cpu_damage);
list_del(&priv->list);
priv->cpu = false;
- priv->undamaged = false;
assert(bo->refcnt);
if (priv->gpu_bo != bo) {
kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
priv->gpu_bo = ref(bo);
+ if (priv->mapped) {
+ assert(!priv->shm && priv->stride);
+ pixmap->devPrivate.ptr = PTR(priv->ptr);
+ pixmap->devKind = priv->stride;
+ priv->mapped = false;
+ }
}
if (bo->domain != DOMAIN_GPU)
bo->domain = DOMAIN_NONE;
@@ -459,17 +511,20 @@ static void set_bo(PixmapPtr pixmap, struct kgem_bo *bo)
DamageRegionProcessPending(&pixmap->drawable);
}
-static void sna_dri_select_mode(struct sna *sna, struct kgem_bo *src, bool sync)
+static void sna_dri_select_mode(struct sna *sna, struct kgem_bo *dst, struct kgem_bo *src, bool sync)
{
struct drm_i915_gem_busy busy;
int mode;
- if (sna->kgem.gen < 60)
+ if (sna->kgem.gen < 060)
return;
if (sync) {
- DBG(("%s: sync, force RENDER ring\n", __FUNCTION__));
- kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ DBG(("%s: sync, force %s ring\n", __FUNCTION__,
+ sna->kgem.gen >= 070 ? "BLT" : "RENDER"));
+ kgem_set_mode(&sna->kgem,
+ sna->kgem.gen >= 070 ? KGEM_BLT : KGEM_RENDER,
+ dst);
return;
}
@@ -478,21 +533,21 @@ static void sna_dri_select_mode(struct sna *sna, struct kgem_bo *src, bool sync)
return;
}
- if (sna->kgem.has_semaphores) {
- DBG(("%s: have sempahores, prefering RENDER\n", __FUNCTION__));
- kgem_set_mode(&sna->kgem, KGEM_RENDER);
- return;
- }
-
VG_CLEAR(busy);
- busy.handle = src->handle;
+ busy.handle = dst->handle;
if (drmIoctl(sna->kgem.fd, DRM_IOCTL_I915_GEM_BUSY, &busy))
return;
- DBG(("%s: src busy?=%x\n", __FUNCTION__, busy.busy));
+ DBG(("%s: dst busy?=%x\n", __FUNCTION__, busy.busy));
if (busy.busy == 0) {
- DBG(("%s: src is idle, using defaults\n", __FUNCTION__));
- return;
+ busy.handle = src->handle;
+ if (drmIoctl(sna->kgem.fd, DRM_IOCTL_I915_GEM_BUSY, &busy))
+ return;
+ DBG(("%s: src busy?=%x\n", __FUNCTION__, busy.busy));
+ if (busy.busy == 0) {
+ DBG(("%s: src/dst is idle, using defaults\n", __FUNCTION__));
+ return;
+ }
}
/* Sandybridge introduced a separate ring which it uses to
@@ -513,6 +568,7 @@ static void sna_dri_select_mode(struct sna *sna, struct kgem_bo *src, bool sync)
mode = KGEM_RENDER;
if (busy.busy & (1 << 17))
mode = KGEM_BLT;
+ kgem_bo_mark_busy(dst, mode);
_kgem_set_mode(&sna->kgem, mode);
}
@@ -525,6 +581,9 @@ sna_dri_copy_fallback(struct sna *sna, int bpp,
void *dst = kgem_bo_map__gtt(&sna->kgem, dst_bo);
void *src = kgem_bo_map__gtt(&sna->kgem, src_bo);
+ if (dst == NULL || src == NULL)
+ return;
+
DBG(("%s: src(%d, %d), dst(%d, %d) x %d\n",
__FUNCTION__, sx, sy, dx, dy, n));
@@ -573,7 +632,7 @@ sna_dri_copy_to_front(struct sna *sna, DrawablePtr draw, RegionPtr region,
if (sync)
sync = sna_pixmap_is_scanout(sna, pixmap);
- sna_dri_select_mode(sna, src_bo, sync);
+ sna_dri_select_mode(sna, dst_bo, src_bo, sync);
} else
sync = false;
@@ -628,10 +687,15 @@ sna_dri_copy_to_front(struct sna *sna, DrawablePtr draw, RegionPtr region,
dst_bo, 0, 0,
boxes, n);
} else {
+ unsigned flags;
+
+ flags = COPY_LAST;
+ if (flush)
+ flags |= COPY_SYNC;
sna->render.copy_boxes(sna, GXcopy,
(PixmapPtr)draw, src_bo, -draw->x-dx, -draw->y-dy,
pixmap, dst_bo, 0, 0,
- boxes, n, COPY_LAST);
+ boxes, n, flags);
DBG(("%s: flushing? %d\n", __FUNCTION__, flush));
if (flush) { /* STAT! */
@@ -717,7 +781,7 @@ sna_dri_copy_from_front(struct sna *sna, DrawablePtr draw, RegionPtr region,
dst_bo, -draw->x, -draw->y,
boxes, n);
} else {
- sna_dri_select_mode(sna, src_bo, false);
+ sna_dri_select_mode(sna, dst_bo, src_bo, false);
sna->render.copy_boxes(sna, GXcopy,
pixmap, src_bo, dx, dy,
(PixmapPtr)draw, dst_bo, -draw->x, -draw->y,
@@ -766,7 +830,7 @@ sna_dri_copy(struct sna *sna, DrawablePtr draw, RegionPtr region,
dst_bo, 0, 0,
boxes, n);
} else {
- sna_dri_select_mode(sna, src_bo, false);
+ sna_dri_select_mode(sna, dst_bo, src_bo, false);
sna->render.copy_boxes(sna, GXcopy,
(PixmapPtr)draw, src_bo, 0, 0,
(PixmapPtr)draw, dst_bo, 0, 0,
@@ -777,6 +841,42 @@ sna_dri_copy(struct sna *sna, DrawablePtr draw, RegionPtr region,
pixman_region_fini(&clip);
}
+static bool
+can_blit(struct sna * sna,
+ DrawablePtr draw,
+ DRI2BufferPtr front,
+ DRI2BufferPtr back)
+{
+ RegionPtr clip;
+ int w, h;
+ uint32_t s;
+
+ if (draw->type == DRAWABLE_PIXMAP)
+ return true;
+
+ clip = &((WindowPtr)draw)->clipList;
+ w = clip->extents.x2 - draw->x;
+ h = clip->extents.y2 - draw->y;
+ if ((w|h) < 0)
+ return false;
+
+ s = get_private(front)->size;
+ if ((s>>16) < h || (s&0xffff) < w) {
+ DBG(("%s: reject front size (%dx%d) < (%dx%d)\n", __func__,
+ s&0xffff, s>>16, w, h));
+ return false;
+ }
+
+ s = get_private(back)->size;
+ if ((s>>16) < h || (s&0xffff) < w) {
+ DBG(("%s:reject back size (%dx%d) < (%dx%d)\n", __func__,
+ s&0xffff, s>>16, w, h));
+ return false;
+ }
+
+ return true;
+}
+
static void
sna_dri_copy_region(DrawablePtr draw,
RegionPtr region,
@@ -789,6 +889,9 @@ sna_dri_copy_region(DrawablePtr draw,
void (*copy)(struct sna *, DrawablePtr, RegionPtr,
struct kgem_bo *, struct kgem_bo *, bool) = sna_dri_copy;
+ if (!can_blit(sna, draw, dst_buffer, src_buffer))
+ return;
+
if (dst_buffer->attachment == DRI2BufferFrontLeft) {
dst = sna_pixmap_get_bo(pixmap);
copy = (void *)sna_dri_copy_to_front;
@@ -860,7 +963,7 @@ sna_dri_get_pipe(DrawablePtr pDraw)
static struct sna_dri_frame_event *
sna_dri_window_get_chain(WindowPtr win)
{
- return ((void **)dixGetPrivateAddr(&win->devPrivates, &sna_window_key))[1];
+ return ((void **)__get_private(win, sna_window_key))[1];
}
static void
@@ -869,7 +972,7 @@ sna_dri_window_set_chain(WindowPtr win,
{
DBG(("%s: head now %p\n", __FUNCTION__, chain));
assert(win->drawable.type == DRAWABLE_WINDOW);
- ((void **)dixGetPrivateAddr(&win->devPrivates, &sna_window_key))[1] = chain;
+ ((void **)__get_private(win, sna_window_key))[1] = chain;
}
static void
@@ -947,8 +1050,10 @@ sna_dri_frame_event_info_free(struct sna *sna,
_sna_dri_destroy_buffer(sna, info->front);
_sna_dri_destroy_buffer(sna, info->back);
- if (info->old_front.bo)
- kgem_bo_destroy(&sna->kgem, info->old_front.bo);
+ assert(info->scanout[1].bo == NULL);
+
+ if (info->scanout[0].bo)
+ kgem_bo_destroy(&sna->kgem, info->scanout[0].bo);
if (info->cache.bo)
kgem_bo_destroy(&sna->kgem, info->cache.bo);
@@ -959,25 +1064,39 @@ sna_dri_frame_event_info_free(struct sna *sna,
free(info);
}
-static void
+static bool
sna_dri_page_flip(struct sna *sna, struct sna_dri_frame_event *info)
{
struct kgem_bo *bo = get_private(info->back)->bo;
+ struct dri_bo tmp;
DBG(("%s()\n", __FUNCTION__));
assert(sna_pixmap_get_buffer(sna->front) == info->front);
assert(get_drawable_pixmap(info->draw)->drawable.height * bo->pitch <= kgem_bo_size(bo));
+ assert(info->scanout[0].bo);
info->count = sna_page_flip(sna, bo, info, info->pipe);
+ if (!info->count)
+ return false;
+
+ info->scanout[1] = info->scanout[0];
+ info->scanout[0].bo = ref(bo);
+ info->scanout[0].name = info->back->name;
- info->old_front.name = info->front->name;
- info->old_front.bo = get_private(info->front)->bo;
+ tmp.bo = get_private(info->front)->bo;
+ tmp.name = info->front->name;
set_bo(sna->front, bo);
info->front->name = info->back->name;
get_private(info->front)->bo = bo;
+
+ info->back->name = tmp.name;
+ get_private(info->back)->bo = tmp.bo;
+
+ sna->dri.flip_pending = info;
+ return true;
}
static bool
@@ -1031,12 +1150,25 @@ can_flip(struct sna * sna,
if (sna_pixmap_get_buffer(pixmap) != front) {
DBG(("%s: no, DRI2 drawable is no longer attached (old name=%d, new name=%d) to pixmap=%ld\n",
__FUNCTION__, front->name,
- ((DRI2BufferPtr)sna_pixmap_get_buffer(pixmap))->name,
+ sna_pixmap_get_buffer(pixmap) ? ((DRI2BufferPtr)sna_pixmap_get_buffer(pixmap))->name : 0,
pixmap->drawable.serialNumber));
return false;
}
+ if (!get_private(front)->scanout) {
+ DBG(("%s: no, DRI2 drawable not attached at time of creation)\n",
+ __FUNCTION__));
+ return false;
+ }
assert(get_private(front)->pixmap == sna->front);
+ assert(sna_pixmap(sna->front)->gpu_bo == get_private(front)->bo);
+
+ if (!get_private(back)->scanout) {
+ DBG(("%s: no, DRI2 drawable was too small at time of creation)\n",
+ __FUNCTION__));
+ return false;
+ }
+ assert(get_private(back)->size == get_private(front)->size);
DBG(("%s: window size: %dx%d, clip=(%d, %d), (%d, %d) x %d\n",
__FUNCTION__,
@@ -1094,6 +1226,12 @@ can_exchange(struct sna * sna,
WindowPtr win = (WindowPtr)draw;
PixmapPtr pixmap;
+ /* XXX There is an inherent race between the DRI2 client and the DRI2
+ * compositor which is only masked if we force a blit and serialise
+ * the operations through the kernel command queue. Hopeless.
+ */
+ return false;
+
if (front->format != back->format) {
DBG(("%s: no, format mismatch, front = %d, back = %d\n",
__FUNCTION__, front->format, back->format));
@@ -1127,6 +1265,20 @@ can_exchange(struct sna * sna,
return false;
}
+ if (!get_private(front)->scanout) {
+ DBG(("%s: no, DRI2 drawable not attached at time of creation)\n",
+ __FUNCTION__));
+ return false;
+ }
+ assert(get_private(front)->pixmap == sna->front);
+
+ if (!get_private(back)->scanout) {
+ DBG(("%s: no, DRI2 drawable was too small at time of creation)\n",
+ __FUNCTION__));
+ return false;
+ }
+ assert(get_private(back)->size == get_private(front)->size);
+
return true;
}
@@ -1167,10 +1319,12 @@ sna_dri_exchange_buffers(DrawablePtr draw,
pixmap->drawable.width,
pixmap->drawable.height));
- DBG(("%s: back_bo pitch=%d, size=%d\n",
- __FUNCTION__, back_bo->pitch, kgem_bo_size(back_bo)));
- DBG(("%s: front_bo pitch=%d, size=%d\n",
- __FUNCTION__, front_bo->pitch, kgem_bo_size(front_bo)));
+ DBG(("%s: back_bo pitch=%d, size=%d, ref=%d\n",
+ __FUNCTION__, back_bo->pitch, kgem_bo_size(back_bo), back_bo->refcnt));
+ DBG(("%s: front_bo pitch=%d, size=%d, ref=%d\n",
+ __FUNCTION__, front_bo->pitch, kgem_bo_size(front_bo), front_bo->refcnt));
+ assert(front_bo->refcnt);
+ assert(back_bo->refcnt);
assert(sna_pixmap_get_buffer(pixmap) == front);
assert(pixmap->drawable.height * back_bo->pitch <= kgem_bo_size(back_bo));
@@ -1188,7 +1342,7 @@ sna_dri_exchange_buffers(DrawablePtr draw,
static void chain_swap(struct sna *sna,
DrawablePtr draw,
- struct drm_event_vblank *event,
+ int frame, unsigned int tv_sec, unsigned int tv_usec,
struct sna_dri_frame_event *chain)
{
drmVBlank vbl;
@@ -1209,7 +1363,7 @@ static void chain_swap(struct sna *sna,
DBG(("%s: performing chained exchange\n", __FUNCTION__));
sna_dri_exchange_buffers(draw, chain->front, chain->back);
type = DRI2_EXCHANGE_COMPLETE;
- } else {
+ } else if (can_blit(sna, draw, chain->front, chain->back)) {
DBG(("%s: emitting chained vsync'ed blit\n", __FUNCTION__));
chain->bo = sna_dri_copy_to_front(sna, draw, NULL,
@@ -1218,10 +1372,16 @@ static void chain_swap(struct sna *sna,
true);
type = DRI2_BLIT_COMPLETE;
+ } else {
+ DRI2SwapComplete(chain->client, draw,
+ 0, 0, 0, DRI2_BLIT_COMPLETE,
+ chain->client ? chain->event_complete : NULL, chain->event_data);
+ sna_dri_frame_event_info_free(sna, draw, chain);
+ return;
}
DRI2SwapComplete(chain->client, draw,
- event->sequence, event->tv_sec, event->tv_usec,
+ frame, tv_sec, tv_usec,
type, chain->client ? chain->event_complete : NULL, chain->event_data);
VG_CLEAR(vbl);
@@ -1273,19 +1433,17 @@ void sna_dri_vblank_handler(struct sna *sna, struct drm_event_vblank *event)
switch (info->type) {
case DRI2_FLIP:
/* If we can still flip... */
- if (can_flip(sna, draw, info->front, info->back)) {
- sna_dri_page_flip(sna, info);
- info->back->name = info->old_front.name;
- get_private(info->back)->bo = info->old_front.bo;
- info->old_front.bo = NULL;
+ if (can_flip(sna, draw, info->front, info->back) &&
+ sna_dri_page_flip(sna, info))
return;
- }
+
/* else fall through to blit */
case DRI2_SWAP:
- info->bo = sna_dri_copy_to_front(sna, draw, NULL,
- get_private(info->front)->bo,
- get_private(info->back)->bo,
- true);
+ if (can_blit(sna, draw, info->front, info->back))
+ info->bo = sna_dri_copy_to_front(sna, draw, NULL,
+ get_private(info->front)->bo,
+ get_private(info->back)->bo,
+ true);
info->type = DRI2_SWAP_WAIT;
/* fall through to SwapComplete */
case DRI2_SWAP_WAIT:
@@ -1325,7 +1483,9 @@ void sna_dri_vblank_handler(struct sna *sna, struct drm_event_vblank *event)
if (info->chain) {
sna_dri_remove_frame_event((WindowPtr)draw, info);
- chain_swap(sna, draw, event, info->chain);
+ chain_swap(sna, draw,
+ event->sequence, event->tv_sec, event->tv_usec,
+ info->chain);
draw = NULL;
}
@@ -1334,22 +1494,91 @@ done:
}
static void
+sna_dri_flip_get_back(struct sna *sna, struct sna_dri_frame_event *info)
+{
+ struct kgem_bo *bo;
+ uint32_t name;
+
+ DBG(("%s: scanout=(%d, %d), back=%d, cache=%d\n",
+ __FUNCTION__,
+ info->scanout[0].bo ? info->scanout[0].bo->handle : 0,
+ info->scanout[1].bo ? info->scanout[1].bo->handle : 0,
+ get_private(info->back)->bo->handle,
+ info->cache.bo ? info->cache.bo->handle : 0));
+
+ bo = get_private(info->back)->bo;
+ if (!(bo == info->scanout[0].bo || bo == info->scanout[1].bo))
+ return;
+
+ bo = info->cache.bo;
+ name = info->cache.name;
+ if (bo == NULL ||
+ bo == info->scanout[0].bo ||
+ bo == info->scanout[1].bo) {
+ if (bo) {
+ DBG(("%s: discarding old backbuffer\n", __FUNCTION__));
+ kgem_bo_destroy(&sna->kgem, bo);
+ }
+ DBG(("%s: allocating new backbuffer\n", __FUNCTION__));
+ bo = kgem_create_2d(&sna->kgem,
+ info->draw->width,
+ info->draw->height,
+ info->draw->bitsPerPixel,
+ get_private(info->front)->bo->tiling,
+ CREATE_SCANOUT | CREATE_EXACT);
+ name = kgem_bo_flink(&sna->kgem, bo);
+ }
+
+ info->cache.bo = get_private(info->back)->bo;
+ info->cache.name = info->back->name;
+
+ get_private(info->back)->bo = bo;
+ info->back->name = name;
+
+ assert(get_private(info->back)->bo != info->scanout[0].bo);
+ assert(get_private(info->back)->bo != info->scanout[1].bo);
+}
+
+static bool
sna_dri_flip_continue(struct sna *sna, struct sna_dri_frame_event *info)
{
- struct dri_bo tmp;
+ DBG(("%s(mode=%d)\n", __FUNCTION__, info->mode));
- DBG(("%s()\n", __FUNCTION__));
+ if (info->mode > 1){
+ if (get_private(info->front)->bo != sna_pixmap(sna->front)->gpu_bo)
+ return false;
- assert(sna_pixmap_get_buffer(get_drawable_pixmap(info->draw)) == info->front);
+ info->count = sna_page_flip(sna,
+ get_private(info->front)->bo,
+ info, info->pipe);
+ if (!info->count)
+ return false;
- tmp = info->old_front;
+ info->scanout[1] = info->scanout[0];
+ info->scanout[0].bo = ref(get_private(info->front)->bo);
+ info->scanout[0].name = info->front->name;
+ sna->dri.flip_pending = info;
+ } else {
+ if (!info->draw)
+ return false;
- sna_dri_page_flip(sna, info);
+ assert(sna_pixmap_get_buffer(get_drawable_pixmap(info->draw)) == info->front);
+ if (!can_flip(sna, info->draw, info->front, info->back))
+ return false;
- get_private(info->back)->bo = tmp.bo;
- info->back->name = tmp.name;
+ if (!sna_dri_page_flip(sna, info))
+ return false;
+
+ sna_dri_flip_get_back(sna, info);
+ DRI2SwapComplete(info->client, info->draw,
+ 0, 0, 0,
+ DRI2_FLIP_COMPLETE,
+ info->client ? info->event_complete : NULL,
+ info->event_data);
+ }
- info->next_front.name = 0;
+ info->mode = 0;
+ return true;
}
static void chain_flip(struct sna *sna)
@@ -1367,29 +1596,17 @@ static void chain_flip(struct sna *sna)
}
if (chain->type == DRI2_FLIP &&
- can_flip(sna, chain->draw, chain->front, chain->back)) {
+ can_flip(sna, chain->draw, chain->front, chain->back) &&
+ sna_dri_page_flip(sna, chain)) {
DBG(("%s: performing chained flip\n", __FUNCTION__));
- sna_dri_page_flip(sna, chain);
-
- chain->back->name = chain->old_front.name;
- get_private(chain->back)->bo = chain->old_front.bo;
- chain->old_front.bo = NULL;
-
- if (chain->count == 0) {
- DRI2SwapComplete(chain->client, chain->draw, 0, 0, 0,
- DRI2_EXCHANGE_COMPLETE,
- chain->event_complete,
- chain->event_data);
- sna_dri_frame_event_info_free(sna, chain->draw, chain);
- } else
- sna->dri.flip_pending = chain;
} else {
- DBG(("%s: emitting chained vsync'ed blit\n", __FUNCTION__));
-
- chain->bo = sna_dri_copy_to_front(sna, chain->draw, NULL,
- get_private(chain->front)->bo,
- get_private(chain->back)->bo,
- true);
+ if (can_blit(sna, chain->draw, chain->front, chain->back)) {
+ DBG(("%s: emitting chained vsync'ed blit\n", __FUNCTION__));
+ chain->bo = sna_dri_copy_to_front(sna, chain->draw, NULL,
+ get_private(chain->front)->bo,
+ get_private(chain->back)->bo,
+ true);
+ }
DRI2SwapComplete(chain->client, chain->draw, 0, 0, 0,
DRI2_BLIT_COMPLETE, chain->client ? chain->event_complete : NULL, chain->event_data);
sna_dri_frame_event_info_free(sna, chain->draw, chain);
@@ -1406,6 +1623,14 @@ static void sna_dri_flip_event(struct sna *sna,
flip->fe_tv_usec,
flip->type));
+ if (flip->cache.bo == NULL) {
+ flip->cache = flip->scanout[1];
+ flip->scanout[1].bo = NULL;
+ }
+ if (flip->scanout[1].bo) {
+ kgem_bo_destroy(&sna->kgem, flip->scanout[1].bo);
+ flip->scanout[1].bo = NULL;
+ }
if (sna->dri.flip_pending == flip)
sna->dri.flip_pending = NULL;
@@ -1433,44 +1658,31 @@ static void sna_dri_flip_event(struct sna *sna,
if (sna->dri.flip_pending) {
sna_dri_frame_event_info_free(sna, flip->draw, flip);
chain_flip(sna);
- } else if (!flip->next_front.name) {
- /* Keep the pageflipping running for a couple of frames
- * so we keep the uncached scanouts alive.
- */
- DBG(("%s: flip chain complete, off-delay=%d\n",
- __FUNCTION__, flip->off_delay));
- if (flip->off_delay-- && flip->draw &&
- can_flip(sna, flip->draw, flip->front, flip->front) &&
- (flip->count = sna_page_flip(sna,
- get_private(flip->front)->bo,
- flip, flip->pipe))) {
- assert(flip == sna_dri_window_get_chain((WindowPtr)flip->draw));
- sna->dri.flip_pending = flip;
- } else {
- DBG(("%s: flip chain complete, off\n", __FUNCTION__));
- sna_dri_frame_event_info_free(sna, flip->draw, flip);
+ } else if (!flip->mode) {
+ DBG(("%s: flip chain complete\n", __FUNCTION__));
+
+ if (flip->chain) {
+ sna_dri_remove_frame_event((WindowPtr)flip->draw,
+ flip);
+ chain_swap(sna, flip->draw,
+ flip->fe_frame,
+ flip->fe_tv_sec,
+ flip->fe_tv_usec,
+ flip->chain);
+ flip->draw = NULL;
}
- } else if (flip->draw &&
- can_flip(sna, flip->draw, flip->front, flip->back)) {
- sna_dri_flip_continue(sna, flip);
- DRI2SwapComplete(flip->client, flip->draw,
- 0, 0, 0,
- DRI2_FLIP_COMPLETE,
- flip->client ? flip->event_complete : NULL,
- flip->event_data);
- if (flip->count)
- sna->dri.flip_pending = flip;
- else
- sna_dri_frame_event_info_free(sna, flip->draw, flip);
- flip->off_delay = FLIP_OFF_DELAY;
- } else {
+
+ sna_dri_frame_event_info_free(sna, flip->draw, flip);
+ } else if (!sna_dri_flip_continue(sna, flip)) {
DBG(("%s: no longer able to flip\n", __FUNCTION__));
if (flip->draw) {
- flip->bo = sna_dri_copy_to_front(sna, flip->draw, NULL,
- get_private(flip->front)->bo,
- get_private(flip->back)->bo,
- false);
+ if (can_blit(sna, flip->draw, flip->front, flip->back)) {
+ flip->bo = sna_dri_copy_to_front(sna, flip->draw, NULL,
+ get_private(flip->front)->bo,
+ get_private(flip->back)->bo,
+ false);
+ }
DRI2SwapComplete(flip->client, flip->draw,
0, 0, 0,
DRI2_BLIT_COMPLETE,
@@ -1482,59 +1694,6 @@ static void sna_dri_flip_event(struct sna *sna,
}
break;
-#if USE_ASYNC_SWAP
- case DRI2_ASYNC_FLIP:
- DBG(("%s: async swap flip completed on pipe %d, pending? %d, new? %d\n",
- __FUNCTION__, flip->pipe,
- sna->dri.flip_pending != NULL,
- flip->front->name != flip->next_front.name));
-
- if (sna->dri.flip_pending) {
- chain_flip(sna);
- goto finish_async_flip;
- } else if (flip->front->name != flip->next_front.name) {
- DBG(("%s: async flip continuing\n", __FUNCTION__));
-
- flip->cache = flip->old_front;
- flip->old_front = flip->next_front;
-
- flip->count = sna_page_flip(sna,
- get_private(flip->front)->bo,
- flip, flip->pipe);
- if (flip->count == 0)
- goto finish_async_flip;
-
- flip->next_front.bo = get_private(flip->front)->bo;
- flip->next_front.name = flip->front->name;
- flip->off_delay = FLIP_OFF_DELAY;
-
- sna->dri.flip_pending = flip;
- } else if (flip->draw &&
- can_flip(sna, flip->draw, flip->front, flip->back) &&
- flip->off_delay--) {
- assert(flip == sna_dri_window_get_chain((WindowPtr)flip->draw));
- DBG(("%s: queuing no-flip [delay=%d]\n",
- __FUNCTION__, flip->off_delay));
- /* Just queue a no-op flip to trigger another event */
- flip->count = sna_page_flip(sna,
- get_private(flip->front)->bo,
- flip, flip->pipe);
- if (flip->count == 0)
- goto finish_async_flip;
-
- assert(flip->next_front.bo == get_private(flip->front)->bo);
- assert(flip->next_front.name == flip->front->name);
-
- sna->dri.flip_pending = flip;
- } else {
-finish_async_flip:
- DBG(("%s: async flip completed (drawable gone? %d)\n",
- __FUNCTION__, flip->draw == NULL));
- sna_dri_frame_event_info_free(sna, flip->draw, flip);
- }
- break;
-#endif
-
default:
xf86DrvMsg(sna->scrn->scrnIndex, X_WARNING,
"%s: unknown vblank event received\n", __func__);
@@ -1564,213 +1723,22 @@ sna_dri_page_flip_handler(struct sna *sna,
sna_dri_flip_event(sna, info);
}
-static bool
-sna_dri_schedule_flip(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
- DRI2BufferPtr back, CARD64 *target_msc, CARD64 divisor,
- CARD64 remainder, DRI2SwapEventPtr func, void *data)
-{
- struct sna *sna = to_sna_from_drawable(draw);
- struct sna_dri_frame_event *info;
- drmVBlank vbl;
- int pipe;
- CARD64 current_msc;
-
- DBG(("%s(target_msc=%llu, divisor=%llu, remainder=%llu)\n",
- __FUNCTION__,
- (long long)*target_msc,
- (long long)divisor,
- (long long)remainder));
-
- VG_CLEAR(vbl);
-
- pipe = sna_dri_get_pipe(draw);
- if (pipe == -1) {
- /* XXX WARN_ON(sna->dri.flip_pending) ? */
- if (sna->dri.flip_pending == NULL) {
- sna_dri_exchange_buffers(draw, front, back);
- DRI2SwapComplete(client, draw, 0, 0, 0,
- DRI2_EXCHANGE_COMPLETE, func, data);
- return true;
- } else
- return false;
- }
-
- /* Truncate to match kernel interfaces; means occasional overflow
- * misses, but that's generally not a big deal */
- divisor &= 0xffffffff;
- if (divisor == 0) {
- DBG(("%s: performing immediate swap on pipe %d, pending? %d\n",
- __FUNCTION__, pipe, sna->dri.flip_pending != NULL));
-
- info = sna->dri.flip_pending;
- if (info && info->draw == draw && info->type == DRI2_FLIP_THROTTLE) {
- DBG(("%s: chaining flip\n", __FUNCTION__));
- info->next_front.name = 1;
- return true;
- }
-
- info = calloc(1, sizeof(struct sna_dri_frame_event));
- if (info == NULL)
- return false;
-
- info->type = DRI2_FLIP_THROTTLE;
-
- info->draw = draw;
- info->client = client;
- info->event_complete = func;
- info->event_data = data;
- info->front = front;
- info->back = back;
- info->pipe = pipe;
-
- sna_dri_add_frame_event(draw, info);
- sna_dri_reference_buffer(front);
- sna_dri_reference_buffer(back);
-
- if (sna->dri.flip_pending) {
- /* We need to first wait (one vblank) for the
- * async flips to complete before this client
- * can take over.
- */
- DBG(("%s: queueing flip after pending completion\n",
- __FUNCTION__));
- info->type = DRI2_FLIP;
- sna->dri.flip_pending = info;
- return true;
- }
-
- sna_dri_page_flip(sna, info);
-
- if (info->count == 0) {
- info->back->name = info->old_front.name;
- get_private(info->back)->bo = info->old_front.bo;
- info->old_front.bo = NULL;
-
- DRI2SwapComplete(info->client, draw, 0, 0, 0,
- DRI2_EXCHANGE_COMPLETE,
- info->event_complete,
- info->event_data);
- sna_dri_frame_event_info_free(sna, draw, info);
- } else if (info->type != DRI2_FLIP) {
- get_private(info->back)->bo =
- kgem_create_2d(&sna->kgem,
- draw->width,
- draw->height,
- draw->bitsPerPixel,
- get_private(info->front)->bo->tiling,
- CREATE_SCANOUT | CREATE_EXACT);
- info->back->name = kgem_bo_flink(&sna->kgem,
- get_private(info->back)->bo);
- info->off_delay = FLIP_OFF_DELAY;
- sna->dri.flip_pending = info;
-
- DRI2SwapComplete(info->client, draw, 0, 0, 0,
- DRI2_EXCHANGE_COMPLETE,
- info->event_complete,
- info->event_data);
- } else {
- info->back->name = info->old_front.name;
- get_private(info->back)->bo = info->old_front.bo;
- info->old_front.bo = NULL;
- }
- } else {
- info = calloc(1, sizeof(struct sna_dri_frame_event));
- if (info == NULL)
- return false;
-
- info->draw = draw;
- info->client = client;
- info->event_complete = func;
- info->event_data = data;
- info->front = front;
- info->back = back;
- info->pipe = pipe;
- info->type = DRI2_FLIP;
-
- sna_dri_add_frame_event(draw, info);
- sna_dri_reference_buffer(front);
- sna_dri_reference_buffer(back);
-
- /* Get current count */
- vbl.request.type = DRM_VBLANK_RELATIVE | pipe_select(pipe);
- vbl.request.sequence = 0;
- if (sna_wait_vblank(sna, &vbl)) {
- sna_dri_frame_event_info_free(sna, draw, info);
- return false;
- }
-
- current_msc = vbl.reply.sequence;
- *target_msc &= 0xffffffff;
- remainder &= 0xffffffff;
-
- vbl.request.type =
- DRM_VBLANK_ABSOLUTE |
- DRM_VBLANK_EVENT |
- pipe_select(pipe);
-
- /*
- * If divisor is zero, or current_msc is smaller than target_msc
- * we just need to make sure target_msc passes before initiating
- * the swap.
- */
- if (current_msc < *target_msc) {
- DBG(("%s: waiting for swap: current=%d, target=%d, divisor=%d\n",
- __FUNCTION__,
- (int)current_msc,
- (int)*target_msc,
- (int)divisor));
- vbl.request.sequence = *target_msc;
- } else {
- DBG(("%s: missed target, queueing event for next: current=%d, target=%d, divisor=%d\n",
- __FUNCTION__,
- (int)current_msc,
- (int)*target_msc,
- (int)divisor));
-
- vbl.request.sequence = current_msc - current_msc % divisor + remainder;
-
- /*
- * If the calculated deadline vbl.request.sequence is
- * smaller than or equal to current_msc, it means
- * we've passed the last point when effective onset
- * frame seq could satisfy *seq % divisor == remainder,
- * so we need to wait for the next time this will
- * happen.
- *
- * This comparison takes the 1 frame swap delay
- * in pageflipping mode into account.
- */
- if (vbl.request.sequence <= current_msc)
- vbl.request.sequence += divisor;
-
- /* Adjust returned value for 1 frame pageflip offset */
- *target_msc = vbl.reply.sequence + 1;
- }
-
- /* Account for 1 frame extra pageflip delay */
- vbl.request.sequence -= 1;
- vbl.request.signal = (unsigned long)info;
- if (sna_wait_vblank(sna, &vbl)) {
- sna_dri_frame_event_info_free(sna, draw, info);
- return false;
- }
- }
-
- return true;
-}
-
static void
sna_dri_immediate_xchg(struct sna *sna,
DrawablePtr draw,
- struct sna_dri_frame_event *info)
+ struct sna_dri_frame_event *info,
+ bool sync)
{
drmVBlank vbl;
- DBG(("%s: emitting immediate exchange, throttling client\n",
- __FUNCTION__));
+ if (sna->flags & SNA_NO_WAIT)
+ sync = false;
+
+ DBG(("%s: emitting immediate exchange, throttling client, synced? %d\n",
+ __FUNCTION__, sync));
VG_CLEAR(vbl);
- if ((sna->flags & SNA_NO_WAIT) == 0) {
+ if (sync) {
info->type = DRI2_XCHG_THROTTLE;
if (sna_dri_window_get_chain((WindowPtr)draw) == info) {
DBG(("%s: no pending xchg, starting chain\n",
@@ -1804,16 +1772,20 @@ sna_dri_immediate_xchg(struct sna *sna,
static void
sna_dri_immediate_blit(struct sna *sna,
DrawablePtr draw,
- struct sna_dri_frame_event *info)
+ struct sna_dri_frame_event *info,
+ bool sync)
{
- drmVBlank vbl;
+ if (sna->flags & SNA_NO_WAIT)
+ sync = false;
- DBG(("%s: emitting immediate blit, throttling client\n", __FUNCTION__));
- VG_CLEAR(vbl);
+ DBG(("%s: emitting immediate blit, throttling client, synced? %d\n",
+ __FUNCTION__, sync));
- if ((sna->flags & SNA_NO_WAIT) == 0) {
+ if (sync) {
info->type = DRI2_SWAP_THROTTLE;
if (sna_dri_window_get_chain((WindowPtr)draw) == info) {
+ drmVBlank vbl;
+
DBG(("%s: no pending blit, starting chain\n",
__FUNCTION__));
@@ -1826,6 +1798,7 @@ sna_dri_immediate_blit(struct sna *sna,
info->event_complete,
info->event_data);
+ VG_CLEAR(vbl);
vbl.request.type =
DRM_VBLANK_RELATIVE |
DRM_VBLANK_NEXTONMISS |
@@ -1849,6 +1822,204 @@ sna_dri_immediate_blit(struct sna *sna,
}
}
+static CARD64
+get_current_msc_for_target(struct sna *sna, CARD64 target_msc, int pipe)
+{
+ CARD64 ret = -1;
+
+ if (target_msc && (sna->flags & SNA_NO_WAIT) == 0) {
+ drmVBlank vbl;
+
+ VG_CLEAR(vbl);
+ vbl.request.type = DRM_VBLANK_RELATIVE | pipe_select(pipe);
+ vbl.request.sequence = 0;
+ if (sna_wait_vblank(sna, &vbl) == 0)
+ ret = vbl.reply.sequence;
+ }
+
+ return ret;
+}
+
+static bool
+sna_dri_schedule_flip(ClientPtr client, DrawablePtr draw,
+ DRI2BufferPtr front, DRI2BufferPtr back, int pipe,
+ CARD64 *target_msc, CARD64 divisor, CARD64 remainder,
+ DRI2SwapEventPtr func, void *data)
+{
+ struct sna *sna = to_sna_from_drawable(draw);
+ struct sna_dri_frame_event *info;
+ drmVBlank vbl;
+ CARD64 current_msc;
+
+ current_msc = get_current_msc_for_target(sna, *target_msc, pipe);
+
+ DBG(("%s: target_msc=%u, current_msc=%u, divisor=%u\n", __FUNCTION__,
+ (uint32_t)*target_msc, (uint32_t)current_msc, (uint32_t)divisor));
+
+ if (divisor == 0 && current_msc >= *target_msc - 1) {
+ info = sna->dri.flip_pending;
+
+ DBG(("%s: performing immediate swap on pipe %d, pending? %d, mode: %d\n",
+ __FUNCTION__, pipe, info != NULL, info ? info->mode : 0));
+
+ if (info &&
+ info->draw == draw) {
+ assert(info->type == DRI2_FLIP_THROTTLE);
+ assert(info->front == front);
+ if (info->back != back) {
+ _sna_dri_destroy_buffer(sna, info->back);
+ info->back = back;
+ sna_dri_reference_buffer(back);
+ }
+ if (current_msc >= *target_msc) {
+ DBG(("%s: executing xchg of pending flip\n",
+ __FUNCTION__));
+ sna_dri_exchange_buffers(draw, front, back);
+ info->mode = 2;
+ goto new_back;
+ } else {
+ DBG(("%s: chaining flip\n", __FUNCTION__));
+ info->mode = 1;
+ current_msc++;
+ goto out;
+ }
+ }
+
+ info = calloc(1, sizeof(struct sna_dri_frame_event));
+ if (info == NULL)
+ return false;
+
+ info->type = sna->flags & SNA_TRIPLE_BUFFER ? DRI2_FLIP_THROTTLE: DRI2_FLIP;
+
+ info->draw = draw;
+ info->client = client;
+ info->event_complete = func;
+ info->event_data = data;
+ info->front = front;
+ info->back = back;
+ info->pipe = pipe;
+
+ info->scanout[0].bo = ref(get_private(front)->bo);
+ info->scanout[0].name = info->front->name;
+
+ sna_dri_add_frame_event(draw, info);
+ sna_dri_reference_buffer(front);
+ sna_dri_reference_buffer(back);
+
+ if (sna->dri.flip_pending) {
+ /* We need to first wait (one vblank) for the
+ * async flips to complete before this client
+ * can take over.
+ */
+ DBG(("%s: queueing flip after pending completion\n",
+ __FUNCTION__));
+ info->type = DRI2_FLIP;
+ sna->dri.flip_pending = info;
+ *target_msc = current_msc + 1;
+ return true;
+ }
+
+ if (!sna_dri_page_flip(sna, info)) {
+ sna_dri_frame_event_info_free(sna, draw, info);
+ return false;
+ }
+
+ if (info->type != DRI2_FLIP) {
+ current_msc++;
+new_back:
+ sna_dri_flip_get_back(sna, info);
+ DRI2SwapComplete(client, draw, 0, 0, 0,
+ DRI2_EXCHANGE_COMPLETE,
+ func, data);
+ }
+out:
+ *target_msc = current_msc;
+ return true;
+ }
+
+ info = calloc(1, sizeof(struct sna_dri_frame_event));
+ if (info == NULL)
+ return false;
+
+ info->draw = draw;
+ info->client = client;
+ info->event_complete = func;
+ info->event_data = data;
+ info->front = front;
+ info->back = back;
+ info->pipe = pipe;
+ info->type = DRI2_FLIP;
+
+ info->scanout[0].bo = ref(get_private(front)->bo);
+ info->scanout[0].name = info->front->name;
+
+ sna_dri_add_frame_event(draw, info);
+ sna_dri_reference_buffer(front);
+ sna_dri_reference_buffer(back);
+
+ *target_msc &= 0xffffffff;
+ remainder &= 0xffffffff;
+
+ VG_CLEAR(vbl);
+
+ vbl.request.type =
+ DRM_VBLANK_ABSOLUTE |
+ DRM_VBLANK_EVENT |
+ pipe_select(pipe);
+
+ /*
+ * If divisor is zero, or current_msc is smaller than target_msc
+ * we just need to make sure target_msc passes before initiating
+ * the swap.
+ */
+ if (current_msc <= *target_msc - 1) {
+ DBG(("%s: waiting for swap: current=%d, target=%d, divisor=%d\n",
+ __FUNCTION__,
+ (int)current_msc,
+ (int)*target_msc,
+ (int)divisor));
+ vbl.request.sequence = *target_msc;
+ } else {
+ DBG(("%s: missed target, queueing event for next: current=%d, target=%d, divisor=%d\n",
+ __FUNCTION__,
+ (int)current_msc,
+ (int)*target_msc,
+ (int)divisor));
+
+ if (divisor == 0)
+ divisor = 1;
+
+ vbl.request.sequence = current_msc - current_msc % divisor + remainder;
+
+ /*
+ * If the calculated deadline vbl.request.sequence is
+ * smaller than or equal to current_msc, it means
+ * we've passed the last point when effective onset
+ * frame seq could satisfy *seq % divisor == remainder,
+ * so we need to wait for the next time this will
+ * happen.
+ *
+ * This comparison takes the 1 frame swap delay
+ * in pageflipping mode into account.
+ */
+ if (vbl.request.sequence <= current_msc)
+ vbl.request.sequence += divisor;
+
+ /* Adjust returned value for 1 frame pageflip offset */
+ *target_msc = vbl.reply.sequence;
+ }
+
+ /* Account for 1 frame extra pageflip delay */
+ vbl.request.sequence -= 1;
+ vbl.request.signal = (unsigned long)info;
+ if (sna_wait_vblank(sna, &vbl)) {
+ sna_dri_frame_event_info_free(sna, draw, info);
+ return false;
+ }
+
+ return true;
+}
+
/*
* ScheduleSwap is responsible for requesting a DRM vblank event for the
* appropriate frame.
@@ -1889,37 +2060,26 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
(long long)divisor,
(long long)remainder));
- if (can_flip(sna, draw, front, back)) {
- DBG(("%s: try flip\n", __FUNCTION__));
- if (sna_dri_schedule_flip(client, draw, front, back,
- target_msc, divisor, remainder,
- func, data))
- return TRUE;
- }
+ /* Truncate to match kernel interfaces; means occasional overflow
+ * misses, but that's generally not a big deal */
+ *target_msc &= 0xffffffff;
+ divisor &= 0xffffffff;
+ remainder &= 0xffffffff;
/* Drawable not displayed... just complete the swap */
pipe = sna_dri_get_pipe(draw);
if (pipe == -1) {
- if (can_exchange(sna, draw, front, back)) {
- DBG(("%s: unattached, exchange pixmaps\n", __FUNCTION__));
- sna_dri_exchange_buffers(draw, front, back);
-
- DRI2SwapComplete(client, draw, 0, 0, 0,
- DRI2_EXCHANGE_COMPLETE, func, data);
- return TRUE;
- }
-
DBG(("%s: off-screen, immediate update\n", __FUNCTION__));
goto blit_fallback;
}
- VG_CLEAR(vbl);
+ if (can_flip(sna, draw, front, back) &&
+ sna_dri_schedule_flip(client, draw, front, back, pipe,
+ target_msc, divisor, remainder,
+ func, data))
+ return TRUE;
- /* Truncate to match kernel interfaces; means occasional overflow
- * misses, but that's generally not a big deal */
- *target_msc &= 0xffffffff;
- divisor &= 0xffffffff;
- remainder &= 0xffffffff;
+ VG_CLEAR(vbl);
info = calloc(1, sizeof(struct sna_dri_frame_event));
if (!info)
@@ -1938,21 +2098,25 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
sna_dri_reference_buffer(back);
info->type = swap_type;
- if (divisor == 0) {
- if (can_exchange(sna, draw, front, back))
- sna_dri_immediate_xchg(sna, draw, info);
- else
- sna_dri_immediate_blit(sna, draw, info);
- return TRUE;
- }
- /* Get current count */
- vbl.request.type = DRM_VBLANK_RELATIVE | pipe_select(pipe);
- vbl.request.sequence = 0;
- if (sna_wait_vblank(sna, &vbl))
- goto blit_fallback;
+ current_msc = get_current_msc_for_target(sna, *target_msc, pipe);
+ DBG(("%s: target_msc=%u, current_msc=%u, divisor=%u\n", __FUNCTION__,
+ (uint32_t)*target_msc, (uint32_t)current_msc, (uint32_t)divisor));
- current_msc = vbl.reply.sequence;
+ if (divisor == 0 && current_msc >= *target_msc - 1) {
+ bool sync = current_msc < *target_msc;
+ if (can_exchange(sna, draw, front, back)) {
+ sna_dri_immediate_xchg(sna, draw, info, sync);
+ } else if (can_blit(sna, draw, front, back)) {
+ sna_dri_immediate_blit(sna, draw, info, sync);
+ } else {
+ DRI2SwapComplete(client, draw, 0, 0, 0,
+ DRI2_BLIT_COMPLETE, func, data);
+ sna_dri_frame_event_info_free(sna, draw, info);
+ }
+ *target_msc = current_msc + sync;
+ return TRUE;
+ }
/*
* If divisor is zero, or current_msc is smaller than target_msc
@@ -1991,6 +2155,9 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
(int)*target_msc,
(int)divisor));
+ if (divisor == 0)
+ divisor = 1;
+
vbl.request.type =
DRM_VBLANK_ABSOLUTE |
DRM_VBLANK_EVENT |
@@ -2007,27 +2174,27 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
*/
if (vbl.request.sequence < current_msc)
vbl.request.sequence += divisor;
- vbl.request.sequence -= 1;
+ *target_msc = vbl.reply.sequence;
+ vbl.request.sequence -= 1;
vbl.request.signal = (unsigned long)info;
if (sna_wait_vblank(sna, &vbl))
goto blit_fallback;
- *target_msc = vbl.reply.sequence;
return TRUE;
blit_fallback:
+ pipe = DRI2_BLIT_COMPLETE;
if (can_exchange(sna, draw, front, back)) {
DBG(("%s -- xchg\n", __FUNCTION__));
sna_dri_exchange_buffers(draw, front, back);
pipe = DRI2_EXCHANGE_COMPLETE;
- } else {
+ } else if (can_blit(sna, draw, front, back)) {
DBG(("%s -- blit\n", __FUNCTION__));
sna_dri_copy_to_front(sna, draw, NULL,
get_private(front)->bo,
get_private(back)->bo,
false);
- pipe = DRI2_BLIT_COMPLETE;
}
if (info)
sna_dri_frame_event_info_free(sna, draw, info);
@@ -2043,100 +2210,31 @@ sna_dri_async_swap(ClientPtr client, DrawablePtr draw,
DRI2SwapEventPtr func, void *data)
{
struct sna *sna = to_sna_from_drawable(draw);
- struct sna_dri_frame_event *info;
- struct kgem_bo *bo;
- int name;
+ CARD64 target_msc = 0;
+ int pipe;
DBG(("%s()\n", __FUNCTION__));
- if (!can_flip(sna, draw, front, back)) {
-blit:
+ if (!can_flip(sna, draw, front, back) ||
+ (pipe = sna_dri_get_pipe(draw)) < 0 ||
+ !sna_dri_schedule_flip(client, draw, front, back, pipe,
+ &target_msc, 0, 0, func, data)) {
+ pipe = DRI2_BLIT_COMPLETE;
if (can_exchange(sna, draw, front, back)) {
DBG(("%s: unable to flip, so xchg\n", __FUNCTION__));
sna_dri_exchange_buffers(draw, front, back);
- name = DRI2_EXCHANGE_COMPLETE;
- } else {
+ pipe = DRI2_EXCHANGE_COMPLETE;
+ } else if (can_blit(sna, draw, front, back)) {
DBG(("%s: unable to flip, so blit\n", __FUNCTION__));
sna_dri_copy_to_front(sna, draw, NULL,
get_private(front)->bo,
get_private(back)->bo,
false);
- name = DRI2_BLIT_COMPLETE;
- }
-
- DRI2SwapComplete(client, draw, 0, 0, 0, name, func, data);
- return name == DRI2_EXCHANGE_COMPLETE;
- }
-
- bo = NULL;
- name = 0;
-
- info = sna->dri.flip_pending;
- if (info == NULL) {
- int pipe = sna_dri_get_pipe(draw);
- if (pipe == -1)
- goto blit;
-
- DBG(("%s: no pending flip, so updating scanout\n",
- __FUNCTION__));
-
- info = calloc(1, sizeof(struct sna_dri_frame_event));
- if (!info)
- goto blit;
-
- info->client = client;
- info->draw = draw;
- info->type = DRI2_ASYNC_FLIP;
- info->pipe = pipe;
- info->front = front;
- info->back = back;
-
- sna_dri_add_frame_event(draw, info);
- sna_dri_reference_buffer(front);
- sna_dri_reference_buffer(back);
-
- sna_dri_page_flip(sna, info);
-
- info->next_front.name = info->front->name;
- info->next_front.bo = get_private(info->front)->bo;
- info->off_delay = FLIP_OFF_DELAY;
- } else if (info->type != DRI2_ASYNC_FLIP) {
- /* A normal vsync'ed client is finishing, wait for it
- * to unpin the old framebuffer before taking over.
- */
- goto blit;
- } else {
- DBG(("%s: pending flip, chaining next\n", __FUNCTION__));
- if (info->next_front.name == info->front->name) {
- name = info->cache.name;
- bo = info->cache.bo;
- } else {
- name = info->front->name;
- bo = get_private(info->front)->bo;
}
- info->front->name = info->back->name;
- get_private(info->front)->bo = get_private(info->back)->bo;
- }
- if (bo == NULL) {
- DBG(("%s: creating new back buffer\n", __FUNCTION__));
- bo = kgem_create_2d(&sna->kgem,
- draw->width,
- draw->height,
- draw->bitsPerPixel,
- get_private(info->front)->bo->tiling,
- CREATE_SCANOUT | CREATE_EXACT);
- name = kgem_bo_flink(&sna->kgem, bo);
+ DRI2SwapComplete(client, draw, 0, 0, 0, pipe, func, data);
+ return pipe == DRI2_EXCHANGE_COMPLETE;
}
- assert(bo->refcnt);
- get_private(info->back)->bo = bo;
- info->back->name = name;
-
- set_bo(sna->front, get_private(info->front)->bo);
- sna->dri.flip_pending = info;
-
- DRI2SwapComplete(client, draw, 0, 0, 0,
- DRI2_EXCHANGE_COMPLETE, func, data);
return TRUE;
}
#endif
@@ -2291,6 +2389,17 @@ out_complete:
}
#endif
+static const char *dri_driver_name(struct sna *sna)
+{
+ const char *s = xf86GetOptValString(sna->Options, OPTION_DRI);
+ Bool dummy;
+
+ if (s == NULL || xf86getBoolValue(&dummy, s))
+ return sna->kgem.gen < 040 ? "i915" : "i965";
+
+ return s;
+}
+
bool sna_dri_open(struct sna *sna, ScreenPtr screen)
{
DRI2InfoRec info;
@@ -2318,8 +2427,7 @@ bool sna_dri_open(struct sna *sna, ScreenPtr screen)
sna->deviceName = drmGetDeviceNameFromFd(sna->kgem.fd);
memset(&info, '\0', sizeof(info));
info.fd = sna->kgem.fd;
- info.driverName =
- (sna->kgem.gen && sna->kgem.gen < 40) ? "i915" : "i965";
+ info.driverName = dri_driver_name(sna);
info.deviceName = sna->deviceName;
DBG(("%s: loading dri driver '%s' [gen=%d] for device '%s'\n",
diff --git a/src/sna/sna_driver.c b/src/sna/sna_driver.c
index 1b7e817f5..ffeaead58 100644
--- a/src/sna/sna_driver.c
+++ b/src/sna/sna_driver.c
@@ -62,7 +62,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include <sys/ioctl.h>
#include <sys/fcntl.h>
-#include <sys/poll.h>
#include "i915_drm.h"
#ifdef HAVE_VALGRIND
@@ -79,11 +78,6 @@ DevPrivateKeyRec sna_gc_key;
DevPrivateKeyRec sna_window_key;
DevPrivateKeyRec sna_glyph_key;
-static Bool sna_enter_vt(VT_FUNC_ARGS_DECL);
-
-/* temporary */
-extern void xf86SetCursor(ScreenPtr screen, CursorPtr pCurs, int x, int y);
-
static void
sna_load_palette(ScrnInfoPtr scrn, int numColors, int *indices,
LOCO * colors, VisualPtr pVisual)
@@ -150,6 +144,79 @@ sna_load_palette(ScrnInfoPtr scrn, int numColors, int *indices,
}
}
+static void
+sna_set_fallback_mode(ScrnInfoPtr scrn)
+{
+ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn);
+ xf86OutputPtr output = NULL;
+ xf86CrtcPtr crtc = NULL;
+ int n;
+
+ if ((unsigned)config->compat_output < config->num_output) {
+ output = config->output[config->compat_output];
+ crtc = output->crtc;
+ }
+
+ for (n = 0; n < config->num_output; n++)
+ config->output[n]->crtc = NULL;
+ for (n = 0; n < config->num_crtc; n++)
+ config->crtc[n]->enabled = FALSE;
+
+ if (output && crtc) {
+ DisplayModePtr mode;
+
+ output->crtc = crtc;
+
+ mode = xf86OutputFindClosestMode(output, scrn->currentMode);
+ if (mode &&
+ xf86CrtcSetModeTransform(crtc, mode, RR_Rotate_0, NULL, 0, 0)) {
+ crtc->desiredMode = *mode;
+ crtc->desiredMode.prev = crtc->desiredMode.next = NULL;
+ crtc->desiredMode.name = NULL;
+ crtc->desiredMode.PrivSize = 0;
+ crtc->desiredMode.PrivFlags = 0;
+ crtc->desiredMode.Private = NULL;
+ crtc->desiredRotation = RR_Rotate_0;
+ crtc->desiredTransformPresent = FALSE;
+ crtc->desiredX = 0;
+ crtc->desiredY = 0;
+ crtc->enabled = TRUE;
+ }
+ }
+
+ xf86DisableUnusedFunctions(scrn);
+#ifdef RANDR_12_INTERFACE
+ if (root(scrn->pScreen))
+ xf86RandR12TellChanged(scrn->pScreen);
+#endif
+}
+
+static Bool sna_become_master(struct sna *sna)
+{
+ ScrnInfoPtr scrn = sna->scrn;
+
+ DBG(("%s\n", __FUNCTION__));
+
+ if (drmSetMaster(sna->kgem.fd)) {
+ sleep(2); /* XXX wait for the current master to decease */
+ if (drmSetMaster(sna->kgem.fd)) {
+ xf86DrvMsg(scrn->scrnIndex, X_ERROR,
+ "drmSetMaster failed: %s\n",
+ strerror(errno));
+ return FALSE;
+ }
+ }
+
+ if (!xf86SetDesiredModes(scrn)) {
+ xf86DrvMsg(scrn->scrnIndex, X_WARNING,
+ "failed to restore desired modes on VT switch\n");
+ sna_set_fallback_mode(scrn);
+ }
+
+ sna_mode_disable_unused(sna);
+ return TRUE;
+}
+
/**
* Adjust the screen pixmap for the current location of the front buffer.
* This is done at EnterVT when buffers are bound as long as the resources
@@ -158,7 +225,6 @@ sna_load_palette(ScrnInfoPtr scrn, int numColors, int *indices,
*/
static Bool sna_create_screen_resources(ScreenPtr screen)
{
- ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
struct sna *sna = to_sna_from_screen(screen);
DBG(("%s(%dx%d@%d)\n", __FUNCTION__,
@@ -197,7 +263,7 @@ static Bool sna_create_screen_resources(ScreenPtr screen)
sna_copy_fbcon(sna);
- if (!sna_enter_vt(VT_FUNC_ARGS(0))) {
+ if (!sna_become_master(sna)) {
xf86DrvMsg(screen->myNum, X_ERROR,
"[intel] Failed to become DRM master\n");
goto cleanup_front;
@@ -363,6 +429,12 @@ static void sna_setup_capabilities(ScrnInfoPtr scrn, int fd)
#endif
}
+static Bool sna_option_cast_to_bool(struct sna *sna, int id, Bool val)
+{
+ xf86getBoolValue(&val, xf86GetOptValString(sna->Options, id));
+ return val;
+}
+
/**
* This is called before ScreenInit to do any require probing of screen
* configuration.
@@ -468,7 +540,8 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int flags)
intel_detect_chipset(scrn, sna->pEnt, sna->PciInfo);
kgem_init(&sna->kgem, fd, sna->PciInfo, sna->info->gen);
- if (xf86ReturnOptValBool(sna->Options, OPTION_ACCEL_DISABLE, FALSE)) {
+ if (xf86ReturnOptValBool(sna->Options, OPTION_ACCEL_DISABLE, FALSE) ||
+ !sna_option_cast_to_bool(sna, OPTION_ACCEL_METHOD, TRUE)) {
xf86DrvMsg(sna->scrn->scrnIndex, X_CONFIG,
"Disabling hardware acceleration.\n");
sna->kgem.wedged = true;
@@ -493,12 +566,10 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int flags)
sna->tiling &= ~SNA_TILING_FB;
sna->flags = 0;
- if (!xf86ReturnOptValBool(sna->Options, OPTION_THROTTLE, TRUE))
- sna->flags |= SNA_NO_THROTTLE;
- if (!xf86ReturnOptValBool(sna->Options, OPTION_DELAYED_FLUSH, TRUE))
- sna->flags |= SNA_NO_DELAYED_FLUSH;
if (!xf86ReturnOptValBool(sna->Options, OPTION_SWAPBUFFERS_WAIT, TRUE))
sna->flags |= SNA_NO_WAIT;
+ if (xf86ReturnOptValBool(sna->Options, OPTION_TRIPLE_BUFFER, TRUE))
+ sna->flags |= SNA_TRIPLE_BUFFER;
if (has_pageflipping(sna)) {
if (xf86ReturnOptValBool(sna->Options, OPTION_TEAR_FREE, FALSE))
sna->flags |= SNA_TEAR_FREE;
@@ -511,12 +582,6 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int flags)
sna->tiling & SNA_TILING_FB ? "tiled" : "linear");
xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Pixmaps %s\n",
sna->tiling & SNA_TILING_2D ? "tiled" : "linear");
- xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "3D buffers %s\n",
- sna->tiling & SNA_TILING_3D ? "tiled" : "linear");
- xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Throttling %sabled\n",
- sna->flags & SNA_NO_THROTTLE ? "dis" : "en");
- xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Delayed flush %sabled\n",
- sna->flags & SNA_NO_DELAYED_FLUSH ? "dis" : "en");
xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "\"Tear free\" %sabled\n",
sna->flags & SNA_TEAR_FREE ? "en" : "dis");
xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Forcing per-crtc-pixmaps? %s\n",
@@ -543,7 +608,7 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int flags)
xf86SetDpi(scrn, 0, 0);
sna->dri_available = false;
- if (xf86ReturnOptValBool(sna->Options, OPTION_DRI, TRUE))
+ if (sna_option_cast_to_bool(sna, OPTION_DRI, TRUE))
sna->dri_available = !!xf86LoadSubModule(scrn, "dri2");
return TRUE;
@@ -552,9 +617,11 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int flags)
static void
sna_block_handler(BLOCKHANDLER_ARGS_DECL)
{
- SCREEN_PTR(arg);
- ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
- struct sna *sna = to_sna(scrn);
+#ifndef XF86_SCRN_INTERFACE
+ struct sna *sna = to_sna(xf86Screens[arg]);
+#else
+ struct sna *sna = to_sna_from_screen(arg);
+#endif
struct timeval **tv = timeout;
DBG(("%s (tv=%ld.%06ld)\n", __FUNCTION__,
@@ -569,9 +636,11 @@ sna_block_handler(BLOCKHANDLER_ARGS_DECL)
static void
sna_wakeup_handler(WAKEUPHANDLER_ARGS_DECL)
{
- SCREEN_PTR(arg);
- ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
- struct sna *sna = to_sna(scrn);
+#ifndef XF86_SCRN_INTERFACE
+ struct sna *sna = to_sna(xf86Screens[arg]);
+#else
+ struct sna *sna = to_sna_from_screen(arg);
+#endif
DBG(("%s\n", __FUNCTION__));
@@ -639,11 +708,14 @@ sna_uevent_init(ScrnInfoPtr scrn)
DBG(("%s\n", __FUNCTION__));
- if (!xf86GetOptValBool(sna->Options, OPTION_HOTPLUG, &hotplug)) {
- from = X_DEFAULT;
- hotplug = TRUE;
- }
+ /* RandR will be disabled if Xinerama is active, and so generating
+ * RR hotplug events is then verboten.
+ */
+ if (!dixPrivateKeyRegistered(rrPrivKey))
+ return;
+ if (!xf86GetOptValBool(sna->Options, OPTION_HOTPLUG, &hotplug))
+ from = X_DEFAULT, hotplug = TRUE;
xf86DrvMsg(scrn->scrnIndex, from, "hotplug detection: \"%s\"\n",
hotplug ? "enabled" : "disabled");
if (!hotplug)
@@ -654,16 +726,14 @@ sna_uevent_init(ScrnInfoPtr scrn)
return;
mon = udev_monitor_new_from_netlink(u, "udev");
-
if (!mon) {
udev_unref(u);
return;
}
if (udev_monitor_filter_add_match_subsystem_devtype(mon,
- "drm",
- "drm_minor") < 0 ||
- udev_monitor_enable_receiving(mon) < 0)
+ "drm", "drm_minor") < 0 ||
+ udev_monitor_enable_receiving(mon) < 0)
{
udev_monitor_unref(mon);
udev_unref(u);
@@ -681,23 +751,29 @@ sna_uevent_init(ScrnInfoPtr scrn)
}
sna->uevent_monitor = mon;
+
+ DBG(("%s: installed uvent handler\n", __FUNCTION__));
}
static void
sna_uevent_fini(ScrnInfoPtr scrn)
{
struct sna *sna = to_sna(scrn);
+ struct udev *u;
- if (sna->uevent_handler) {
- struct udev *u = udev_monitor_get_udev(sna->uevent_monitor);
+ if (sna->uevent_handler == NULL)
+ return;
- xf86RemoveGeneralHandler(sna->uevent_handler);
+ xf86RemoveGeneralHandler(sna->uevent_handler);
- udev_monitor_unref(sna->uevent_monitor);
- udev_unref(u);
- sna->uevent_handler = NULL;
- sna->uevent_monitor = NULL;
- }
+ u = udev_monitor_get_udev(sna->uevent_monitor);
+ udev_monitor_unref(sna->uevent_monitor);
+ udev_unref(u);
+
+ sna->uevent_handler = NULL;
+ sna->uevent_monitor = NULL;
+
+ DBG(("%s: removed uvent handler\n", __FUNCTION__));
}
#else
static void sna_uevent_fini(ScrnInfoPtr scrn) { }
@@ -717,18 +793,6 @@ static void sna_leave_vt(VT_FUNC_ARGS_DECL)
"drmDropMaster failed: %s\n", strerror(errno));
}
-/* In order to workaround a kernel bug in not honouring O_NONBLOCK,
- * check that the fd is readable before attempting to read the next
- * event from drm.
- */
-static Bool sna_mode_has_pending_events(struct sna *sna)
-{
- struct pollfd pfd;
- pfd.fd = sna->kgem.fd;
- pfd.events = POLLIN;
- return poll(&pfd, 1, 0) == 1;
-}
-
static Bool sna_early_close_screen(CLOSE_SCREEN_ARGS_DECL)
{
ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
@@ -739,9 +803,7 @@ static Bool sna_early_close_screen(CLOSE_SCREEN_ARGS_DECL)
xf86_hide_cursors(scrn);
sna_uevent_fini(scrn);
- /* drain the event queues */
- if (sna_mode_has_pending_events(sna))
- sna_mode_wakeup(sna);
+ sna_mode_close(sna);
if (sna->dri_open) {
sna_dri_close(sna, screen);
@@ -793,6 +855,7 @@ static void sna_mode_set(ScrnInfoPtr scrn)
static Bool
sna_register_all_privates(void)
{
+#if HAS_DIXREGISTERPRIVATEKEY
if (!dixRegisterPrivateKey(&sna_pixmap_key, PRIVATE_PIXMAP,
3*sizeof(void *)))
return FALSE;
@@ -808,6 +871,19 @@ sna_register_all_privates(void)
if (!dixRegisterPrivateKey(&sna_window_key, PRIVATE_WINDOW,
2*sizeof(void *)))
return FALSE;
+#else
+ if (!dixRequestPrivate(&sna_pixmap_key, 3*sizeof(void *)))
+ return FALSE;
+
+ if (!dixRequestPrivate(&sna_gc_key, sizeof(FbGCPrivate)))
+ return FALSE;
+
+ if (!dixRequestPrivate(&sna_glyph_key, sizeof(struct sna_glyph)))
+ return FALSE;
+
+ if (!dixRequestPrivate(&sna_window_key, 2*sizeof(void *)))
+ return FALSE;
+#endif
return TRUE;
}
@@ -815,7 +891,7 @@ sna_register_all_privates(void)
static size_t
agp_aperture_size(struct pci_device *dev, int gen)
{
- return dev->regions[gen < 30 ? 0 : 2].size;
+ return dev->regions[gen < 030 ? 0 : 2].size;
}
static Bool
@@ -983,24 +1059,9 @@ static void sna_free_screen(FREE_SCREEN_ARGS_DECL)
static Bool sna_enter_vt(VT_FUNC_ARGS_DECL)
{
SCRN_INFO_PTR(arg);
- struct sna *sna = to_sna(scrn);
DBG(("%s\n", __FUNCTION__));
-
- if (drmSetMaster(sna->kgem.fd)) {
- xf86DrvMsg(scrn->scrnIndex, X_ERROR,
- "drmSetMaster failed: %s\n",
- strerror(errno));
- return FALSE;
- }
-
- if (!xf86SetDesiredModes(scrn))
- xf86DrvMsg(scrn->scrnIndex, X_WARNING,
- "failed to restore desired modes on VT switch\n");
-
- sna_mode_disable_unused(sna);
-
- return TRUE;
+ return sna_become_master(to_sna(scrn));
}
static Bool sna_switch_mode(SWITCH_MODE_ARGS_DECL)
@@ -1094,6 +1155,10 @@ Bool sna_init_scrn(ScrnInfoPtr scrn, int entity_num)
xf86DrvMsg(scrn->scrnIndex, X_INFO,
"SNA compiled with assertions enabled\n");
#endif
+#if DEBUG_SYNC
+ xf86DrvMsg(scrn->scrnIndex, X_INFO,
+ "SNA compiled with synchronous rendering\n");
+#endif
#if DEBUG_MEMORY
xf86DrvMsg(scrn->scrnIndex, X_INFO,
"SNA compiled with memory allocation reporting enabled\n");
@@ -1117,11 +1182,15 @@ Bool sna_init_scrn(ScrnInfoPtr scrn, int entity_num)
scrn->ValidMode = sna_valid_mode;
scrn->PMEvent = sna_pm_event;
+#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,9,99,901,0)
scrn->ModeSet = sna_mode_set;
+#endif
xf86SetEntitySharable(entity_num);
xf86SetEntityInstanceForScreen(scrn, entity_num,
xf86GetNumEntityInstances(entity_num)-1);
+ sna_threads_init();
+
return TRUE;
}
diff --git a/src/sna/sna_glyphs.c b/src/sna/sna_glyphs.c
index 9a6ad4b52..5fed8b419 100644
--- a/src/sna/sna_glyphs.c
+++ b/src/sna/sna_glyphs.c
@@ -84,6 +84,8 @@
#define N_STACK_GLYPHS 512
+#define glyph_valid(g) *((uint32_t *)&(g)->info.width)
+
#if HAS_DEBUG_FULL
static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char *function)
{
@@ -108,7 +110,7 @@ extern DevPrivateKeyRec sna_glyph_key;
static inline struct sna_glyph *sna_glyph(GlyphPtr glyph)
{
- return dixGetPrivateAddr(&glyph->devPrivates, &sna_glyph_key);
+ return __get_private(glyph, sna_glyph_key);
}
#define NeedsComponent(f) (PICT_FORMAT_A(f) != 0 && PICT_FORMAT_RGB(f) != 0)
@@ -191,11 +193,17 @@ bool sna_glyphs_create(struct sna *sna)
if (sna->render.white_image == NULL)
goto bail;
- if (!can_render(sna))
+ if (!can_render(sna)) {
+ DBG(("%s: no render acceleration, no render glyph caches\n",
+ __FUNCTION__));
return true;
+ }
- if (xf86IsEntityShared(sna->scrn->entityList[0]))
+ if (xf86IsEntityShared(sna->scrn->entityList[0])) {
+ DBG(("%s: shared GlyphPictures, no render glyph caches\n",
+ __FUNCTION__));
return true;
+ }
for (i = 0; i < ARRAY_SIZE(formats); i++) {
struct sna_glyph_cache *cache = &sna->render.glyph[i];
@@ -215,9 +223,12 @@ bool sna_glyphs_create(struct sna *sna)
CACHE_PICTURE_SIZE,
CACHE_PICTURE_SIZE,
depth,
- SNA_CREATE_SCRATCH);
- if (!pixmap)
+ SNA_CREATE_GLYPHS);
+ if (!pixmap) {
+ DBG(("%s: failed to allocate pixmap for Glyph cache\n",
+ __FUNCTION__));
goto bail;
+ }
priv = sna_pixmap(pixmap);
if (priv != NULL) {
@@ -235,6 +246,7 @@ bool sna_glyphs_create(struct sna *sna)
goto bail;
ValidatePicture(picture);
+ assert(picture->pDrawable == &pixmap->drawable);
cache->count = cache->evict = 0;
cache->picture = picture;
@@ -297,7 +309,7 @@ glyph_extents(int nlist,
while (n--) {
GlyphPtr glyph = *glyphs++;
- if (glyph->info.width && glyph->info.height) {
+ if (glyph_valid(glyph)) {
int v;
v = x - glyph->info.x;
@@ -350,14 +362,20 @@ glyph_cache(ScreenPtr screen,
struct sna_render *render,
GlyphPtr glyph)
{
- PicturePtr glyph_picture = GetGlyphPicture(glyph, screen);
- struct sna_glyph_cache *cache = &render->glyph[PICT_FORMAT_RGB(glyph_picture->format) != 0];
+ PicturePtr glyph_picture;
+ struct sna_glyph_cache *cache;
struct sna_glyph *priv;
int size, mask, pos, s;
if (NO_GLYPH_CACHE)
return false;
+ glyph_picture = GetGlyphPicture(glyph, screen);
+ if (unlikely(glyph_picture == NULL)) {
+ glyph->info.width = glyph->info.height = 0;
+ return false;
+ }
+
if (glyph->info.width > GLYPH_MAX_SIZE ||
glyph->info.height > GLYPH_MAX_SIZE) {
PixmapPtr pixmap = (PixmapPtr)glyph_picture->pDrawable;
@@ -373,6 +391,7 @@ glyph_cache(ScreenPtr screen,
if (glyph->info.width <= size && glyph->info.height <= size)
break;
+ cache = &render->glyph[PICT_FORMAT_RGB(glyph_picture->format) != 0];
s = glyph_size_to_count(size);
mask = glyph_count_to_mask(s);
pos = (cache->count + s - 1) & mask;
@@ -528,7 +547,7 @@ glyphs_to_dst(struct sna *sna,
struct sna_glyph priv;
int i;
- if (glyph->info.width == 0 || glyph->info.height == 0)
+ if (!glyph_valid(glyph))
goto next_glyph;
priv = *sna_glyph(glyph);
@@ -540,6 +559,10 @@ glyphs_to_dst(struct sna *sna,
if (!glyph_cache(screen, &sna->render, glyph)) {
/* no cache for this glyph */
priv.atlas = GetGlyphPicture(glyph, screen);
+ if (unlikely(priv.atlas == NULL)) {
+ glyph->info.width = glyph->info.height = 0;
+ goto next_glyph;
+ }
priv.coordinate.x = priv.coordinate.y = 0;
} else
priv = *sna_glyph(glyph);
@@ -671,7 +694,7 @@ glyphs_slow(struct sna *sna,
BoxPtr rects;
int nrect;
- if (glyph->info.width == 0 || glyph->info.height == 0)
+ if (!glyph_valid(glyph))
goto next_glyph;
priv = *sna_glyph(glyph);
@@ -679,6 +702,10 @@ glyphs_slow(struct sna *sna,
if (!glyph_cache(screen, &sna->render, glyph)) {
/* no cache for this glyph */
priv.atlas = GetGlyphPicture(glyph, screen);
+ if (unlikely(priv.atlas == NULL)) {
+ glyph->info.width = glyph->info.height = 0;
+ goto next_glyph;
+ }
priv.coordinate.x = priv.coordinate.y = 0;
} else
priv = *sna_glyph(glyph);
@@ -780,7 +807,7 @@ __sna_glyph_get_image(GlyphPtr g, ScreenPtr s)
int dx, dy;
p = GetGlyphPicture(g, s);
- if (p == NULL)
+ if (unlikely(p == NULL))
return NULL;
image = image_from_pict(p, FALSE, &dx, &dy);
@@ -917,7 +944,7 @@ glyphs_via_mask(struct sna *sna,
GlyphPtr g = *glyphs++;
const void *ptr;
- if (g->info.width == 0 || g->info.height == 0)
+ if (!glyph_valid(g))
goto next_pglyph;
ptr = pixman_glyph_cache_lookup(cache, g, NULL);
@@ -968,7 +995,7 @@ next_pglyph:
pixman_image_t *glyph_image;
int16_t xi, yi;
- if (g->info.width == 0 || g->info.height == 0)
+ if (!glyph_valid(g))
goto next_image;
/* If the mask has been cropped, it is likely
@@ -984,6 +1011,8 @@ next_pglyph:
glyph_image =
sna_glyph_get_image(g, dst->pDrawable->pScreen);
+ if (glyph_image == NULL)
+ goto next_image;
DBG(("%s: glyph to mask (%d, %d)x(%d, %d)\n",
__FUNCTION__,
@@ -1058,7 +1087,7 @@ next_image:
PicturePtr this_atlas;
struct sna_composite_rectangles r;
- if (glyph->info.width == 0 || glyph->info.height == 0)
+ if (!glyph_valid(glyph))
goto next_glyph;
priv = sna_glyph(glyph);
@@ -1076,6 +1105,10 @@ next_image:
} else {
/* no cache for this glyph */
this_atlas = GetGlyphPicture(glyph, screen);
+ if (unlikely(this_atlas == NULL)) {
+ glyph->info.width = glyph->info.height = 0;
+ goto next_glyph;
+ }
r.src.x = r.src.y = 0;
}
}
@@ -1090,7 +1123,8 @@ next_image:
__FUNCTION__,
(int)this_atlas->format,
(int)(format->depth << 24 | format->format)));
- if (this_atlas->format == (format->depth << 24 | format->format)) {
+ if (this_atlas->format == (format->depth << 24 | format->format) &&
+ (sna->kgem.gen >> 3) != 4) { /* XXX cache corruption? how? */
ok = sna->render.composite(sna, PictOpAdd,
this_atlas, NULL, mask,
0, 0, 0, 0, 0, 0,
@@ -1194,7 +1228,7 @@ glyphs_format(int nlist, GlyphListPtr list, GlyphPtr * glyphs)
while (n--) {
GlyphPtr glyph = *glyphs++;
- if (glyph->info.width == 0 || glyph->info.height == 0) {
+ if (!glyph_valid(glyph)) {
x += glyph->info.xOff;
y += glyph->info.yOff;
continue;
@@ -1333,7 +1367,7 @@ glyphs_fallback(CARD8 op,
__FUNCTION__,
RegionExtents(&region)->x1, RegionExtents(&region)->y1,
RegionExtents(&region)->x2, RegionExtents(&region)->y2));
- if (!RegionNotEmpty(&region))
+ if (RegionNil(&region))
return;
if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region,
@@ -1391,7 +1425,7 @@ glyphs_fallback(CARD8 op,
GlyphPtr g = *glyphs++;
const void *ptr;
- if (g->info.width == 0 || g->info.height == 0)
+ if (!glyph_valid(g))
goto next;
ptr = pixman_glyph_cache_lookup(cache, g, NULL);
@@ -1517,7 +1551,7 @@ out:
GlyphPtr g = *glyphs++;
pixman_image_t *glyph_image;
- if (g->info.width == 0 || g->info.height == 0)
+ if (!glyph_valid(g))
goto next_glyph;
glyph_image = sna_glyph_get_image(g, screen);
@@ -1654,7 +1688,7 @@ sna_glyphs(CARD8 op,
}
if ((too_small(priv) || DAMAGE_IS_ALL(priv->cpu_damage)) &&
- !picture_is_gpu(src)) {
+ !picture_is_gpu(sna, src)) {
DBG(("%s: fallback -- too small (%dx%d)\n",
__FUNCTION__, dst->pDrawable->width, dst->pDrawable->height));
goto fallback;
@@ -1810,7 +1844,7 @@ glyphs_via_image(struct sna *sna,
GlyphPtr g = *glyphs++;
const void *ptr;
- if (g->info.width == 0 || g->info.height == 0)
+ if (!glyph_valid(g))
goto next_pglyph;
ptr = pixman_glyph_cache_lookup(cache, g, NULL);
@@ -1861,7 +1895,7 @@ next_pglyph:
pixman_image_t *glyph_image;
int16_t xi, yi;
- if (g->info.width == 0 || g->info.height == 0)
+ if (!glyph_valid(g))
goto next_image;
/* If the mask has been cropped, it is likely
@@ -1877,6 +1911,8 @@ next_pglyph:
glyph_image =
sna_glyph_get_image(g, dst->pDrawable->pScreen);
+ if (glyph_image == NULL)
+ goto next_image;
DBG(("%s: glyph to mask (%d, %d)x(%d, %d)\n",
__FUNCTION__,
@@ -1976,7 +2012,7 @@ sna_glyphs__shared(CARD8 op,
}
if ((too_small(priv) || DAMAGE_IS_ALL(priv->cpu_damage)) &&
- !picture_is_gpu(src)) {
+ !picture_is_gpu(sna, src)) {
DBG(("%s: fallback -- too small (%dx%d)\n",
__FUNCTION__, dst->pDrawable->width, dst->pDrawable->height));
goto fallback;
diff --git a/src/sna/sna_gradient.c b/src/sna/sna_gradient.c
index 5f06fbc8d..db09e72db 100644
--- a/src/sna/sna_gradient.c
+++ b/src/sna/sna_gradient.c
@@ -219,11 +219,11 @@ sna_render_flush_solid(struct sna *sna)
DBG(("sna_render_flush_solid(size=%d)\n", cache->size));
assert(cache->dirty);
assert(cache->size);
+ assert(cache->size <= 1024);
kgem_bo_write(&sna->kgem, cache->cache_bo,
cache->color, cache->size*sizeof(uint32_t));
cache->dirty = 0;
- cache->last = 0;
}
static void
@@ -250,21 +250,24 @@ sna_render_finish_solid(struct sna *sna, bool force)
cache->bo[i] = NULL;
}
- old = cache->cache_bo;
-
DBG(("sna_render_finish_solid reset\n"));
-
+ old = cache->cache_bo;
cache->cache_bo = kgem_create_linear(&sna->kgem, sizeof(cache->color), 0);
if (cache->cache_bo == NULL) {
cache->cache_bo = old;
old = NULL;
}
- cache->bo[0] = kgem_create_proxy(&sna->kgem, cache->cache_bo,
- 0, sizeof(uint32_t));
- cache->bo[0]->pitch = 4;
if (force)
- cache->size = 1;
+ cache->size = 0;
+ if (cache->last < cache->size) {
+ cache->bo[cache->last] = kgem_create_proxy(&sna->kgem, cache->cache_bo,
+ cache->last*sizeof(uint32_t), sizeof(uint32_t));
+ if (cache->bo[cache->last])
+ cache->bo[cache->last]->pitch = 4;
+ else
+ cache->last = 1024;
+ }
if (old)
kgem_bo_destroy(&sna->kgem, old);
@@ -283,7 +286,38 @@ sna_render_get_solid(struct sna *sna, uint32_t color)
if (color == 0xffffffff) {
DBG(("%s(white)\n", __FUNCTION__));
- return kgem_bo_reference(cache->bo[0]);
+ return kgem_bo_reference(sna->render.alpha_cache.bo[255+7]);
+ }
+
+ if ((color >> 24) == 0xff) {
+ int v = 0;
+
+ if (((color >> 16) & 0xff) == 0)
+ v |= 0;
+ else if (((color >> 16) & 0xff) == 0xff)
+ v |= 1 << 2;
+ else
+ v = -1;
+
+ if (((color >> 8) & 0xff) == 0)
+ v |= 0;
+ else if (((color >> 8) & 0xff) == 0xff)
+ v |= 1 << 1;
+ else
+ v = -1;
+
+ if (((color >> 0) & 0xff) == 0)
+ v |= 0;
+ else if (((color >> 0) & 0xff) == 0xff)
+ v |= 1 << 0;
+ else
+ v = -1;
+
+ if (v >= 0) {
+ DBG(("%s(primary (%d,%d,%d): %d)\n",
+ __FUNCTION__, v & 4, v & 2, v & 1, v));
+ return kgem_bo_reference(sna->render.alpha_cache.bo[255+v]);
+ }
}
if (cache->color[cache->last] == color) {
@@ -292,7 +326,7 @@ sna_render_get_solid(struct sna *sna, uint32_t color)
return kgem_bo_reference(cache->bo[cache->last]);
}
- for (i = 1; i < cache->size; i++) {
+ for (i = 0; i < cache->size; i++) {
if (cache->color[i] == color) {
if (cache->bo[i] == NULL) {
DBG(("sna_render_get_solid(%d) = %x (recreate)\n",
@@ -306,7 +340,7 @@ sna_render_get_solid(struct sna *sna, uint32_t color)
}
}
- sna_render_finish_solid(sna, i == ARRAY_SIZE(cache->color));
+ sna_render_finish_solid(sna, i == 1024);
i = cache->size++;
cache->color[i] = color;
@@ -326,7 +360,7 @@ done:
static bool sna_alpha_cache_init(struct sna *sna)
{
struct sna_alpha_cache *cache = &sna->render.alpha_cache;
- uint32_t color[256];
+ uint32_t color[256 + 7];
int i;
DBG(("%s\n", __FUNCTION__));
@@ -346,6 +380,28 @@ static bool sna_alpha_cache_init(struct sna *sna)
cache->bo[i]->pitch = 4;
}
+
+ /* primary */
+ for (i = 1; i < 8; i++) {
+ int j = 255+i;
+
+ color[j] = 0xff << 24;
+ if (i & 1)
+ color[j] |= 0xff << 0;
+ if (i & 2)
+ color[j] |= 0xff << 8;
+ if (i & 4)
+ color[j] |= 0xff << 16;
+ cache->bo[j] = kgem_create_proxy(&sna->kgem,
+ cache->cache_bo,
+ sizeof(uint32_t)*j,
+ sizeof(uint32_t));
+ if (cache->bo[j] == NULL)
+ return false;
+
+ cache->bo[j]->pitch = 4;
+ }
+
return kgem_bo_write(&sna->kgem, cache->cache_bo, color, sizeof(color));
}
@@ -356,24 +412,14 @@ static bool sna_solid_cache_init(struct sna *sna)
DBG(("%s\n", __FUNCTION__));
cache->cache_bo =
- kgem_create_linear(&sna->kgem, sizeof(cache->color), 0);
+ kgem_create_linear(&sna->kgem, 4096, 0);
if (!cache->cache_bo)
return false;
- /*
- * Initialise [0] with white since it is very common and filling the
- * zeroth slot simplifies some of the checks.
- */
- cache->color[0] = 0xffffffff;
- cache->bo[0] = kgem_create_proxy(&sna->kgem, cache->cache_bo,
- 0, sizeof(uint32_t));
- if (cache->bo[0] == NULL)
- return false;
-
- cache->bo[0]->pitch = 4;
- cache->dirty = 1;
- cache->size = 1;
- cache->last = 0;
+ cache->last = 1024;
+ cache->color[cache->last] = 0;
+ cache->dirty = 0;
+ cache->size = 0;
return true;
}
diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index 69d920c7d..f89cd89ec 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -117,12 +117,18 @@ static void read_boxes_inplace(struct kgem *kgem,
static bool download_inplace(struct kgem *kgem, struct kgem_bo *bo)
{
+ if (unlikely(kgem->wedged))
+ return true;
+
if (!kgem_bo_can_map(kgem, bo))
return false;
if (FORCE_INPLACE)
return FORCE_INPLACE > 0;
+ if (kgem->can_blt_cpu && kgem->max_cpu_size)
+ return false;
+
return !__kgem_bo_is_busy(kgem, bo) || bo->tiling == I915_TILING_NONE;
}
@@ -364,7 +370,7 @@ fallback:
cmd = XY_SRC_COPY_BLT_CMD;
src_pitch = src_bo->pitch;
- if (kgem->gen >= 40 && src_bo->tiling) {
+ if (kgem->gen >= 040 && src_bo->tiling) {
cmd |= BLT_SRC_TILED;
src_pitch >>= 2;
}
@@ -378,11 +384,13 @@ fallback:
case 1: break;
}
- kgem_set_mode(kgem, KGEM_BLT);
- if (!kgem_check_reloc_and_exec(kgem, 2) ||
- !kgem_check_batch(kgem, 8) ||
+ kgem_set_mode(kgem, KGEM_BLT, dst_bo);
+ if (!kgem_check_batch(kgem, 8) ||
+ !kgem_check_reloc_and_exec(kgem, 2) ||
!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
- _kgem_submit(kgem);
+ kgem_submit(kgem);
+ if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL))
+ goto fallback;
_kgem_set_mode(kgem, KGEM_BLT);
}
@@ -483,7 +491,16 @@ fallback:
static bool upload_inplace__tiled(struct kgem *kgem, struct kgem_bo *bo)
{
- if (kgem->gen < 50) /* bit17 swizzling :( */
+#ifndef __x86_64__
+ /* Between a register starved compiler emitting attrocious code
+ * and the extra overhead in the kernel for managing the tight
+ * 32-bit address space, unless we have a 64-bit system,
+ * using memcpy_to_tiled_x() is extremely slow.
+ */
+ return false;
+#endif
+
+ if (kgem->gen < 050) /* bit17 swizzling :( */
return false;
if (bo->tiling != I915_TILING_X)
@@ -579,19 +596,13 @@ static bool write_boxes_inplace(struct kgem *kgem,
return true;
}
-static bool upload_inplace(struct kgem *kgem,
- struct kgem_bo *bo,
- const BoxRec *box,
- int n, int bpp)
+static bool __upload_inplace(struct kgem *kgem,
+ struct kgem_bo *bo,
+ const BoxRec *box,
+ int n, int bpp)
{
unsigned int bytes;
- if (kgem->wedged)
- return true;
-
- if (!kgem_bo_can_map(kgem, bo) && !upload_inplace__tiled(kgem, bo))
- return false;
-
if (FORCE_INPLACE)
return FORCE_INPLACE > 0;
@@ -610,6 +621,20 @@ static bool upload_inplace(struct kgem *kgem,
return bytes * bpp >> 12;
}
+static bool upload_inplace(struct kgem *kgem,
+ struct kgem_bo *bo,
+ const BoxRec *box,
+ int n, int bpp)
+{
+ if (unlikely(kgem->wedged))
+ return true;
+
+ if (!kgem_bo_can_map(kgem, bo) && !upload_inplace__tiled(kgem, bo))
+ return false;
+
+ return __upload_inplace(kgem, bo, box, n,bpp);
+}
+
bool sna_write_boxes(struct sna *sna, PixmapPtr dst,
struct kgem_bo * const dst_bo, int16_t const dst_dx, int16_t const dst_dy,
const void * const src, int const stride, int16_t const src_dx, int16_t const src_dy,
@@ -672,14 +697,18 @@ fallback:
sna->render.max_3d_size, sna->render.max_3d_size));
if (must_tile(sna, tmp.drawable.width, tmp.drawable.height)) {
BoxRec tile, stack[64], *clipped, *c;
- int step;
+ int cpp, step;
tile:
- step = MIN(sna->render.max_3d_size - 4096 / dst->drawable.bitsPerPixel,
- 8*(MAXSHORT&~63) / dst->drawable.bitsPerPixel);
- while (step * step * 4 > sna->kgem.max_upload_tile_size)
+ cpp = dst->drawable.bitsPerPixel / 8;
+ step = MIN(sna->render.max_3d_size,
+ (MAXSHORT&~63) / cpp);
+ while (step * step * cpp > sna->kgem.max_upload_tile_size)
step /= 2;
+ if (step * cpp > 4096)
+ step = 4096 / cpp;
+
DBG(("%s: tiling upload, using %dx%d tiles\n",
__FUNCTION__, step, step));
@@ -803,7 +832,7 @@ tile:
cmd = XY_SRC_COPY_BLT_CMD;
br13 = dst_bo->pitch;
- if (kgem->gen >= 40 && dst_bo->tiling) {
+ if (kgem->gen >= 040 && dst_bo->tiling) {
cmd |= BLT_DST_TILED;
br13 >>= 2;
}
@@ -816,11 +845,13 @@ tile:
case 8: break;
}
- kgem_set_mode(kgem, KGEM_BLT);
+ kgem_set_mode(kgem, KGEM_BLT, dst_bo);
if (!kgem_check_batch(kgem, 8) ||
!kgem_check_reloc_and_exec(kgem, 2) ||
!kgem_check_bo_fenced(kgem, dst_bo)) {
- _kgem_submit(kgem);
+ kgem_submit(kgem);
+ if (!kgem_check_bo_fenced(kgem, dst_bo))
+ goto fallback;
_kgem_set_mode(kgem, KGEM_BLT);
}
@@ -960,6 +991,20 @@ write_boxes_inplace__xor(struct kgem *kgem,
} while (--n);
}
+static bool upload_inplace__xor(struct kgem *kgem,
+ struct kgem_bo *bo,
+ const BoxRec *box,
+ int n, int bpp)
+{
+ if (unlikely(kgem->wedged))
+ return true;
+
+ if (!kgem_bo_can_map(kgem, bo))
+ return false;
+
+ return __upload_inplace(kgem, bo, box, n, bpp);
+}
+
void sna_write_boxes__xor(struct sna *sna, PixmapPtr dst,
struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
const void *src, int stride, int16_t src_dx, int16_t src_dy,
@@ -976,7 +1021,7 @@ void sna_write_boxes__xor(struct sna *sna, PixmapPtr dst,
DBG(("%s x %d\n", __FUNCTION__, nbox));
- if (upload_inplace(kgem, dst_bo, box, nbox, dst->drawable.bitsPerPixel)) {
+ if (upload_inplace__xor(kgem, dst_bo, box, nbox, dst->drawable.bitsPerPixel)) {
fallback:
write_boxes_inplace__xor(kgem,
src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy,
@@ -1158,7 +1203,7 @@ tile:
cmd = XY_SRC_COPY_BLT_CMD;
br13 = dst_bo->pitch;
- if (kgem->gen >= 40 && dst_bo->tiling) {
+ if (kgem->gen >= 040 && dst_bo->tiling) {
cmd |= BLT_DST_TILED;
br13 >>= 2;
}
@@ -1171,11 +1216,13 @@ tile:
case 8: break;
}
- kgem_set_mode(kgem, KGEM_BLT);
- if (!kgem_check_reloc_and_exec(kgem, 2) ||
- !kgem_check_batch(kgem, 8) ||
+ kgem_set_mode(kgem, KGEM_BLT, dst_bo);
+ if (!kgem_check_batch(kgem, 8) ||
+ !kgem_check_reloc_and_exec(kgem, 2) ||
!kgem_check_bo_fenced(kgem, dst_bo)) {
- _kgem_submit(kgem);
+ kgem_submit(kgem);
+ if (!kgem_check_bo_fenced(kgem, dst_bo))
+ goto fallback;
_kgem_set_mode(kgem, KGEM_BLT);
}
diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index 58449228d..69ac21c3b 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -87,8 +87,8 @@ no_render_composite(struct sna *sna,
if (mask)
return false;
- if (!is_gpu(dst->pDrawable) &&
- (src->pDrawable == NULL || !is_gpu(src->pDrawable)))
+ if (!is_gpu(sna, dst->pDrawable, PREFER_GPU_BLT) &&
+ (src->pDrawable == NULL || !is_gpu(sna, src->pDrawable, PREFER_GPU_BLT)))
return false;
return sna_blt_composite(sna,
@@ -246,15 +246,14 @@ static void
no_render_context_switch(struct kgem *kgem,
int new_mode)
{
- if (!kgem->mode)
+ if (!kgem->nbatch)
return;
- if (kgem_is_idle(kgem)) {
+ if (kgem_ring_is_idle(kgem, kgem->ring)) {
DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
_kgem_submit(kgem);
}
- (void)kgem;
(void)new_mode;
}
@@ -280,7 +279,9 @@ void no_render_init(struct sna *sna)
{
struct sna_render *render = &sna->render;
- memset (render,0, sizeof (*render));
+ memset (render, 0, sizeof (*render));
+
+ render->prefer_gpu = PREFER_GPU_BLT;
render->vertices = render->vertex_data;
render->vertex_size = ARRAY_SIZE(render->vertex_data);
@@ -305,6 +306,8 @@ void no_render_init(struct sna *sna)
sna->kgem.expire = no_render_expire;
if (sna->kgem.has_blt)
sna->kgem.ring = KGEM_BLT;
+
+ sna_vertex_init(sna);
}
static struct kgem_bo *
@@ -321,6 +324,14 @@ use_cpu_bo(struct sna *sna, PixmapPtr pixmap, const BoxRec *box, bool blt)
return NULL;
}
+ if (priv->shm) {
+ DBG(("%s: shm CPU bo, avoiding promotion to GPU\n",
+ __FUNCTION__));
+ assert(!priv->flush);
+ sna_add_flush_pixmap(sna, priv, priv->cpu_bo);
+ return priv->cpu_bo;
+ }
+
if (priv->cpu_bo->snoop && priv->source_count > SOURCE_BIAS) {
DBG(("%s: promoting snooped CPU bo due to reuse\n",
__FUNCTION__));
@@ -339,6 +350,11 @@ use_cpu_bo(struct sna *sna, PixmapPtr pixmap, const BoxRec *box, bool blt)
__FUNCTION__));
break;
default:
+ if (kgem_bo_is_busy(priv->gpu_bo)){
+ DBG(("%s: box is partially damaged on the CPU, and the GPU is busy\n",
+ __FUNCTION__));
+ return NULL;
+ }
if (sna_damage_contains_box(priv->gpu_damage,
box) != PIXMAN_REGION_OUT) {
DBG(("%s: box is damaged on the GPU\n",
@@ -384,11 +400,6 @@ use_cpu_bo(struct sna *sna, PixmapPtr pixmap, const BoxRec *box, bool blt)
}
}
- if (priv->shm) {
- assert(!priv->flush);
- sna_add_flush_pixmap(sna, priv, priv->cpu_bo);
- }
-
DBG(("%s for box=(%d, %d), (%d, %d)\n",
__FUNCTION__, box->x1, box->y1, box->x2, box->y2));
++priv->source_count;
@@ -423,12 +434,21 @@ move_to_gpu(PixmapPtr pixmap, const BoxRec *box, bool blt)
return priv->gpu_bo;
}
+ if (priv->cpu_damage == NULL) {
+ DBG(("%s: not migrating uninitialised pixmap\n",
+ __FUNCTION__));
+ return NULL;
+ }
+
if (pixmap->usage_hint) {
DBG(("%s: not migrating pixmap due to usage_hint=%d\n",
__FUNCTION__, pixmap->usage_hint));
return NULL;
}
+ if (priv->shm)
+ blt = true;
+
if (DBG_FORCE_UPLOAD < 0) {
if (!sna_pixmap_force_to_gpu(pixmap,
blt ? MOVE_READ : MOVE_SOURCE_HINT | MOVE_READ))
@@ -439,7 +459,9 @@ move_to_gpu(PixmapPtr pixmap, const BoxRec *box, bool blt)
w = box->x2 - box->x1;
h = box->y2 - box->y1;
- if (w == pixmap->drawable.width && h == pixmap->drawable.height) {
+ if (priv->cpu_bo && !priv->cpu_bo->flush) {
+ migrate = true;
+ } else if (w == pixmap->drawable.width && h == pixmap->drawable.height) {
migrate = priv->source_count++ > SOURCE_BIAS;
DBG(("%s: migrating whole pixmap (%dx%d) for source (%d,%d),(%d,%d), count %d? %d\n",
@@ -464,9 +486,15 @@ move_to_gpu(PixmapPtr pixmap, const BoxRec *box, bool blt)
migrate = count*w*h > pixmap->drawable.width * pixmap->drawable.height;
}
- if (migrate && !sna_pixmap_force_to_gpu(pixmap,
- blt ? MOVE_READ : MOVE_SOURCE_HINT | MOVE_READ))
- return NULL;
+ if (migrate) {
+ if (blt) {
+ if (!sna_pixmap_move_area_to_gpu(pixmap, box, MOVE_READ))
+ return NULL;
+ } else {
+ if (!sna_pixmap_force_to_gpu(pixmap, MOVE_SOURCE_HINT | MOVE_READ))
+ return NULL;
+ }
+ }
return priv->gpu_bo;
}
@@ -474,7 +502,7 @@ move_to_gpu(PixmapPtr pixmap, const BoxRec *box, bool blt)
static struct kgem_bo *upload(struct sna *sna,
struct sna_composite_channel *channel,
PixmapPtr pixmap,
- BoxPtr box)
+ const BoxRec *box)
{
struct sna_pixmap *priv;
struct kgem_bo *bo;
@@ -488,6 +516,9 @@ static struct kgem_bo *upload(struct sna *sna,
priv = sna_pixmap(pixmap);
if (priv) {
+ if (priv->cpu_damage == NULL)
+ return NULL;
+
/* As we know this box is on the CPU just fixup the shadow */
if (priv->mapped) {
pixmap->devPrivate.ptr = NULL;
@@ -497,7 +528,7 @@ static struct kgem_bo *upload(struct sna *sna,
if (priv->ptr == NULL) /* uninitialised */
return NULL;
assert(priv->stride);
- pixmap->devPrivate.ptr = priv->ptr;
+ pixmap->devPrivate.ptr = PTR(priv->ptr);
pixmap->devKind = priv->stride;
}
}
@@ -515,8 +546,11 @@ static struct kgem_bo *upload(struct sna *sna,
if (priv &&
pixmap->usage_hint == 0 &&
channel->width == pixmap->drawable.width &&
- channel->height == pixmap->drawable.height)
+ channel->height == pixmap->drawable.height) {
+ assert(priv->gpu_damage == NULL);
+ assert(priv->gpu_bo == NULL);
kgem_proxy_bo_attach(bo, &priv->gpu_bo);
+ }
}
return bo;
@@ -575,6 +609,10 @@ sna_render_pixmap_bo(struct sna *sna,
!priv->cpu_bo->snoop && priv->cpu_bo->pitch < 4096) {
DBG(("%s: CPU all damaged\n", __FUNCTION__));
channel->bo = priv->cpu_bo;
+ if (priv->shm) {
+ assert(!priv->flush);
+ sna_add_flush_pixmap(sna, priv, priv->cpu_bo);
+ }
goto done;
}
}
@@ -587,8 +625,8 @@ sna_render_pixmap_bo(struct sna *sna,
} else {
box.x1 = x;
box.y1 = y;
- box.x2 = x + w;
- box.y2 = y + h;
+ box.x2 = bound(x, w);
+ box.y2 = bound(y, h);
if (channel->repeat == RepeatNone || channel->repeat == RepeatPad) {
if (box.x1 < 0)
@@ -661,8 +699,8 @@ static int sna_render_picture_downsample(struct sna *sna,
box.x1 = x;
box.y1 = y;
- box.x2 = x + w;
- box.y2 = y + h;
+ box.x2 = bound(x, w);
+ box.y2 = bound(y, h);
if (channel->transform) {
pixman_vector_t v;
@@ -843,8 +881,8 @@ sna_render_pixmap_partial(struct sna *sna,
box.x1 = x;
box.y1 = y;
- box.x2 = x + w;
- box.y2 = y + h;
+ box.x2 = bound(x, w);
+ box.y2 = bound(y, h);
DBG(("%s: unaligned box (%d, %d), (%d, %d)\n",
__FUNCTION__, box.x1, box.y1, box.x2, box.y2));
@@ -861,6 +899,9 @@ sna_render_pixmap_partial(struct sna *sna,
DBG(("%s: tile size for tiling %d: %dx%d, size=%d\n",
__FUNCTION__, bo->tiling, tile_width, tile_height, tile_size));
+ if (sna->kgem.gen < 033)
+ tile_width = bo->pitch;
+
/* Ensure we align to an even tile row */
box.y1 = box.y1 & ~(2*tile_height - 1);
box.y2 = ALIGN(box.y2, 2*tile_height);
@@ -934,8 +975,8 @@ sna_render_picture_partial(struct sna *sna,
box.x1 = x;
box.y1 = y;
- box.x2 = x + w;
- box.y2 = y + h;
+ box.x2 = bound(x, w);
+ box.y2 = bound(y, h);
if (channel->transform)
pixman_transform_bounds(channel->transform, &box);
@@ -1077,8 +1118,8 @@ sna_render_picture_extract(struct sna *sna,
ox = box.x1 = x;
oy = box.y1 = y;
- box.x2 = x + w;
- box.y2 = y + h;
+ box.x2 = bound(x, w);
+ box.y2 = bound(y, h);
if (channel->transform) {
pixman_vector_t v;
@@ -1147,8 +1188,11 @@ sna_render_picture_extract(struct sna *sna,
box.x2 - box.x1 == pixmap->drawable.width &&
box.y2 - box.y1 == pixmap->drawable.height) {
struct sna_pixmap *priv = sna_pixmap(pixmap);
- if (priv)
+ if (priv) {
+ assert(priv->gpu_damage == NULL);
+ assert(priv->gpu_bo == NULL);
kgem_proxy_bo_attach(bo, &priv->gpu_bo);
+ }
}
}
}
@@ -1334,6 +1378,7 @@ sna_render_picture_flatten(struct sna *sna,
assert(w <= sna->render.max_3d_size && h <= sna->render.max_3d_size);
/* XXX shortcut a8? */
+ DBG(("%s: %dx%d\n", __FUNCTION__, w, h));
pixmap = screen->CreatePixmap(screen, w, h, 32, SNA_CREATE_SCRATCH);
if (pixmap == NullPixmap)
@@ -1346,6 +1391,8 @@ sna_render_picture_flatten(struct sna *sna,
if (tmp == NULL)
return 0;
+ ValidatePicture(tmp);
+
old_format = picture->format;
picture->format = PICT_FORMAT(PICT_FORMAT_BPP(picture->format),
PICT_FORMAT_TYPE(picture->format),
@@ -1445,11 +1492,11 @@ sna_render_picture_approximate_gradient(struct sna *sna,
pixman_transform_multiply(&t, picture->transform, &t);
pixman_image_set_transform(src, &t);
- pixman_image_composite(PictOpSrc, src, NULL, dst,
- x + dx, y + dy,
- 0, 0,
- 0, 0,
- w2, h2);
+ sna_image_composite(PictOpSrc, src, NULL, dst,
+ x+dx, y+dy,
+ 0, 0,
+ 0, 0,
+ w2, h2);
free_pixman_pict(picture, src);
pixman_image_unref(dst);
@@ -1498,7 +1545,8 @@ sna_render_picture_fixup(struct sna *sna,
if (picture->alphaMap) {
DBG(("%s: alphamap\n", __FUNCTION__));
- if (is_gpu(picture->pDrawable) || is_gpu(picture->alphaMap->pDrawable)) {
+ if (is_gpu(sna, picture->pDrawable, PREFER_GPU_RENDER) ||
+ is_gpu(sna, picture->alphaMap->pDrawable, PREFER_GPU_RENDER)) {
return sna_render_picture_flatten(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
@@ -1508,7 +1556,7 @@ sna_render_picture_fixup(struct sna *sna,
if (picture->filter == PictFilterConvolution) {
DBG(("%s: convolution\n", __FUNCTION__));
- if (is_gpu(picture->pDrawable)) {
+ if (is_gpu(sna, picture->pDrawable, PREFER_GPU_RENDER)) {
return sna_render_picture_convolve(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
@@ -1541,8 +1589,10 @@ do_fixup:
}
/* Composite in the original format to preserve idiosyncracies */
- if (picture->format == channel->pict_format)
- dst = pixman_image_create_bits(picture->format,
+ if (!kgem_buffer_is_inplace(channel->bo) &&
+ (picture->pDrawable == NULL ||
+ picture->format == channel->pict_format))
+ dst = pixman_image_create_bits(channel->pict_format,
w, h, ptr, channel->bo->pitch);
else
dst = pixman_image_create_bits(picture->format, w, h, NULL, 0);
@@ -1560,15 +1610,15 @@ do_fixup:
DBG(("%s: compositing tmp=(%d+%d, %d+%d)x(%d, %d)\n",
__FUNCTION__, x, dx, y, dy, w, h));
- pixman_image_composite(PictOpSrc, src, NULL, dst,
- x + dx, y + dy,
- 0, 0,
- 0, 0,
- w, h);
+ sna_image_composite(PictOpSrc, src, NULL, dst,
+ x + dx, y + dy,
+ 0, 0,
+ 0, 0,
+ w, h);
free_pixman_pict(picture, src);
/* Then convert to card format */
- if (picture->format != channel->pict_format) {
+ if (pixman_image_get_data(dst) != ptr) {
DBG(("%s: performing post-conversion %08x->%08x (%d, %d)\n",
__FUNCTION__,
picture->format, channel->pict_format,
@@ -1614,11 +1664,10 @@ sna_render_picture_convert(struct sna *sna,
PixmapPtr pixmap,
int16_t x, int16_t y,
int16_t w, int16_t h,
- int16_t dst_x, int16_t dst_y)
+ int16_t dst_x, int16_t dst_y,
+ bool fixup_alpha)
{
- pixman_image_t *src, *dst;
BoxRec box;
- void *ptr;
#if NO_CONVERT
return -1;
@@ -1627,8 +1676,8 @@ sna_render_picture_convert(struct sna *sna,
if (w != 0 && h != 0) {
box.x1 = x;
box.y1 = y;
- box.x2 = x + w;
- box.y2 = y + h;
+ box.x2 = bound(x, w);
+ box.y2 = bound(y, h);
if (channel->transform) {
DBG(("%s: has transform, converting whole surface\n",
@@ -1668,52 +1717,113 @@ sna_render_picture_convert(struct sna *sna,
return 0;
}
- if (!sna_pixmap_move_to_cpu(pixmap, MOVE_READ))
- return 0;
+ if (fixup_alpha && is_gpu(sna, &pixmap->drawable, PREFER_GPU_RENDER)) {
+ ScreenPtr screen = pixmap->drawable.pScreen;
+ PixmapPtr tmp;
+ PicturePtr src, dst;
+ int error;
- src = pixman_image_create_bits(picture->format,
- pixmap->drawable.width,
- pixmap->drawable.height,
- pixmap->devPrivate.ptr,
- pixmap->devKind);
- if (!src)
- return 0;
+ assert(PICT_FORMAT_BPP(picture->format) == pixmap->drawable.bitsPerPixel);
+ channel->pict_format = PICT_FORMAT(PICT_FORMAT_BPP(picture->format),
+ PICT_FORMAT_TYPE(picture->format),
+ PICT_FORMAT_BPP(picture->format) - PIXMAN_FORMAT_DEPTH(picture->format),
+ PICT_FORMAT_R(picture->format),
+ PICT_FORMAT_G(picture->format),
+ PICT_FORMAT_B(picture->format));
- if (PICT_FORMAT_RGB(picture->format) == 0) {
- channel->pict_format = PIXMAN_a8;
- DBG(("%s: converting to a8 from %08x\n",
- __FUNCTION__, picture->format));
+ DBG(("%s: converting to %08x from %08x using composite alpha-fixup\n",
+ __FUNCTION__, (unsigned)picture->format));
+
+ tmp = screen->CreatePixmap(screen, w, h, pixmap->drawable.bitsPerPixel, 0);
+ if (tmp == NULL)
+ return 0;
+
+ dst = CreatePicture(0, &tmp->drawable,
+ PictureMatchFormat(screen,
+ pixmap->drawable.bitsPerPixel,
+ channel->pict_format),
+ 0, NULL, serverClient, &error);
+ if (dst == NULL) {
+ screen->DestroyPixmap(tmp);
+ return 0;
+ }
+
+ src = CreatePicture(0, &pixmap->drawable,
+ PictureMatchFormat(screen,
+ pixmap->drawable.depth,
+ picture->format),
+ 0, NULL, serverClient, &error);
+ if (src == NULL) {
+ FreePicture(dst, 0);
+ screen->DestroyPixmap(tmp);
+ return 0;
+ }
+
+ ValidatePicture(src);
+ ValidatePicture(dst);
+
+ sna_composite(PictOpSrc, src, NULL, dst,
+ box.x1, box.y1,
+ 0, 0,
+ 0, 0,
+ w, h);
+ FreePicture(dst, 0);
+ FreePicture(src, 0);
+
+ channel->bo = sna_pixmap_get_bo(tmp);
+ kgem_bo_reference(channel->bo);
+ screen->DestroyPixmap(tmp);
} else {
- channel->pict_format = PIXMAN_a8r8g8b8;
- DBG(("%s: converting to a8r8g8b8 from %08x\n",
- __FUNCTION__, picture->format));
- }
+ pixman_image_t *src, *dst;
+ void *ptr;
- channel->bo = kgem_create_buffer_2d(&sna->kgem,
- w, h, PIXMAN_FORMAT_BPP(channel->pict_format),
- KGEM_BUFFER_WRITE_INPLACE,
- &ptr);
- if (!channel->bo) {
- pixman_image_unref(src);
- return 0;
- }
+ if (!sna_pixmap_move_to_cpu(pixmap, MOVE_READ))
+ return 0;
- dst = pixman_image_create_bits(channel->pict_format,
- w, h, ptr, channel->bo->pitch);
- if (!dst) {
- kgem_bo_destroy(&sna->kgem, channel->bo);
+ src = pixman_image_create_bits(picture->format,
+ pixmap->drawable.width,
+ pixmap->drawable.height,
+ pixmap->devPrivate.ptr,
+ pixmap->devKind);
+ if (!src)
+ return 0;
+
+ if (PICT_FORMAT_RGB(picture->format) == 0) {
+ channel->pict_format = PIXMAN_a8;
+ DBG(("%s: converting to a8 from %08x\n",
+ __FUNCTION__, picture->format));
+ } else {
+ channel->pict_format = PIXMAN_a8r8g8b8;
+ DBG(("%s: converting to a8r8g8b8 from %08x\n",
+ __FUNCTION__, picture->format));
+ }
+
+ channel->bo = kgem_create_buffer_2d(&sna->kgem,
+ w, h, PIXMAN_FORMAT_BPP(channel->pict_format),
+ KGEM_BUFFER_WRITE_INPLACE,
+ &ptr);
+ if (!channel->bo) {
+ pixman_image_unref(src);
+ return 0;
+ }
+
+ dst = pixman_image_create_bits(channel->pict_format,
+ w, h, ptr, channel->bo->pitch);
+ if (!dst) {
+ kgem_bo_destroy(&sna->kgem, channel->bo);
+ pixman_image_unref(src);
+ return 0;
+ }
+
+ pixman_image_composite(PictOpSrc, src, NULL, dst,
+ box.x1, box.y1,
+ 0, 0,
+ 0, 0,
+ w, h);
+ pixman_image_unref(dst);
pixman_image_unref(src);
- return 0;
}
- pixman_image_composite(PictOpSrc, src, NULL, dst,
- box.x1, box.y1,
- 0, 0,
- 0, 0,
- w, h);
- pixman_image_unref(dst);
- pixman_image_unref(src);
-
channel->width = w;
channel->height = h;
@@ -1722,11 +1832,10 @@ sna_render_picture_convert(struct sna *sna,
channel->offset[0] = x - dst_x - box.x1;
channel->offset[1] = y - dst_y - box.y1;
- DBG(("%s: offset=(%d, %d), size=(%d, %d) ptr[0]=%08x\n",
+ DBG(("%s: offset=(%d, %d), size=(%d, %d)\n",
__FUNCTION__,
channel->offset[0], channel->offset[1],
- channel->width, channel->height,
- *(uint32_t*)ptr));
+ channel->width, channel->height));
return 1;
}
@@ -1764,9 +1873,9 @@ sna_render_composite_redirect(struct sna *sna,
__FUNCTION__, op->dst.bo->pitch, sna->render.max_3d_pitch));
box.x1 = x;
- box.x2 = x + width;
+ box.x2 = bound(x, width);
box.y1 = y;
- box.y2 = y + height;
+ box.y2 = bound(y, height);
/* Ensure we align to an even tile row */
if (op->dst.bo->tiling) {
@@ -1783,7 +1892,7 @@ sna_render_composite_redirect(struct sna *sna,
offset = box.x1 * op->dst.pixmap->drawable.bitsPerPixel / 8 / tile_width * tile_size;
} else {
- if (sna->kgem.gen < 40) {
+ if (sna->kgem.gen < 040) {
box.y1 = box.y1 & ~3;
box.y2 = ALIGN(box.y2, 4);
@@ -1860,8 +1969,8 @@ sna_render_composite_redirect(struct sna *sna,
t->box.x1 = x + op->dst.x;
t->box.y1 = y + op->dst.y;
- t->box.x2 = t->box.x1 + width;
- t->box.y2 = t->box.y1 + height;
+ t->box.x2 = bound(t->box.x1, width);
+ t->box.y2 = bound(t->box.y1, height);
DBG(("%s: original box (%d, %d), (%d, %d)\n",
__FUNCTION__, t->box.x1, t->box.y1, t->box.x2, t->box.y2));
@@ -1911,11 +2020,13 @@ sna_render_composite_redirect_done(struct sna *sna,
assert(ok);
}
if (t->damage) {
- DBG(("%s: combining damage, offset=(%d, %d)\n",
- __FUNCTION__, t->box.x1, t->box.y1));
- sna_damage_combine(t->real_damage, t->damage,
+ DBG(("%s: combining damage (all? %d), offset=(%d, %d)\n",
+ __FUNCTION__, (int)DAMAGE_IS_ALL(t->damage),
+ t->box.x1, t->box.y1));
+ sna_damage_combine(t->real_damage,
+ DAMAGE_PTR(t->damage),
t->box.x1, t->box.y1);
- __sna_damage_destroy(t->damage);
+ __sna_damage_destroy(DAMAGE_PTR(t->damage));
}
kgem_bo_destroy(&sna->kgem, op->dst.bo);
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index 03a700571..01176c6aa 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -5,6 +5,11 @@
#include <picturestr.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <pthread.h>
+#include "atomic.h"
+
#define GRADIENT_CACHE_SIZE 16
#define GXinvalid 0xff
@@ -30,6 +35,8 @@ struct sna_composite_op {
const BoxRec *box);
void (*boxes)(struct sna *sna, const struct sna_composite_op *op,
const BoxRec *box, int nbox);
+ void (*thread_boxes)(struct sna *sna, const struct sna_composite_op *op,
+ const BoxRec *box, int nbox);
void (*done)(struct sna *sna, const struct sna_composite_op *op);
struct sna_damage **damage;
@@ -66,10 +73,10 @@ struct sna_composite_op {
union {
struct {
+ float dx, dy, offset;
+ } linear;
+ struct {
uint32_t pixel;
- float linear_dx;
- float linear_dy;
- float linear_offset;
} gen2;
struct gen3_shader_channel {
int type;
@@ -88,6 +95,9 @@ struct sna_composite_op {
fastcall void (*prim_emit)(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r);
+ fastcall void (*emit_boxes)(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v);
struct sna_composite_redirect {
struct kgem_bo *real_bo;
@@ -122,8 +132,8 @@ struct sna_composite_op {
} gen4;
struct {
- int wm_kernel;
- int ve_id;
+ int16_t wm_kernel;
+ int16_t ve_id;
} gen5;
struct {
@@ -138,6 +148,11 @@ struct sna_composite_op {
void *priv;
};
+struct sna_opacity_box {
+ BoxRec box;
+ float alpha;
+} __packed__;
+
struct sna_composite_spans_op {
struct sna_composite_op base;
@@ -149,6 +164,12 @@ struct sna_composite_spans_op {
const struct sna_composite_spans_op *op,
const BoxRec *box, int nbox,
float opacity);
+
+ fastcall void (*thread_boxes)(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box,
+ int nbox);
+
fastcall void (*done)(struct sna *sna,
const struct sna_composite_spans_op *op);
@@ -156,6 +177,9 @@ struct sna_composite_spans_op {
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity);
+ fastcall void (*emit_boxes)(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box, int nbox,
+ float *v);
};
struct sna_fill_op {
@@ -184,9 +208,18 @@ struct sna_copy_op {
};
struct sna_render {
+ pthread_mutex_t lock;
+ pthread_cond_t wait;
+ int active;
+
int max_3d_size;
int max_3d_pitch;
+ unsigned prefer_gpu;
+#define PREFER_GPU_BLT 0x1
+#define PREFER_GPU_RENDER 0x2
+#define PREFER_GPU_SPANS 0x4
+
bool (*composite)(struct sna *sna, uint8_t op,
PicturePtr dst, PicturePtr src, PicturePtr mask,
int16_t src_x, int16_t src_y,
@@ -214,6 +247,7 @@ struct sna_render {
RegionPtr dstRegion,
short src_w, short src_h,
short drw_w, short drw_h,
+ short dx, short dy,
PixmapPtr pixmap);
bool (*fill_boxes)(struct sna *sna,
@@ -237,6 +271,7 @@ struct sna_render {
PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
const BoxRec *box, int n, unsigned flags);
#define COPY_LAST 0x1
+#define COPY_SYNC 0x2
bool (*copy)(struct sna *sna, uint8_t alu,
PixmapPtr src, struct kgem_bo *src_bo,
@@ -249,13 +284,13 @@ struct sna_render {
struct sna_alpha_cache {
struct kgem_bo *cache_bo;
- struct kgem_bo *bo[256];
+ struct kgem_bo *bo[256+7];
} alpha_cache;
struct sna_solid_cache {
struct kgem_bo *cache_bo;
- uint32_t color[1024];
struct kgem_bo *bo[1024];
+ uint32_t color[1025];
int last;
int size;
int dirty;
@@ -282,6 +317,8 @@ struct sna_render {
pixman_glyph_cache_t *glyph_cache;
#endif
+ uint16_t vb_id;
+ uint16_t vertex_offset;
uint16_t vertex_start;
uint16_t vertex_index;
uint16_t vertex_used;
@@ -302,7 +339,6 @@ struct gen2_render_state {
uint32_t ls1, ls2, vft;
uint32_t diffuse;
uint32_t specular;
- uint16_t vertex_offset;
};
struct gen3_render_state {
@@ -318,7 +354,6 @@ struct gen3_render_state {
uint32_t last_diffuse;
uint32_t last_specular;
- uint16_t vertex_offset;
uint16_t last_vertex_offset;
uint16_t floats_per_vertex;
uint16_t last_floats_per_vertex;
@@ -332,16 +367,14 @@ struct gen4_render_state {
struct kgem_bo *general_bo;
uint32_t vs;
- uint32_t sf[2];
+ uint32_t sf;
uint32_t wm;
uint32_t cc;
int ve_id;
uint32_t drawrect_offset;
uint32_t drawrect_limit;
- uint32_t vb_id;
uint32_t last_pipelined_pointers;
- uint16_t vertex_offset;
uint16_t last_primitive;
int16_t floats_per_vertex;
uint16_t surface_table;
@@ -361,8 +394,6 @@ struct gen5_render_state {
int ve_id;
uint32_t drawrect_offset;
uint32_t drawrect_limit;
- uint32_t vb_id;
- uint16_t vertex_offset;
uint16_t last_primitive;
int16_t floats_per_vertex;
uint16_t surface_table;
@@ -402,7 +433,6 @@ struct gen6_render_state {
uint32_t wm_state;
uint32_t wm_kernel[GEN6_KERNEL_COUNT][3];
- uint32_t cc_vp;
uint32_t cc_blend;
uint32_t drawrect_offset;
@@ -412,9 +442,7 @@ struct gen6_render_state {
uint32_t kernel;
uint16_t num_sf_outputs;
- uint16_t vb_id;
uint16_t ve_id;
- uint16_t vertex_offset;
uint16_t last_primitive;
int16_t floats_per_vertex;
uint16_t surface_table;
@@ -454,7 +482,6 @@ struct gen7_render_state {
uint32_t wm_state;
uint32_t wm_kernel[GEN7_WM_KERNEL_COUNT][3];
- uint32_t cc_vp;
uint32_t cc_blend;
uint32_t drawrect_offset;
@@ -464,9 +491,7 @@ struct gen7_render_state {
uint32_t kernel;
uint16_t num_sf_outputs;
- uint16_t vb_id;
uint16_t ve_id;
- uint16_t vertex_offset;
uint16_t last_primitive;
int16_t floats_per_vertex;
uint16_t surface_table;
@@ -690,7 +715,8 @@ sna_render_picture_convert(struct sna *sna,
PixmapPtr pixmap,
int16_t x, int16_t y,
int16_t w, int16_t h,
- int16_t dst_x, int16_t dst_y);
+ int16_t dst_x, int16_t dst_y,
+ bool fixup_alpha);
inline static void sna_render_composite_redirect_init(struct sna_composite_op *op)
{
@@ -717,4 +743,36 @@ sna_render_copy_boxes__overlap(struct sna *sna, uint8_t alu,
bool
sna_composite_mask_is_opaque(PicturePtr mask);
+void sna_vertex_init(struct sna *sna);
+
+static inline void sna_vertex_lock(struct sna_render *r)
+{
+ pthread_mutex_lock(&r->lock);
+}
+
+static inline void sna_vertex_acquire__locked(struct sna_render *r)
+{
+ r->active++;
+}
+
+static inline void sna_vertex_unlock(struct sna_render *r)
+{
+ pthread_mutex_unlock(&r->lock);
+}
+
+static inline void sna_vertex_release__locked(struct sna_render *r)
+{
+ assert(r->active > 0);
+ if (--r->active == 0)
+ pthread_cond_signal(&r->wait);
+}
+
+static inline bool sna_vertex_wait__locked(struct sna_render *r)
+{
+ bool was_active = r->active;
+ while (r->active)
+ pthread_cond_wait(&r->wait, &r->lock);
+ return was_active;
+}
+
#endif /* SNA_RENDER_H */
diff --git a/src/sna/sna_render_inline.h b/src/sna/sna_render_inline.h
index a796903fb..7d9f2cacf 100644
--- a/src/sna/sna_render_inline.h
+++ b/src/sna/sna_render_inline.h
@@ -17,6 +17,17 @@ static inline bool need_redirect(struct sna *sna, PixmapPtr dst)
dst->drawable.height > sna->render.max_3d_size);
}
+static inline float pack_2s(int16_t x, int16_t y)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } u;
+ u.p.x = x;
+ u.p.y = y;
+ return u.f;
+}
+
static inline int vertex_space(struct sna *sna)
{
return sna->render.vertex_size - sna->render.vertex_used;
@@ -28,21 +39,7 @@ static inline void vertex_emit(struct sna *sna, float v)
}
static inline void vertex_emit_2s(struct sna *sna, int16_t x, int16_t y)
{
- int16_t *v = (int16_t *)&sna->render.vertices[sna->render.vertex_used++];
- assert(sna->render.vertex_used <= sna->render.vertex_size);
- v[0] = x;
- v[1] = y;
-}
-
-static inline float pack_2s(int16_t x, int16_t y)
-{
- union {
- struct sna_coordinate p;
- float f;
- } u;
- u.p.x = x;
- u.p.y = y;
- return u.f;
+ vertex_emit(sna, pack_2s(x, y));
}
static inline int batch_space(struct sna *sna)
@@ -70,17 +67,18 @@ static inline void batch_emit_float(struct sna *sna, float f)
}
static inline bool
-is_gpu(DrawablePtr drawable)
+is_gpu(struct sna *sna, DrawablePtr drawable, unsigned prefer)
{
struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable);
- if (priv == NULL || priv->clear)
+ if (priv == NULL || priv->clear || priv->cpu)
return false;
if (priv->cpu_damage == NULL)
return true;
- if (priv->gpu_damage && !priv->gpu_bo->proxy)
+ if (priv->gpu_damage && !priv->gpu_bo->proxy &&
+ (sna->render.prefer_gpu & prefer))
return true;
if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
@@ -111,11 +109,20 @@ unattached(DrawablePtr drawable)
}
static inline bool
-picture_is_gpu(PicturePtr picture)
+picture_is_gpu(struct sna *sna, PicturePtr picture)
{
if (!picture || !picture->pDrawable)
return false;
- return is_gpu(picture->pDrawable);
+ return is_gpu(sna, picture->pDrawable, PREFER_GPU_RENDER);
+}
+
+static inline bool
+picture_is_cpu(struct sna *sna, PicturePtr picture)
+{
+ if (!picture->pDrawable)
+ return false;
+
+ return !is_gpu(sna, picture->pDrawable, PREFER_GPU_RENDER);
}
static inline bool sna_blt_compare_depth(DrawablePtr src, DrawablePtr dst)
@@ -146,8 +153,8 @@ sna_render_picture_extents(PicturePtr p, BoxRec *box)
{
box->x1 = p->pDrawable->x;
box->y1 = p->pDrawable->y;
- box->x2 = p->pDrawable->x + p->pDrawable->width;
- box->y2 = p->pDrawable->y + p->pDrawable->height;
+ box->x2 = bound(box->x1, p->pDrawable->width);
+ box->y2 = bound(box->y1, p->pDrawable->height);
if (box->x1 < p->pCompositeClip->extents.x1)
box->x1 = p->pCompositeClip->extents.x1;
@@ -158,6 +165,8 @@ sna_render_picture_extents(PicturePtr p, BoxRec *box)
box->x2 = p->pCompositeClip->extents.x2;
if (box->y2 > p->pCompositeClip->extents.y2)
box->y2 = p->pCompositeClip->extents.y2;
+
+ assert(box->x2 > box->x1 && box->y2 > box->y1);
}
static inline void
@@ -218,4 +227,44 @@ color_convert(uint32_t pixel,
return pixel;
}
+inline static bool dst_use_gpu(PixmapPtr pixmap)
+{
+ struct sna_pixmap *priv = sna_pixmap(pixmap);
+ if (priv == NULL)
+ return false;
+
+ if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
+ return true;
+
+ if (priv->clear)
+ return false;
+
+ if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
+ return true;
+
+ return priv->gpu_damage && (!priv->cpu || !priv->cpu_damage);
+}
+
+inline static bool dst_use_cpu(PixmapPtr pixmap)
+{
+ struct sna_pixmap *priv = sna_pixmap(pixmap);
+ if (priv == NULL || priv->shm)
+ return true;
+
+ return priv->cpu_damage && priv->cpu;
+}
+
+inline static bool dst_is_cpu(PixmapPtr pixmap)
+{
+ struct sna_pixmap *priv = sna_pixmap(pixmap);
+ return priv == NULL || DAMAGE_IS_ALL(priv->cpu_damage);
+}
+
+inline static bool
+untransformed(PicturePtr p)
+{
+ return !p->transform || pixman_transform_is_int_translate(p->transform);
+}
+
+
#endif /* SNA_RENDER_INLINE_H */
diff --git a/src/sna/sna_threads.c b/src/sna/sna_threads.c
new file mode 100644
index 000000000..f77ddbfe8
--- /dev/null
+++ b/src/sna/sna_threads.c
@@ -0,0 +1,306 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "sna.h"
+
+#include <unistd.h>
+#include <pthread.h>
+#include <signal.h>
+
+static int max_threads = -1;
+
+static struct thread {
+ pthread_t thread;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+
+ void (*func)(void *arg);
+ void *arg;
+} *threads;
+
+static void *__run__(void *arg)
+{
+ struct thread *t = arg;
+ sigset_t signals;
+
+ /* Disable all signals in the slave threads as X uses them for IO */
+ sigfillset(&signals);
+ pthread_sigmask(SIG_BLOCK, &signals, NULL);
+
+ pthread_mutex_lock(&t->mutex);
+ while (1) {
+ while (t->func == NULL)
+ pthread_cond_wait(&t->cond, &t->mutex);
+ pthread_mutex_unlock(&t->mutex);
+
+ assert(t->func);
+ t->func(t->arg);
+
+ pthread_mutex_lock(&t->mutex);
+ t->func = NULL;
+ pthread_cond_signal(&t->cond);
+ }
+ pthread_mutex_unlock(&t->mutex);
+
+ return NULL;
+}
+
+#if defined(__GNUC__)
+#define popcount(x) __builtin_popcount(x)
+#else
+static int popcount(unsigned int x)
+{
+ int count = 0;
+
+ while (x) {
+ count += x&1;
+ x >>= 1;
+ }
+
+ return count;
+}
+#endif
+
+static int
+num_cores(void)
+{
+ FILE *file = fopen("/proc/cpuinfo", "r");
+ int count = 0;
+ if (file) {
+ size_t len = 0;
+ char *line = NULL;
+ uint32_t processors = 0, cores = 0;
+ while (getline(&line, &len, file) != -1) {
+ int id;
+ if (sscanf(line, "physical id : %d", &id) == 1) {
+ if (id >= 32)
+ return 0;
+ processors |= 1 << id;
+ } else if (sscanf(line, "core id : %d", &id) == 1) {
+ if (id >= 32)
+ return 0;
+ cores |= 1 << id;
+ }
+ }
+ free(line);
+ fclose(file);
+
+ DBG(("%s: processors=0x%08x, cores=0x%08x\n",
+ __FUNCTION__, processors, cores));
+
+ count = popcount(processors) * popcount(cores);
+ }
+ return count;
+}
+
+void sna_threads_init(void)
+{
+ int n;
+
+ if (max_threads != -1)
+ return;
+
+ max_threads = num_cores();
+ if (max_threads == 0)
+ max_threads = sysconf(_SC_NPROCESSORS_ONLN) / 2;
+ if (max_threads <= 1)
+ goto bail;
+
+ DBG(("%s: creating a thread pool of %d threads\n",
+ __func__, max_threads));
+
+ threads = malloc (sizeof(threads[0])*max_threads);
+ if (threads == NULL)
+ goto bail;
+
+ for (n = 0; n < max_threads; n++) {
+ pthread_mutex_init(&threads[n].mutex, NULL);
+ pthread_cond_init(&threads[n].cond, NULL);
+
+ threads[n].func = NULL;
+ if (pthread_create(&threads[n].thread, NULL,
+ __run__, &threads[n]))
+ goto bail;
+ }
+
+ return;
+
+bail:
+ max_threads = 0;
+}
+
+void sna_threads_run(void (*func)(void *arg), void *arg)
+{
+ int n;
+
+ assert(max_threads > 0);
+
+ for (n = 0; n < max_threads; n++) {
+ if (threads[n].func)
+ continue;
+
+ pthread_mutex_lock(&threads[n].mutex);
+ if (threads[n].func) {
+ pthread_mutex_unlock(&threads[n].mutex);
+ continue;
+ }
+
+ goto execute;
+ }
+
+ n = rand() % max_threads;
+ pthread_mutex_lock(&threads[n].mutex);
+ while (threads[n].func)
+ pthread_cond_wait(&threads[n].cond, &threads[n].mutex);
+
+execute:
+ threads[n].func = func;
+ threads[n].arg = arg;
+ pthread_cond_signal(&threads[n].cond);
+ pthread_mutex_unlock(&threads[n].mutex);
+}
+
+void sna_threads_wait(void)
+{
+ int n;
+
+ assert(max_threads > 0);
+
+ for (n = 0; n < max_threads; n++) {
+ if (threads[n].func == NULL)
+ continue;
+
+ pthread_mutex_lock(&threads[n].mutex);
+ while (threads[n].func)
+ pthread_cond_wait(&threads[n].cond, &threads[n].mutex);
+ pthread_mutex_unlock(&threads[n].mutex);
+ }
+}
+
+int sna_use_threads(int width, int height, int threshold)
+{
+ int num_threads;
+
+ if (max_threads <= 0)
+ return 1;
+
+ num_threads = height / (128/width + 1) / threshold-1;
+ if (num_threads <= 0)
+ return 1;
+
+ if (num_threads > max_threads)
+ num_threads = max_threads;
+ return num_threads;
+}
+
+struct thread_composite {
+ pixman_image_t *src, *mask, *dst;
+ pixman_op_t op;
+ int16_t src_x, src_y;
+ int16_t mask_x, mask_y;
+ int16_t dst_x, dst_y;
+ uint16_t width, height;
+};
+
+static void thread_composite(void *arg)
+{
+ struct thread_composite *t = arg;
+ pixman_image_composite(t->op, t->src, t->mask, t->dst,
+ t->src_x, t->src_y,
+ t->mask_x, t->mask_y,
+ t->dst_x, t->dst_y,
+ t->width, t->height);
+}
+
+void sna_image_composite(pixman_op_t op,
+ pixman_image_t *src,
+ pixman_image_t *mask,
+ pixman_image_t *dst,
+ int16_t src_x,
+ int16_t src_y,
+ int16_t mask_x,
+ int16_t mask_y,
+ int16_t dst_x,
+ int16_t dst_y,
+ uint16_t width,
+ uint16_t height)
+{
+ int num_threads;
+
+ num_threads = sna_use_threads(width, height, 16);
+ if (num_threads <= 1) {
+ pixman_image_composite(op, src, mask, dst,
+ src_x, src_y,
+ mask_x, mask_y,
+ dst_x, dst_y,
+ width, height);
+ } else {
+ struct thread_composite data[num_threads];
+ int y, dy, n;
+
+ DBG(("%s: using %d threads for compositing %dx%d\n",
+ __FUNCTION__, num_threads, width, height));
+
+ y = dst_y;
+ dy = (height + num_threads - 1) / num_threads;
+
+ data[0].op = op;
+ data[0].src = src;
+ data[0].mask = mask;
+ data[0].dst = dst;
+ data[0].src_x = src_x;
+ data[0].src_y = src_y;
+ data[0].mask_x = mask_x;
+ data[0].mask_y = mask_y;
+ data[0].dst_x = dst_x;
+ data[0].dst_y = y;
+ data[0].width = width;
+ data[0].height = dy;
+
+ for (n = 1; n < num_threads; n++) {
+ data[n] = data[0];
+ data[n].src_y += y - dst_y;
+ data[n].mask_y += y - dst_y;
+ data[n].dst_y = y;
+ y += dy;
+
+ sna_threads_run(thread_composite, &data[n]);
+ }
+
+ if (y + dy > dst_y + height)
+ dy = dst_y + height - y;
+
+ data[0].src_y += y - dst_y;
+ data[0].mask_y += y - dst_y;
+ data[0].dst_y = y;
+ data[0].height = dy;
+
+ thread_composite(&data[0]);
+
+ sna_threads_wait();
+ }
+}
diff --git a/src/sna/sna_tiling.c b/src/sna/sna_tiling.c
index 9e70833c6..5bebf0020 100644
--- a/src/sna/sna_tiling.c
+++ b/src/sna/sna_tiling.c
@@ -624,7 +624,7 @@ sna_tiling_fill_boxes(struct sna *sna,
RegionNull(&this);
RegionIntersect(&this, &region, &tile);
- if (!RegionNotEmpty(&this))
+ if (RegionNil(&this))
continue;
tmp.drawable.width = this.extents.x2 - this.extents.x1;
@@ -737,7 +737,7 @@ bool sna_tiling_blt_copy_boxes(struct sna *sna, uint8_t alu,
RegionNull(&this);
RegionIntersect(&this, &region, &tile);
- if (!RegionNotEmpty(&this))
+ if (RegionNil(&this))
continue;
w = this.extents.x2 - this.extents.x1;
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 482abd369..c547fb5aa 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -49,6 +49,7 @@
#define NO_ALIGNED_BOXES 0
#define NO_UNALIGNED_BOXES 0
#define NO_SCAN_CONVERTER 0
+#define NO_GPU_THREADS 0
/* TODO: Emit unantialiased and MSAA triangles. */
@@ -68,6 +69,9 @@
#define FAST_SAMPLES_Y (1<<FAST_SAMPLES_shift)
#define FAST_SAMPLES_mask ((1<<FAST_SAMPLES_shift)-1)
+#define region_count(r) ((r)->data ? (r)->data->numRects : 1)
+#define region_boxes(r) ((r)->data ? (BoxPtr)((r)->data + 1) : &(r)->extents)
+
typedef void (*span_func_t)(struct sna *sna,
struct sna_composite_spans_op *op,
pixman_region16_t *clip,
@@ -328,10 +332,10 @@ floored_divrem(int a, int b)
/* Compute the floored division (x*a)/b. Assumes / and % perform symmetric
* division. */
static struct quorem
-floored_muldivrem(int x, int a, int b)
+floored_muldivrem(int32_t x, int32_t a, int32_t b)
{
struct quorem qr;
- long long xa = (long long)x*a;
+ int64_t xa = (int64_t)x*a;
qr.quo = xa/b;
qr.rem = xa%b;
if (qr.rem && (xa>=0) != (b>=0)) {
@@ -471,7 +475,7 @@ cell_list_reset(struct cell_list *cells)
pool_reset(cells->cell_pool.base);
}
-static struct cell *
+inline static struct cell *
cell_list_alloc(struct cell_list *cells,
struct cell *tail,
int x)
@@ -532,6 +536,9 @@ cell_list_add_subspan(struct cell_list *cells,
int ix1, fx1;
int ix2, fx2;
+ if (x1 == x2)
+ return;
+
FAST_SAMPLES_X_TO_INT_FRAC(x1, ix1, fx1);
FAST_SAMPLES_X_TO_INT_FRAC(x2, ix2, fx2);
@@ -671,6 +678,8 @@ polygon_add_edge(struct polygon *polygon,
ybot = bottom <= ymax ? bottom : ymax;
e->ytop = ytop;
e->height_left = ybot - ytop;
+ if (e->height_left <= 0)
+ return;
if (dx == 0) {
e->x.quo = x1;
@@ -733,6 +742,8 @@ polygon_add_line(struct polygon *polygon,
e->ytop = top;
e->height_left = bot - top;
+ if (e->height_left <= 0)
+ return;
if (dx == 0) {
e->x.quo = p1->x;
@@ -797,6 +808,9 @@ merge_sorted_edges(struct edge *head_a, struct edge *head_b)
struct edge *head, **next, *prev;
int32_t x;
+ if (head_b == NULL)
+ return head_a;
+
prev = head_a->prev;
next = &head;
if (head_a->x.quo <= head_b->x.quo) {
@@ -869,11 +883,39 @@ sort_edges(struct edge *list,
return remaining;
}
+static struct edge *filter(struct edge *edges)
+{
+ struct edge *e;
+
+ e = edges;
+ do {
+ struct edge *n = e->next;
+ if (e->dir == -n->dir &&
+ e->height_left == n->height_left &&
+ *(uint64_t *)&e->x == *(uint64_t *)&n->x &&
+ *(uint64_t *)&e->dxdy == *(uint64_t *)&n->dxdy) {
+ if (e->prev)
+ e->prev->next = n->next;
+ else
+ edges = n->next;
+ if (n->next)
+ n->next->prev = e->prev;
+ else
+ break;
+
+ e = n->next;
+ } else
+ e = e->next;
+ } while (e->next);
+
+ return edges;
+}
+
static struct edge *
merge_unsorted_edges (struct edge *head, struct edge *unsorted)
{
sort_edges (unsorted, UINT_MAX, &unsorted);
- return merge_sorted_edges (head, unsorted);
+ return merge_sorted_edges (head, filter(unsorted));
}
/* Test if the edges on the active list can be safely advanced by a
@@ -881,18 +923,18 @@ merge_unsorted_edges (struct edge *head, struct edge *unsorted)
inline static bool
can_full_step(struct active_list *active)
{
- const struct edge *e;
-
/* Recomputes the minimum height of all edges on the active
* list if we have been dropping edges. */
if (active->min_height <= 0) {
+ const struct edge *e;
int min_height = INT_MAX;
int is_vertical = 1;
for (e = active->head.next; &active->tail != e; e = e->next) {
if (e->height_left < min_height)
min_height = e->height_left;
- is_vertical &= e->dy == 0;
+ if (is_vertical)
+ is_vertical = e->dy == 0;
}
active->is_vertical = is_vertical;
@@ -929,7 +971,8 @@ fill_buckets(struct active_list *active,
*b = edge;
if (edge->height_left < min_height)
min_height = edge->height_left;
- is_vertical &= edge->dy == 0;
+ if (is_vertical)
+ is_vertical = edge->dy == 0;
edge = next;
}
@@ -1836,7 +1879,7 @@ static void
mono_add_line(struct mono *mono,
int dst_x, int dst_y,
xFixed top, xFixed bottom,
- xPointFixed *p1, xPointFixed *p2,
+ const xPointFixed *p1, const xPointFixed *p2,
int dir)
{
struct mono_polygon *polygon = &mono->polygon;
@@ -1853,7 +1896,7 @@ mono_add_line(struct mono *mono,
dir));
if (top > bottom) {
- xPointFixed *t;
+ const xPointFixed *t;
y = top;
top = bottom;
@@ -1917,6 +1960,9 @@ mono_merge_sorted_edges(struct mono_edge *head_a, struct mono_edge *head_b)
struct mono_edge *head, **next, *prev;
int32_t x;
+ if (head_b == NULL)
+ return head_a;
+
prev = head_a->prev;
next = &head;
if (head_a->x.quo <= head_b->x.quo) {
@@ -1990,11 +2036,39 @@ mono_sort_edges(struct mono_edge *list,
return remaining;
}
+static struct mono_edge *mono_filter(struct mono_edge *edges)
+{
+ struct mono_edge *e;
+
+ e = edges;
+ do {
+ struct mono_edge *n = e->next;
+ if (e->dir == -n->dir &&
+ e->height_left == n->height_left &&
+ *(uint64_t *)&e->x == *(uint64_t *)&n->x &&
+ *(uint64_t *)&e->dxdy == *(uint64_t *)&n->dxdy) {
+ if (e->prev)
+ e->prev->next = n->next;
+ else
+ edges = n->next;
+ if (n->next)
+ n->next->prev = e->prev;
+ else
+ break;
+
+ e = n->next;
+ } else
+ e = e->next;
+ } while (e->next);
+
+ return edges;
+}
+
static struct mono_edge *
mono_merge_unsorted_edges(struct mono_edge *head, struct mono_edge *unsorted)
{
mono_sort_edges(unsorted, UINT_MAX, &unsorted);
- return mono_merge_sorted_edges(head, unsorted);
+ return mono_merge_sorted_edges(head, mono_filter(unsorted));
}
#if 0
@@ -2079,6 +2153,60 @@ mono_span__fast(struct mono *c, int x1, int x2, BoxPtr box)
c->op.box(c->sna, &c->op, box);
}
+struct mono_span_thread_boxes {
+ const struct sna_composite_op *op;
+#define MONO_SPAN_MAX_BOXES (8192/sizeof(BoxRec))
+ BoxRec boxes[MONO_SPAN_MAX_BOXES];
+ int num_boxes;
+};
+
+inline static void
+thread_mono_span_add_boxes(struct mono *c, const BoxRec *box, int count)
+{
+ struct mono_span_thread_boxes *b = c->op.priv;
+
+ assert(count > 0 && count <= MONO_SPAN_MAX_BOXES);
+ if (b->num_boxes + count > MONO_SPAN_MAX_BOXES) {
+ b->op->thread_boxes(c->sna, b->op, b->boxes, b->num_boxes);
+ b->num_boxes = 0;
+ }
+
+ memcpy(b->boxes + b->num_boxes, box, count*sizeof(BoxRec));
+ b->num_boxes += count;
+ assert(b->num_boxes <= MONO_SPAN_MAX_BOXES);
+}
+
+fastcall static void
+thread_mono_span_clipped(struct mono *c, int x1, int x2, BoxPtr box)
+{
+ pixman_region16_t region;
+
+ __DBG(("%s [%d, %d]\n", __FUNCTION__, x1, x2));
+
+ box->x1 = x1;
+ box->x2 = x2;
+
+ assert(c->clip.data);
+
+ pixman_region_init_rects(&region, box, 1);
+ RegionIntersect(&region, &region, &c->clip);
+ if (REGION_NUM_RECTS(&region))
+ thread_mono_span_add_boxes(c,
+ REGION_RECTS(&region),
+ REGION_NUM_RECTS(&region));
+ pixman_region_fini(&region);
+}
+
+fastcall static void
+thread_mono_span(struct mono *c, int x1, int x2, BoxPtr box)
+{
+ __DBG(("%s [%d, %d]\n", __FUNCTION__, x1, x2));
+
+ box->x1 = x1;
+ box->x2 = x2;
+ thread_mono_span_add_boxes(c, box, 1);
+}
+
inline static void
mono_row(struct mono *c, int16_t y, int16_t h)
{
@@ -2196,10 +2324,7 @@ mono_render(struct mono *mono)
struct mono_polygon *polygon = &mono->polygon;
int i, j, h = mono->clip.extents.y2 - mono->clip.extents.y1;
- if (mono->clip.data == NULL && mono->op.damage == NULL)
- mono->span = mono_span__fast;
- else
- mono->span = mono_span;
+ assert(mono->span);
for (i = 0; i < h; i = j) {
j = i + 1;
@@ -2332,7 +2457,8 @@ is_mono(PicturePtr dst, PictFormatPtr mask)
}
static bool
-trapezoids_inplace_fallback(CARD8 op,
+trapezoids_inplace_fallback(struct sna *sna,
+ CARD8 op,
PicturePtr src, PicturePtr dst, PictFormatPtr mask,
int ntrap, xTrapezoid *traps)
{
@@ -2372,7 +2498,7 @@ trapezoids_inplace_fallback(CARD8 op,
return false;
}
- if (is_gpu(dst->pDrawable)) {
+ if (is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
DBG(("%s: not performing inplace as dst is already on the GPU\n",
__FUNCTION__));
return false;
@@ -2398,8 +2524,66 @@ trapezoids_inplace_fallback(CARD8 op,
return true;
}
+struct rasterize_traps_thread {
+ xTrapezoid *traps;
+ char *ptr;
+ int stride;
+ BoxRec bounds;
+ pixman_format_code_t format;
+ int ntrap;
+};
+
+static void rasterize_traps_thread(void *arg)
+{
+ struct rasterize_traps_thread *thread = arg;
+ pixman_image_t *image;
+ int width, height, n;
+
+ width = thread->bounds.x2 - thread->bounds.x1;
+ height = thread->bounds.y2 - thread->bounds.y1;
+
+ memset(thread->ptr, 0, thread->stride*height);
+ if (PIXMAN_FORMAT_DEPTH(thread->format) < 8)
+ image = pixman_image_create_bits(thread->format,
+ width, height,
+ NULL, 0);
+ else
+ image = pixman_image_create_bits(thread->format,
+ width, height,
+ (uint32_t *)thread->ptr,
+ thread->stride);
+ if (image == NULL)
+ return;
+
+ for (n = 0; n < thread->ntrap; n++)
+ pixman_rasterize_trapezoid(image,
+ (pixman_trapezoid_t *)&thread->traps[n],
+ -thread->bounds.x1, -thread->bounds.y1);
+
+ if (PIXMAN_FORMAT_DEPTH(thread->format) < 8) {
+ pixman_image_t *a8;
+
+ a8 = pixman_image_create_bits(PIXMAN_a8,
+ width, height,
+ (uint32_t *)thread->ptr,
+ thread->stride);
+ if (a8) {
+ pixman_image_composite(PIXMAN_OP_SRC,
+ image, NULL, a8,
+ 0, 0,
+ 0, 0,
+ 0, 0,
+ width, height);
+ pixman_image_unref(a8);
+ }
+ }
+
+ pixman_image_unref(image);
+}
+
static void
-trapezoids_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
+trapezoids_fallback(struct sna *sna,
+ CARD8 op, PicturePtr src, PicturePtr dst,
PictFormatPtr maskFormat, INT16 xSrc, INT16 ySrc,
int ntrap, xTrapezoid * traps)
{
@@ -2441,6 +2625,8 @@ trapezoids_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
height = bounds.y2 - bounds.y1;
bounds.x1 -= dst->pDrawable->x;
bounds.y1 -= dst->pDrawable->y;
+ bounds.x2 -= dst->pDrawable->x;
+ bounds.y2 -= dst->pDrawable->y;
depth = maskFormat->depth;
if (depth == 1) {
format = PIXMAN_a1;
@@ -2452,51 +2638,90 @@ trapezoids_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
DBG(("%s: mask (%dx%d) depth=%d, format=%08x\n",
__FUNCTION__, width, height, depth, format));
- if (is_gpu(dst->pDrawable) || picture_is_gpu(src)) {
+ if (is_gpu(sna, dst->pDrawable, PREFER_GPU_RENDER) ||
+ picture_is_gpu(sna, src)) {
+ int num_threads;
+
scratch = sna_pixmap_create_upload(screen,
width, height, 8,
KGEM_BUFFER_WRITE);
if (!scratch)
return;
- if (depth < 8) {
- image = pixman_image_create_bits(format, width, height,
- NULL, 0);
- } else {
- memset(scratch->devPrivate.ptr, 0, scratch->devKind*height);
- image = pixman_image_create_bits(format, width, height,
- scratch->devPrivate.ptr,
- scratch->devKind);
- }
- if (image) {
- for (; ntrap; ntrap--, traps++)
- pixman_rasterize_trapezoid(image,
- (pixman_trapezoid_t *)traps,
- -bounds.x1, -bounds.y1);
+ num_threads = sna_use_threads(width, height, 4);
+ if (num_threads == 1) {
if (depth < 8) {
- pixman_image_t *a8;
-
- a8 = pixman_image_create_bits(PIXMAN_a8, width, height,
- scratch->devPrivate.ptr,
- scratch->devKind);
- if (a8) {
- pixman_image_composite(PIXMAN_OP_SRC,
- image, NULL, a8,
- 0, 0,
- 0, 0,
- 0, 0,
- width, height);
- format = PIXMAN_a8;
- depth = 8;
- pixman_image_unref (a8);
+ image = pixman_image_create_bits(format, width, height,
+ NULL, 0);
+ } else {
+ memset(scratch->devPrivate.ptr, 0, scratch->devKind*height);
+
+ image = pixman_image_create_bits(format, width, height,
+ scratch->devPrivate.ptr,
+ scratch->devKind);
+ }
+ if (image) {
+ for (; ntrap; ntrap--, traps++)
+ pixman_rasterize_trapezoid(image,
+ (pixman_trapezoid_t *)traps,
+ -bounds.x1, -bounds.y1);
+ if (depth < 8) {
+ pixman_image_t *a8;
+
+ a8 = pixman_image_create_bits(PIXMAN_a8, width, height,
+ scratch->devPrivate.ptr,
+ scratch->devKind);
+ if (a8) {
+ pixman_image_composite(PIXMAN_OP_SRC,
+ image, NULL, a8,
+ 0, 0,
+ 0, 0,
+ 0, 0,
+ width, height);
+ format = PIXMAN_a8;
+ depth = 8;
+ pixman_image_unref(a8);
+ }
}
+
+ pixman_image_unref(image);
+ }
+ if (format != PIXMAN_a8) {
+ sna_pixmap_destroy(scratch);
+ return;
+ }
+ } else {
+ struct rasterize_traps_thread threads[num_threads];
+ int y, dy, n;
+
+ threads[0].ptr = scratch->devPrivate.ptr;
+ threads[0].stride = scratch->devKind;
+ threads[0].traps = traps;
+ threads[0].ntrap = ntrap;
+ threads[0].bounds = bounds;
+ threads[0].format = format;
+
+ y = bounds.y1;
+ dy = (height + num_threads - 1) / num_threads;
+
+ for (n = 1; n < num_threads; n++) {
+ threads[n] = threads[0];
+ threads[n].ptr += (y - bounds.y1) * threads[n].stride;
+ threads[n].bounds.y1 = y;
+ threads[n].bounds.y2 = y += dy;
+
+ sna_threads_run(rasterize_traps_thread, &threads[n]);
}
- pixman_image_unref(image);
- }
- if (format != PIXMAN_a8) {
- sna_pixmap_destroy(scratch);
- return;
+ threads[0].ptr += (y - bounds.y1) * threads[0].stride;
+ threads[0].bounds.y1 = y;
+ threads[0].bounds.y2 = bounds.y2;
+ rasterize_traps_thread(&threads[0]);
+
+ sna_threads_wait();
+
+ format = PIXMAN_a8;
+ depth = 8;
}
} else {
scratch = sna_pixmap_create_unattached(screen,
@@ -2538,7 +2763,7 @@ trapezoids_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
maskFormat = PictureMatchFormat(screen, 8, PICT_a8);
for (; ntrap; ntrap--, traps++)
- trapezoids_fallback(op,
+ trapezoids_fallback(sna, op,
src, dst, maskFormat,
xSrc, ySrc, 1, traps);
}
@@ -3073,13 +3298,13 @@ lerp32_unaligned_box_row(PixmapPtr scratch, uint32_t color,
uint8_t covered)
{
int16_t x1 = pixman_fixed_to_int(trap->left.p1.x) + dx;
- int16_t fx1 = grid_coverage(SAMPLES_X, trap->left.p1.x);
+ uint16_t fx1 = grid_coverage(SAMPLES_X, trap->left.p1.x);
int16_t x2 = pixman_fixed_to_int(trap->right.p2.x) + dx;
- int16_t fx2 = grid_coverage(SAMPLES_X, trap->right.p2.x);
+ uint16_t fx2 = grid_coverage(SAMPLES_X, trap->right.p2.x);
if (x1 < extents->x1)
x1 = extents->x1, fx1 = 0;
- if (x2 > extents->x2)
+ if (x2 >= extents->x2)
x2 = extents->x2, fx2 = 0;
DBG(("%s: x=(%d.%d, %d.%d), y=%dx%d, covered=%d\n", __FUNCTION__,
@@ -3171,13 +3396,13 @@ pixsolid_unaligned_box_row(struct pixman_inplace *pi,
uint8_t covered)
{
int16_t x1 = pixman_fixed_to_int(trap->left.p1.x);
- int16_t fx1 = grid_coverage(SAMPLES_X, trap->left.p1.x);
+ uint16_t fx1 = grid_coverage(SAMPLES_X, trap->left.p1.x);
int16_t x2 = pixman_fixed_to_int(trap->right.p1.x);
- int16_t fx2 = grid_coverage(SAMPLES_X, trap->right.p1.x);
+ uint16_t fx2 = grid_coverage(SAMPLES_X, trap->right.p1.x);
if (x1 < extents->x1)
x1 = extents->x1, fx1 = 0;
- if (x2 > extents->x2)
+ if (x2 >= extents->x2)
x2 = extents->x2, fx2 = 0;
if (x1 < x2) {
@@ -3198,7 +3423,8 @@ pixsolid_unaligned_box_row(struct pixman_inplace *pi,
}
static bool
-composite_unaligned_boxes_inplace__solid(CARD8 op, uint32_t color,
+composite_unaligned_boxes_inplace__solid(struct sna *sna,
+ CARD8 op, uint32_t color,
PicturePtr dst, int n, xTrapezoid *t,
bool force_fallback)
{
@@ -3206,9 +3432,9 @@ composite_unaligned_boxes_inplace__solid(CARD8 op, uint32_t color,
int16_t dx, dy;
DBG(("%s: force=%d, is_gpu=%d, op=%d, color=%x\n", __FUNCTION__,
- force_fallback, is_gpu(dst->pDrawable), op, color));
+ force_fallback, is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS), op, color));
- if (!force_fallback && is_gpu(dst->pDrawable)) {
+ if (!force_fallback && is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
DBG(("%s: fallback -- can not perform operation in place, destination busy\n",
__FUNCTION__));
@@ -3276,9 +3502,9 @@ composite_unaligned_boxes_inplace__solid(CARD8 op, uint32_t color,
extents = REGION_RECTS(&clip);
while (count--) {
int16_t y1 = dy + pixman_fixed_to_int(t->top);
- int16_t fy1 = pixman_fixed_frac(t->top);
+ uint16_t fy1 = pixman_fixed_frac(t->top);
int16_t y2 = dy + pixman_fixed_to_int(t->bottom);
- int16_t fy2 = pixman_fixed_frac(t->bottom);
+ uint16_t fy2 = pixman_fixed_frac(t->bottom);
DBG(("%s: t=(%d, %d), (%d, %d), extents (%d, %d), (%d, %d)\n",
__FUNCTION__,
@@ -3291,7 +3517,7 @@ composite_unaligned_boxes_inplace__solid(CARD8 op, uint32_t color,
if (y1 < extents->y1)
y1 = extents->y1, fy1 = 0;
- if (y2 > extents->y2)
+ if (y2 >= extents->y2)
y2 = extents->y2, fy2 = 0;
if (y1 < y2) {
@@ -3363,13 +3589,13 @@ pixman:
extents = REGION_RECTS(&clip);
while (count--) {
int16_t y1 = pixman_fixed_to_int(t->top);
- int16_t fy1 = pixman_fixed_frac(t->top);
+ uint16_t fy1 = pixman_fixed_frac(t->top);
int16_t y2 = pixman_fixed_to_int(t->bottom);
- int16_t fy2 = pixman_fixed_frac(t->bottom);
+ uint16_t fy2 = pixman_fixed_frac(t->bottom);
if (y1 < extents->y1)
y1 = extents->y1, fy1 = 0;
- if (y2 > extents->y2)
+ if (y2 >= extents->y2)
y2 = extents->y2, fy2 = 0;
if (y1 < y2) {
if (fy1) {
@@ -3424,18 +3650,18 @@ pixmask_opacity(struct pixman_inplace *pi,
static void
pixmask_unaligned_box_row(struct pixman_inplace *pi,
const BoxRec *extents,
- xTrapezoid *trap,
+ const xTrapezoid *trap,
int16_t y, int16_t h,
uint8_t covered)
{
int16_t x1 = pixman_fixed_to_int(trap->left.p1.x);
- int16_t fx1 = grid_coverage(SAMPLES_X, trap->left.p1.x);
+ uint16_t fx1 = grid_coverage(SAMPLES_X, trap->left.p1.x);
int16_t x2 = pixman_fixed_to_int(trap->right.p1.x);
- int16_t fx2 = grid_coverage(SAMPLES_X, trap->right.p1.x);
+ uint16_t fx2 = grid_coverage(SAMPLES_X, trap->right.p1.x);
if (x1 < extents->x1)
x1 = extents->x1, fx1 = 0;
- if (x2 > extents->x2)
+ if (x2 >= extents->x2)
x2 = extents->x2, fx2 = 0;
if (x1 < x2) {
@@ -3455,13 +3681,82 @@ pixmask_unaligned_box_row(struct pixman_inplace *pi,
}
}
+struct rectilinear_inplace_thread {
+ pixman_image_t *dst, *src;
+ const RegionRec *clip;
+ const xTrapezoid *trap;
+ int dx, dy, sx, sy;
+ int y1, y2;
+ CARD8 op;
+};
+
+static void rectilinear_inplace_thread(void *arg)
+{
+ struct rectilinear_inplace_thread *thread = arg;
+ const xTrapezoid *t = thread->trap;
+ struct pixman_inplace pi;
+ const BoxRec *extents;
+ int count;
+
+ pi.image = thread->dst;
+ pi.dx = thread->dx;
+ pi.dy = thread->dy;
+
+ pi.source = thread->src;
+ pi.sx = thread->sx;
+ pi.sy = thread->sy;
+
+ pi.mask = pixman_image_create_bits(PIXMAN_a8, 1, 1, &pi.color, 4);
+ pixman_image_set_repeat(pi.mask, PIXMAN_REPEAT_NORMAL);
+ pi.bits = pixman_image_get_data(pi.mask);
+ pi.op = thread->op;
+
+ count = region_count(thread->clip);
+ extents = region_boxes(thread->clip);
+ while (count--) {
+ int16_t y1 = pixman_fixed_to_int(t->top);
+ uint16_t fy1 = pixman_fixed_frac(t->top);
+ int16_t y2 = pixman_fixed_to_int(t->bottom);
+ uint16_t fy2 = pixman_fixed_frac(t->bottom);
+
+ if (y1 < MAX(thread->y1, extents->y1))
+ y1 = MAX(thread->y1, extents->y1), fy1 = 0;
+ if (y2 > MIN(thread->y2, extents->y2))
+ y2 = MIN(thread->y2, extents->y2), fy2 = 0;
+ if (y1 < y2) {
+ if (fy1) {
+ pixmask_unaligned_box_row(&pi, extents, t, y1, 1,
+ SAMPLES_Y - grid_coverage(SAMPLES_Y, fy1));
+ y1++;
+ }
+
+ if (y2 > y1)
+ pixmask_unaligned_box_row(&pi, extents, t, y1, y2 - y1,
+ SAMPLES_Y);
+
+ if (fy2)
+ pixmask_unaligned_box_row(&pi, extents, t, y2, 1,
+ grid_coverage(SAMPLES_Y, fy2));
+ } else if (y1 == y2 && fy2 > fy1) {
+ pixmask_unaligned_box_row(&pi, extents, t, y1, 1,
+ grid_coverage(SAMPLES_Y, fy2) - grid_coverage(SAMPLES_Y, fy1));
+ }
+ extents++;
+ }
+
+ pixman_image_unref(pi.mask);
+}
+
static bool
-composite_unaligned_boxes_inplace(CARD8 op,
+composite_unaligned_boxes_inplace(struct sna *sna,
+ CARD8 op,
PicturePtr src, int16_t src_x, int16_t src_y,
PicturePtr dst, int n, xTrapezoid *t,
bool force_fallback)
{
- if (!force_fallback) {
+ if (!force_fallback &&
+ (is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS) ||
+ picture_is_gpu(sna, src))) {
DBG(("%s: fallback -- not forcing\n", __FUNCTION__));
return false;
}
@@ -3471,10 +3766,10 @@ composite_unaligned_boxes_inplace(CARD8 op,
src_x -= pixman_fixed_to_int(t[0].left.p1.x);
src_y -= pixman_fixed_to_int(t[0].left.p1.y);
do {
- struct pixman_inplace pi;
RegionRec clip;
BoxPtr extents;
int count;
+ int num_threads;
clip.extents.x1 = pixman_fixed_to_int(t->left.p1.x);
clip.extents.x2 = pixman_fixed_to_int(t->right.p1.x + pixman_fixed_1_minus_e);
@@ -3513,59 +3808,100 @@ composite_unaligned_boxes_inplace(CARD8 op,
}
}
- pi.image = image_from_pict(dst, false, &pi.dx, &pi.dy);
- pi.source = image_from_pict(src, false, &pi.sx, &pi.sy);
- pi.sx += src_x;
- pi.sy += src_y;
- pi.mask = pixman_image_create_bits(PIXMAN_a8, 1, 1, NULL, 0);
- pixman_image_set_repeat(pi.mask, PIXMAN_REPEAT_NORMAL);
- pi.bits = pixman_image_get_data(pi.mask);
- pi.op = op;
+ num_threads = sna_use_threads(clip.extents.x2 - clip.extents.x1,
+ clip.extents.y2 - clip.extents.y1,
+ 32);
+ if (num_threads == 1) {
+ struct pixman_inplace pi;
- count = REGION_NUM_RECTS(&clip);
- extents = REGION_RECTS(&clip);
- while (count--) {
- int16_t y1 = pixman_fixed_to_int(t->top);
- int16_t fy1 = pixman_fixed_frac(t->top);
- int16_t y2 = pixman_fixed_to_int(t->bottom);
- int16_t fy2 = pixman_fixed_frac(t->bottom);
+ pi.image = image_from_pict(dst, false, &pi.dx, &pi.dy);
+ pi.source = image_from_pict(src, false, &pi.sx, &pi.sy);
+ pi.sx += src_x;
+ pi.sy += src_y;
+ pi.mask = pixman_image_create_bits(PIXMAN_a8, 1, 1, &pi.color, 4);
+ pixman_image_set_repeat(pi.mask, PIXMAN_REPEAT_NORMAL);
+ pi.bits = pixman_image_get_data(pi.mask);
+ pi.op = op;
- if (y1 < extents->y1)
- y1 = extents->y1, fy1 = 0;
- if (y2 > extents->y2)
- y2 = extents->y2, fy2 = 0;
- if (y1 < y2) {
- if (fy1) {
+ count = REGION_NUM_RECTS(&clip);
+ extents = REGION_RECTS(&clip);
+ while (count--) {
+ int16_t y1 = pixman_fixed_to_int(t->top);
+ uint16_t fy1 = pixman_fixed_frac(t->top);
+ int16_t y2 = pixman_fixed_to_int(t->bottom);
+ uint16_t fy2 = pixman_fixed_frac(t->bottom);
+
+ if (y1 < extents->y1)
+ y1 = extents->y1, fy1 = 0;
+ if (y2 > extents->y2)
+ y2 = extents->y2, fy2 = 0;
+ if (y1 < y2) {
+ if (fy1) {
+ pixmask_unaligned_box_row(&pi, extents, t, y1, 1,
+ SAMPLES_Y - grid_coverage(SAMPLES_Y, fy1));
+ y1++;
+ }
+
+ if (y2 > y1)
+ pixmask_unaligned_box_row(&pi, extents, t, y1, y2 - y1,
+ SAMPLES_Y);
+
+ if (fy2)
+ pixmask_unaligned_box_row(&pi, extents, t, y2, 1,
+ grid_coverage(SAMPLES_Y, fy2));
+ } else if (y1 == y2 && fy2 > fy1) {
pixmask_unaligned_box_row(&pi, extents, t, y1, 1,
- SAMPLES_Y - grid_coverage(SAMPLES_Y, fy1));
- y1++;
+ grid_coverage(SAMPLES_Y, fy2) - grid_coverage(SAMPLES_Y, fy1));
}
+ extents++;
+ }
- if (y2 > y1)
- pixmask_unaligned_box_row(&pi, extents, t, y1, y2 - y1,
- SAMPLES_Y);
+ pixman_image_unref(pi.image);
+ pixman_image_unref(pi.source);
+ pixman_image_unref(pi.mask);
+ } else {
+ struct rectilinear_inplace_thread thread[num_threads];
+ int i, y, dy;
- if (fy2)
- pixmask_unaligned_box_row(&pi, extents, t, y2, 1,
- grid_coverage(SAMPLES_Y, fy2));
- } else if (y1 == y2 && fy2 > fy1) {
- pixmask_unaligned_box_row(&pi, extents, t, y1, 1,
- grid_coverage(SAMPLES_Y, fy2) - grid_coverage(SAMPLES_Y, fy1));
+
+ thread[0].trap = t;
+ thread[0].dst = image_from_pict(dst, false, &thread[0].dx, &thread[0].dy);
+ thread[0].src = image_from_pict(src, false, &thread[0].sx, &thread[0].sy);
+ thread[0].sx += src_x;
+ thread[0].sy += src_y;
+
+ thread[0].clip = &clip;
+ thread[0].op = op;
+
+ y = clip.extents.y1;
+ dy = (clip.extents.y2 - clip.extents.y1 + num_threads - 1) / num_threads;
+
+ for (i = 1; i < num_threads; i++) {
+ thread[i] = thread[0];
+ thread[i].y1 = y;
+ thread[i].y2 = y += dy;
+ sna_threads_run(rectilinear_inplace_thread, &thread[i]);
}
- extents++;
+
+ thread[0].y1 = y;
+ thread[0].y2 = clip.extents.y2;
+ rectilinear_inplace_thread(&thread[0]);
+
+ sna_threads_wait();
+
+ pixman_image_unref(thread[0].dst);
+ pixman_image_unref(thread[0].src);
}
RegionUninit(&clip);
- pixman_image_unref(pi.image);
- pixman_image_unref(pi.source);
- pixman_image_unref(pi.mask);
} while (--n && t++);
return true;
}
static bool
-composite_unaligned_boxes_fallback(CARD8 op,
+composite_unaligned_boxes_fallback(struct sna *sna,
+ CARD8 op,
PicturePtr src,
PicturePtr dst,
INT16 src_x, INT16 src_y,
@@ -3579,12 +3915,12 @@ composite_unaligned_boxes_fallback(CARD8 op,
int n;
if (sna_picture_is_solid(src, &color) &&
- composite_unaligned_boxes_inplace__solid(op, color, dst,
+ composite_unaligned_boxes_inplace__solid(sna, op, color, dst,
ntrap, traps,
force_fallback))
return true;
- if (composite_unaligned_boxes_inplace(op, src, src_x, src_y,
+ if (composite_unaligned_boxes_inplace(sna, op, src, src_x, src_y,
dst, ntrap, traps,
force_fallback))
return true;
@@ -3708,7 +4044,7 @@ composite_unaligned_boxes(struct sna *sna,
!sna->render.check_composite_spans(sna, op, src, dst, 0, 0,
COMPOSITE_SPANS_RECTILINEAR)) {
fallback:
- return composite_unaligned_boxes_fallback(op, src, dst,
+ return composite_unaligned_boxes_fallback(sna, op, src, dst,
src_x, src_y,
ntrap, traps,
force_fallback);
@@ -3860,14 +4196,13 @@ static span_func_t
choose_span(struct sna_composite_spans_op *tmp,
PicturePtr dst,
PictFormatPtr maskFormat,
- uint8_t op,
RegionPtr clip)
{
span_func_t span;
if (is_mono(dst, maskFormat)) {
/* XXX An imprecise approximation */
- if (maskFormat && !operator_is_bounded(op)) {
+ if (maskFormat && !operator_is_bounded(tmp->base.op)) {
span = tor_blt_span_mono_unbounded;
if (REGION_NUM_RECTS(clip) > 1)
span = tor_blt_span_mono_unbounded_clipped;
@@ -3888,8 +4223,77 @@ choose_span(struct sna_composite_spans_op *tmp,
return span;
}
+struct mono_span_thread {
+ struct sna *sna;
+ const xTrapezoid *traps;
+ const struct sna_composite_op *op;
+ RegionPtr clip;
+ int ntrap;
+ BoxRec extents;
+ int dx, dy;
+};
+
+static void
+mono_span_thread(void *arg)
+{
+ struct mono_span_thread *thread = arg;
+ struct mono mono;
+ struct mono_span_thread_boxes boxes;
+ const xTrapezoid *t;
+ int n;
+
+ mono.sna = thread->sna;
+
+ mono.clip.extents = thread->extents;
+ mono.clip.data = NULL;
+ if (thread->clip->data) {
+ RegionIntersect(&mono.clip, &mono.clip, thread->clip);
+ if (RegionNil(&mono.clip))
+ return;
+ }
+
+ boxes.op = thread->op;
+ boxes.num_boxes = 0;
+ mono.op.priv = &boxes;
+
+ if (!mono_init(&mono, 2*thread->ntrap)) {
+ RegionUninit(&mono.clip);
+ return;
+ }
+
+ for (n = thread->ntrap, t = thread->traps; n--; t++) {
+ if (!xTrapezoidValid(t))
+ continue;
+
+ if (pixman_fixed_to_int(t->top) + thread->dy >= thread->extents.y2 ||
+ pixman_fixed_to_int(t->bottom) + thread->dy <= thread->extents.y1)
+ continue;
+
+ mono_add_line(&mono, thread->dx, thread->dy,
+ t->top, t->bottom,
+ &t->left.p1, &t->left.p2, 1);
+ mono_add_line(&mono, thread->dx, thread->dy,
+ t->top, t->bottom,
+ &t->right.p1, &t->right.p2, -1);
+ }
+
+ if (mono.clip.data == NULL)
+ mono.span = thread_mono_span;
+ else
+ mono.span = thread_mono_span_clipped;
+
+ mono_render(&mono);
+ mono_fini(&mono);
+
+ if (boxes.num_boxes)
+ thread->op->thread_boxes(thread->sna, thread->op,
+ boxes.boxes, boxes.num_boxes);
+ RegionUninit(&mono.clip);
+}
+
static bool
-mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
+mono_trapezoids_span_converter(struct sna *sna,
+ CARD8 op, PicturePtr src, PicturePtr dst,
INT16 src_x, INT16 src_y,
int ntrap, xTrapezoid *traps)
{
@@ -3897,8 +4301,8 @@ mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
BoxRec extents;
int16_t dst_x, dst_y;
int16_t dx, dy;
- bool was_clear;
- int n;
+ bool unbounded;
+ int num_threads, n;
if (NO_SCAN_CONVERTER)
return false;
@@ -3937,11 +4341,69 @@ mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
src_x + mono.clip.extents.x1 - dst_x - dx,
src_y + mono.clip.extents.y1 - dst_y - dy));
- mono.sna = to_sna_from_drawable(dst->pDrawable);
- if (!mono_init(&mono, 2*ntrap))
+ unbounded = (!sna_drawable_is_clear(dst->pDrawable) &&
+ !operator_is_bounded(op));
+
+ mono.sna = sna;
+ if (!mono.sna->render.composite(mono.sna, op, src, NULL, dst,
+ src_x + mono.clip.extents.x1 - dst_x - dx,
+ src_y + mono.clip.extents.y1 - dst_y - dy,
+ 0, 0,
+ mono.clip.extents.x1, mono.clip.extents.y1,
+ mono.clip.extents.x2 - mono.clip.extents.x1,
+ mono.clip.extents.y2 - mono.clip.extents.y1,
+ memset(&mono.op, 0, sizeof(mono.op))))
return false;
- was_clear = sna_drawable_is_clear(dst->pDrawable);
+ num_threads = 1;
+ if (!NO_GPU_THREADS &&
+ mono.op.thread_boxes &&
+ mono.op.damage == NULL &&
+ !unbounded)
+ num_threads = sna_use_threads(mono.clip.extents.x2 - mono.clip.extents.x1,
+ mono.clip.extents.y2 - mono.clip.extents.y1,
+ 16);
+ if (num_threads > 1) {
+ struct mono_span_thread threads[num_threads];
+ int y, h;
+
+ DBG(("%s: using %d threads for mono span compositing %dx%d\n",
+ __FUNCTION__, num_threads,
+ mono.clip.extents.x2 - mono.clip.extents.x1,
+ mono.clip.extents.y2 - mono.clip.extents.y1));
+
+ threads[0].sna = mono.sna;
+ threads[0].op = &mono.op;
+ threads[0].traps = traps;
+ threads[0].ntrap = ntrap;
+ threads[0].extents = mono.clip.extents;
+ threads[0].clip = &mono.clip;
+ threads[0].dx = dx;
+ threads[0].dy = dy;
+
+ y = extents.y1;
+ h = extents.y2 - extents.y1;
+ h = (h + num_threads - 1) / num_threads;
+
+ for (n = 1; n < num_threads; n++) {
+ threads[n] = threads[0];
+ threads[n].extents.y1 = y;
+ threads[n].extents.y2 = y += h;
+
+ sna_threads_run(mono_span_thread, &threads[n]);
+ }
+
+ threads[0].extents.y1 = y;
+ threads[0].extents.y2 = extents.y2;
+ mono_span_thread(&threads[0]);
+
+ sna_threads_wait();
+ mono.op.done(mono.sna, &mono.op);
+ return true;
+ }
+
+ if (!mono_init(&mono, 2*ntrap))
+ return false;
for (n = 0; n < ntrap; n++) {
if (!xTrapezoidValid(&traps[n]))
@@ -3959,23 +4421,16 @@ mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
&traps[n].right.p1, &traps[n].right.p2, -1);
}
- memset(&mono.op, 0, sizeof(mono.op));
- if (!mono.sna->render.composite(mono.sna, op, src, NULL, dst,
- src_x + mono.clip.extents.x1 - dst_x - dx,
- src_y + mono.clip.extents.y1 - dst_y - dy,
- 0, 0,
- mono.clip.extents.x1, mono.clip.extents.y1,
- mono.clip.extents.x2 - mono.clip.extents.x1,
- mono.clip.extents.y2 - mono.clip.extents.y1,
- &mono.op)) {
- mono_fini(&mono);
- return false;
- }
+ if (mono.clip.data == NULL && mono.op.damage == NULL)
+ mono.span = mono_span__fast;
+ else
+ mono.span = mono_span;
+
mono_render(&mono);
mono.op.done(mono.sna, &mono.op);
mono_fini(&mono);
- if (!was_clear && !operator_is_bounded(op)) {
+ if (unbounded) {
xPointFixed p1, p2;
if (!mono_init(&mono, 2+2*ntrap))
@@ -4027,26 +4482,171 @@ mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
return true;
}
+struct span_thread {
+ struct sna *sna;
+ const struct sna_composite_spans_op *op;
+ const xTrapezoid *traps;
+ RegionPtr clip;
+ span_func_t span;
+ BoxRec extents;
+ int dx, dy, draw_y;
+ int ntrap;
+ bool unbounded;
+};
+
+#define SPAN_THREAD_MAX_BOXES (8192/sizeof(struct sna_opacity_box))
+struct span_thread_boxes {
+ const struct sna_composite_spans_op *op;
+ struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES];
+ int num_boxes;
+};
+
+static void span_thread_add_boxes(struct sna *sna, void *data,
+ const BoxRec *box, int count, float alpha)
+{
+ struct span_thread_boxes *b = data;
+
+ __DBG(("%s: adding %d boxes with alpha=%f\n",
+ __FUNCTION__, count, alpha));
+
+ assert(count > 0 && count <= SPAN_THREAD_MAX_BOXES);
+ if (b->num_boxes + count > SPAN_THREAD_MAX_BOXES) {
+ DBG(("%s: flushing %d boxes, adding %d\n", __FUNCTION__, b->num_boxes, count));
+ assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES);
+ b->op->thread_boxes(sna, b->op, b->boxes, b->num_boxes);
+ b->num_boxes = 0;
+ }
+
+ do {
+ b->boxes[b->num_boxes].box = *box++;
+ b->boxes[b->num_boxes].alpha = alpha;
+ b->num_boxes++;
+ } while (--count);
+ assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES);
+}
+
+static void
+span_thread_box(struct sna *sna,
+ struct sna_composite_spans_op *op,
+ pixman_region16_t *clip,
+ const BoxRec *box,
+ int coverage)
+{
+ __DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage));
+ span_thread_add_boxes(sna, op, box, 1, AREA_TO_ALPHA(coverage));
+}
+
+static void
+span_thread_clipped_box(struct sna *sna,
+ struct sna_composite_spans_op *op,
+ pixman_region16_t *clip,
+ const BoxRec *box,
+ int coverage)
+{
+ pixman_region16_t region;
+
+ __DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2,
+ AREA_TO_ALPHA(coverage)));
+
+ pixman_region_init_rects(&region, box, 1);
+ RegionIntersect(&region, &region, clip);
+ if (REGION_NUM_RECTS(&region)) {
+ span_thread_add_boxes(sna, op,
+ REGION_RECTS(&region),
+ REGION_NUM_RECTS(&region),
+ AREA_TO_ALPHA(coverage));
+ }
+ pixman_region_fini(&region);
+}
+
+static span_func_t
+thread_choose_span(struct sna_composite_spans_op *tmp,
+ PicturePtr dst,
+ PictFormatPtr maskFormat,
+ RegionPtr clip)
+{
+ span_func_t span;
+
+ if (tmp->base.damage)
+ return NULL;
+
+ if (is_mono(dst, maskFormat)) {
+ return NULL;
+ } else {
+ if (REGION_NUM_RECTS(clip) > 1)
+ span = span_thread_clipped_box;
+ else
+ span = span_thread_box;
+ }
+
+ return span;
+}
+
+static void
+span_thread(void *arg)
+{
+ struct span_thread *thread = arg;
+ struct span_thread_boxes boxes;
+ struct tor tor;
+ const xTrapezoid *t;
+ int n, y1, y2;
+
+ if (tor_init(&tor, &thread->extents, 2*thread->ntrap))
+ return;
+
+ boxes.op = thread->op;
+ boxes.num_boxes = 0;
+
+ y1 = thread->extents.y1 - thread->draw_y;
+ y2 = thread->extents.y2 - thread->draw_y;
+ for (n = thread->ntrap, t = thread->traps; n--; t++) {
+ xTrapezoid tt;
+
+ if (pixman_fixed_to_int(t->top) >= y2 ||
+ pixman_fixed_to_int(t->bottom) < y1)
+ continue;
+
+ if (!project_trapezoid_onto_grid(t, thread->dx, thread->dy, &tt))
+ continue;
+
+ tor_add_edge(&tor, &tt, &tt.left, 1);
+ tor_add_edge(&tor, &tt, &tt.right, -1);
+ }
+
+ tor_render(thread->sna, &tor,
+ (struct sna_composite_spans_op *)&boxes, thread->clip,
+ thread->span, thread->unbounded);
+
+ tor_fini(&tor);
+
+ if (boxes.num_boxes) {
+ DBG(("%s: flushing %d boxes\n", __FUNCTION__, boxes.num_boxes));
+ assert(boxes.num_boxes <= SPAN_THREAD_MAX_BOXES);
+ thread->op->thread_boxes(thread->sna, thread->op,
+ boxes.boxes, boxes.num_boxes);
+ }
+}
+
static bool
-trapezoid_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
+trapezoid_span_converter(struct sna *sna,
+ CARD8 op, PicturePtr src, PicturePtr dst,
PictFormatPtr maskFormat, unsigned int flags,
INT16 src_x, INT16 src_y,
int ntrap, xTrapezoid *traps)
{
- struct sna *sna;
struct sna_composite_spans_op tmp;
- struct tor tor;
BoxRec extents;
pixman_region16_t clip;
int16_t dst_x, dst_y;
bool was_clear;
int dx, dy, n;
+ int num_threads;
if (NO_SCAN_CONVERTER)
return false;
if (is_mono(dst, maskFormat))
- return mono_trapezoids_span_converter(op, src, dst,
+ return mono_trapezoids_span_converter(sna, op, src, dst,
src_x, src_y,
ntrap, traps);
@@ -4057,7 +4657,6 @@ trapezoid_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
return false;
}
- sna = to_sna_from_drawable(dst->pDrawable);
if (!sna->render.check_composite_spans(sna, op, src, dst, 0, 0, flags)) {
DBG(("%s: fallback -- composite spans not supported\n",
__FUNCTION__));
@@ -4144,29 +4743,78 @@ trapezoid_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
dx *= FAST_SAMPLES_X;
dy *= FAST_SAMPLES_Y;
- if (tor_init(&tor, &extents, 2*ntrap))
- goto skip;
- for (n = 0; n < ntrap; n++) {
- xTrapezoid t;
+ num_threads = 1;
+ if (!NO_GPU_THREADS && tmp.thread_boxes &&
+ thread_choose_span(&tmp, dst, maskFormat, &clip))
+ num_threads = sna_use_threads(extents.x2-extents.x1,
+ extents.y2-extents.y1,
+ 16);
+ if (num_threads == 1) {
+ struct tor tor;
- if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t))
- continue;
+ if (tor_init(&tor, &extents, 2*ntrap))
+ goto skip;
- if (pixman_fixed_to_int(traps[n].top) + dst->pDrawable->y >= extents.y2 ||
- pixman_fixed_to_int(traps[n].bottom) + dst->pDrawable->y < extents.y1)
- continue;
+ for (n = 0; n < ntrap; n++) {
+ xTrapezoid t;
- tor_add_edge(&tor, &t, &t.left, 1);
- tor_add_edge(&tor, &t, &t.right, -1);
- }
+ if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t))
+ continue;
- tor_render(sna, &tor, &tmp, &clip,
- choose_span(&tmp, dst, maskFormat, op, &clip),
- !was_clear && maskFormat && !operator_is_bounded(op));
+ if (pixman_fixed_to_int(traps[n].top) + dst->pDrawable->y >= extents.y2 ||
+ pixman_fixed_to_int(traps[n].bottom) + dst->pDrawable->y < extents.y1)
+ continue;
+
+ tor_add_edge(&tor, &t, &t.left, 1);
+ tor_add_edge(&tor, &t, &t.right, -1);
+ }
+
+ tor_render(sna, &tor, &tmp, &clip,
+ choose_span(&tmp, dst, maskFormat, &clip),
+ !was_clear && maskFormat && !operator_is_bounded(op));
skip:
- tor_fini(&tor);
+ tor_fini(&tor);
+ } else {
+ struct span_thread threads[num_threads];
+ int y, h;
+
+ DBG(("%s: using %d threads for span compositing %dx%d\n",
+ __FUNCTION__, num_threads,
+ extents.x2 - extents.x1,
+ extents.y2 - extents.y1));
+
+ threads[0].sna = sna;
+ threads[0].op = &tmp;
+ threads[0].traps = traps;
+ threads[0].ntrap = ntrap;
+ threads[0].extents = extents;
+ threads[0].clip = &clip;
+ threads[0].dx = dx;
+ threads[0].dy = dy;
+ threads[0].draw_y = dst->pDrawable->y;
+ threads[0].unbounded = !was_clear && maskFormat && !operator_is_bounded(op);
+ threads[0].span = thread_choose_span(&tmp, dst, maskFormat, &clip);
+
+ y = extents.y1;
+ h = extents.y2 - extents.y1;
+ h = (h + num_threads - 1) / num_threads;
+
+ for (n = 1; n < num_threads; n++) {
+ threads[n] = threads[0];
+ threads[n].extents.y1 = y;
+ threads[n].extents.y2 = y += h;
+
+ sna_threads_run(span_thread, &threads[n]);
+ }
+
+ threads[0].extents.y1 = y;
+ threads[0].extents.y2 = extents.y2;
+ span_thread(&threads[0]);
+
+ sna_threads_wait();
+ }
tmp.done(sna, &tmp);
REGION_UNINIT(NULL, &clip);
@@ -4351,7 +4999,8 @@ struct inplace {
static force_inline uint8_t coverage_opacity(int coverage, uint8_t opacity)
{
coverage = coverage * 256 / FAST_SAMPLES_XY;
- return mul_8_8(coverage - (coverage >> 8), opacity);
+ coverage -= coverage >> 8;
+ return opacity == 255 ? coverage : mul_8_8(coverage, opacity);
}
static void
@@ -4673,7 +5322,8 @@ mono_inplace_composite_boxes(struct sna *sna,
}
static bool
-trapezoid_spans_maybe_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
+trapezoid_spans_maybe_inplace(struct sna *sna,
+ CARD8 op, PicturePtr src, PicturePtr dst,
PictFormatPtr maskFormat)
{
struct sna_pixmap *priv;
@@ -4706,7 +5356,7 @@ trapezoid_spans_maybe_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
case PICT_x8r8g8b8:
case PICT_a8r8g8b8:
- if (picture_is_gpu(src))
+ if (picture_is_gpu(sna, src))
return false;
switch (op) {
@@ -4753,7 +5403,8 @@ out:
}
static bool
-trapezoid_span_mono_inplace(CARD8 op,
+trapezoid_span_mono_inplace(struct sna *sna,
+ CARD8 op,
PicturePtr src,
PicturePtr dst,
INT16 src_x, INT16 src_y,
@@ -4799,7 +5450,7 @@ trapezoid_span_mono_inplace(CARD8 op,
MOVE_WRITE | MOVE_READ))
return true;
- mono.sna = to_sna_from_drawable(dst->pDrawable);
+ mono.sna = sna;
if (!mono_init(&mono, 2*ntrap))
return false;
@@ -4855,6 +5506,20 @@ unbounded_pass:
op = 0;
} else {
+ if (src->pDrawable) {
+ if (!sna_drawable_move_to_cpu(src->pDrawable,
+ MOVE_READ)) {
+ mono_fini(&mono);
+ return false;
+ }
+ if (src->alphaMap &&
+ !sna_drawable_move_to_cpu(src->alphaMap->pDrawable,
+ MOVE_READ)) {
+ mono_fini(&mono);
+ return false;
+ }
+ }
+
inplace.composite.dst = image_from_pict(dst, false,
&inplace.composite.dx,
&inplace.composite.dy);
@@ -4871,6 +5536,11 @@ unbounded_pass:
mono.op.box = mono_inplace_composite_box;
mono.op.boxes = mono_inplace_composite_boxes;
}
+
+ if (mono.clip.data == NULL && mono.op.damage == NULL)
+ mono.span = mono_span__fast;
+ else
+ mono.span = mono_span;
mono_render(&mono);
mono_fini(&mono);
@@ -4922,6 +5592,45 @@ unbounded_pass:
}
static void
+pixmask_span_solid(struct sna *sna,
+ struct sna_composite_spans_op *op,
+ pixman_region16_t *clip,
+ const BoxRec *box,
+ int coverage)
+{
+ struct pixman_inplace *pi = (struct pixman_inplace *)op;
+ if (coverage != FAST_SAMPLES_XY) {
+ coverage = coverage * 256 / FAST_SAMPLES_XY;
+ coverage -= coverage >> 8;
+ *pi->bits = mul_4x8_8(pi->color, coverage);
+ } else
+ *pi->bits = pi->color;
+ pixman_image_composite(pi->op, pi->source, NULL, pi->image,
+ box->x1, box->y1,
+ 0, 0,
+ pi->dx + box->x1, pi->dy + box->y1,
+ box->x2 - box->x1, box->y2 - box->y1);
+}
+static void
+pixmask_span_solid__clipped(struct sna *sna,
+ struct sna_composite_spans_op *op,
+ pixman_region16_t *clip,
+ const BoxRec *box,
+ int coverage)
+{
+ pixman_region16_t region;
+ int n;
+
+ pixman_region_init_rects(&region, box, 1);
+ RegionIntersect(&region, &region, clip);
+ n = REGION_NUM_RECTS(&region);
+ box = REGION_RECTS(&region);
+ while (n--)
+ pixmask_span_solid(sna, op, NULL, box++, coverage);
+ pixman_region_fini(&region);
+}
+
+static void
pixmask_span(struct sna *sna,
struct sna_composite_spans_op *op,
pixman_region16_t *clip,
@@ -4961,6 +5670,113 @@ pixmask_span__clipped(struct sna *sna,
pixman_region_fini(&region);
}
+struct inplace_x8r8g8b8_thread {
+ xTrapezoid *traps;
+ PicturePtr dst, src;
+ BoxRec extents;
+ int dx, dy;
+ int ntrap;
+ bool lerp, is_solid;
+ uint32_t color;
+ int16_t src_x, src_y;
+ uint8_t op;
+};
+
+static void inplace_x8r8g8b8_thread(void *arg)
+{
+ struct inplace_x8r8g8b8_thread *thread = arg;
+ struct tor tor;
+ span_func_t span;
+ RegionPtr clip;
+ int y1, y2, n;
+
+ if (tor_init(&tor, &thread->extents, 2*thread->ntrap))
+ return;
+
+ y1 = thread->extents.y1 - thread->dst->pDrawable->y;
+ y2 = thread->extents.y2 - thread->dst->pDrawable->y;
+ for (n = 0; n < thread->ntrap; n++) {
+ xTrapezoid t;
+
+ if (!project_trapezoid_onto_grid(&thread->traps[n], thread->dx, thread->dy, &t))
+ continue;
+
+ if (pixman_fixed_to_int(thread->traps[n].top) >= y2 ||
+ pixman_fixed_to_int(thread->traps[n].bottom) < y1)
+ continue;
+
+ tor_add_edge(&tor, &t, &t.left, 1);
+ tor_add_edge(&tor, &t, &t.right, -1);
+ }
+
+ clip = thread->dst->pCompositeClip;
+ if (thread->lerp) {
+ struct inplace inplace;
+ int16_t dst_x, dst_y;
+ PixmapPtr pixmap;
+
+ pixmap = get_drawable_pixmap(thread->dst->pDrawable);
+ get_drawable_deltas(thread->dst->pDrawable, pixmap, &dst_x, &dst_y);
+
+ inplace.ptr = pixmap->devPrivate.ptr;
+ inplace.ptr += dst_y * pixmap->devKind + dst_x * 4;
+ inplace.stride = pixmap->devKind;
+ inplace.color = thread->color;
+
+ if (clip->data)
+ span = tor_blt_lerp32_clipped;
+ else
+ span = tor_blt_lerp32;
+
+ tor_render(NULL, &tor, (void*)&inplace, clip, span, false);
+ } else if (thread->is_solid) {
+ struct pixman_inplace pi;
+
+ pi.image = image_from_pict(thread->dst, false, &pi.dx, &pi.dy);
+ pi.op = thread->op;
+ pi.color = thread->color;
+
+ pi.bits = (uint32_t *)&pi.sx;
+ pi.source = pixman_image_create_bits(PIXMAN_a8r8g8b8,
+ 1, 1, pi.bits, 0);
+ pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL);
+
+ if (clip->data)
+ span = pixmask_span_solid__clipped;
+ else
+ span = pixmask_span_solid;
+
+ tor_render(NULL, &tor, (void*)&pi, clip, span, false);
+
+ pixman_image_unref(pi.source);
+ pixman_image_unref(pi.image);
+ } else {
+ struct pixman_inplace pi;
+
+ pi.image = image_from_pict(thread->dst, false, &pi.dx, &pi.dy);
+ pi.source = image_from_pict(thread->src, false, &pi.sx, &pi.sy);
+ pi.sx += thread->src_x - pixman_fixed_to_int(thread->traps[0].left.p1.x);
+ pi.sy += thread->src_y - pixman_fixed_to_int(thread->traps[0].left.p1.y);
+ pi.mask = pixman_image_create_bits(PIXMAN_a8, 1, 1, NULL, 0);
+ pixman_image_set_repeat(pi.mask, PIXMAN_REPEAT_NORMAL);
+ pi.bits = pixman_image_get_data(pi.mask);
+ pi.op = thread->op;
+
+ if (clip->data)
+ span = pixmask_span__clipped;
+ else
+ span = pixmask_span;
+
+ tor_render(NULL, &tor, (void*)&pi, clip, span, false);
+
+ pixman_image_unref(pi.mask);
+ pixman_image_unref(pi.source);
+ pixman_image_unref(pi.image);
+ }
+
+ tor_fini(&tor);
+}
+
static bool
trapezoid_span_inplace__x8r8g8b8(CARD8 op,
PicturePtr dst,
@@ -4968,17 +5784,15 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
PictFormatPtr maskFormat,
int ntrap, xTrapezoid *traps)
{
- struct tor tor;
- span_func_t span;
uint32_t color;
- bool lerp;
+ bool lerp, is_solid;
RegionRec region;
- int16_t dst_x, dst_y;
int dx, dy;
- int n;
+ int num_threads, n;
lerp = false;
- if (sna_picture_is_solid(src, &color)) {
+ is_solid = sna_picture_is_solid(src, &color);
+ if (is_solid) {
if (op == PictOpOver && (color >> 24) == 0xff)
op = PictOpSrc;
if (op == PictOpOver && sna_drawable_is_clear(dst->pDrawable))
@@ -5037,43 +5851,66 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
region.extents.x1, region.extents.y1,
region.extents.x2, region.extents.y2));
- if (tor_init(&tor, &region.extents, 2*ntrap))
+ region.data = NULL;
+ if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region,
+ MOVE_WRITE | MOVE_READ))
return true;
+ if (!is_solid && src->pDrawable) {
+ if (!sna_drawable_move_to_cpu(src->pDrawable,
+ MOVE_READ))
+ return true;
+
+ if (src->alphaMap &&
+ !sna_drawable_move_to_cpu(src->alphaMap->pDrawable,
+ MOVE_READ))
+ return true;
+ }
+
dx = dst->pDrawable->x * FAST_SAMPLES_X;
dy = dst->pDrawable->y * FAST_SAMPLES_Y;
- for (n = 0; n < ntrap; n++) {
- xTrapezoid t;
+ num_threads = sna_use_threads(4*(region.extents.x2 - region.extents.x1),
+ region.extents.y2 - region.extents.y1,
+ 8);
- if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t))
- continue;
+ DBG(("%s: %dx%d, format=%x, op=%d, lerp?=%d, num_threads=%d\n",
+ __FUNCTION__,
+ region.extents.x2 - region.extents.x1,
+ region.extents.y2 - region.extents.y1,
+ dst->format, op, lerp, num_threads));
- if (pixman_fixed_to_int(traps[n].top) >= region.extents.y2 - dst->pDrawable->y ||
- pixman_fixed_to_int(traps[n].bottom) < region.extents.y1 - dst->pDrawable->y)
- continue;
+ if (num_threads == 1) {
+ struct tor tor;
+ span_func_t span;
- tor_add_edge(&tor, &t, &t.left, 1);
- tor_add_edge(&tor, &t, &t.right, -1);
- }
+ if (tor_init(&tor, &region.extents, 2*ntrap))
+ return true;
- DBG(("%s: move-to-cpu\n", __FUNCTION__));
- region.data = NULL;
- if (sna_drawable_move_region_to_cpu(dst->pDrawable, &region,
- MOVE_WRITE | MOVE_READ)) {
- PixmapPtr pixmap;
+ for (n = 0; n < ntrap; n++) {
+ xTrapezoid t;
- pixmap = get_drawable_pixmap(dst->pDrawable);
- get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y);
+ if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t))
+ continue;
- DBG(("%s: format=%x, op=%d, color=%x\n",
- __FUNCTION__, dst->format, op, color));
+ if (pixman_fixed_to_int(traps[n].top) >= region.extents.y2 - dst->pDrawable->y ||
+ pixman_fixed_to_int(traps[n].bottom) < region.extents.y1 - dst->pDrawable->y)
+ continue;
+
+ tor_add_edge(&tor, &t, &t.left, 1);
+ tor_add_edge(&tor, &t, &t.right, -1);
+ }
if (lerp) {
struct inplace inplace;
+ PixmapPtr pixmap;
+ int16_t dst_x, dst_y;
+
+ pixmap = get_drawable_pixmap(dst->pDrawable);
+ get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y);
inplace.ptr = pixmap->devPrivate.ptr;
- inplace.ptr += dst_y * pixmap->devKind + dst_x;
+ inplace.ptr += dst_y * pixmap->devKind + dst_x * 4;
inplace.stride = pixmap->devKind;
inplace.color = color;
@@ -5087,7 +5924,29 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
tor_render(NULL, &tor, (void*)&inplace,
dst->pCompositeClip, span, false);
- tor_fini(&tor);
+ } else if (is_solid) {
+ struct pixman_inplace pi;
+
+ pi.image = image_from_pict(dst, false, &pi.dx, &pi.dy);
+ pi.op = op;
+ pi.color = color;
+
+ pi.bits = (uint32_t *)&pi.sx;
+ pi.source = pixman_image_create_bits(PIXMAN_a8r8g8b8,
+ 1, 1, pi.bits, 0);
+ pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL);
+
+ if (dst->pCompositeClip->data)
+ span = pixmask_span_solid__clipped;
+ else
+ span = pixmask_span_solid;
+
+ tor_render(NULL, &tor, (void*)&pi,
+ dst->pCompositeClip, span,
+ false);
+
+ pixman_image_unref(pi.source);
+ pixman_image_unref(pi.image);
} else {
struct pixman_inplace pi;
@@ -5108,24 +5967,106 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
tor_render(NULL, &tor, (void*)&pi,
dst->pCompositeClip, span,
false);
- tor_fini(&tor);
pixman_image_unref(pi.mask);
pixman_image_unref(pi.source);
pixman_image_unref(pi.image);
}
+
+ tor_fini(&tor);
+ } else {
+ struct inplace_x8r8g8b8_thread threads[num_threads];
+ int y, h;
+
+ DBG(("%s: using %d threads for inplace compositing %dx%d\n",
+ __FUNCTION__, num_threads,
+ region.extents.x2 - region.extents.x1,
+ region.extents.y2 - region.extents.y1));
+
+ threads[0].traps = traps;
+ threads[0].ntrap = ntrap;
+ threads[0].extents = region.extents;
+ threads[0].lerp = lerp;
+ threads[0].is_solid = is_solid;
+ threads[0].color = color;
+ threads[0].dx = dx;
+ threads[0].dy = dy;
+ threads[0].dst = dst;
+ threads[0].src = src;
+ threads[0].op = op;
+ threads[0].src_x = src_x;
+ threads[0].src_y = src_y;
+
+ y = region.extents.y1;
+ h = region.extents.y2 - region.extents.y1;
+ h = (h + num_threads - 1) / num_threads;
+
+ for (n = 1; n < num_threads; n++) {
+ threads[n] = threads[0];
+ threads[n].extents.y1 = y;
+ threads[n].extents.y2 = y += h;
+
+ sna_threads_run(inplace_x8r8g8b8_thread, &threads[n]);
+ }
+
+ threads[0].extents.y1 = y;
+ threads[0].extents.y2 = region.extents.y2;
+ inplace_x8r8g8b8_thread(&threads[0]);
+
+ sna_threads_wait();
}
return true;
}
+struct inplace_thread {
+ xTrapezoid *traps;
+ RegionPtr clip;
+ span_func_t span;
+ struct inplace inplace;
+ BoxRec extents;
+ int dx, dy;
+ int draw_x, draw_y;
+ bool unbounded;
+ int ntrap;
+};
+
+static void inplace_thread(void *arg)
+{
+ struct inplace_thread *thread = arg;
+ struct tor tor;
+ int n;
+
+ if (tor_init(&tor, &thread->extents, 2*thread->ntrap))
+ return;
+
+ for (n = 0; n < thread->ntrap; n++) {
+ xTrapezoid t;
+
+ if (!project_trapezoid_onto_grid(&thread->traps[n], thread->dx, thread->dy, &t))
+ continue;
+
+ if (pixman_fixed_to_int(thread->traps[n].top) >= thread->extents.y2 - thread->draw_y ||
+ pixman_fixed_to_int(thread->traps[n].bottom) < thread->extents.y1 - thread->draw_y)
+ continue;
+
+ tor_add_edge(&tor, &t, &t.left, 1);
+ tor_add_edge(&tor, &t, &t.right, -1);
+ }
+
+ tor_render(NULL, &tor, (void*)&thread->inplace,
+ thread->clip, thread->span, thread->unbounded);
+
+ tor_fini(&tor);
+}
+
static bool
-trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
+trapezoid_span_inplace(struct sna *sna,
+ CARD8 op, PicturePtr src, PicturePtr dst,
PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
int ntrap, xTrapezoid *traps,
bool fallback)
{
- struct tor tor;
struct inplace inplace;
span_func_t span;
PixmapPtr pixmap;
@@ -5135,7 +6076,7 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
bool unbounded;
int16_t dst_x, dst_y;
int dx, dy;
- int n;
+ int num_threads, n;
if (NO_SCAN_CONVERTER)
return false;
@@ -5151,7 +6092,7 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
return false;
}
- if (!fallback && is_gpu(dst->pDrawable)) {
+ if (!fallback && is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
DBG(("%s: fallback -- can not perform operation in place, destination busy\n",
__FUNCTION__));
@@ -5159,7 +6100,7 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
}
if (is_mono(dst, maskFormat))
- return trapezoid_span_mono_inplace(op, src, dst,
+ return trapezoid_span_mono_inplace(sna, op, src, dst,
src_x, src_y, ntrap, traps);
if (dst->format == PICT_a8r8g8b8 || dst->format == PICT_x8r8g8b8)
@@ -5234,7 +6175,7 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
__FUNCTION__));
do {
/* XXX unwind errors? */
- if (!trapezoid_span_inplace(op, src, dst, NULL,
+ if (!trapezoid_span_inplace(sna, op, src, dst, NULL,
src_x, src_y, 1, traps++,
fallback))
return false;
@@ -5266,26 +6207,6 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
region.extents.x1, region.extents.y1,
region.extents.x2, region.extents.y2));
- if (tor_init(&tor, &region.extents, 2*ntrap))
- return true;
-
- dx = dst->pDrawable->x * FAST_SAMPLES_X;
- dy = dst->pDrawable->y * FAST_SAMPLES_Y;
-
- for (n = 0; n < ntrap; n++) {
- xTrapezoid t;
-
- if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t))
- continue;
-
- if (pixman_fixed_to_int(traps[n].top) >= region.extents.y2 - dst->pDrawable->y ||
- pixman_fixed_to_int(traps[n].bottom) < region.extents.y1 - dst->pDrawable->y)
- continue;
-
- tor_add_edge(&tor, &t, &t.left, 1);
- tor_add_edge(&tor, &t, &t.right, -1);
- }
-
if (op == PictOpSrc) {
if (dst->pCompositeClip->data)
span = tor_blt_src_clipped;
@@ -5310,6 +6231,9 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
op == PictOpSrc ? MOVE_WRITE | MOVE_INPLACE_HINT : MOVE_WRITE | MOVE_READ))
return true;
+ dx = dst->pDrawable->x * FAST_SAMPLES_X;
+ dy = dst->pDrawable->y * FAST_SAMPLES_Y;
+
get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y);
inplace.ptr = pixmap->devPrivate.ptr;
@@ -5317,10 +6241,72 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
inplace.stride = pixmap->devKind;
inplace.opacity = color >> 24;
- tor_render(NULL, &tor, (void*)&inplace,
- dst->pCompositeClip, span, unbounded);
+ num_threads = sna_use_threads(region.extents.x2 - region.extents.x1,
+ region.extents.y2 - region.extents.y1,
+ 8);
+ if (num_threads == 1) {
+ struct tor tor;
- tor_fini(&tor);
+ if (tor_init(&tor, &region.extents, 2*ntrap))
+ return true;
+
+ for (n = 0; n < ntrap; n++) {
+ xTrapezoid t;
+
+ if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t))
+ continue;
+
+ if (pixman_fixed_to_int(traps[n].top) >= region.extents.y2 - dst->pDrawable->y ||
+ pixman_fixed_to_int(traps[n].bottom) < region.extents.y1 - dst->pDrawable->y)
+ continue;
+
+ tor_add_edge(&tor, &t, &t.left, 1);
+ tor_add_edge(&tor, &t, &t.right, -1);
+ }
+
+ tor_render(NULL, &tor, (void*)&inplace,
+ dst->pCompositeClip, span, unbounded);
+
+ tor_fini(&tor);
+ } else {
+ struct inplace_thread threads[num_threads];
+ int y, h;
+
+ DBG(("%s: using %d threads for inplace compositing %dx%d\n",
+ __FUNCTION__, num_threads,
+ region.extents.x2 - region.extents.x1,
+ region.extents.y2 - region.extents.y1));
+
+ threads[0].traps = traps;
+ threads[0].ntrap = ntrap;
+ threads[0].inplace = inplace;
+ threads[0].extents = region.extents;
+ threads[0].clip = dst->pCompositeClip;
+ threads[0].span = span;
+ threads[0].unbounded = unbounded;
+ threads[0].dx = dx;
+ threads[0].dy = dy;
+ threads[0].draw_x = dst->pDrawable->x;
+ threads[0].draw_y = dst->pDrawable->y;
+
+ y = region.extents.y1;
+ h = region.extents.y2 - region.extents.y1;
+ h = (h + num_threads - 1) / num_threads;
+
+ for (n = 1; n < num_threads; n++) {
+ threads[n] = threads[0];
+ threads[n].extents.y1 = y;
+ threads[n].extents.y2 = y += h;
+
+ sna_threads_run(inplace_thread, &threads[n]);
+ }
+
+ threads[0].extents.y1 = y;
+ threads[0].extents.y2 = region.extents.y2;
+ inplace_thread(&threads[0]);
+
+ sna_threads_wait();
+ }
return true;
}
@@ -5441,32 +6427,14 @@ trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
region.extents.y2 = region.extents.y1 + extents.y2;
region.data = NULL;
- DBG(("%s: move-to-cpu\n", __FUNCTION__));
- if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region,
- MOVE_READ | MOVE_WRITE))
- goto done;
- if (dst->alphaMap &&
- !sna_drawable_move_to_cpu(dst->alphaMap->pDrawable,
- MOVE_READ | MOVE_WRITE))
- goto done;
- if (src->pDrawable) {
- if (!sna_drawable_move_to_cpu(src->pDrawable,
- MOVE_READ))
- goto done;
- if (src->alphaMap &&
- !sna_drawable_move_to_cpu(src->alphaMap->pDrawable,
- MOVE_READ))
- goto done;
- }
-
DBG(("%s: fbComposite()\n", __FUNCTION__));
- fbComposite(op, src, mask, dst,
- src_x + dst_x - pixman_fixed_to_int(traps[0].left.p1.x),
- src_y + dst_y - pixman_fixed_to_int(traps[0].left.p1.y),
- 0, 0,
- dst_x, dst_y,
- extents.x2, extents.y2);
-done:
+ sna_composite_fb(op, src, mask, dst, &region,
+ src_x + dst_x - pixman_fixed_to_int(traps[0].left.p1.x),
+ src_y + dst_y - pixman_fixed_to_int(traps[0].left.p1.y),
+ 0, 0,
+ dst_x, dst_y,
+ extents.x2, extents.y2);
+
FreePicture(mask, 0);
}
sna_pixmap_destroy(scratch);
@@ -5518,11 +6486,14 @@ sna_composite_trapezoids(CARD8 op,
force_fallback = FORCE_FALLBACK > 0;
if ((too_small(priv) || DAMAGE_IS_ALL(priv->cpu_damage)) &&
- !picture_is_gpu(src)) {
- DBG(("%s: force fallbacks -- dst is too small, %dx%d\n",
+ !picture_is_gpu(sna, src) && untransformed(src)) {
+ DBG(("%s: force fallbacks --too small, %dx%d? %d, all-cpu? %d, src-is-cpu? %d\n",
__FUNCTION__,
dst->pDrawable->width,
- dst->pDrawable->height));
+ dst->pDrawable->height,
+ too_small(priv),
+ (int)DAMAGE_IS_ALL(priv->cpu_damage),
+ !picture_is_gpu(sna, src)));
force_fallback = true;
}
if (FORCE_FALLBACK < 0)
@@ -5589,24 +6560,24 @@ sna_composite_trapezoids(CARD8 op,
goto fallback;
if (is_mono(dst, maskFormat) &&
- mono_trapezoids_span_converter(op, src, dst,
+ mono_trapezoids_span_converter(sna, op, src, dst,
xSrc, ySrc,
ntrap, traps))
return;
- if (trapezoid_spans_maybe_inplace(op, src, dst, maskFormat)) {
+ if (trapezoid_spans_maybe_inplace(sna, op, src, dst, maskFormat)) {
flags |= COMPOSITE_SPANS_INPLACE_HINT;
- if (trapezoid_span_inplace(op, src, dst, maskFormat,
+ if (trapezoid_span_inplace(sna, op, src, dst, maskFormat,
xSrc, ySrc, ntrap, traps,
false))
return;
}
- if (trapezoid_span_converter(op, src, dst, maskFormat, flags,
+ if (trapezoid_span_converter(sna, op, src, dst, maskFormat, flags,
xSrc, ySrc, ntrap, traps))
return;
- if (trapezoid_span_inplace(op, src, dst, maskFormat,
+ if (trapezoid_span_inplace(sna, op, src, dst, maskFormat,
xSrc, ySrc, ntrap, traps,
false))
return;
@@ -5616,7 +6587,7 @@ sna_composite_trapezoids(CARD8 op,
return;
fallback:
- if (trapezoid_span_inplace(op, src, dst, maskFormat,
+ if (trapezoid_span_inplace(sna, op, src, dst, maskFormat,
xSrc, ySrc, ntrap, traps,
true))
return;
@@ -5625,12 +6596,13 @@ fallback:
xSrc, ySrc, ntrap, traps))
return;
- if (trapezoids_inplace_fallback(op, src, dst, maskFormat, ntrap, traps))
+ if (trapezoids_inplace_fallback(sna, op, src, dst, maskFormat,
+ ntrap, traps))
return;
DBG(("%s: fallback mask=%08x, ntrap=%d\n", __FUNCTION__,
maskFormat ? (unsigned)maskFormat->format : 0, ntrap));
- trapezoids_fallback(op, src, dst, maskFormat,
+ trapezoids_fallback(sna, op, src, dst, maskFormat,
xSrc, ySrc,
ntrap, traps);
}
@@ -5652,7 +6624,8 @@ project_trap_onto_grid(const xTrap *in,
}
static bool
-mono_trap_span_converter(PicturePtr dst,
+mono_trap_span_converter(struct sna *sna,
+ PicturePtr dst,
INT16 x, INT16 y,
int ntrap, xTrap *traps)
{
@@ -5677,7 +6650,7 @@ mono_trap_span_converter(PicturePtr dst,
mono.clip.extents.x2, mono.clip.extents.y2,
x, y));
- mono.sna = to_sna_from_drawable(dst->pDrawable);
+ mono.sna = sna;
if (!mono_init(&mono, 2*ntrap))
return false;
@@ -5722,11 +6695,11 @@ mono_trap_span_converter(PicturePtr dst,
}
static bool
-trap_span_converter(PicturePtr dst,
+trap_span_converter(struct sna *sna,
+ PicturePtr dst,
INT16 src_x, INT16 src_y,
int ntrap, xTrap *trap)
{
- struct sna *sna;
struct sna_composite_spans_op tmp;
struct tor tor;
BoxRec extents;
@@ -5740,9 +6713,8 @@ trap_span_converter(PicturePtr dst,
return false;
if (dst->polyEdge == PolyEdgeSharp)
- return mono_trap_span_converter(dst, src_x, src_y, ntrap, trap);
+ return mono_trap_span_converter(sna, dst, src_x, src_y, ntrap, trap);
- sna = to_sna_from_drawable(dst->pDrawable);
if (!sna->render.check_composite_spans(sna, PictOpAdd, sna->render.white_picture, dst,
dst->pCompositeClip->extents.x2 - dst->pCompositeClip->extents.x1,
dst->pCompositeClip->extents.y2 - dst->pCompositeClip->extents.y1,
@@ -5806,7 +6778,7 @@ trap_span_converter(PicturePtr dst,
}
tor_render(sna, &tor, &tmp, clip,
- choose_span(&tmp, dst, NULL, PictOpAdd, clip), false);
+ choose_span(&tmp, dst, NULL, clip), false);
skip:
tor_fini(&tor);
@@ -5827,7 +6799,6 @@ static void mark_damaged(PixmapPtr pixmap, struct sna_pixmap *priv,
pixmap->drawable.width,
pixmap->drawable.height);
list_del(&priv->list);
- priv->undamaged = false;
} else {
sna_damage_add_box(&priv->gpu_damage, box);
sna_damage_subtract_box(&priv->cpu_damage, box);
@@ -5835,11 +6806,11 @@ static void mark_damaged(PixmapPtr pixmap, struct sna_pixmap *priv,
}
static bool
-trap_mask_converter(PicturePtr picture,
+trap_mask_converter(struct sna *sna,
+ PicturePtr picture,
INT16 x, INT16 y,
int ntrap, xTrap *trap)
{
- struct sna *sna;
struct tor tor;
ScreenPtr screen = picture->pDrawable->pScreen;
PixmapPtr scratch, pixmap;
@@ -6033,13 +7004,18 @@ trap_upload(PicturePtr picture,
void
sna_add_traps(PicturePtr picture, INT16 x, INT16 y, int n, xTrap *t)
{
+ struct sna *sna;
+
DBG(("%s (%d, %d) x %d\n", __FUNCTION__, x, y, n));
- if (is_gpu(picture->pDrawable)) {
- if (trap_span_converter(picture, x, y, n, t))
+ sna = to_sna_from_drawable(picture->pDrawable);
+ if (is_gpu(sna, picture->pDrawable, PREFER_GPU_SPANS)) {
+ if (trap_span_converter(sna, picture, x, y, n, t))
return;
+ }
- if (trap_mask_converter(picture, x, y, n, t))
+ if (is_gpu(sna, picture->pDrawable, PREFER_GPU_RENDER)) {
+ if (trap_mask_converter(sna, picture, x, y, n, t))
return;
if (trap_upload(picture, x, y, n, t))
@@ -6070,6 +7046,7 @@ project_point_onto_grid(const xPointFixed *in,
out->y = dy + pixman_fixed_to_grid(in->y);
}
+#if HAS_PIXMAN_TRIANGLES
static inline bool
xTriangleValid(const xTriangle *t)
{
@@ -6104,7 +7081,8 @@ project_triangle_onto_grid(const xTriangle *in,
}
static bool
-mono_triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
+mono_triangles_span_converter(struct sna *sna,
+ CARD8 op, PicturePtr src, PicturePtr dst,
INT16 src_x, INT16 src_y,
int count, xTriangle *tri)
{
@@ -6115,7 +7093,7 @@ mono_triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
bool was_clear;
int n;
- mono.sna = to_sna_from_drawable(dst->pDrawable);
+ mono.sna = sna;
dst_x = pixman_fixed_to_int(tri[0].p1.x);
dst_y = pixman_fixed_to_int(tri[0].p1.y);
@@ -6177,6 +7155,10 @@ mono_triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
mono.clip.extents.x2 - mono.clip.extents.x1,
mono.clip.extents.y2 - mono.clip.extents.y1,
&mono.op)) {
+ if (mono.clip.data == NULL && mono.op.damage == NULL)
+ mono.span = mono_span__fast;
+ else
+ mono.span = mono_span;
mono_render(&mono);
mono.op.done(mono.sna, &mono.op);
}
@@ -6220,6 +7202,10 @@ mono_triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
mono.clip.extents.x2 - mono.clip.extents.x1,
mono.clip.extents.y2 - mono.clip.extents.y1,
&mono.op)) {
+ if (mono.clip.data == NULL && mono.op.damage == NULL)
+ mono.span = mono_span__fast;
+ else
+ mono.span = mono_span;
mono_render(&mono);
mono.op.done(mono.sna, &mono.op);
}
@@ -6232,11 +7218,11 @@ mono_triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
}
static bool
-triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
+triangles_span_converter(struct sna *sna,
+ CARD8 op, PicturePtr src, PicturePtr dst,
PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
int count, xTriangle *tri)
{
- struct sna *sna;
struct sna_composite_spans_op tmp;
struct tor tor;
BoxRec extents;
@@ -6249,7 +7235,7 @@ triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
return false;
if (is_mono(dst, maskFormat))
- return mono_triangles_span_converter(op, src, dst,
+ return mono_triangles_span_converter(sna, op, src, dst,
src_x, src_y,
count, tri);
@@ -6260,7 +7246,6 @@ triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
return false;
}
- sna = to_sna_from_drawable(dst->pDrawable);
if (!sna->render.check_composite_spans(sna, op, src, dst, 0, 0, 0)) {
DBG(("%s: fallback -- composite spans not supported\n",
__FUNCTION__));
@@ -6352,7 +7337,7 @@ triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
}
tor_render(sna, &tor, &tmp, &clip,
- choose_span(&tmp, dst, maskFormat, op, &clip),
+ choose_span(&tmp, dst, maskFormat, &clip),
!was_clear && maskFormat && !operator_is_bounded(op));
skip:
@@ -6585,7 +7570,9 @@ sna_composite_triangles(CARD8 op,
INT16 xSrc, INT16 ySrc,
int n, xTriangle *tri)
{
- if (triangles_span_converter(op, src, dst, maskFormat,
+ struct sna *sna = to_sna_from_drawable(dst->pDrawable);
+
+ if (triangles_span_converter(sna, op, src, dst, maskFormat,
xSrc, ySrc,
n, tri))
return;
@@ -6599,11 +7586,11 @@ sna_composite_triangles(CARD8 op,
}
static bool
-tristrip_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
+tristrip_span_converter(struct sna *sna,
+ CARD8 op, PicturePtr src, PicturePtr dst,
PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
int count, xPointFixed *points)
{
- struct sna *sna;
struct sna_composite_spans_op tmp;
struct tor tor;
BoxRec extents;
@@ -6624,7 +7611,6 @@ tristrip_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
return false;
}
- sna = to_sna_from_drawable(dst->pDrawable);
if (!sna->render.check_composite_spans(sna, op, src, dst, 0, 0, 0)) {
DBG(("%s: fallback -- composite spans not supported\n",
__FUNCTION__));
@@ -6726,7 +7712,7 @@ tristrip_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
assert(tor.polygon->num_edges <= 2*count);
tor_render(sna, &tor, &tmp, &clip,
- choose_span(&tmp, dst, maskFormat, op, &clip),
+ choose_span(&tmp, dst, maskFormat, &clip),
!was_clear && maskFormat && !operator_is_bounded(op));
skip:
@@ -6865,7 +7851,9 @@ sna_composite_tristrip(CARD8 op,
INT16 xSrc, INT16 ySrc,
int n, xPointFixed *points)
{
- if (tristrip_span_converter(op, src, dst, maskFormat, xSrc, ySrc, n, points))
+ struct sna *sna = to_sna_from_drawable(dst->pDrawable);
+
+ if (tristrip_span_converter(sna, op, src, dst, maskFormat, xSrc, ySrc, n, points))
return;
tristrip_fallback(op, src, dst, maskFormat, xSrc, ySrc, n, points);
@@ -7001,3 +7989,4 @@ sna_composite_trifan(CARD8 op,
{
trifan_fallback(op, src, dst, maskFormat, xSrc, ySrc, n, points);
}
+#endif
diff --git a/src/sna/sna_vertex.c b/src/sna/sna_vertex.c
new file mode 100644
index 000000000..6755d9aad
--- /dev/null
+++ b/src/sna/sna_vertex.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "sna.h"
+
+#include <unistd.h>
+
+void sna_vertex_init(struct sna *sna)
+{
+ pthread_mutex_init(&sna->render.lock, NULL);
+ pthread_cond_init(&sna->render.wait, NULL);
+ sna->render.active = 0;
+}
diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c
index 7bf20e96b..07fa829fa 100644
--- a/src/sna/sna_video.c
+++ b/src/sna/sna_video.c
@@ -100,9 +100,16 @@ sna_video_buffer(struct sna *sna,
if (video->buf && __kgem_bo_size(video->buf) < frame->size)
sna_video_free_buffers(sna, video);
- if (video->buf == NULL)
- video->buf = kgem_create_linear(&sna->kgem, frame->size,
- CREATE_GTT_MAP);
+ if (video->buf == NULL) {
+ if (video->tiled) {
+ video->buf = kgem_create_2d(&sna->kgem,
+ frame->width, frame->height, 32,
+ I915_TILING_X, CREATE_EXACT);
+ } else {
+ video->buf = kgem_create_linear(&sna->kgem, frame->size,
+ CREATE_GTT_MAP);
+ }
+ }
return video->buf;
}
@@ -166,14 +173,20 @@ sna_video_clip_helper(ScrnInfoPtr scrn,
if (crtc_region != reg)
RegionUninit(crtc_region);
- frame->top = y1 >> 16;
- frame->left = (x1 >> 16) & ~1;
- frame->npixels = ALIGN(((x2 + 0xffff) >> 16), 2) - frame->left;
+ frame->src.x1 = x1 >> 16;
+ frame->src.y1 = y1 >> 16;
+ frame->src.x2 = (x2 + 0xffff) >> 16;
+ frame->src.y2 = (y2 + 0xffff) >> 16;
+
+ frame->image.x1 = frame->src.x1 & ~1;
+ frame->image.x2 = ALIGN(frame->src.x2, 2);
if (is_planar_fourcc(frame->id)) {
- frame->top &= ~1;
- frame->nlines = ALIGN(((y2 + 0xffff) >> 16), 2) - frame->top;
- } else
- frame->nlines = ((y2 + 0xffff) >> 16) - frame->top;
+ frame->image.y1 = frame->src.y1 & ~1;
+ frame->image.y2 = ALIGN(frame->src.y2, 2);
+ } else {
+ frame->image.y1 = frame->src.y1;
+ frame->image.y2 = frame->src.y2;
+ }
return ret;
}
@@ -186,51 +199,38 @@ sna_video_frame_init(struct sna *sna,
{
int align;
+ DBG(("%s: id=%d [planar? %d], width=%d, height=%d, align=%d\n",
+ __FUNCTION__, id, is_planar_fourcc(id), width, height, video->alignment));
+ assert(width && height);
+
frame->bo = NULL;
frame->id = id;
frame->width = width;
frame->height = height;
- /* Only needs to be DWORD-aligned for textured on i915, but overlay has
- * stricter requirements.
- */
- if (video->textured) {
- align = 4;
- } else {
- if (sna->kgem.gen >= 40)
- /* Actually the alignment is 64 bytes, too. But the
- * stride must be at least 512 bytes. Take the easy fix
- * and align on 512 bytes unconditionally. */
- align = 512;
- else if (sna->kgem.gen < 21)
- /* Harsh, errata on these chipsets limit the stride
- * to be a multiple of 256 bytes.
- */
- align = 256;
- else
- align = 64;
- }
-
+ align = video->alignment;
#if SNA_XVMC
/* for i915 xvmc, hw requires 1kb aligned surfaces */
- if (id == FOURCC_XVMC && sna->kgem.gen < 40)
+ if (id == FOURCC_XVMC && sna->kgem.gen < 040 && align < 1024)
align = 1024;
#endif
-
- /* Determine the desired destination pitch (representing the chroma's pitch,
- * in the planar case.
+ /* Determine the desired destination pitch (representing the
+ * chroma's pitch in the planar case).
*/
if (is_planar_fourcc(id)) {
+ assert((width & 1) == 0);
+ assert((height & 1) == 0);
if (video->rotation & (RR_Rotate_90 | RR_Rotate_270)) {
frame->pitch[0] = ALIGN((height / 2), align);
frame->pitch[1] = ALIGN(height, align);
- frame->size = 3U * frame->pitch[0] * width;
+ frame->size = width;
} else {
frame->pitch[0] = ALIGN((width / 2), align);
frame->pitch[1] = ALIGN(width, align);
- frame->size = 3U * frame->pitch[0] * height;
+ frame->size = height;
}
+ frame->size *= frame->pitch[0] + frame->pitch[1];
} else {
if (video->rotation & (RR_Rotate_90 | RR_Rotate_270)) {
frame->pitch[0] = ALIGN((height << 1), align);
@@ -251,51 +251,68 @@ sna_video_frame_init(struct sna *sna,
frame->VBufOffset =
frame->UBufOffset + (int)frame->pitch[0] * height / 2;
}
+
+ assert(frame->size);
}
-static void sna_memcpy_plane(uint8_t *dst, const uint8_t *src,
- int height, int width,
- int dstPitch, int srcPitch,
- Rotation rotation)
+static void sna_memcpy_plane(struct sna_video *video,
+ uint8_t *dst, const uint8_t *src,
+ const struct sna_video_frame *frame, int sub)
{
+ int dstPitch = frame->pitch[!sub], srcPitch;
const uint8_t *s;
int i, j = 0;
+ int x, y, w, h;
+
+ x = frame->image.x1;
+ y = frame->image.y1;
+ w = frame->image.x2 - frame->image.x1;
+ h = frame->image.y2 - frame->image.y1;
+ if (sub) {
+ x >>= 1; w >>= 1;
+ y >>= 1; h >>= 1;
+ srcPitch = ALIGN((frame->width >> 1), 4);
+ } else
+ srcPitch = ALIGN(frame->width, 4);
+
+ src += y * srcPitch + x;
+ if (!video->textured)
+ x = y = 0;
- switch (rotation) {
+ switch (video->rotation) {
case RR_Rotate_0:
- /* optimise for the case of no clipping */
- if (srcPitch == dstPitch && srcPitch == width)
- memcpy(dst, src, srcPitch * height);
- else while (height--) {
- memcpy(dst, src, width);
+ dst += y * dstPitch + x;
+ if (srcPitch == dstPitch && srcPitch == w)
+ memcpy(dst, src, srcPitch * h);
+ else while (h--) {
+ memcpy(dst, src, w);
src += srcPitch;
dst += dstPitch;
}
break;
case RR_Rotate_90:
- for (i = 0; i < height; i++) {
+ for (i = 0; i < h; i++) {
s = src;
- for (j = 0; j < width; j++) {
- dst[(i) + ((width - j - 1) * dstPitch)] = *s++;
- }
+ for (j = 0; j < w; j++)
+ dst[i + ((x + w - j - 1) * dstPitch)] = *s++;
src += srcPitch;
}
break;
case RR_Rotate_180:
- for (i = 0; i < height; i++) {
+ for (i = 0; i < h; i++) {
s = src;
- for (j = 0; j < width; j++) {
- dst[(width - j - 1) +
- ((height - i - 1) * dstPitch)] = *s++;
+ for (j = 0; j < w; j++) {
+ dst[(x + w - j - 1) +
+ ((h - i - 1) * dstPitch)] = *s++;
}
src += srcPitch;
}
break;
case RR_Rotate_270:
- for (i = 0; i < height; i++) {
+ for (i = 0; i < h; i++) {
s = src;
- for (j = 0; j < width; j++) {
- dst[(height - i - 1) + (j * dstPitch)] = *s++;
+ for (j = 0; j < w; j++) {
+ dst[(h - i - 1) + (x + j * dstPitch)] = *s++;
}
src += srcPitch;
}
@@ -309,36 +326,22 @@ sna_copy_planar_data(struct sna_video *video,
const uint8_t *src, uint8_t *dst)
{
uint8_t *d;
- int w = frame->npixels;
- int h = frame->nlines;
- int pitch;
- pitch = ALIGN(frame->width, 4);
- sna_memcpy_plane(dst, src + frame->top * pitch + frame->left,
- h, w, frame->pitch[1], pitch, video->rotation);
-
- src += frame->height * pitch; /* move over Luma plane */
-
- /* align to beginning of chroma planes */
- pitch = ALIGN((frame->width >> 1), 0x4);
- src += (frame->top >> 1) * pitch + (frame->left >> 1);
- w >>= 1;
- h >>= 1;
+ sna_memcpy_plane(video, dst, src, frame, 0);
+ src += frame->height * ALIGN(frame->width, 4);
if (frame->id == FOURCC_I420)
d = dst + frame->UBufOffset;
else
d = dst + frame->VBufOffset;
-
- sna_memcpy_plane(d, src, h, w, frame->pitch[0], pitch, video->rotation);
- src += (frame->height >> 1) * pitch; /* move over Chroma plane */
+ sna_memcpy_plane(video, d, src, frame, 1);
+ src += (frame->height >> 1) * ALIGN(frame->width >> 1, 4);
if (frame->id == FOURCC_I420)
d = dst + frame->VBufOffset;
else
d = dst + frame->UBufOffset;
-
- sna_memcpy_plane(d, src, h, w, frame->pitch[0], pitch, video->rotation);
+ sna_memcpy_plane(video, d, src, frame, 1);
}
static void
@@ -349,11 +352,22 @@ sna_copy_packed_data(struct sna_video *video,
{
int pitch = frame->width << 1;
const uint8_t *src, *s;
- int w = frame->npixels;
- int h = frame->nlines;
+ int x, y, w, h;
int i, j;
- src = buf + (frame->top * pitch) + (frame->left << 1);
+ if (video->textured) {
+ /* XXX support copying cropped extents */
+ x = y = 0;
+ w = frame->width;
+ h = frame->height;
+ } else {
+ x = frame->image.x1;
+ y = frame->image.y1;
+ w = frame->image.x2 - frame->image.x1;
+ h = frame->image.y2 - frame->image.y1;
+ }
+
+ src = buf + (y * pitch) + (x << 1);
switch (video->rotation) {
case RR_Rotate_0:
@@ -376,7 +390,7 @@ sna_copy_packed_data(struct sna_video *video,
src += pitch;
}
h >>= 1;
- src = buf + (frame->top * pitch) + (frame->left << 1);
+ src = buf + (y * pitch) + (x << 1);
for (i = 0; i < h; i += 2) {
for (j = 0; j < w; j += 2) {
/* Copy U */
@@ -412,7 +426,7 @@ sna_copy_packed_data(struct sna_video *video,
src += pitch;
}
h >>= 1;
- src = buf + (frame->top * pitch) + (frame->left << 1);
+ src = buf + (y * pitch) + (x << 1);
for (i = 0; i < h; i += 2) {
for (j = 0; j < w; j += 2) {
/* Copy U */
@@ -435,27 +449,28 @@ sna_video_copy_data(struct sna *sna,
{
uint8_t *dst;
- DBG(("%s: handle=%d, size=%dx%d, rotation=%d\n",
+ DBG(("%s: handle=%d, size=%dx%d [%d], rotation=%d, is-texture=%d\n",
__FUNCTION__, frame->bo ? frame->bo->handle : 0,
- frame->width, frame->height, video->rotation));
- DBG(("%s: top=%d, left=%d\n", __FUNCTION__, frame->top, frame->left));
+ frame->width, frame->height, frame->size,
+ video->rotation, video->textured));
+ DBG(("%s: image=(%d, %d), (%d, %d), source=(%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ frame->image.x1, frame->image.y1, frame->image.x2, frame->image.y2,
+ frame->src.x1, frame->src.y1, frame->src.x2, frame->src.y2));
+ assert(frame->width && frame->height);
+ assert(frame->size);
/* In the common case, we can simply the upload in a single pwrite */
- if (video->rotation == RR_Rotate_0) {
+ if (video->rotation == RR_Rotate_0 && !video->tiled) {
if (is_planar_fourcc(frame->id)) {
- uint16_t pitch[2] = {
- ALIGN((frame->width >> 1), 0x4),
- ALIGN(frame->width, 0x4),
- };
- if (pitch[0] == frame->pitch[0] &&
- pitch[1] == frame->pitch[1] &&
- frame->top == 0 && frame->left == 0) {
- uint32_t len =
- (uint32_t)pitch[1]*frame->height +
- (uint32_t)pitch[0]*frame->height;
+ int w = frame->image.x2 - frame->image.x1;
+ int h = frame->image.y2 - frame->image.y1;
+ if (ALIGN(h, 2) == frame->height &&
+ ALIGN(w >> 1, 4) == frame->pitch[0] &&
+ ALIGN(w, 4) == frame->pitch[1]) {
if (frame->bo) {
kgem_bo_write(&sna->kgem, frame->bo,
- buf, len);
+ buf, frame->size);
} else {
frame->bo = kgem_create_buffer(&sna->kgem, frame->size,
KGEM_BUFFER_WRITE | KGEM_BUFFER_WRITE_INPLACE,
@@ -463,7 +478,7 @@ sna_video_copy_data(struct sna *sna,
if (frame->bo == NULL)
return false;
- memcpy(dst, buf, len);
+ memcpy(dst, buf, frame->size);
}
if (frame->id != FOURCC_I420) {
uint32_t tmp;
@@ -477,8 +492,8 @@ sna_video_copy_data(struct sna *sna,
if (frame->width*2 == frame->pitch[0]) {
if (frame->bo) {
kgem_bo_write(&sna->kgem, frame->bo,
- buf + (2U*frame->top * frame->width) + (frame->left << 1),
- 2U*frame->nlines*frame->width);
+ buf + (2U*frame->image.y1 * frame->width) + (frame->image.x1 << 1),
+ 2U*(frame->image.y2-frame->image.y1)*frame->width);
} else {
frame->bo = kgem_create_buffer(&sna->kgem, frame->size,
KGEM_BUFFER_WRITE | KGEM_BUFFER_WRITE_INPLACE,
@@ -487,8 +502,8 @@ sna_video_copy_data(struct sna *sna,
return false;
memcpy(dst,
- buf + (frame->top * frame->width*2) + (frame->left << 1),
- 2U*frame->nlines*frame->width);
+ buf + (frame->image.y1 * frame->width*2) + (frame->image.x1 << 1),
+ 2U*(frame->image.y2-frame->image.y1)*frame->width);
}
return true;
}
diff --git a/src/sna/sna_video.h b/src/sna/sna_video.h
index 3ce72c009..c0c023cf9 100644
--- a/src/sna/sna_video.h
+++ b/src/sna/sna_video.h
@@ -57,6 +57,8 @@ struct sna_video {
struct kgem_bo *old_buf[2];
struct kgem_bo *buf;
+ int alignment;
+ bool tiled;
bool textured;
Rotation rotation;
int plane;
@@ -75,8 +77,8 @@ struct sna_video_frame {
uint16_t pitch[2];
/* extents */
- uint16_t top, left;
- uint16_t npixels, nlines;
+ BoxRec image;
+ BoxRec src;
};
void sna_video_init(struct sna *sna, ScreenPtr screen);
diff --git a/src/sna/sna_video_hwmc.c b/src/sna/sna_video_hwmc.c
index b0e8d25d2..b3e065d95 100644
--- a/src/sna/sna_video_hwmc.c
+++ b/src/sna/sna_video_hwmc.c
@@ -36,63 +36,72 @@
#include <X11/extensions/XvMC.h>
#include <fourcc.h>
-static int create_subpicture(ScrnInfoPtr scrn, XvMCSubpicturePtr subpicture,
- int *num_priv, CARD32 ** priv)
+extern DevPrivateKey XF86XvScreenKey;
+
+static int create_subpicture(XvMCSubpicturePtr sub, int *size, CARD32 **priv)
{
return Success;
}
-static void destroy_subpicture(ScrnInfoPtr scrn, XvMCSubpicturePtr subpicture)
+static void destroy_subpicture(XvMCSubpicturePtr sub)
{
}
-static int create_surface(ScrnInfoPtr scrn, XvMCSurfacePtr surface,
- int *num_priv, CARD32 ** priv)
+static int create_surface(XvMCSurfacePtr surface, int *size, CARD32 **priv)
{
return Success;
}
-static void destroy_surface(ScrnInfoPtr scrn, XvMCSurfacePtr surface)
+static void destroy_surface(XvMCSurfacePtr surface)
{
}
-static int create_context(ScrnInfoPtr scrn, XvMCContextPtr pContext,
- int *num_priv, CARD32 **priv)
+static int create_context(XvPortPtr port, XvMCContextPtr ctx,
+ int *size, CARD32 **out)
{
- struct sna *sna = to_sna(scrn);
- struct sna_xvmc_hw_context *contextRec;
+ struct sna *sna = to_sna_from_screen(ctx->pScreen);
+ struct intel_xvmc_hw_context {
+ unsigned int type;
+ union {
+ struct {
+ unsigned int use_phys_addr : 1;
+ } i915;
+ struct {
+ unsigned int is_g4x:1;
+ unsigned int is_965_q:1;
+ unsigned int is_igdng:1;
+ } i965;
+ };
+ } *priv;
- *priv = calloc(1, sizeof(struct sna_xvmc_hw_context));
- contextRec = (struct sna_xvmc_hw_context *) *priv;
- if (!contextRec) {
- *num_priv = 0;
- return BadAlloc;
- }
+ ctx->port_priv = port->devPriv.ptr;
- *num_priv = sizeof(struct sna_xvmc_hw_context) >> 2;
+ priv = calloc(1, sizeof(*priv));
+ if (priv == NULL)
+ return BadAlloc;
- if (sna->kgem.gen >= 40) {
- if (sna->kgem.gen >= 45)
- contextRec->type = XVMC_I965_MPEG2_VLD;
+ if (sna->kgem.gen >= 040) {
+ if (sna->kgem.gen >= 045)
+ priv->type = XVMC_I965_MPEG2_VLD;
else
- contextRec->type = XVMC_I965_MPEG2_MC;
- contextRec->i965.is_g4x = sna->kgem.gen == 45;
- contextRec->i965.is_965_q = IS_965_Q(sna);
- contextRec->i965.is_igdng = sna->kgem.gen == 50;
- } else {
- contextRec->type = XVMC_I915_MPEG2_MC;
- contextRec->i915.use_phys_addr = 0;
- }
+ priv->type = XVMC_I965_MPEG2_MC;
+ priv->i965.is_g4x = sna->kgem.gen == 045;
+ priv->i965.is_965_q = IS_965_Q(sna);
+ priv->i965.is_igdng = sna->kgem.gen == 050;
+ } else
+ priv->type = XVMC_I915_MPEG2_MC;
+ *size = sizeof(*priv) >> 2;
+ *out = priv;
return Success;
}
-static void destroy_context(ScrnInfoPtr scrn, XvMCContextPtr context)
+static void destroy_context(XvMCContextPtr ctx)
{
}
/* i915 hwmc support */
-static XF86MCSurfaceInfoRec i915_YV12_mpg2_surface = {
+static XvMCSurfaceInfoRec i915_YV12_mpg2_surface = {
FOURCC_YV12,
XVMC_CHROMA_FORMAT_420,
0,
@@ -107,7 +116,7 @@ static XF86MCSurfaceInfoRec i915_YV12_mpg2_surface = {
NULL,
};
-static XF86MCSurfaceInfoRec i915_YV12_mpg1_surface = {
+static XvMCSurfaceInfoRec i915_YV12_mpg1_surface = {
FOURCC_YV12,
XVMC_CHROMA_FORMAT_420,
0,
@@ -121,9 +130,9 @@ static XF86MCSurfaceInfoRec i915_YV12_mpg1_surface = {
NULL,
};
-static XF86MCSurfaceInfoPtr surface_info_i915[2] = {
- (XF86MCSurfaceInfoPtr) & i915_YV12_mpg2_surface,
- (XF86MCSurfaceInfoPtr) & i915_YV12_mpg1_surface
+static XvMCSurfaceInfoPtr surface_info_i915[2] = {
+ &i915_YV12_mpg2_surface,
+ &i915_YV12_mpg1_surface
};
/* i965 and later hwmc support */
@@ -131,7 +140,7 @@ static XF86MCSurfaceInfoPtr surface_info_i915[2] = {
#define XVMC_VLD 0x00020000
#endif
-static XF86MCSurfaceInfoRec yv12_mpeg2_vld_surface = {
+static XvMCSurfaceInfoRec yv12_mpeg2_vld_surface = {
FOURCC_YV12,
XVMC_CHROMA_FORMAT_420,
0,
@@ -144,7 +153,7 @@ static XF86MCSurfaceInfoRec yv12_mpeg2_vld_surface = {
NULL
};
-static XF86MCSurfaceInfoRec yv12_mpeg2_i965_surface = {
+static XvMCSurfaceInfoRec yv12_mpeg2_i965_surface = {
FOURCC_YV12,
XVMC_CHROMA_FORMAT_420,
0,
@@ -159,7 +168,7 @@ static XF86MCSurfaceInfoRec yv12_mpeg2_i965_surface = {
NULL
};
-static XF86MCSurfaceInfoRec yv12_mpeg1_i965_surface = {
+static XvMCSurfaceInfoRec yv12_mpeg1_i965_surface = {
FOURCC_YV12,
XVMC_CHROMA_FORMAT_420,
0,
@@ -176,12 +185,12 @@ static XF86MCSurfaceInfoRec yv12_mpeg1_i965_surface = {
NULL
};
-static XF86MCSurfaceInfoPtr surface_info_i965[] = {
+static XvMCSurfaceInfoPtr surface_info_i965[] = {
&yv12_mpeg2_i965_surface,
&yv12_mpeg1_i965_surface
};
-static XF86MCSurfaceInfoPtr surface_info_vld[] = {
+static XvMCSurfaceInfoPtr surface_info_vld[] = {
&yv12_mpeg2_vld_surface,
&yv12_mpeg2_i965_surface,
};
@@ -191,63 +200,76 @@ Bool sna_video_xvmc_setup(struct sna *sna,
ScreenPtr screen,
XF86VideoAdaptorPtr target)
{
- XF86MCAdaptorRec *pAdapt;
+ XvMCAdaptorRec *adaptors;
+ XvScreenPtr xv;
const char *name;
- char buf[64];
+ char bus[64];
+ int i;
+
+ if (!xf86LoaderCheckSymbol("XvMCScreenInit"))
+ return FALSE;
/* Needs KMS support. */
- if (sna->kgem.gen < 31)
+ if (sna->kgem.gen < 031)
return FALSE;
/* Not implemented */
- if (sna->kgem.gen >= 60)
+ if (sna->kgem.gen >= 060)
return FALSE;
- pAdapt = calloc(1, sizeof(XF86MCAdaptorRec));
- if (!pAdapt)
+ adaptors = calloc(1, sizeof(XvMCAdaptorRec));
+ if (adaptors == NULL)
return FALSE;
- pAdapt->name = target->name;
- pAdapt->num_subpictures = 0;
- pAdapt->subpictures = NULL;
- pAdapt->CreateContext = create_context;
- pAdapt->DestroyContext = destroy_context;
- pAdapt->CreateSurface = create_surface;
- pAdapt->DestroySurface = destroy_surface;
- pAdapt->CreateSubpicture = create_subpicture;
- pAdapt->DestroySubpicture = destroy_subpicture;
-
- if (sna->kgem.gen >= 45) {
+ xv = dixLookupPrivate(&screen->devPrivates, XF86XvScreenKey);
+ for (i = 0; i< xv->nAdaptors;i++) {
+ if (strcmp(xv->pAdaptors[i].name, target->name) == 0) {
+ adaptors->xv_adaptor = &xv->pAdaptors[i];
+ break;
+ }
+ }
+ assert(adaptors->xv_adaptor);
+
+ adaptors->num_subpictures = 0;
+ adaptors->subpictures = NULL;
+ adaptors->CreateContext = create_context;
+ adaptors->DestroyContext = destroy_context;
+ adaptors->CreateSurface = create_surface;
+ adaptors->DestroySurface = destroy_surface;
+ adaptors->CreateSubpicture = create_subpicture;
+ adaptors->DestroySubpicture = destroy_subpicture;
+
+ if (sna->kgem.gen >= 045) {
name = "xvmc_vld",
- pAdapt->num_surfaces = ARRAY_SIZE(surface_info_vld);
- pAdapt->surfaces = surface_info_vld;
- } else if (sna->kgem.gen >= 40) {
+ adaptors->num_surfaces = ARRAY_SIZE(surface_info_vld);
+ adaptors->surfaces = surface_info_vld;
+ } else if (sna->kgem.gen >= 040) {
name = "i965_xvmc",
- pAdapt->num_surfaces = ARRAY_SIZE(surface_info_i965);
- pAdapt->surfaces = surface_info_i965;
+ adaptors->num_surfaces = ARRAY_SIZE(surface_info_i965);
+ adaptors->surfaces = surface_info_i965;
} else {
name = "i915_xvmc",
- pAdapt->num_surfaces = ARRAY_SIZE(surface_info_i915);
- pAdapt->surfaces = surface_info_i915;
+ adaptors->num_surfaces = ARRAY_SIZE(surface_info_i915);
+ adaptors->surfaces = surface_info_i915;
}
- if (xf86XvMCScreenInit(screen, 1, &pAdapt)) {
- xf86DrvMsg(sna->scrn->scrnIndex, X_INFO,
- "[XvMC] %s driver initialized.\n",
- name);
- } else {
+ if (XvMCScreenInit(screen, 1, adaptors) != Success) {
xf86DrvMsg(sna->scrn->scrnIndex, X_INFO,
"[XvMC] Failed to initialize XvMC.\n");
+ free(adaptors);
return FALSE;
}
- sprintf(buf, "pci:%04x:%02x:%02x.%d",
+ sprintf(bus, "pci:%04x:%02x:%02x.%d",
sna->PciInfo->domain,
sna->PciInfo->bus, sna->PciInfo->dev, sna->PciInfo->func);
- xf86XvMCRegisterDRInfo(screen, SNA_XVMC_LIBNAME,
- buf,
+ xf86XvMCRegisterDRInfo(screen, SNA_XVMC_LIBNAME, bus,
SNA_XVMC_MAJOR, SNA_XVMC_MINOR,
SNA_XVMC_PATCHLEVEL);
+
+ xf86DrvMsg(sna->scrn->scrnIndex, X_INFO,
+ "[XvMC] %s driver initialized.\n",
+ name);
return TRUE;
}
diff --git a/src/sna/sna_video_hwmc.h b/src/sna/sna_video_hwmc.h
index 2494d44bd..44de456e9 100644
--- a/src/sna/sna_video_hwmc.h
+++ b/src/sna/sna_video_hwmc.h
@@ -32,38 +32,12 @@
#define SNA_XVMC_MINOR 1
#define SNA_XVMC_PATCHLEVEL 0
-/*
- * Commands that client submits through XvPutImage:
- */
-
-#define SNA_XVMC_COMMAND_DISPLAY 0x00
-#define SNA_XVMC_COMMAND_UNDISPLAY 0x01
-
/* hw xvmc support type */
#define XVMC_I915_MPEG2_MC 0x01
#define XVMC_I965_MPEG2_MC 0x02
#define XVMC_I945_MPEG2_VLD 0x04
#define XVMC_I965_MPEG2_VLD 0x08
-struct sna_xvmc_hw_context {
- unsigned int type;
- union {
- struct {
- unsigned int use_phys_addr : 1;
- } i915;
- struct {
- unsigned int is_g4x:1;
- unsigned int is_965_q:1;
- unsigned int is_igdng:1;
- } i965;
- };
-};
-
-/* Intel private XvMC command to DDX driver */
-struct sna_xvmc_command {
- uint32_t handle;
-};
-
#ifdef _SNA_XVMC_SERVER_
#include <xf86xvmc.h>
Bool sna_video_xvmc_setup(struct sna *sna,
diff --git a/src/sna/sna_video_overlay.c b/src/sna/sna_video_overlay.c
index b73e9ddf8..3655b8763 100644
--- a/src/sna/sna_video_overlay.c
+++ b/src/sna/sna_video_overlay.c
@@ -41,7 +41,7 @@
#define MAKE_ATOM(a) MakeAtom(a, sizeof(a) - 1, TRUE)
-#define HAS_GAMMA(sna) ((sna)->kgem.gen >= 30)
+#define HAS_GAMMA(sna) ((sna)->kgem.gen >= 030)
static Atom xvBrightness, xvContrast, xvSaturation, xvColorKey, xvPipe;
static Atom xvGamma0, xvGamma1, xvGamma2, xvGamma3, xvGamma4, xvGamma5;
@@ -296,7 +296,7 @@ sna_video_overlay_query_best_size(ScrnInfoPtr scrn,
drw_h = vid_h >> 1;
}
- if (sna->kgem.gen < 21) {
+ if (sna->kgem.gen < 021) {
max_w = IMAGE_MAX_WIDTH_LEGACY;
max_h = IMAGE_MAX_HEIGHT_LEGACY;
} else {
@@ -532,6 +532,7 @@ sna_video_overlay_put_image(ScrnInfoPtr scrn,
return BadAlloc;
}
+ frame.bo->domain = DOMAIN_NONE;
sna_video_buffer_fini(sna, video);
/* update cliplist */
@@ -554,7 +555,7 @@ sna_video_overlay_query_video_attributes(ScrnInfoPtr scrn,
DBG(("%s: w is %d, h is %d\n", __FUNCTION__, *w, *h));
- if (sna->kgem.gen < 21) {
+ if (sna->kgem.gen < 021) {
if (*w > IMAGE_MAX_WIDTH_LEGACY)
*w = IMAGE_MAX_WIDTH_LEGACY;
if (*h > IMAGE_MAX_HEIGHT_LEGACY)
@@ -664,7 +665,7 @@ XF86VideoAdaptorPtr sna_video_overlay_setup(struct sna *sna,
adaptor->nEncodings = 1;
adaptor->pEncodings = xnfalloc(sizeof(DummyEncoding));
memcpy(adaptor->pEncodings, DummyEncoding, sizeof(DummyEncoding));
- if (sna->kgem.gen < 21) {
+ if (sna->kgem.gen < 021) {
adaptor->pEncodings->width = IMAGE_MAX_WIDTH_LEGACY;
adaptor->pEncodings->height = IMAGE_MAX_HEIGHT_LEGACY;
}
@@ -701,6 +702,18 @@ XF86VideoAdaptorPtr sna_video_overlay_setup(struct sna *sna,
adaptor->PutImage = sna_video_overlay_put_image;
adaptor->QueryImageAttributes = sna_video_overlay_query_video_attributes;
+ if (sna->kgem.gen >= 040)
+ /* Actually the alignment is 64 bytes, too. But the
+ * stride must be at least 512 bytes. Take the easy fix
+ * and align on 512 bytes unconditionally. */
+ video->alignment = 512;
+ else if (sna->kgem.gen < 021)
+ /* Harsh, errata on these chipsets limit the stride
+ * to be a multiple of 256 bytes.
+ */
+ video->alignment = 256;
+ else
+ video->alignment = 64;
video->textured = false;
video->color_key = sna_video_overlay_color_key(sna);
video->brightness = -19; /* (255/219) * -16 */
diff --git a/src/sna/sna_video_sprite.c b/src/sna/sna_video_sprite.c
index a912590fe..7737460b5 100644
--- a/src/sna/sna_video_sprite.c
+++ b/src/sna/sna_video_sprite.c
@@ -37,8 +37,11 @@
#include <xf86xv.h>
#include <X11/extensions/Xv.h>
#include <fourcc.h>
-#include <drm_fourcc.h>
#include <i915_drm.h>
+#include <errno.h>
+
+#ifdef DRM_IOCTL_MODE_GETPLANERESOURCES
+#include <drm_fourcc.h>
#define IMAGE_MAX_WIDTH 2048
#define IMAGE_MAX_HEIGHT 2048
@@ -60,13 +63,14 @@ static XF86AttributeRec attribs[] = {
static void sna_video_sprite_off(struct sna *sna, struct sna_video *video)
{
+ struct drm_mode_set_plane s;
+
if (video->plane == 0)
return;
- if (drmModeSetPlane(sna->kgem.fd,
- video->plane, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0))
+ memset(&s, 0, sizeof(s));
+ s.plane_id = video->plane;
+ if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETPLANE, &s))
xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR,
"failed to disable plane\n");
@@ -114,8 +118,15 @@ static void sna_video_sprite_best_size(ScrnInfoPtr scrn, Bool motion,
unsigned int *p_w, unsigned int *p_h,
pointer data)
{
- *p_w = vid_w;
- *p_h = vid_h;
+ struct sna *sna = to_sna(scrn);
+
+ if (sna->kgem.gen == 075) {
+ *p_w = vid_w;
+ *p_h = vid_h;
+ } else {
+ *p_w = drw_w;
+ *p_h = drw_h;
+ }
}
static void
@@ -174,7 +185,10 @@ sna_video_sprite_show(struct sna *sna,
xf86CrtcPtr crtc,
BoxPtr dstBox)
{
- int plane = sna_crtc_to_plane(crtc);
+ struct drm_mode_set_plane s;
+
+ VG_CLEAR(s);
+ s.plane_id = sna_crtc_to_plane(crtc);
update_dst_box_to_crtc_coords(sna, crtc, dstBox);
if (crtc->rotation & (RR_Rotate_90 | RR_Rotate_270)) {
@@ -184,13 +198,13 @@ sna_video_sprite_show(struct sna *sna,
}
#if defined(DRM_I915_SET_SPRITE_DESTKEY)
- if (video->color_key_changed || video->plane != plane) {
+ if (video->color_key_changed || video->plane != s.plane_id) {
struct drm_intel_set_sprite_destkey set;
DBG(("%s: updating color key: %x\n",
__FUNCTION__, video->color_key));
- set.plane_id = plane;
+ set.plane_id = s.plane_id;
set.value = video->color_key;
if (drmIoctl(sna->kgem.fd,
@@ -221,8 +235,9 @@ sna_video_sprite_show(struct sna *sna,
pitches[0] = frame->pitch[0];
offsets[0] = 0;
- DBG(("%s: creating new fb for handle=%d\n",
- __FUNCTION__, frame->bo->handle));
+ DBG(("%s: creating new fb for handle=%d, width=%d, height=%d, stride=%d\n",
+ __FUNCTION__, frame->bo->handle,
+ frame->width, frame->height, frame->pitch[0]));
if (drmModeAddFB2(sna->kgem.fd,
frame->width, frame->height, pixel_format,
@@ -236,21 +251,33 @@ sna_video_sprite_show(struct sna *sna,
frame->bo->scanout = true;
}
- DBG(("%s: updating plane=%d, handle=%d [fb %d], dst=(%d,%d)x(%d,%d)\n",
- __FUNCTION__, plane, frame->bo->handle, frame->bo->delta,
- dstBox->x1, dstBox->y1,
- dstBox->x2 - dstBox->x1, dstBox->y2 - dstBox->y1));
assert(frame->bo->scanout);
assert(frame->bo->delta);
- if (drmModeSetPlane(sna->kgem.fd,
- plane, sna_crtc_id(crtc), frame->bo->delta, 0,
- dstBox->x1, dstBox->y1,
- dstBox->x2 - dstBox->x1, dstBox->y2 - dstBox->y1,
- 0, 0, frame->width << 16, frame->height << 16))
+ s.crtc_id = sna_crtc_id(crtc);
+ s.fb_id = frame->bo->delta;
+ s.flags = 0;
+ s.crtc_x = dstBox->x1;
+ s.crtc_y = dstBox->y1;
+ s.crtc_w = dstBox->x2 - dstBox->x1;
+ s.crtc_h = dstBox->y2 - dstBox->y1;
+ s.src_x = 0;
+ s.src_y = 0;
+ s.src_w = (frame->image.x2 - frame->image.x1) << 16;
+ s.src_h = (frame->image.y2 - frame->image.y1) << 16;
+
+ DBG(("%s: updating crtc=%d, plane=%d, handle=%d [fb %d], dst=(%d,%d)x(%d,%d), src=(%d,%d)x(%d,%d)\n",
+ __FUNCTION__, s.crtc_id, s.plane_id, frame->bo->handle, s.fb_id,
+ s.crtc_x, s.crtc_y, s.crtc_w, s.crtc_h,
+ s.src_x >> 16, s.src_y >> 16, s.src_w >> 16, s.src_h >> 16));
+
+ if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETPLANE, &s)) {
+ DBG(("SET_PLANE failed: ret=%d\n", errno));
return false;
+ }
- video->plane = plane;
+ frame->bo->domain = DOMAIN_NONE;
+ video->plane = s.plane_id;
return true;
}
@@ -278,7 +305,7 @@ static int sna_video_sprite_put_image(ScrnInfoPtr scrn,
clip))
return Success;
- if (!crtc || !sna_crtc_to_plane(crtc)) {
+ if (!crtc || sna_crtc_to_plane(crtc) == 0) {
/* If the video isn't visible on any CRTC, turn it off */
sna_video_sprite_off(sna, video);
return Success;
@@ -370,6 +397,7 @@ XF86VideoAdaptorPtr sna_video_sprite_setup(struct sna *sna,
memset(&r, 0, sizeof(struct drm_mode_get_plane_res));
if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPLANERESOURCES, &r))
return NULL;
+ DBG(("%s: %d sprite planes\n", __FUNCTION__, r.count_planes));
if (r.count_planes == 0)
return NULL;
@@ -411,7 +439,7 @@ XF86VideoAdaptorPtr sna_video_sprite_setup(struct sna *sna,
adaptor->PutImage = sna_video_sprite_put_image;
adaptor->QueryImageAttributes = sna_video_sprite_query_attrs;
- video->textured = false;
+ video->alignment = 64;
video->color_key = sna_video_sprite_color_key(sna);
video->color_key_changed = true;
video->brightness = -19; /* (255/219) * -16 */
@@ -433,3 +461,9 @@ XF86VideoAdaptorPtr sna_video_sprite_setup(struct sna *sna,
return adaptor;
}
+#else
+XF86VideoAdaptorPtr sna_video_sprite_setup(struct sna *sna, ScreenPtr screen)
+{
+ return NULL;
+}
+#endif
diff --git a/src/sna/sna_video_textured.c b/src/sna/sna_video_textured.c
index 27fc09f47..e5cae859e 100644
--- a/src/sna/sna_video_textured.c
+++ b/src/sna/sna_video_textured.c
@@ -254,7 +254,7 @@ sna_video_textured_put_image(ScrnInfoPtr scrn,
DBG(("%s: using passthough, name=%d\n",
__FUNCTION__, *(uint32_t *)buf));
- if (sna->kgem.gen < 31) {
+ if (sna->kgem.gen < 031) {
/* XXX: i915 is not support and needs some
* serious care. grep for KMS in i915_hwmc.c */
return BadAlloc;
@@ -267,6 +267,10 @@ sna_video_textured_put_image(ScrnInfoPtr scrn,
}
assert(kgem_bo_size(frame.bo) >= frame.size);
+ frame.image.x1 = 0;
+ frame.image.y1 = 0;
+ frame.image.x2 = frame.width;
+ frame.image.y2 = frame.height;
} else {
if (!sna_video_copy_data(sna, video, &frame, buf)) {
DBG(("%s: failed to copy frame\n", __FUNCTION__));
@@ -276,15 +280,17 @@ sna_video_textured_put_image(ScrnInfoPtr scrn,
}
if (crtc && video->SyncToVblank != 0 &&
- sna_pixmap_is_scanout(sna, pixmap))
+ sna_pixmap_is_scanout(sna, pixmap)) {
+ kgem_set_mode(&sna->kgem, KGEM_RENDER, sna_pixmap(pixmap)->gpu_bo);
flush = sna_wait_for_scanline(sna, pixmap, crtc,
&clip->extents);
+ }
ret = Success;
if (!sna->render.video(sna, video, &frame, clip,
- src_w, src_h,
- drw_w, drw_h,
- pixmap)) {
+ src_w, src_h, drw_w, drw_h,
+ drw_x - src_x, drw_y - src_y,
+ pixmap)) {
DBG(("%s: failed to render video\n", __FUNCTION__));
ret = BadAlloc;
} else
@@ -355,7 +361,7 @@ sna_video_textured_query(ScrnInfoPtr scrn,
#ifdef SNA_XVMC
case FOURCC_XVMC:
*h = (*h + 1) & ~1;
- size = sizeof(struct sna_xvmc_command);
+ size = sizeof(uint32_t);
if (pitches)
pitches[0] = size;
break;
@@ -447,6 +453,7 @@ XF86VideoAdaptorPtr sna_video_textured_setup(struct sna *sna,
struct sna_video *v = &video[i];
v->textured = true;
+ v->alignment = 4;
v->rotation = RR_Rotate_0;
v->SyncToVblank = 1;
diff --git a/src/xvmc/Makefile.am b/src/xvmc/Makefile.am
index d3ed4499a..51c98b0c8 100644
--- a/src/xvmc/Makefile.am
+++ b/src/xvmc/Makefile.am
@@ -17,7 +17,8 @@ libIntelXvMC_la_SOURCES = intel_xvmc.c \
intel_batchbuffer.h
AM_CFLAGS = @XORG_CFLAGS@ @DRM_CFLAGS@ @DRI_CFLAGS@ \
- @XVMCLIB_CFLAGS@ -I$(top_srcdir)/src -DTRUE=1 -DFALSE=0
+ @XVMCLIB_CFLAGS@ @XCB_CFLAGS@ \
+ -I$(top_srcdir)/src -DTRUE=1 -DFALSE=0
libIntelXvMC_la_LDFLAGS = -version-number 1:0:0
-libIntelXvMC_la_LIBADD = @DRI_LIBS@ @DRM_LIBS@ @XVMCLIB_LIBS@ -lpthread -ldrm_intel
+libIntelXvMC_la_LIBADD = @DRI_LIBS@ @DRM_LIBS@ @XVMCLIB_LIBS@ @XCB_LIBS@ @DRMINTEL_LIBS@ -lpthread
diff --git a/src/xvmc/Makefile.in b/src/xvmc/Makefile.in
index 028ea3729..36f565528 100644
--- a/src/xvmc/Makefile.in
+++ b/src/xvmc/Makefile.in
@@ -245,7 +245,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MAN_SUBSTS = @MAN_SUBSTS@
@@ -284,6 +283,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@
VERSION = @VERSION@
X11_CFLAGS = @X11_CFLAGS@
X11_LIBS = @X11_LIBS@
+XCB_CFLAGS = @XCB_CFLAGS@
+XCB_LIBS = @XCB_LIBS@
XORG_CFLAGS = @XORG_CFLAGS@
XORG_LIBS = @XORG_LIBS@
XORG_MAN_PAGE = @XORG_MAN_PAGE@
@@ -357,15 +358,16 @@ libIntelXvMC_la_SOURCES = intel_xvmc.c \
intel_batchbuffer.h
AM_CFLAGS = @XORG_CFLAGS@ @DRM_CFLAGS@ @DRI_CFLAGS@ \
- @XVMCLIB_CFLAGS@ -I$(top_srcdir)/src -DTRUE=1 -DFALSE=0
+ @XVMCLIB_CFLAGS@ @XCB_CFLAGS@ \
+ -I$(top_srcdir)/src -DTRUE=1 -DFALSE=0
libIntelXvMC_la_LDFLAGS = -version-number 1:0:0
-libIntelXvMC_la_LIBADD = @DRI_LIBS@ @DRM_LIBS@ @XVMCLIB_LIBS@ -lpthread -ldrm_intel
+libIntelXvMC_la_LIBADD = @DRI_LIBS@ @DRM_LIBS@ @XVMCLIB_LIBS@ @XCB_LIBS@ @DRMINTEL_LIBS@ -lpthread
all: all-recursive
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -390,9 +392,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
install-libLTLIBRARIES: $(lib_LTLIBRARIES)
diff --git a/src/xvmc/shader/Makefile.in b/src/xvmc/shader/Makefile.in
index 04fe1979c..1910c289b 100644
--- a/src/xvmc/shader/Makefile.in
+++ b/src/xvmc/shader/Makefile.in
@@ -183,7 +183,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MAN_SUBSTS = @MAN_SUBSTS@
@@ -222,6 +221,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@
VERSION = @VERSION@
X11_CFLAGS = @X11_CFLAGS@
X11_LIBS = @X11_LIBS@
+XCB_CFLAGS = @XCB_CFLAGS@
+XCB_LIBS = @XCB_LIBS@
XORG_CFLAGS = @XORG_CFLAGS@
XORG_LIBS = @XORG_LIBS@
XORG_MAN_PAGE = @XORG_MAN_PAGE@
@@ -284,7 +285,7 @@ SUBDIRS = mc vld
all: all-recursive
.SUFFIXES:
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -309,9 +310,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
diff --git a/src/xvmc/shader/mc/Makefile.am b/src/xvmc/shader/mc/Makefile.am
index c1bff77dd..8d6576917 100644
--- a/src/xvmc/shader/mc/Makefile.am
+++ b/src/xvmc/shader/mc/Makefile.am
@@ -109,9 +109,9 @@ if HAVE_GEN4ASM
SUFFIXES = .g4a .g4b
.g4a.g4b:
- m4 -I$(srcdir) $(srcdir)/$*.g4a > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m
+ $(AM_V_GEN)m4 -I$(srcdir) $(srcdir)/$*.g4a > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m
-$(INTEL_G4B): $(INTEL_G4I)
+$(INTEL_G4B): $(INTEL_GEN4ASM) $(INTEL_G4I)
BUILT_SOURCES= $(INTEL_G4B)
diff --git a/src/xvmc/shader/mc/Makefile.in b/src/xvmc/shader/mc/Makefile.in
index 165ebc807..1a196be3e 100644
--- a/src/xvmc/shader/mc/Makefile.in
+++ b/src/xvmc/shader/mc/Makefile.in
@@ -143,7 +143,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MAN_SUBSTS = @MAN_SUBSTS@
@@ -182,6 +181,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@
VERSION = @VERSION@
X11_CFLAGS = @X11_CFLAGS@
X11_LIBS = @X11_LIBS@
+XCB_CFLAGS = @XCB_CFLAGS@
+XCB_LIBS = @XCB_LIBS@
XORG_CFLAGS = @XORG_CFLAGS@
XORG_LIBS = @XORG_LIBS@
XORG_MAN_PAGE = @XORG_MAN_PAGE@
@@ -353,7 +354,7 @@ all: $(BUILT_SOURCES)
.SUFFIXES:
.SUFFIXES: .g4a .g4b
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -378,9 +379,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
@@ -545,9 +546,9 @@ uninstall-am:
uninstall uninstall-am
@HAVE_GEN4ASM_TRUE@.g4a.g4b:
-@HAVE_GEN4ASM_TRUE@ m4 -I$(srcdir) $(srcdir)/$*.g4a > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m
+@HAVE_GEN4ASM_TRUE@ $(AM_V_GEN)m4 -I$(srcdir) $(srcdir)/$*.g4a > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m
-@HAVE_GEN4ASM_TRUE@$(INTEL_G4B): $(INTEL_G4I)
+@HAVE_GEN4ASM_TRUE@$(INTEL_G4B): $(INTEL_GEN4ASM) $(INTEL_G4I)
@HAVE_GEN4ASM_TRUE@clean-local:
@HAVE_GEN4ASM_TRUE@ -rm -f $(INTEL_G4B)
diff --git a/src/xvmc/shader/vld/Makefile.am b/src/xvmc/shader/vld/Makefile.am
index 9280f1513..8f1047e02 100644
--- a/src/xvmc/shader/vld/Makefile.am
+++ b/src/xvmc/shader/vld/Makefile.am
@@ -62,9 +62,9 @@ if HAVE_GEN4ASM
SUFFIXES = .g4a .g4b
.g4a.g4b:
- m4 $*.g4a > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m
+ $(AM_V_GEN)m4 $*.g4a > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m
-$(INTEL_G4B): $(INTEL_G4I)
+$(INTEL_G4B): $(INTEL_GEN4ASM) $(INTEL_G4I)
BUILT_SOURCES= $(INTEL_G4B)
diff --git a/src/xvmc/shader/vld/Makefile.in b/src/xvmc/shader/vld/Makefile.in
index 5cf44c18f..595948ebc 100644
--- a/src/xvmc/shader/vld/Makefile.in
+++ b/src/xvmc/shader/vld/Makefile.in
@@ -143,7 +143,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MAN_SUBSTS = @MAN_SUBSTS@
@@ -182,6 +181,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@
VERSION = @VERSION@
X11_CFLAGS = @X11_CFLAGS@
X11_LIBS = @X11_LIBS@
+XCB_CFLAGS = @XCB_CFLAGS@
+XCB_LIBS = @XCB_LIBS@
XORG_CFLAGS = @XORG_CFLAGS@
XORG_LIBS = @XORG_LIBS@
XORG_MAN_PAGE = @XORG_MAN_PAGE@
@@ -306,7 +307,7 @@ all: $(BUILT_SOURCES)
.SUFFIXES:
.SUFFIXES: .g4a .g4b
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -331,9 +332,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
@@ -498,9 +499,9 @@ uninstall-am:
uninstall uninstall-am
@HAVE_GEN4ASM_TRUE@.g4a.g4b:
-@HAVE_GEN4ASM_TRUE@ m4 $*.g4a > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m
+@HAVE_GEN4ASM_TRUE@ $(AM_V_GEN)m4 $*.g4a > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m
-@HAVE_GEN4ASM_TRUE@$(INTEL_G4B): $(INTEL_G4I)
+@HAVE_GEN4ASM_TRUE@$(INTEL_G4B): $(INTEL_GEN4ASM) $(INTEL_G4I)
@HAVE_GEN4ASM_TRUE@clean-local:
@HAVE_GEN4ASM_TRUE@ -rm -f $(INTEL_G4B)
diff --git a/test/Makefile.am b/test/Makefile.am
index 96c87f824..0f9bd7d09 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -21,6 +21,8 @@ stress_TESTS = \
check_PROGRAMS = $(stress_TESTS)
+noinst_PROGRAMS = lowlevel-blt-bench
+
AM_CFLAGS = @CWARNFLAGS@ @X11_CFLAGS@ @DRM_CFLAGS@
LDADD = libtest.la @X11_LIBS@ -lXfixes @DRM_LIBS@ -lrt
@@ -35,4 +37,11 @@ libtest_la_SOURCES = \
dri2.h \
$(NULL)
-EXTRA_DIST = README
+vsync.avi: mkvsync.sh
+ ./mkvsync.sh $@
+
+clean-vsync-avi:
+ rm -rf vsync.avi .build.tmp
+
+EXTRA_DIST = README mkvsync.sh
+clean-local: clean-vsync-avi
diff --git a/test/Makefile.in b/test/Makefile.in
index 315802172..b462d6f4b 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -15,6 +15,7 @@
@SET_MAKE@
+
VPATH = @srcdir@
am__make_dryrun = \
{ \
@@ -52,6 +53,7 @@ POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
check_PROGRAMS = $(am__EXEEXT_1)
+noinst_PROGRAMS = lowlevel-blt-bench$(EXEEXT)
subdir = test
DIST_COMMON = README $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
@@ -79,6 +81,7 @@ am__EXEEXT_1 = basic-fillrect$(EXEEXT) basic-rectangle$(EXEEXT) \
render-composite-solid$(EXEEXT) render-copyarea$(EXEEXT) \
render-copyarea-size$(EXEEXT) render-copy-alphaless$(EXEEXT) \
mixed-stress$(EXEEXT) dri2-swap$(EXEEXT)
+PROGRAMS = $(noinst_PROGRAMS)
basic_copyarea_SOURCES = basic-copyarea.c
basic_copyarea_OBJECTS = basic-copyarea.$(OBJEXT)
basic_copyarea_LDADD = $(LDADD)
@@ -115,6 +118,10 @@ dri2_swap_SOURCES = dri2-swap.c
dri2_swap_OBJECTS = dri2-swap.$(OBJEXT)
dri2_swap_LDADD = $(LDADD)
dri2_swap_DEPENDENCIES = libtest.la
+lowlevel_blt_bench_SOURCES = lowlevel-blt-bench.c
+lowlevel_blt_bench_OBJECTS = lowlevel-blt-bench.$(OBJEXT)
+lowlevel_blt_bench_LDADD = $(LDADD)
+lowlevel_blt_bench_DEPENDENCIES = libtest.la
mixed_stress_SOURCES = mixed-stress.c
mixed_stress_OBJECTS = mixed-stress.$(OBJEXT)
mixed_stress_LDADD = $(LDADD)
@@ -180,14 +187,14 @@ am__v_GEN_0 = @echo " GEN " $@;
SOURCES = $(libtest_la_SOURCES) basic-copyarea.c basic-copyarea-size.c \
basic-fillrect.c basic-lines.c basic-putimage.c \
basic-rectangle.c basic-stress.c basic-string.c dri2-swap.c \
- mixed-stress.c render-composite-solid.c \
+ lowlevel-blt-bench.c mixed-stress.c render-composite-solid.c \
render-copy-alphaless.c render-copyarea.c \
render-copyarea-size.c render-fill.c render-fill-copy.c \
render-trapezoid.c render-trapezoid-image.c
DIST_SOURCES = $(libtest_la_SOURCES) basic-copyarea.c \
basic-copyarea-size.c basic-fillrect.c basic-lines.c \
basic-putimage.c basic-rectangle.c basic-stress.c \
- basic-string.c dri2-swap.c mixed-stress.c \
+ basic-string.c dri2-swap.c lowlevel-blt-bench.c mixed-stress.c \
render-composite-solid.c render-copy-alphaless.c \
render-copyarea.c render-copyarea-size.c render-fill.c \
render-fill-copy.c render-trapezoid.c render-trapezoid-image.c
@@ -268,7 +275,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MAN_SUBSTS = @MAN_SUBSTS@
@@ -307,6 +313,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@
VERSION = @VERSION@
X11_CFLAGS = @X11_CFLAGS@
X11_LIBS = @X11_LIBS@
+XCB_CFLAGS = @XCB_CFLAGS@
+XCB_LIBS = @XCB_LIBS@
XORG_CFLAGS = @XORG_CFLAGS@
XORG_LIBS = @XORG_LIBS@
XORG_MAN_PAGE = @XORG_MAN_PAGE@
@@ -399,12 +407,12 @@ libtest_la_SOURCES = \
dri2.h \
$(NULL)
-EXTRA_DIST = README
+EXTRA_DIST = README mkvsync.sh
all: all-am
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -429,9 +437,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
@@ -454,6 +462,15 @@ clean-checkPROGRAMS:
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
+
+clean-noinstPROGRAMS:
+ @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \
+ echo " rm -f" $$list; \
+ rm -f $$list || exit $$?; \
+ test -n "$(EXEEXT)" || exit 0; \
+ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+ echo " rm -f" $$list; \
+ rm -f $$list
basic-copyarea$(EXEEXT): $(basic_copyarea_OBJECTS) $(basic_copyarea_DEPENDENCIES) $(EXTRA_basic_copyarea_DEPENDENCIES)
@rm -f basic-copyarea$(EXEEXT)
$(AM_V_CCLD)$(LINK) $(basic_copyarea_OBJECTS) $(basic_copyarea_LDADD) $(LIBS)
@@ -481,6 +498,9 @@ basic-string$(EXEEXT): $(basic_string_OBJECTS) $(basic_string_DEPENDENCIES) $(EX
dri2-swap$(EXEEXT): $(dri2_swap_OBJECTS) $(dri2_swap_DEPENDENCIES) $(EXTRA_dri2_swap_DEPENDENCIES)
@rm -f dri2-swap$(EXEEXT)
$(AM_V_CCLD)$(LINK) $(dri2_swap_OBJECTS) $(dri2_swap_LDADD) $(LIBS)
+lowlevel-blt-bench$(EXEEXT): $(lowlevel_blt_bench_OBJECTS) $(lowlevel_blt_bench_DEPENDENCIES) $(EXTRA_lowlevel_blt_bench_DEPENDENCIES)
+ @rm -f lowlevel-blt-bench$(EXEEXT)
+ $(AM_V_CCLD)$(LINK) $(lowlevel_blt_bench_OBJECTS) $(lowlevel_blt_bench_LDADD) $(LIBS)
mixed-stress$(EXEEXT): $(mixed_stress_OBJECTS) $(mixed_stress_DEPENDENCIES) $(EXTRA_mixed_stress_DEPENDENCIES)
@rm -f mixed-stress$(EXEEXT)
$(AM_V_CCLD)$(LINK) $(mixed_stress_OBJECTS) $(mixed_stress_LDADD) $(LIBS)
@@ -525,6 +545,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/basic-string.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dri2-swap.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dri2.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lowlevel-blt-bench.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mixed-stress.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/render-composite-solid.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/render-copy-alphaless.Po@am__quote@
@@ -651,7 +672,7 @@ distdir: $(DISTFILES)
check-am: all-am
$(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS)
check: check-am
-all-am: Makefile $(LTLIBRARIES)
+all-am: Makefile $(LTLIBRARIES) $(PROGRAMS)
installdirs:
install: install-am
install-exec: install-exec-am
@@ -685,8 +706,8 @@ maintainer-clean-generic:
@echo "it deletes files that may require special tools to rebuild."
clean: clean-am
-clean-am: clean-checkPROGRAMS clean-generic clean-libtool \
- clean-noinstLTLIBRARIES mostlyclean-am
+clean-am: clean-checkPROGRAMS clean-generic clean-libtool clean-local \
+ clean-noinstLTLIBRARIES clean-noinstPROGRAMS mostlyclean-am
distclean: distclean-am
-rm -rf ./$(DEPDIR)
@@ -757,19 +778,26 @@ uninstall-am:
.MAKE: check-am install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-am clean \
- clean-checkPROGRAMS clean-generic clean-libtool \
- clean-noinstLTLIBRARIES ctags distclean distclean-compile \
- distclean-generic distclean-libtool distclean-tags distdir dvi \
- dvi-am html html-am info info-am install install-am \
- install-data install-data-am install-dvi install-dvi-am \
- install-exec install-exec-am install-html install-html-am \
- install-info install-info-am install-man install-pdf \
- install-pdf-am install-ps install-ps-am install-strip \
- installcheck installcheck-am installdirs maintainer-clean \
- maintainer-clean-generic mostlyclean mostlyclean-compile \
- mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
- tags uninstall uninstall-am
-
+ clean-checkPROGRAMS clean-generic clean-libtool clean-local \
+ clean-noinstLTLIBRARIES clean-noinstPROGRAMS ctags distclean \
+ distclean-compile distclean-generic distclean-libtool \
+ distclean-tags distdir dvi dvi-am html html-am info info-am \
+ install install-am install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am install-man \
+ install-pdf install-pdf-am install-ps install-ps-am \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+ pdf pdf-am ps ps-am tags uninstall uninstall-am
+
+
+vsync.avi: mkvsync.sh
+ ./mkvsync.sh $@
+
+clean-vsync-avi:
+ rm -rf vsync.avi .build.tmp
+clean-local: clean-vsync-avi
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
diff --git a/test/lowlevel-blt-bench.c b/test/lowlevel-blt-bench.c
new file mode 100644
index 000000000..0cea0a81a
--- /dev/null
+++ b/test/lowlevel-blt-bench.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright © 2009 Nokia Corporation
+ * Copyright © 2010 Movial Creative Technologies Oy
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+#include <X11/X.h>
+#include <X11/Xutil.h> /* for XDestroyImage */
+#include <pixman.h> /* for pixman blt functions */
+
+#include "test.h"
+
+static const struct format {
+ const char *name;
+ pixman_format_code_t pixman_format;
+} formats[] = {
+ { "a8r8g8b8", PIXMAN_a8r8g8b8 },
+ { "x8r8g8b8", PIXMAN_x8r8g8b8 },
+ { "a8", PIXMAN_a8 },
+ { "a4", PIXMAN_a4 },
+ { "a1", PIXMAN_a1 },
+};
+
+static const struct op {
+ const char *name;
+} ops[] = {
+ [PictOpClear] = { "Clear" },
+ [PictOpSrc] = { "Src" },
+ [PictOpDst] = { "Dst" },
+ [PictOpOver] = { "Over" },
+ [PictOpOverReverse] = { "OverReverse" },
+ [PictOpIn] = { "In" },
+ [PictOpInReverse] = { "InReverse" },
+ [PictOpOut] = { "Out" },
+ [PictOpOutReverse] = { "OutReverse" },
+ [PictOpAtop] = { "Atop" },
+ [PictOpAtopReverse] = { "AtopReverse" },
+ [PictOpXor] = { "Xor" },
+ [PictOpAdd] = { "Add" },
+ [PictOpSaturate] = { "Saturate" },
+};
+
+static double _bench(struct test_display *t, enum target target_type,
+ int op, int src_format,
+ int loops)
+{
+ XRenderColor render_color = { 0x8000, 0x8000, 0x8000, 0x8000 };
+ struct test_target target;
+ Pixmap pixmap;
+ Picture picture;
+ struct timespec tv;
+ double elapsed;
+
+ test_target_create_render(t, target_type, &target);
+ XRenderFillRectangle(t->dpy, PictOpClear, target.picture, &render_color,
+ 0, 0, target.width, target.height);
+
+ pixmap = XCreatePixmap(t->dpy, t->root,
+ target.width, target.height,
+ PIXMAN_FORMAT_DEPTH(formats[src_format].pixman_format));
+
+ picture = XRenderCreatePicture(t->dpy, pixmap,
+ XRenderFindStandardFormat(t->dpy, src_format),
+ 0, NULL);
+ XRenderFillRectangle(t->dpy, PictOpSrc, picture, &render_color,
+ 0, 0, target.width, target.height);
+
+ test_timer_start(t, &tv);
+ while (loops--)
+ XRenderComposite(t->dpy, op,
+ picture, 0, target.picture,
+ 0, 0,
+ 0, 0,
+ 0, 0,
+ target.width, target.height);
+ elapsed = test_timer_stop(t, &tv);
+
+ XRenderFreePicture(t->dpy, picture);
+ XFreePixmap(t->dpy, pixmap);
+ test_target_destroy_render(t, &target);
+
+ return elapsed;
+}
+
+static void bench(struct test *t, enum target target, int op, int sf)
+{
+ double real, ref;
+
+ ref = _bench(&t->ref, target, op, sf, 1000);
+ real = _bench(&t->real, target, op, sf, 1000);
+
+ fprintf (stdout, "Testing %s with %s: ref=%f, real=%f\n",
+ formats[sf].name, ops[op].name, ref, real);
+}
+
+int main(int argc, char **argv)
+{
+ struct test test;
+ int op, sf;
+
+ test_init(&test, argc, argv);
+
+ for (op = 0; op < sizeof(ops)/sizeof(ops[0]); op++) {
+ for (sf = 0; sf < sizeof(formats)/sizeof(formats[0]); sf++)
+ bench(&test, ROOT, op, sf);
+ fprintf (stdout, "\n");
+ }
+
+ return 0;
+}
diff --git a/test/mkvsync.sh b/test/mkvsync.sh
new file mode 100755
index 000000000..dd96ad8df
--- /dev/null
+++ b/test/mkvsync.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+OUT="$1"
+[ -n "$OUT" ] || OUT="vsync.avi"
+
+TMP=".build.tmp"
+
+rm -rf ${TMP}
+mkdir ${TMP}
+convert -size 640x480 -depth 24 canvas:black png24:${TMP}/black.png
+convert -size 640x480 -depth 24 canvas:white png24:${TMP}/white.png
+
+mkdir ${TMP}/anim
+
+for ((a=0; $a < 1000; a=$a+2)); do
+ ln -s ../black.png ${TMP}/anim/$a.png
+done
+
+for ((a=1; $a < 1000; a=$a+2)); do
+ ln -s ../white.png ${TMP}/anim/$a.png
+done
+
+mencoder "mf://${TMP}/anim/*.png" -v -vf-clr -mf fps=60 -o "${OUT}" -ovc lavc
+exitcode=$?
+rm -rf ${TMP}
+
+exit ${exitcode}
diff --git a/test/test.h b/test/test.h
index 1e3995bbf..7ef4dca4c 100644
--- a/test/test.h
+++ b/test/test.h
@@ -2,6 +2,8 @@
#define TEST_H
#include <stdint.h>
+#include <time.h>
+
#include <X11/Xlib.h>
#include <X11/extensions/XShm.h>
#include <X11/extensions/Xrender.h>
@@ -107,6 +109,9 @@ static inline uint32_t color(uint8_t red, uint8_t green, uint8_t blue, uint8_t a
return alpha << 24 | ra >> 8 << 16 | ga >> 8 << 8 | ba >> 8;
}
+void test_timer_start(struct test_display *t, struct timespec *tv);
+double test_timer_stop(struct test_display *t, struct timespec *tv);
+
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
diff --git a/test/test_display.c b/test/test_display.c
index ad3e40bc7..b5e7e06ed 100644
--- a/test/test_display.c
+++ b/test/test_display.c
@@ -148,3 +148,20 @@ void test_init(struct test *test, int argc, char **argv)
memset(test, 0, sizeof(*test));
test_get_displays(argc, argv, &test->real, &test->ref);
}
+
+void test_timer_start(struct test_display *t, struct timespec *tv)
+{
+ clock_gettime(CLOCK_MONOTONIC, tv);
+}
+
+double test_timer_stop(struct test_display *t, struct timespec *tv)
+{
+ XImage *image;
+ struct timespec now;
+
+ image = XGetImage(t->dpy, t->root, 0, 0, 1, 1, AllPlanes, ZPixmap);
+ clock_gettime(CLOCK_MONOTONIC, &now);
+ XDestroyImage(image);
+
+ return (now.tv_sec - tv->tv_sec) + 1e-9*(now.tv_nsec - tv->tv_nsec);
+}
diff --git a/uxa/Makefile.in b/uxa/Makefile.in
index 417eb9cfc..6a01effe9 100644
--- a/uxa/Makefile.in
+++ b/uxa/Makefile.in
@@ -175,7 +175,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MAN_SUBSTS = @MAN_SUBSTS@
@@ -214,6 +213,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@
VERSION = @VERSION@
X11_CFLAGS = @X11_CFLAGS@
X11_LIBS = @X11_LIBS@
+XCB_CFLAGS = @XCB_CFLAGS@
+XCB_LIBS = @XCB_LIBS@
XORG_CFLAGS = @XORG_CFLAGS@
XORG_LIBS = @XORG_LIBS@
XORG_MAN_PAGE = @XORG_MAN_PAGE@
@@ -293,7 +294,7 @@ all: all-am
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -318,9 +319,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
diff --git a/uxa/uxa-accel.c b/uxa/uxa-accel.c
index 76425fe9f..27215ddf6 100644
--- a/uxa/uxa-accel.c
+++ b/uxa/uxa-accel.c
@@ -1027,7 +1027,7 @@ uxa_push_pixels(GCPtr pGC, PixmapPtr pBitmap,
ok = glamor_push_pixels_nf(pGC, pBitmap, pDrawable, w, h, x, y);
uxa_finish_access(&pBitmap->drawable, UXA_GLAMOR_ACCESS_RO);
}
- uxa_prepare_access(pDrawable, UXA_GLAMOR_ACCESS_RW);
+ uxa_finish_access(pDrawable, UXA_GLAMOR_ACCESS_RW);
}
if (!ok)
goto fallback;
diff --git a/uxa/uxa-render.c b/uxa/uxa-render.c
index 4463dc2f0..d783ea26c 100644
--- a/uxa/uxa-render.c
+++ b/uxa/uxa-render.c
@@ -962,7 +962,7 @@ uxa_try_driver_composite(CARD8 op,
RegionRec region;
BoxPtr pbox;
int nbox;
- int xDst_copy, yDst_copy;
+ int xDst_copy = 0, yDst_copy = 0;
int src_off_x, src_off_y, mask_off_x, mask_off_y, dst_off_x, dst_off_y;
PixmapPtr pSrcPix, pMaskPix = NULL, pDstPix;
PicturePtr localSrc, localMask = NULL;