diff options
author | Anas Nashif <anas.nashif@intel.com> | 2013-02-11 07:30:29 -0800 |
---|---|---|
committer | Anas Nashif <anas.nashif@intel.com> | 2013-02-11 07:30:29 -0800 |
commit | c4f30fa8253338176ec71f157200b8e2824c0f15 (patch) | |
tree | 6b7485eb6f028539ce3dcc40770ee35889eda025 | |
parent | 1501461b978a770b6fc8883901d6c3d177661667 (diff) | |
download | xf86-video-intel-upstream.tar.gz xf86-video-intel-upstream.tar.bz2 xf86-video-intel-upstream.zip |
Imported Upstream version 2.21.2upstream/2.21.2upstream
111 files changed, 16961 insertions, 8303 deletions
@@ -1,3 +1,4810 @@ +commit a241949c05f44792f51a5bd1e246a44693cb5b06 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Feb 10 14:20:59 2013 +0000 + + 2.21.2 release + +commit 0d75b19979b1ac14353765e2bb84c6a466129109 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Feb 10 15:47:53 2013 +0000 + + sna: Restore glyphs with xorg-1.12 + + That simple and innocuous build fix for xorg-1.13 bizarrely causes + missing glyphs with earlier Xorgs. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 9fd0d8873a5a5c4f77904cab0b9909ca941b5dae +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Feb 10 14:29:29 2013 +0000 + + NEWS: fix bug url + + The dangers of cutting and pasting from git log. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 617fadf3acf7bf75fb203c1e85fd0ddb98b3dbb9 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Feb 10 14:20:59 2013 +0000 + + 2.21.1 release + +commit 3169a4e53cf39cc3d5c18ac6add909aa3a58de7e +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Feb 10 11:57:14 2013 +0000 + + sna: Reorder some includes so that compat-api.h comes after the headers it wraps + + Fixes the build in cases where the compat-api.h was defining macros to + subvert the real functions found in the xorg includes + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 71fbad64c5cfe6832a03815bece4c89d15253e1a +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Feb 10 10:54:17 2013 +0000 + + configure: Fix typo in checking for libdrm_intel + + The package name is libdrm_intel not libdrm_intel-1, an obvious + cut'n'paste error from testing for pixman-1. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 3cbdfb54d1fcfed7745111e861e19b7bbac243cc +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Feb 9 19:15:20 2013 +0000 + + sna: Backport to squeeze - Xorg-1.6, pixman-0.16, libdrm-2.4.21 + + The principle change is to switch to the old Privates API and undo the + Region renames. + + The downside is that this ignores the critical bugfixes made to the + xserver since xorg-1.6 - but I assume that whoever wants to run the + latest hardware on the old xservers is also backporting those stability + fixes... + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 42a6b25817985e22e7d462be87fbd97973d96a29 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Feb 9 15:30:58 2013 +0000 + + sna: Fix alignment of the base of partial buffers for pre-G33 chipsets + + The older chipsets have much more restrictive alignment rules for the + base address of tiled but unfenced objects. + + Bugzilla: https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1120108 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 80044e54634d0836694d5aa6f98ce22fe38d367f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Feb 9 09:57:26 2013 +0000 + + sna: Promote to GPU is only partially damaged on the CPU but busy on the GPU + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit d18cb72a94fad0ee99ab361c21d643c927d29c35 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Feb 8 22:31:19 2013 +0000 + + sna: Randomly perturb 'wedged' to hunt for faults + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit ce9f0448367ea6a90490a28150bfdc0a76500129 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Feb 8 16:01:54 2013 +0000 + + sna/gen6: Use GT2 settings for both GT2 and GT2+ + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit ae5399aaf9ef57d33e8fd957e8a96964897c09b3 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Feb 8 11:31:21 2013 +0000 + + sna: Force the fallback path for unaccelerated randr damage + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit c69b4389abc324533a9a311c17a667bf8a1e1673 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Feb 7 22:54:37 2013 +0000 + + sna/gen4: Split the have_render flag in separate prefer_gpu hints + + The idea is to implement more fine-grained checks as we may want + different heuristics for desktops with GT1s than for mobile GT2s, etc. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit bec99de812ce6a1bbc2c8e4cfd05f4f74c560ea6 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Feb 8 00:53:10 2013 +0000 + + sna: Remove the bogus assertions on buffer domains + + Just a few lines earlier we already have the correct assertion that the + buffer was not in the GPU domain, so had these two been correct, they + would have still been redundant. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 8d1d3c6e6102ff20fbff74ec6b3b2e94ee757015 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Feb 7 14:47:07 2013 +0000 + + sna: Fixup an invalid assertion + + We may choose to operate inplace on a buffer last used by the CPU if we + are discarding all the existing damage. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit c405dba367bdca51221bd2464213199783dc18fe +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Feb 7 13:41:42 2013 +0000 + + sna: Also assert that the GPU is not wedged before continuing a batch + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit feeff6fcefccdca5335fea55c2fdbf8a4004c175 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Feb 7 13:33:58 2013 +0000 + + sna: Force GTT readback if the GPU is wedged + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 8a272971d5971a56f57dde00dceb082d0b142c8c +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Feb 6 17:59:10 2013 +0000 + + sna: Allow inplace uploads to utilise GTT on LLC machines + + Rather than arbitrarily disable the fallback paths for LLC, allow it to + utilise any available GTT buffers for inplace uploads. The best + explanation so far is that with the streaming is that we are trashing + the LLC. On other machines, the difference is in the noise. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit bc8a2c30c4f6bb9ce751b6717a3a2feaea0d6d4b +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Feb 7 10:42:58 2013 +0000 + + sna: Only try the SRC fixup into the buffer if it is CPU mapped + + On one particular machine, this operation is behaving as if it is + reading back UC memory during the explicit write-only composite. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 889ed28f52bccdbc54692ea075f95f9635a8d58a +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Feb 7 10:42:21 2013 +0000 + + sna: Correctly align used buffers to the following page boundary + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 974b6a97d78dadf09be8a2c4f61020f15d80d558 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Feb 6 17:02:27 2013 +0000 + + sna: Fallback to non-LLC paths after an allocation failure for an LLC buffer + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 5c8084ef04cb0a7da064fb1e13c8ef7dae528b1b +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Feb 6 16:39:31 2013 +0000 + + intel: Becareful not to match UMS against future generations + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit be241fb25ed0a8d41a642ea811253207f88d0962 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Feb 6 16:38:12 2013 +0000 + + sna: Free the handle after pwrite buffer allocation failure + + Having just allocated the handle, we need to free it if we then fail to + allocate memory for the buffer. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 4b3b25f0be33d3af3ccecfb3193fc2d365445fdf +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Feb 6 16:37:21 2013 +0000 + + sna: Flush our caches if we fail to mmap an object + + The likely cause for a mmap failure is that we hold too many objects + open or have exhausted our address space. In both cases, we need to trim + our caches before continuing. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit daba1ae3e7f0532cc53d9a5178778dbaec203052 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Feb 6 16:17:36 2013 +0000 + + sna: Correctly handle failure to CPU map a new allocation + + If we fail to CPU map, we want to fallback to just using pwrite with + normal memory. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 0adb0b5e1ebcf3ddfeddae99d96912ec4c090832 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Feb 6 16:02:30 2013 +0000 + + sna: Handle mapped buffer allocation failure for LLC + + The presumption was that if we had LLC we would have allocated the + buffer by that point - however, it was remotely possible to have fallen + through and so we need to handle those cases. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit f4cff22afae598f41adf36cd149223d1f7dd6b6e +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Feb 6 15:15:36 2013 +0000 + + sna: Relax the buffer size assertion to only be larger than required + + Not all paths request alloc pages, a few just request sufficient pages + for the original size. So we can only assert that condition is + satisfied. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 8bc593c732a2f1ccd1bdabc071c709a44222db61 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Feb 6 15:11:00 2013 +0000 + + sna: Make sure we always replace io buffers before inserting into the cache + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 5f72158919098dd5684d1c56d1ba643cc3be2c7d +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Feb 6 15:10:23 2013 +0000 + + configure: XvMC support is optional, so make failure to find xcb non-fatal + +commit cd6d8f9b9df02934ebfff76cb40410c8ce3887dd +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Feb 6 10:37:50 2013 +0000 + + xvmc: Add the complementary XCB_CFLAGS + + After splitting the xvmc dependences into xcb and non-xcb, we then also + have to add the xcb CFLAGS to build libIntelXVmc.la + + Reported-by: Julien Cristau <jcristau@debian.org> + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit b96ee47ad97943c3dccd40d9570e29002dc3d85f +Author: Paul Menzel <paulepanter@users.sourceforge.net> +Date: Sun Feb 3 13:33:08 2013 +0100 + + configure.ac: Split out XCB libraries from `XVMCLIB` into `XCB` + + Building the package under Debian Sid/unstable, `dh_shlibdeps` informs + that `libI810XvMC.so.1.0.0` does not need to be linked against + `libX11-xcb.so.1`, `libxcb-dri2.so.0`, `libxcb-util.so.0` or + `libxcb.so.1` [1]. + + $ debuild -b -us -uc + […] + make[1]: Entering directory `/src/xserver-xorg-video-intel' + dh_shlibdeps -- --warnings=6 + dpkg-shlibdeps: Warnung: debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 sollte nicht gegen libX11-xcb.so.1 gelinkt werden (es verwendet keines der Bibliotheks-Symbole) + dpkg-shlibdeps: Warnung: debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 sollte nicht gegen libxcb-dri2.so.0 gelinkt werden (es verwendet keines der Bibliotheks-Symbole) + dpkg-shlibdeps: Warnung: debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 sollte nicht gegen libxcb-util.so.0 gelinkt werden (es verwendet keines der Bibliotheks-Symbole) + dpkg-shlibdeps: Warnung: debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 sollte nicht gegen libxcb.so.1 gelinkt werden (es verwendet keines der Bibliotheks-Symbole) + make[1]: Leaving directory `/src/xserver-xorg-video-intel' + […] + + Moving `x11-xcb`, `xcb-dri2` and `xcb-aux` from `XVMCLIBS` into `XCB` + and adding `XCB_LIBS` only to the `LIBADD` variables of `libIntelXvMC` + makes the warnings go away and the libraries are still built without any + issues. + + make[1]: Entering directory `/src/xserver-xorg-video-intel' + dh_shlibdeps -- --warnings=6 + make[1]: Leaving directory `/src/xserver-xorg-video-intel' + dh_installdeb -O--builddirectory=build/ + dh_xsf_substvars -O--builddirectory=build/ + dh_gencontrol -O--builddirectory=build/ + dpkg-gencontrol: Warnung: Feld Depends von Paket xserver-xorg-video-intel-dbg: unbekannte Substitutionsvariable ${shlibs:Depends} + dh_md5sums -O--builddirectory=build/ + dh_builddeb -O--builddirectory=build/ + dpkg-deb: Paket »xserver-xorg-video-intel« wird in »../xserver-xorg-video-intel_2.19.0-6.1_i386.deb« gebaut. + dpkg-deb: Paket »xserver-xorg-video-intel-dbg« wird in »../xserver-xorg-video-intel-dbg_2.19.0-6.1_i386.deb« gebaut. + dpkg-genchanges -b >../xserver-xorg-video-intel_2.19.0-6.1_i386.changes + dpkg-genchanges: rein binärer Upload - es ist kein Quellcode hinzugefügt + dpkg-source --after-build xserver-xorg-video-intel + dpkg-buildpackage: Binärpaket(e) hochzuladen (keine Quellen enthalten) + Now running lintian... + W: xserver-xorg-video-intel: hardening-no-relro usr/lib/libI810XvMC.so.1.0.0 + W: xserver-xorg-video-intel: hardening-no-fortify-functions usr/lib/libI810XvMC.so.1.0.0 + W: xserver-xorg-video-intel: hardening-no-relro usr/lib/libIntelXvMC.so.1.0.0 + W: xserver-xorg-video-intel: hardening-no-fortify-functions usr/lib/libIntelXvMC.so.1.0.0 + W: xserver-xorg-video-intel: hardening-no-relro usr/lib/xorg/modules/drivers/intel_drv.so + W: xserver-xorg-video-intel: hardening-no-fortify-functions usr/lib/xorg/modules/drivers/intel_drv.so + N: 1 tag overridden (1 warning) + Finished running lintian. + + The modules were originally added with the following commit present + since tag 2.10.0. + + commit 3e8f2eae3a586aa29be4858698e666e0ec778cea + Author: Eric Anholt <eric@anholt.net> + Date: Thu Oct 15 13:48:56 2009 -0700 + + XVMC: Use XCB DRI2 instead of cargo-culting our own copy of Xlib stuff. (v2) + + [1] https://buildd.debian.org/status/fetch.php?pkg=xserver-xorg-video-intel&arch=i386&ver=2%3A2.19.0-6&stamp=1347825458 + + Signed-off-by: Paul Menzel <paulepanter@users.sourceforge.net> + +commit 93770c709aa7d3719b7c717040b16c8f82d5c207 +Author: Paul Menzel <paulepanter@users.sourceforge.net> +Date: Tue Jan 22 10:47:22 2013 +0100 + + NEWS: Fix a typo: a*n* inadvertent + +commit a8cfddd280b5220f23565b21c91f3f7dd10bbe91 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Feb 5 22:06:03 2013 +0000 + + sna: Tidy buffer allocation size assertions + + Rather than perilously update a local variable with the allocated size, + just use the size of the bo in the assertion that is large enough to + satisfy the allocation request. + + Reported-by: Jiri Slaby <jirislaby@gmail.com> + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 82dc91e8c24a1fbbf03dcf89a3955319b3399ea0 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Feb 5 21:50:43 2013 +0000 + + test: Add a very basic blt benchmark + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 9c80a0337ec12b6baab5aab380503e672e925677 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Feb 5 14:56:10 2013 +0000 + + sna: ValleyView uses the same scanline registers as SandyBridge + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 4c45e3fe456d211afc6ba69878b413a72ef5d0bf +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Feb 5 14:45:39 2013 +0000 + + intel: add more ValleyView PCI IDs + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit c6101d9d71a86a579ff9771d456b234a38bd80b7 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Feb 5 11:02:30 2013 +0000 + + man: Fix a typo s/debuging/debugging/ + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit f33c90f7ada238683433d05492434120d06ea1fc +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Feb 1 19:34:56 2013 +0000 + + NEWS: Trivial typo s/utilile/utilise/ + +commit 6346c844525c2b3a82c16fe10485b901a2b5ddbc +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Feb 5 10:17:45 2013 +0000 + + sna/gen4: Remove old single-thread SF w/a + + The alternative of disabling GPU spans seems to be far more effective. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 1565917f10d9fb3c7e2e7e273173c38c364b9861 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Feb 5 10:11:14 2013 +0000 + + sna/gen4: Disable non-rectilinear GPU span compositing + + This seems to be the primary victim of the render corruption, so disable + until the root cause is fixed. + + References: https://bugs.freedesktop.org/show_bug.cgi?id=55500 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 37bc822190f36be7b021167ba4d306bbcd97957b +Author: Damien Lespiau <damien.lespiau@intel.com> +Date: Fri Jan 18 14:13:08 2013 +0000 + + build: Make generation of gen code depend on intel-gen4asm + + This way, when a new intel-gen4asm is available (because one just hacked + on it and has installed a new version for instance) the shaders will be + recompiled. This helps catching regressions, testing the latest changes + in the assembler haven't broken too many things. + + Signed-off-by: Damien Lespiau <damien.lespiau@intel.com> + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 18f8d2291fbb53ac993b926c247ca981e1e5207b +Author: Damien Lespiau <damien.lespiau@intel.com> +Date: Fri Jan 18 14:13:07 2013 +0000 + + build: Use $(AM_V_GEN) to silence the assembly of gen programs + + Signed-off-by: Damien Lespiau <damien.lespiau@intel.com> + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit eea535b7e2a35ec4dfa50550b674d4212676d2ee +Author: Damien Lespiau <damien.lespiau@intel.com> +Date: Fri Jan 18 14:13:06 2013 +0000 + + build: Make autoreconf honour ACLOCAL_FLAGS + + When running autoreconf, it's possible to give flags to the underlying + aclocal by declaring a ACLOCAL_AMFLAGS variable in the top level + Makefile.am. + + Putting ${ACLOCAL_FLAGS} there allows the user to set an environment + variable up before running autogen.sh and pull in the right directories + to look for m4 macros, say an up-to-date version of the xorg-util macros. + + Signed-off-by: Damien Lespiau <damien.lespiau@intel.com> + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 9640640ab02d5de630e903116c1b104752f8b604 +Author: Paul Menzel <paulepanter@users.sourceforge.net> +Date: Sat Feb 2 11:44:54 2013 +0100 + + configure.ac: Do not include `xext` and `xfixes` in `XVMCLIB` + + Building the package under Debian Sid/unstable, `dh_shlibdeps` informs + that `libIntelXvMC.so.1.0.0` does not need to be linked against + `libXext.so.6` or `libXfixes.so.3` [1]. + + $ debuild -b -us -uc + […] + make[1]: Entering directory `/build/buildd-xserver-xorg-video-intel_2.19.0-6-i386-9thLfo/xserver-xorg-video-intel-2.19.0' + dh_shlibdeps -- --warnings=6 + dpkg-shlibdeps: warning: debian/xserver-xorg-video-intel/usr/lib/libIntelXvMC.so.1.0.0 should not be linked against libXext.so.6 (it uses none of the library's symbols) + dpkg-shlibdeps: warning: debian/xserver-xorg-video-intel/usr/lib/libIntelXvMC.so.1.0.0 should not be linked against libXfixes.so.3 (it uses none of the library's symbols) + dpkg-shlibdeps: warning: debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 should not be linked against libXext.so.6 (it uses none of the library's symbols) + dpkg-shlibdeps: warning: debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 should not be linked against libXfixes.so.3 (it uses none of the library's symbols) + dpkg-shlibdeps: warning: debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 should not be linked against libX11-xcb.so.1 (it uses none of the library's symbols) + dpkg-shlibdeps: warning: debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 should not be linked against libxcb-dri2.so.0 (it uses none of the library's symbols) + dpkg-shlibdeps: warning: debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 should not be linked against libxcb-util.so.0 (it uses none of the library's symbols) + dpkg-shlibdeps: warning: debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 should not be linked against libxcb.so.1 (it uses none of the library's symbols) + dpkg-shlibdeps: warning: package could avoid a useless dependency if debian/xserver-xorg-video-intel/usr/lib/libIntelXvMC.so.1.0.0 debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 were not linked against libXext.so.6 (they use none of the library's symbols) + dpkg-shlibdeps: warning: package could avoid a useless dependency if debian/xserver-xorg-video-intel/usr/lib/libIntelXvMC.so.1.0.0 debian/xserver-xorg-video-intel/usr/lib/libI810XvMC.so.1.0.0 were not linked against libXfixes.so.3 (they use none of the library's symbols) + make[1]: Leaving directory `/build/buildd-xserver-xorg-video-intel_2.19.0-6-i386-9thLfo/xserver-xorg-video-intel-2.19.0' + dh_installdeb -a -O--builddirectory=build/ + […] + + Not populating `XVMCLIB` with `xext` and `xfixes` makes the warning go + away and the libraries are still built without any issues. + + [1] https://buildd.debian.org/status/fetch.php?pkg=xserver-xorg-video-intel&arch=i386&ver=2%3A2.19.0-6&stamp=1347825458 + + Signed-off-by: Paul Menzel <paulepanter@users.sourceforge.net> + +commit 9807bba950078d86a25b91064ecfebaa0ee459e3 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Feb 1 18:25:48 2013 +0000 + + sna: Drop bogus refcnt assertion during kgem_bo_retire() + + As we may call it kgem_bo_sync(), during preparation of the upload + buffer which in turn may operate on an object straight out of the snoop + cache and hence not yet referenced (or in some cases, ever). + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit a5561f13498066922b54af04cc71549322ce0e3b +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Feb 1 18:05:35 2013 +0000 + + sna: Do not add the INPLACE hint if we have the ASYNC hint set + + If the caller is preparing to use the GPU to rendering into the CPU bo, + it will request an ASYNC migration. In those cases, we do not want to + substitute it with an INPLACE operation. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit d3ff1cb9d7f788002337b1e6c4c81c58112b85b1 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Feb 1 13:46:33 2013 +0000 + + 2.21.0 release + +commit 008f8230a7c47f1249eb51e53b3abf158f2a42bf +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Feb 1 01:54:52 2013 +0000 + + sna: Assert that if we have GPU damage we have a GPU bo + + Scatter the asserts around the migration points to catch where this + invariant may be untrue. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit cf0576f87102b1535268691e7e29661b0f9ee73b +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Feb 1 00:19:21 2013 +0000 + + sna/video: Correct computation of planar frame size + + The total frame size is less than 3 times the subsampled chroma planes + due to the additional alignment bytes. + + Bugzilla: https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1104180 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 268285d9a64fc47fe81fe5bfbfbd1890dad53e1e +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 31 21:57:41 2013 +0000 + + sna/gen3+: Flush vertex threads before touching global state + + We need to be careful not just when finishing the current vbo to + synchronize with the sharing threads, but also before we emit the batch + state that no other thread will try and do the same. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 1239e012ae6d4f00ce73f32d7244905a601170ea +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 31 19:18:17 2013 +0000 + + sna: Make sure the needs_flush is always accompanied by a tracking request + + References: https://bugs.freedesktop.org/show_bug.cgi?id=47597 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 9712f49fddc8be939f77c25fcb907873af44619f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 31 18:08:05 2013 +0000 + + sna: Remove stale assertion + + Now the reset is meant to restablish 'rq' if the bo was busy. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit cd7df0004cf6e423d2ae6c0cf83a84e0031161b4 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 31 17:32:57 2013 +0000 + + sna: Pass width/height to composite for rotated displays + + This is essential to handle displays that are too large to be rendered + normally via the 3D pipeline and so that the bounds of the fixup region + are known. + + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=60124 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 38376b56cfe0dfc603bce48e37432622ef9a0135 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 31 17:29:10 2013 +0000 + + sna: Remember to move scanouts to the scanout cache after retiring + + Reported-by: Jiri Slaby <jirislaby@gmail.com> + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 0a08de1f02577aef0da289108270c1b35e5d9703 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 31 16:39:47 2013 +0000 + + sna: After removing the bo from a batch, check whether it is still busy + + If we transfer a bo to the current batch, then subsequently discard it, + we lose the information about its current active state. Try to recover + this information, by querying the kernel and adding it to the flushing + list if necessary. + + Reported-by: Jiri Slaby <jirislaby@gmail.com> + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit fff0686342f8ec3b3f3510340e073defdf2fb73f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 31 12:40:21 2013 +0000 + + sna/traps: Thread the fallback rectilinear compositor + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 839542d219bd919c99398d514c1d194d18b78eff +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 31 12:08:52 2013 +0000 + + sna/traps: Allow inplace compositing for non-GPU buffers and rectilinear traps + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit e329e04b10c88afb40f2fd8fdad5b24b9f7dfc15 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 31 11:33:37 2013 +0000 + + sna/traps: Translate the extents for the rasterization threads + + The single-threaded code used the pre-computed width/height and only + required the origin from the bounds. However, the threads need to + allocate memory for themselves based on the computed bounds, and so it + helps if those bounds are then correct (rather than only the top-left + being in local space with the bottom-right in global coordinates). + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 033f75e5bd94e226e719f87ed4e0091845384679 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 31 01:38:01 2013 +0000 + + sna: Stage retirement through the flushing list + + If the kernel replies that a bo is still busy, stage its retirement + through the flushing list to be certain that we never stall on a + subsequent write. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 5f5711e62cc4c8ca15782376c4047174299e2db0 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 31 01:21:08 2013 +0000 + + sna: Disable dangerous assertions that depend upon external state + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 42529336fd92d39a5a5eceb07f2838d4be50fa8e +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 31 00:58:51 2013 +0000 + + sna: Prevent falling back to swrast if source is on the GPU + + Currently if the dst is wholly contained within the CPU, then we try to + continue to operate on the GPU. However, if we have FORCE_GPU set, it + means that one of the sources for the operation resides on the GPU, and + that would require a readback in order to perform the operation on the + CPU. Hence, if we try to use a CPU bo and fail, convert back to using + the GPU bo if forced. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit c2d06c407e1c2cbbf3f7f6c4989710a799cd43d0 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 30 21:17:42 2013 +0000 + + sna: Improve DBG output for damaged slave outputs + + After computing the intersection of the damage with the slave, give the + region extents. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 8867aa6a46c33fd2abf3b3f0b1d6115bad6c8017 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 30 21:15:55 2013 +0000 + + sna/dri: Handle change of BackBuffer across a pending flip + + If we encounter a delayed flip with a different back buffer than the + current, simply update the info rather than bug out. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit a31fd03bd4c87c48dc3ca15e3082e29348224b8c +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 30 17:26:28 2013 +0000 + + sna: Add a bunch of assertions to make sure we do not misplace scanouts + + As scanouts are uncached, they need to be treated carefully and + decontaminated before being placed in the general cache. So double check + that no bo in those caches are still marked as a scanout. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 6f1b862282ddb4545987fb9f0a45b528b7b7b5ee +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 30 15:44:53 2013 +0000 + + sna: Pass the correct WRITE hint when migrating for rendering into the CPU bo + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 5011ed2e729d46fe3cff5454e15a0fd16441f7e1 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 30 15:44:22 2013 +0000 + + sna: Only discard the clear hint when writing inplace to the GPU pixmap + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 6312f58014c0bb4afa56855be1e9becc3e3cc3d7 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 30 15:43:05 2013 +0000 + + sna: Don't force a migration from CPU rendering for a DRI2 flushed pixmap + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 60a3b370aea0cf9ffb4947a73984c877b4695d4e +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 30 15:41:51 2013 +0000 + + sna: Retire the bo after a set-domain(CPU,0) + + Having relaxed the earlier assertion because the kernel is wrong, we can + now retire for READ-READ optimisations. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 78ad5a742f40c2311bfe90997aebedeb998464e5 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 30 15:40:06 2013 +0000 + + sna: Relax assertion the the kernel considers the bo idle when we call retire + + All the callers have explicitly changed the domain upon the bo before + calling kgem_bo_retire(), so we still get the occasional sporadic + failure as kgem_busy() reports true. Kill the assertion for now. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 83bcd310d279758542e366348f808d7ca0f6d0bb +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 30 13:18:21 2013 +0000 + + sna: Prefer to use snooped buffers for readbacks + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 496f3ff04453524639a52a3b9dfcb8e198e5e597 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 30 12:21:33 2013 +0000 + + uxa: Harden against failures to submit batchbuffers + + If we fail to submit a batchbuffer, the driver is broken and likely to + continue to fail to render. Give up, and fallback to swrast so that the + session remains usable. + + References: https://bugs.freedesktop.org/show_bug.cgi?id=59771 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 04d48fee713e7bbc9cdf4f09855f6663a4bdc59f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 30 11:46:20 2013 +0000 + + sna: Fix errors found from asserts in a66c5f9ed51e + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit bc8b191ef6f5030d17a3b6497d1fd7556756c1ff +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 30 09:04:10 2013 +0000 + + sna: Return early if the Drawable box exactly matches one CRTC + + If we are trying to find the best coverage, then by definition if the + drawable is an exact match for one CRTC, we can stop looking. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit de28027ffc649920268ae6fdd64146f08310e8a4 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 30 08:42:48 2013 +0000 + + sna/dri: Make sure we discard the existing mappings when swapping GPU bo + + If the GPU bo is currently mapped to the Pixmap, we need to be sure to + invalidate that mapping if we swap the GPU bo (for SwapBuffers). If we + forget, we leave a dangling pointer to chase. + + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=60042 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit cf9b9ac3186299ab2418c55e73e19c81e5f615a4 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 30 08:40:53 2013 +0000 + + sna: Only discard the mapping prior to the actual read when uploading + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit a66c5f9ed51e1dcfc2ab03339795b73617629196 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 30 08:22:00 2013 +0000 + + sna: Before replacing the devPrivate.ptr assert it is not already mapped + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 3fdd28419adee7145d3925cff2704143a324e9d3 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 29 22:26:15 2013 +0000 + + sna: Only migrate the sample box if using the BLT engine for a composite + + Modify the presumption that if we are using a core operation on a shadow + pixmap, then we are likely to continue migrating that pixmap back and + forth. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 0c3b0f11d718d915e502582e9fadd5c0577640db +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 29 22:24:30 2013 +0000 + + sna: Verify that we always add the SHM CPU bo to the flush list when using + + As we need to synchronize that bo before the next reply, we need to keep + track of it whenever it is active on the GPU. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit f743cd5734ca502aa8bdb0e1327fe84d6ce82755 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 29 18:04:40 2013 +0000 + + sna: Avoid promoting SHM CPU bo to GPU to maintain coherence with SHM clients + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 9383c5efe9ace34970abddc5e3c84c32505b537f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 29 17:24:24 2013 +0000 + + sna/gen3+: Fix a DBG for composite_boxes() + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit b02a1ea5573b6f0b58a037dd4788c04c296f7ff3 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 29 09:28:33 2013 +0000 + + sna: Add GT1/GT2 thread counts for Haswell + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 1dc2d9ede5c7f330ebadf85d987559c8a6cb1c6b +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Jan 28 23:14:57 2013 +0000 + + sna: Add some more paranoia that we correctly map before fallbacks + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 63c71bcd96202e6da44d6776d119a82f0c06d386 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 27 23:17:13 2013 +0000 + + sna: Fix typo in vertex count for threaded source span emitter + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit b0d26ca9312695d05c29503a3f892e7f2c5816dd +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 27 21:07:03 2013 +0000 + + sna: Replace the forced vertex finish with just a wait + + When completing a batch mid-operation, we need to wait upon the other + threads to complete their writes so that memory is coherent before + submitting the work to the GPU. This was achieved by forcing the finish, + but all that from that is the wait, which makes the handling of threads + much explicit and removes the unnecessary vbo refresh. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit b0c3170c1092d01b4937f352a3962854785ee549 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 27 19:09:38 2013 +0000 + + sna: Add the pixmap to the flushing list when creating for inplace CPU writes + + Reported-by: Jiri Slaby <jirislaby@gmail.com> + References: https://bugs.freedesktop.org/show_bug.cgi?id=47597 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 73f574945f2cac14f9bafa6395e2c4dbb16fcf5d +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 27 16:02:52 2013 +0000 + + sna: Disable all signals in the render threads + + X uses them (SIGIO especially) for input handling, and gets rightfully + confused if it finds itself in a different thread. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 9a7bf70365980809d0f02190f2f620a957ff1ba8 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Jan 26 23:03:33 2013 +0000 + + sna: Enable threaded rasterisation for non-antialiased geometry + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 8178cff5718e69e14d3953a7f754d7585a06838f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Jan 26 14:41:04 2013 +0000 + + sna: Begin sketching out a threaded rasteriser for spans + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 8ffb3f50b3b4601401da76e2848e059ab63231f4 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Jan 25 10:45:39 2013 +0000 + + sna: Spawn threads to rasterize trapezoids through pixman + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 0ec2f3a8bac96acc55c8fdb432b97d026abaafb4 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 24 23:10:39 2013 +0000 + + sna: Spawn threads to composite trapezoids inplace + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 427b7311fe1b66d54518bae45e9fa149bda8a6e8 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 24 22:25:46 2013 +0000 + + sna: Perform the last threaded composite operation directly + + The point of the refactor was to execute the last stage of the composite + in the master thread, so do so. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 326dcd75f2202b1af29e986f5efb6b1e133217cb +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 24 20:58:53 2013 +0000 + + sna: Parse cpuinfo to determine the actual number of physical cores/caches + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit f597b647180c1e7bf83693060f244926191b7462 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 24 18:45:35 2013 +0000 + + sna: Tidy construction of data for threaded composite + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 1643c97f8f7b49738b649b5f7d1e574d689d167e +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 24 18:24:02 2013 +0000 + + sna: Use threads for simple mask generation + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit d60128c55e8f5f69476d42c20f2fd62ccc0f411e +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 24 15:41:29 2013 +0000 + + sna/dri: Compensate clipExtents for drawable offset + + The clipExtents is in screen coordinates whereas we just want to confirm + that the maximum pixel to be copied lies with the DRI2 buffer, which is + relative to the drawable. + + Reported-by: Matthieu Baerts <matttbe@gmail.com> + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=59806 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 264b3b72500c5af74d124a214347d45c9cb90a1d +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 24 15:06:12 2013 +0000 + + sna: Refactor to use a common fbComposite fallback + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 8ecfbea9d1f83b2de62bee0f58299e7a90c741d1 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 24 14:46:03 2013 +0000 + + sna: Experiment with a threaded renderer for fallback compositing + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 778dba90cfc4e801a975bd661c56a565ce60524b +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 23 21:32:29 2013 +0000 + + sna/dri: Don't contribute missed frames to the target_msc + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 50b41cb485ffb38e6bf705a3a62840bb78af669b +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 23 21:16:49 2013 +0000 + + sna/dri: Only reject DRI2 buffers that are too small for the request blit + + The goal is to reject stale DRI2 buffers that are smaller than the + target due to not-yet-handled ConfigureNotify, but not to reject + blitting from Windows that are larger than the frontbuffer. + + Fixes a regression from the overzealous + commit b27ecf3059bc066ef59f2a71c1d8d8f0ffec7191 + Author: Chris Wilson <chris@chris-wilson.co.uk> + Date: Mon Nov 12 14:06:06 2012 +0000 + + sna/dri: Prevent scheduling a swap on stale buffers + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 98b312e579385e6e4adf6bf0abe20f8ca84592af +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 23 20:51:35 2013 +0000 + + sna/dri: Stop feeding I915_TILING_Y to mesa i915c + + Only i915g handles Y-tiling, and we can't differentiate between the two + types of clients. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 31796400915a06fc789088b7dcfcecd6ea91e195 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 23 19:37:23 2013 +0000 + + sna: Clean up WAIT_FOR_EVENT on gen2/3 + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit ea8148b24d48db4f46205817db8a55dd6ea1a4b3 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 23 17:47:12 2013 +0000 + + sna/dri: Prefer to use the BLT ring for vsync'ed copies on IVB+ + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 3c3a87a2d4261cbd66602812637328a04787f510 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 23 17:35:50 2013 +0000 + + sna/gen6: Correct the event definition for secondary pipes for MI_WAIT_FOR_EVENT + + It helps to wait upon the event we program and enable. + + References: https://bugzilla.kernel.org/show_bug.cgi + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 88753c5a8c6c9acf086d81828260adf330eebb1a +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 23 17:35:50 2013 +0000 + + sna/gen7: Correct the event definition for secondary pipes for MI_WAIT_FOR_EVENT + + It helps to wait upon the event we program and enable. + + References: https://bugzilla.kernel.org/show_bug.cgi + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 2d92d8ec562cb1e6b9dca28074adca670734233c +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 22 09:24:04 2013 +0000 + + sna: Extend rectangular PolyLines to cover corner pixels on ccw paths + + Reported-by: Joe Peterson <joe@skyrush.com> + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=55484 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit c8817e24a9d97110a961c3803290e38ff5cbfc9a +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 22 09:06:50 2013 +0000 + + sna/gen7: Fix inversion of bool return code from CA pass + + As we inverted the predicate, we no longer restored the original + operation after performing a CA pass - glyph would randomly become + white. + + Reported-by: Jiri Slaby<jirislaby@gmail.com> + References: https://bugs.freedesktop.org/show_bug.cgi?id=47597 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 10f549332e315cfe2cc86aadab94a95ae6757c34 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Jan 21 16:48:34 2013 +0000 + + sna: Free a non-reusable bo if it expires on the flushing list + + Still no sure just how the bo ends up there, but as there seems to be + the occasional malinger, just free it. + + Reported-by: Jiri Slaby <jirislaby@gmail.com> + References: https://bugs.freedesktop.org/show_bug.cgi?id=47597 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit d7f0df27edb20b052ad39beb26a0b1924f432618 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Jan 21 16:34:09 2013 +0000 + + sna: Use the maximum backlight value if we fail to read the current value + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 46a3a68e60a1d0a598ec8ece81088a4e6491de55 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Jan 21 16:29:30 2013 +0000 + + sna: Assert that if marked as a scanout it is indeed bound. + + On further review, the invariant must have been violated earlier, so + make the assert earlier. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 0507d55dd1bc8fedae524a410a9e7b53f1dad920 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Jan 21 16:24:49 2013 +0000 + + sna: Only add bound scanouts to the scanout list + + If we never used the bo as an actual scanout it will never have had been + moved to the uncached domain and so we can return it back to the system + cache. + + Reported-by: Jiri Slaby <jirislaby@gmail.com> + References: https://bugs.freedesktop.org/show_bug.cgi?id=47597 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 5a0bc67ba57cf698e100df617474669ed5d036d6 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Jan 21 11:41:38 2013 +0000 + + sna: New execbuffer flags for lut-handle and fast-relocs are upstream + + Now the flags are upstream, we can rely on runtime tests as the + interface is now frozen. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 208ca91a31182e8ddad36e6a735c725362cbd071 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 20 18:02:41 2013 +0000 + + sna/gen7: Place the vsync commands in the same cacheline + + Do as told; both the LRI and WAIT_FOR_EVENT need to be in the same + cacheline for an unspecified reason. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 9a3e3abfe9b624af2354c5a69778aee3024fe46c +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 20 17:48:31 2013 +0000 + + sna/gen7: Offset start/end scanlines by one + + The hardware needs to be programmed with the line before the desired + scanline, wrapping around as required. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit e6a64f872bfd026aa1ba1bd44b1298918c819849 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 20 16:59:58 2013 +0000 + + sna/gen3+: Remove bogus assertion that the vbo in included before finish + + If we are carrying over a nearly full vbo from one batch to the next, we + may indeed finish it prior to writing any new primitives and so the + assert is truly bogus. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 5de919336fc1ba1c4116e18ba0560cdb7b0589f0 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 20 16:36:17 2013 +0000 + + sna/gen6: Tweak programming scanline values + + The documentation says that both start/end scanline need to be the line + before the desired value, and so to program the first scanline we need + to set it to the last scanline. The docs also say that the lower 3 bits + are ignored, so tweaked the values programmed accordingly with an extra + check that the window is not reduced to 0. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 2f9ac4e8a17e9d60bbb55c46929c37e92181d804 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 20 15:53:32 2013 +0000 + + sna/gen3+: And restore non-CA compositing state after the CA pass + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 650c9d5ce80afc1d4c8d9f77f6679f085fa4dc9d +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 20 14:58:42 2013 +0000 + + sna/gen3+: Reset vertex relocation state after discarding the batch + + Fixes a regression from commit a6ecb6d31d8c543f38fca0be6b0ec82e59dcd8d2 + Author: Chris Wilson <chris@chris-wilson.co.uk> + Date: Wed Jan 16 09:14:40 2013 +0000 + + sna: Discard the batch if we are discarding the only buffer in it + + as we may keep a stale relocation for the vertex buffer alive if we + attempt to clear the bo using the render engine before discarding it. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 492952e0d6362a046a666956afdf8f9bc0f2b7e7 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 20 14:55:06 2013 +0000 + + sna/gen3+: Handle flushing vbo for CA glyphs + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit b52c921204df6b2486717fcef05b4a1993aa1071 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 20 14:02:07 2013 +0000 + + sna: Adapt error detection and handling for invalid batchbuffers + + Allow the DDX to continue even if the kernel rejects our batchbuffers by + disabling hw acceleration - just extends the existing hang detection to + also handle the driver producing garbage. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 8215a278f20d34819536edbda05a108a860fefb9 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 20 12:36:07 2013 +0000 + + sna/gen3: Always close the vertices for a batch, even if the vbo is empty + + In the case where we emit a no-op, we may not attempt to finish binding + the vbo as it is considered empty. This leaves a stray relocation for + the next batch, and also causes it to believe that it has a vbo bound + already. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit a88a9b9a59fa2d5fd427fa6e1f74fb9844379264 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 20 12:06:09 2013 +0000 + + 2.20.19 release + +commit 7822bbacbece6fcb2e12863cd6c7a53ab614c37c +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 20 11:43:49 2013 +0000 + + test: Add script to generate source file for testing vsync + + Courtesy of an original script by Mark Schreiber, + https://bugs.freedesktop.org/show_bug.cgi?id=59606 + +commit 9329d8755981989ccbe66df6085fbab7c809a2c6 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 20 10:14:21 2013 +0000 + + sna: Make DEBUG_SYNC a configure option + + As it is advisable to combined the synchronous rendering debug option + with other debugging options, it is more convenient to make it into a + configure option: --enable-debug=sync + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit c9263f192e2f85dd961bc1c4e9ca8180db874517 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 20 01:39:12 2013 +0000 + + sna: Apply DEBUG_SYNC prior to emitting error report + + This is handy for the case where the batch triggers a GPU hang rather + than being rejected by the kernel. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 42ab789cce8423d99864776c6d5ba759c4129b54 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Jan 18 13:56:53 2013 +0000 + + sna: Clear the non-intersecting damage after skipping the slave update + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 828a3a80aa3f0692e7be2831d58bccf02e2c481d +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Jan 18 13:16:23 2013 +0000 + + uxa: Clip dirty region to slave pixmap before appending damage + + Fixes regression from + + commit c789d06cf8a0debc67058d7be1483f5b542e2baa + Author: Dave Airlie <airlied@redhat.com> + Date: Mon Jan 7 13:57:21 2013 +1000 + + intel: fixup damage posting to be done correctly around slave pixmap + + which causes the entire slave scanout to be readback from uncached + memory every time a pixel is modified. + + Reported-by: Stephen Liang <inteldriver@angrywalls.com> + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=59539 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit e17eaf540b614cdcb8f7349dd01852c3afc5ab05 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Jan 18 13:09:36 2013 +0000 + + sna: Replace double negative '!RegionNotEmpty' with the equivalent RegionNil + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 2de43a0164ba5364ffd7cb48f0bccc9873e87332 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Jan 18 12:01:54 2013 +0000 + + sna: Skip an empty slave update + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 38de17f80d780bf219fc3c4018ad9cc8808ba50f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Jan 18 10:16:42 2013 +0000 + + sna: Remove bogus assertion invalidated by 'read-read' sync + + If we perform a read-read synchronisation, the kernel may still believe + that the bo is busy as it remains on the active lists being read by the + GPU. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 9f68ac60ae37cc72503ec40691d1ae43a476f8e7 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 17 20:00:34 2013 +0000 + + sna/dri: Explicitly flag sync copies for the backends + + As gen6/7 need to prevent ring switching and perform a rendercopy if we + need to perform a vsync'ed copy. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 1ee00c408d8142cfaf4202393c2364c9ae73cb6e +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 17 13:09:47 2013 +0000 + + sna/trapezoids: Fix horizontal offset for inplace operation + + Remember that for an inplace operation we are not dealing with an a8 + mask, but rather a x8r8g8b8 surface and so need to step accordingly. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 0d749f93ea52161e59da1adca1a22e96ba293551 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 17 12:28:18 2013 +0000 + + sna: Drop the MOVE_WHOLE_HINT for PutImage + + It is not as clearly beneficial as for GetImage, as for example toolkits + may only push the shadows around a window. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit dc643ef753bcfb69685f1eb10828d0c8f830c30e +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 17 12:27:55 2013 +0000 + + sna: Apply read-only synchronization hints for move-to-cpu + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 18035a21e147788bea03ab2175ca03ae951701ce +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 17 11:52:10 2013 +0000 + + sna: Remove the confusion of the pixmap->undamaged + + This was to track a pixmap that had been used for migration (i.e had in + the past been used for mixed rendering). It is no longer used so remove + it. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 46141d277f326ae78f7b0e927a500e0eb1987f1b +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 17 10:16:24 2013 +0000 + + sna: Consider fill style for XPolyRectangle + + The rectangle outline is not always solid... + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit d5c8d38afaba04281157bafe212e93f010ae00f5 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 17 10:10:54 2013 +0000 + + sna: Refactor to remove a goto from sna_put_zpixmap_blt() + + The complexity of the function has been moved to move-to-cpu so we can + take further advantage of the simplified logic in put_zpixmap to clean + up the code by removing an unwanted goto. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 9552438caa4d295c99a9b8821cf2644739861c6a +Author: Colin Walters <walters@verbum.org> +Date: Wed Jan 4 17:37:06 2012 -0500 + + autogen.sh: Implement GNOME Build API + + http://people.gnome.org/~walters/docs/build-api.txt + + Signed-off-by: Adam Jackson <ajax@redhat.com> + +commit 87d773249af18ae8722aacb7306b0eee51a90dbc +Author: Adam Jackson <ajax@redhat.com> +Date: Wed Jan 16 13:18:23 2013 -0500 + + configure: Drop AM_MAINTAINER_MODE + + Signed-off-by: Adam Jackson <ajax@redhat.com> + +commit dbf1cfec9cd4e9efe7650f2940c92b4e51214288 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 16 12:20:48 2013 +0000 + + 2.20.18 release + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 47caffc50b5cdd288ad868fa9a697f0d4e2d28dc +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 16 10:49:24 2013 +0000 + + sna: Restrict upload buffers to reduce sampler TLB misses + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit ab36300a22222086b94857f356612106ffbeb480 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 16 09:17:59 2013 +0000 + + sna: Correct DBG to refer to the actual tiling mode forced + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit a6ecb6d31d8c543f38fca0be6b0ec82e59dcd8d2 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 16 09:14:40 2013 +0000 + + sna: Discard the batch if we are discarding the only buffer in it + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 26db2438e34feb8f28444bf7418869b4ecd870da +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 16 09:00:21 2013 +0000 + + sna: Fix computation of large object sizes to prevent overflow + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 54c1d97d5ab325874e1c7b2639e58111d7a6b93f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 16 09:00:04 2013 +0000 + + sna: Add DBG for when we add the inplace hint + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 588c5aa6bca441d7c9305fe2fcf268e89b6b617d +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 15 22:21:56 2013 +0000 + + sna: Revert use of a separate CAN_CREATE_SMALL flag + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit af85ffdec7047efa452d6bab3a0ee3889dd4f046 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 15 20:37:11 2013 +0000 + + sna: Avoid serialising on an move-to-cpu for an async operation + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit d70be85dc723168a481c1955444afd951c4817bf +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 15 20:16:45 2013 +0000 + + sna: Assert that we never try to mix INPLACE / ASYNC hints for move-to-cpu + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 1287c3a24c277cb42930d8af2943b9f7b016f31d +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 15 18:59:15 2013 +0000 + + sna: Specialise sna_get_image_blt for clears to avoid sync readback + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit da4972eec57e662b98a7abced6338ceb8a533a48 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 15 18:34:07 2013 +0000 + + sna/trapezoids: Avoid the multiply for an opaque source + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 7f968c8c991cff751459939bdb42e14255f529b7 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 15 18:41:00 2013 +0000 + + sna: Add DBG to use_shm_bo() + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit af63fab5047a43716c5df875ddc50f7c877f8a83 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 15 18:21:11 2013 +0000 + + sna: Hint that a copy from a SHM bo will likely be the last in a batch + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 1be436409222c00ff66c6d747487b77f1037b27a +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 15 18:20:29 2013 +0000 + + sna: Pass the async hint for the upload into the GPU + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 2113f7f440dd2f10e80f0bb3bd5cd155f7e19098 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 15 09:33:03 2013 +0000 + + sna: Free the SHM pixmaps after b266ae6f6f + + Since b266ae6f6f protected the static allocations from being reaped in + the normal course of events, we need to penetrate those defenses in + order to finally free the SHM mappings. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 441c481630a5cf09a7eb26d5db80b1e60cb2b10f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 15 01:26:19 2013 +0000 + + sna: Mark uploads with async hints when appropriate + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 6abd442279fd32d1ce9b33a72eabbeb922316151 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 15 00:15:23 2013 +0000 + + sna: Avoid allocating an active CPU bo unnecessarily + + If we will not write back the GPU damage to the bo as we intend to + overwrite it for the next operation, we can forgo allocating the active + CPU bo and skip the synchronisation overhead. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit f235c74cd661970c76e152777e9a2c314a368a56 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Jan 14 15:49:42 2013 +0000 + + sna: Tweak considering of last-cpu placement for inplace regions + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 70c5e41b519e44e620948d683d3b1111494d2f48 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Jan 14 15:03:59 2013 +0000 + + sna: Limit temporary userptr uploads to large busy targets or LLC machines + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit cf860da1c78244036c59edf934b312cc1367e8aa +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Jan 14 12:50:54 2013 +0000 + + sna: Apply PutImage optimisations to move-to-cpu + + We can replace the custom heuristics for PutImage by applying them to + the common path, where hopefully they are equally valid. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit e4ad4477815abe31b1a2323673da86a6def2f246 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Jan 14 13:12:46 2013 +0000 + + sna: Use userptr to accelerate GetImage + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 3cc04a8e24f02248b6382c9bc354ea15c42b17b6 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 13 17:34:03 2013 +0000 + + sna: Initialize src_bo to detect allocation failure + + sna_accel.c: In function 'sna_put_image': + sna_accel.c:3730:18: warning: 'src_bo' may be used uninitialized in this + function [-Wmaybe-uninitialized] + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 3f04b0b98d7f861ff58b82c99d33b7eacfcda5f7 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 13 17:31:15 2013 +0000 + + sna: Check size against aperture before attempting to perform the GTT mapping + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 7a7db06c62228acc6d1c03e800c7afa84e886f5a +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 13 13:45:18 2013 +0000 + + sna: Add a compile flag for measuring impact of userptr uploads + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit bcc212dc7a939505a678f97f6700eee99204249f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 13 13:36:09 2013 +0000 + + sna: Use the pixmap size (not drawable) to determine replacement + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 32f43f618d9b11ea44b3e01a95ac3f239a731ad2 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 13 13:23:24 2013 +0000 + + sna: Allow large image uploads to utilize temporary mappings + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit bf2b2e2f91208412c8b74a95859def501514be43 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 13 12:24:44 2013 +0000 + + sna: Allow creation of a CPU map for pixmaps if needed + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit b266ae6f6f8fb4c494ece532ae4621055e66beb2 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 13 11:30:07 2013 +0000 + + sna: Relax limitation on not mapping GPU bo with shadow pointers + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit a2d82161436e489f23637d793c737bc6950a62b8 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 13 10:17:33 2013 +0000 + + sna: Correct a few assertions after enabling read-only mappings + + As these do not flush the active state if we have read-read mappings, we + need to be careful with our asserts concerning the busy flag. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit ab01fd696e1137ddfb9a85ae68c15c05900f0e8e +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Jan 12 09:17:03 2013 +0000 + + sna: Experiment with a CPU mapping for certain fallbacks + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 03d392cd1d87e17129c42e4d822d3d1749edb02e +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Jan 12 08:51:52 2013 +0000 + + sna: Tweak max object sizes to take account of aperture restrictions + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit d111c464bfbae57bb7141872810c88b88f30c087 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Jan 12 08:15:13 2013 +0000 + + sna: After a size check, double check the batch before flushing + + As we may fail the size check with an empty batch and a pair of large + bo, we need to check before submitting that batch in order to not run + afoul of our internal sanity checks. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit ec77a07b41f1062b941774f3782b51d21e7824dd +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Jan 11 11:40:57 2013 +0000 + + sna/dri: Prefer to preserve the ring of the destination bo + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 42f1026e11527cb62b4522b44e71a4e72582a876 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Jan 11 11:40:16 2013 +0000 + + sna: Reorder struct kgem_bo to move related data into the same cacheline + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit aead71051ed757e7565d395c858bf8ab8f0b0ff6 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Jan 11 01:30:43 2013 +0000 + + sna: Disable memcpy_to_tiled_x() uploads on 32-bit systems + + It's far too slow due to the register starved instruction set producing + attrocious code and the extra overhead in the kernel for managing memory + mappings. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 220970b1a484e283e2bbb44f79df613ce1ee1146 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 10 19:43:05 2013 +0000 + + sna: Also prefer to use the GPU for uploads into a tiled bo + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 672e59851c427c63f43cde7dfd1688a72100e3b3 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 10 19:35:29 2013 +0000 + + sna: Prefer userptr if copying to a tiled bo + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 441ef916ae6569c88b3d6abaf7fea4d69be49d76 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 10 19:14:21 2013 +0000 + + intel: Throttle harder + + Filling the rings is a very unpleasant user experience, so cap the + number of batches we allow to be inflight at any one time. + + Interestingly, as also found with SNA, throttling can improve + performance by reducing RSS. However, typically throughput is improved + (at the expense of latency) by oversubscribing work to the GPU and a + 10-20% slowdown is commonplace for cairo-traces. Notably, x11perf is + less affected and in particular application level benchmarks show no + change. + + Note that this exposes another bug in libdrm-intel 2.4.40 on gen2/3. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit a37d56f338c5fae832d5eeea1283b6dbde827678 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 10 16:28:24 2013 +0000 + + sna: Use some surplus bits to back our temporary pixman_image_t + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 09ea1f4402b3bd0e411b90eb5575b3ff066d7356 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 10 16:26:24 2013 +0000 + + sna: Prefer to use the GPU for copies from SHM onto tiled destinations + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit c63147a3c33fd26f5c04a8648881659b4a90df06 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 10 15:15:15 2013 +0000 + + sna: Allow CPU bo to copy to GPU bo if the device is idle. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 2933e7595838c28081810d4959ca1e005a0419e1 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 10 13:07:19 2013 +0000 + + sna: Ignore the last pixmap cpu setting if overwritting all damage + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 934ea64f7ff080b00d00c50ba94f63247d7bb130 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 10 13:06:06 2013 +0000 + + sna: With a GPU bo and a shm source, do not fall all the way back + + The normal source upload into GPU bo knows a few more tricks that we may + want to apply first before copying into the shadow of the GPU bo. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 8a8edfe4076ee08558c76eddbb68426e4563888c +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 10 03:31:37 2013 +0000 + + sna: Make sure all outputs are disabled if no CompatOutput is defined + + If we have to fallback and the configuration is wonky, make sure that + all known outputs are disabled as we takeover the console. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 5449e16c0c2b6ca5af4acf42703164b9d2b2d822 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 10 02:54:41 2013 +0000 + + sna: Open-code xf86CompatOutput() to avoid invalid pointers + + config->compat_output needs to be sanitized during device initialization + or we may dereference an invalid xf86OutputPtr. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 8881a14200580db731ca6902b289b08989aaa61e +Author: Mickaël THOMAS <mickael9@gmail.com> +Date: Mon Jan 7 20:47:51 2013 +0100 + + Set initial value for backlight_active_level + + If the "Backlight" option is set, backlight_active_level is not set which + results in a default value of 0, causing a black screen upon starting Xorg. + +commit b8c9598294eaa16e0d1578ad98896f6ec5ba37cf +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Jan 7 13:57:21 2013 +1000 + + sna: fixup damage posting to be done correctly around slave pixmap + + Copied from commit c789d06cf8a0debc67058d7be1483f5b542e2baa + Author: Dave Airlie <airlied@redhat.com> + Date: Mon Jan 7 13:57:21 2013 +1000 + + This fixes the damage posting to happen in the correct ordering, + not sure if this fixes anything, but it should make things more consistent. + +commit c789d06cf8a0debc67058d7be1483f5b542e2baa +Author: Dave Airlie <airlied@redhat.com> +Date: Mon Jan 7 13:57:21 2013 +1000 + + intel: fixup damage posting to be done correctly around slave pixmap + + This fixes the damage posting to happen in the correct ordering, + not sure if this fixes anything, but it should make things more consistent. + + Signed-off-by: Dave Airlie <airlied@redhat.com> + +commit 5891c89ff2be277d1a833d4bc092b65184c1f3d6 +Author: Dave Airlie <airlied@redhat.com> +Date: Mon Jan 7 13:54:47 2013 +1000 + + intel: drop pointless error printf in the slave pixmap sync code. + + This is left over and spams logs, get rid. + + Signed-off-by: Dave Airlie <airlied@redhat.com> + +commit 27550e81482229007fa9e0e9769fdd20f3616b23 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 6 17:29:19 2013 +0000 + + sna/dri: Transfer the DRI2 reference to the new TearFree pixmap + + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=58814 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 1a5e4fb725da2eb25cf7f476290c02e9880a4efc +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 6 17:08:56 2013 +0000 + + sna: Only disable upon a failed pageflip after at least one pipe flips + + If we have yet to update a pipe for a pageflip, then the state remains + consistent and we can fallback to a blit without disabling any pipes. If + we fail after flipping a pipe, then unless we disable an output the + state becomes inconsistent (the pipes disagree on what the attached fb + is). + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit dd66ba8e5666a1ce7da0ddc226d074f591e1fa22 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 6 16:13:56 2013 +0000 + + sna: Try to create userptr with the unsync'ed flag set first + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 9051f43fa3c8d011921ac6ff75b763280f26d98f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 6 15:20:14 2013 +0000 + + sna/gen4+: Handle solids passed to the general texcoord emitter + + The general texcoord emitter does handle solids (for the case of a + transformed mask) and so we need to be careful to setup the + VERTEX_ELEMENTS accordingly. + + Fixes regression from + commit 2559cfcc4cbc1d0d84b048565cad3bfee61df8da + Author: Chris Wilson <chris@chris-wilson.co.uk> + Date: Wed Jan 2 10:22:14 2013 +0000 + + sna/gen4+: Specialise linear vertex emissio + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 4af910e8be92e0ca241ce1e93e322c712dcbe340 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 6 13:43:55 2013 +0000 + + sna/gen4+: Trim the redundant float from the fill vertices + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 3244e4b23397f54ca76876dd76ebea9a0abd357e +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Jan 6 13:24:23 2013 +0000 + + Revert "sna/gen4+: Backport tight vertex packing for simple renderblits" + + This reverts commit 8ff76fad1fadc5e309f9a12c30f883460a432049 and + commit 48e4dc4bd4b2980f0f804f572d0e3fc1bb4bc21e. + + I forgot gen4 and gen5 do not have the 'non-normalized' bit in their + sampler states. + +commit d3be77f87916e38af717bafaf2000becd5180d76 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Jan 5 18:07:50 2013 +0000 + + sna/trapezoids: filter out cancelling edges upon insertion to edge-list + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 2b4a2f52c47a24c297312d51f9a8299c9a54a697 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Jan 5 17:21:34 2013 +0000 + + sna/trapezoids: filter out zero-length runs + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 59a7b8b32c694735942fd7e42c1382d91004b0b1 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Jan 4 18:22:14 2013 +0000 + + sna: Clear up the caches after handling a request allocation failure + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 3c31a9fc210221ba8e7922bec80c15ec39cab7bc +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Jan 4 18:11:12 2013 +0000 + + sna: Embed the pre-allocation of the static request into the device + + So that in the cache where we are driving multiple independent screens + each having their own device, we do not share the global reserved + request in the event of an allocation failure. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit b5b3cfb0ad1cc5e66c99035f526946bf41011e13 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 3 23:33:44 2013 +0000 + + sna: Flush the batch prior to referencing work from another ring + + In the case where the kernel is inserting semaphores to serialise work + between rings, we want to only delay the surface that is coming from the + other ring and not interfere with work already queued. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit ea2da97773d858001f98adc880f24b9671c51b2f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 3 16:47:14 2013 +0000 + + sna: Convert allocation request from bytes to num_pages when shrinking + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 2bd6e4dcd43bb0d836f12232050e73ce1510bb0f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 3 16:38:33 2013 +0000 + + sna: Add a pair of asserts to validate fls()/cache_bucket() + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit f9d2730974a869f15eac599ca865b50a9a9658d9 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 3 15:20:45 2013 +0000 + + sna: Also recognise __i386__ for fls asm + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 69dde74a003ba0168ceca1558a4cb69097421b92 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jan 3 15:20:23 2013 +0000 + + sna: Fix off-by-one in C version of fls + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit fc702cdf534a4694a64408428e8933497a7fc06e +Author: Matt Turner <mattst88@gmail.com> +Date: Wed Jan 2 16:07:54 2013 +0000 + + sna: Rewrite __fls without dependence upon x86 assembly + + The asm() prevents SNA from compiling on ia64. + + Fixes https://bugs.gentoo.org/show_bug.cgi?id=448570 + +commit bc67bdcec832f4302951f2789456666dee2f496c +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 2 13:47:51 2013 +0000 + + sna/gen6+: Fine tune placement of DRI copies + + Avoid offsetting the overhead of the render copy only to be penalised by + the overhead of the semaphore. So compromise. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 2559cfcc4cbc1d0d84b048565cad3bfee61df8da +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 2 10:22:14 2013 +0000 + + sna/gen4+: Specialise linear vertex emission + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 0996ed85fd8bd79f41f28908733b85566f9e2b69 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 1 22:53:26 2013 +0000 + + sna/gen2+: Precompute the affine transformation scale factors + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit d36cae801f1dcb06d4f93f2f27cc9b9de73e89c9 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 1 21:03:06 2013 +0000 + + sna/gen4+: Tidy special handling of 2s2s vertex elements + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 8582c6f0bbe1bf01324b46933ff2f50c65f2a82d +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 1 20:53:12 2013 +0000 + + sna/gen6+: Remove vestigial CC viewport state + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 24264af2912f9abae5aff2a6fb5a50383d9e33be +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 1 20:39:23 2013 +0000 + + sna: Fast path inplace addition of solid trapezoids + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit e9a9f9b02978cb2d73c38163827eb7141ebed16c +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 1 16:40:28 2013 +0000 + + sna: Micro-optimise glyph_valid() + + Note that this requires fixing up the glyph->info if the xserver didn't + create a GlyphPicture. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 372c14aae8f4fd2c5865b9d23cd825dcbc33765f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 1 15:49:12 2013 +0000 + + sna: Remove some obsolete Options + + Throttling and delayed-flush are now redundant. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 65924da91da4bb617df1bb0a7c3e9d4aa475b6b1 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 1 11:40:15 2013 +0000 + + sna: Tidy compat interfaces + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 0a35d9287397031c95ebd9dc53b68e33e7dcf092 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 1 11:12:02 2013 +0000 + + sna/gen2: Always try to use the BLT pipeline first + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit c1457fbd8a169ee19c8e625ea4e779180eb4b070 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 1 10:49:27 2013 +0000 + + sna/gen2: Tidy a pair of vertex emitters + + Switch to the new inline scaled transforms. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 48a5797c0f227204d0723de0ef34b046964c571e +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Dec 31 17:30:40 2012 +0000 + + sna/gen4: Tweak single-thread SF w/a for solids + + Allow multiple threads for the rare case of compositing with a solid + color. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit e4f6ba6b47c41645a40e314f14047ba0b5f93a01 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Dec 31 14:06:36 2012 +0000 + + sna/gen6+: Hint that we prefer to use the BLT with uncached scanouts + + Once again balancing the trade-off of faster smaller copies with the BLT + versus the faster larger copies the RENDER ring. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 6e87e7ddfe0c21e0fb6b3c2cb940a40aa7d4e061 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Dec 31 14:03:16 2012 +0000 + + sna/dri: Use the default choice of backend for copying the region + + Notably, if everything is idle, using the BLT is a win as we can emit + them so much faster than a rendercopy, and as the target is uncached we + do not benefit as much from the rendercache. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit a7988bf77f5a106a48b6e39b6eaf60ef2f8bec11 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Dec 30 14:50:49 2012 +0000 + + sna/dri: Fix triple buffering to not penalise missed frames + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 736b89504a32239a0c7dfb5961c1b8292dd744bd +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Dec 30 10:32:18 2012 +0000 + + uxa: Align surface allocations to even tile rows + + Align surface sizes to an even number of tile rows to cater for sampler + prefetch. If we read beyond the last page we may catch the PTE in a + state of flux and trigger a GPU hang. Also detected by enabling invalid + PTE access checking. + + References: https://bugs.freedesktop.org/show_bug.cgi?id=56916 + References: https://bugs.freedesktop.org/show_bug.cgi?id=55984 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 43336c632beb5d599ec0fc614434b88ef7a26422 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Dec 29 16:47:53 2012 +0000 + + sna: Seed the solid color cache with an invalid value to prevent false hits + + After flushing, we *do* need to make sure we cannot hit a false lookup + via the last cache. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit f6050382095c3bc4f78bc4ff9e9c6086e58d6b28 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Dec 29 16:41:03 2012 +0000 + + sna/dri: Gracefully handle failures from pageflip + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 1c2ece369177ea6c3fd2f254b2554ceadf5590de +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Dec 29 15:53:23 2012 +0000 + + sna/gen4+: Try using the BLT before doing a tiled copy + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 09ca8feb3455c979e799ddf26daae8f2de2813e1 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Dec 29 15:42:02 2012 +0000 + + sna: Move the primary color cache into the alpha cache + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 8c56c9b1da9e078bd5b7ff4ebc5d8b23f593d500 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Dec 29 14:14:41 2012 +0000 + + sna: Allow a flush to occur before batching a flush-bo + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 2f53fb389c001f68134f514e30e25e91de41fb9d +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 28 22:58:02 2012 +0000 + + sna: DBG compile fixes + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit dba83dacd2ccbb2ac23b205ce2a872a889fa30bd +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 28 19:23:36 2012 +0000 + + sna/gen3: Use inline transform+scale function + + So as to avoid reading back from the vbo (which may be wc mapped). + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit f0fca544b0602bc4ed2f68e8d260e0a3745b4bad +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 28 18:52:44 2012 +0000 + + sna/gen4+: Check for a spare exec slot for an outstanding vbo + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit c6e850b626f4bb44876c683d596ea38f8f6c30ae +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 28 17:14:52 2012 +0000 + + sna/gen4+: Trim an extraneous coordinate from solid composite emission + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 3fdc9923447538ed65bf9ffa189d7290ce804730 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 28 17:14:52 2012 +0000 + + sna/gen4+: Trim an extraneous coordinate from solid span emission + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit fdd6d222bc92b3e385f5d62f5e03dfd86f290e45 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 28 17:08:00 2012 +0000 + + sna/gen4+: Tidy emit_spans_affine() + + gcc produced abysmal code for the inlined emission, so hand unroll it + for sanity. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 5d222d4d21e6e3af5316728e0da49a014e9fea21 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 28 17:08:00 2012 +0000 + + sna/gen4+: Tidy emit_spans_solid() + + gcc produced abysmal code for the inlined emission, so hand unroll it + for sanity. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 4528f68eff33a5c2f9c1d884e9b3f7228053e0f4 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 28 16:45:50 2012 +0000 + + sna: Only allocate a busy CPU bo for a GPU readback + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 99fdd1a1c6aa52688c2c821a90f86700b7ee34b2 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 28 16:33:59 2012 +0000 + + sna: Mark kgem_bo_retire() as static + + The exported function is not used, so mark it static and strengthen the + assertions. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 548d284b8cf8cc2b311efe3287e0ae956738189a +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 28 14:49:38 2012 +0000 + + sna: Skip copying fbcon if we are already on the scanout + + If we are already the scanout, then there is little point copying to + ourselves... Should be paranoia. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 583efd4ba067a0a4319e43ebc18dd81ed9c8db0a +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 27 17:59:59 2012 +0000 + + sna: Sanity check config->compat_output + + In a headless setup this may be left initialised to -1. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 7725df8aa1b3eab97618311e3f24769a318bd804 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 27 14:01:59 2012 +0000 + + sna/gen2,3: Remove gen-specific vertex_offset + + Remove the duplication of vertex_offset in favour of the common + vertex_offset. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 46af1ff126f3fb1f9470b0cbb19c7c2b09d5b92a +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 27 00:40:08 2012 +0000 + + sna/gen6+: Tidy up ring preferences + + Remove a few duplicated tests. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit dd5b653aa2c5fe2e062533db35c83a40c1952ea6 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 27 09:54:35 2012 +0000 + + sna: Do not try and set a 0x0 mode + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 861c2362dd38d7d43fe7ffb181cb197199a1c570 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Dec 26 14:12:42 2012 +0000 + + sna/gen6+: Tweak to only consider active ring on destination + + Otherwise we decide to use BLT when hitting the render/sampler cache + is preferrable for a source bo. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit f9b6aa3aaf784f9149e091a646673ddf341cd7ca +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Dec 26 13:05:52 2012 +0000 + + sna: Explicitly track self-relocation entries + + Avoid having to walk the full relocation array for the few entries that + need to be updated for the batch buffer offset. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 90b1b220ee7a3c543301956b01c54a4a04632db4 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Dec 26 12:51:58 2012 +0000 + + 2.20.17 release + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 52fd223fc970118cbdcb31f9574414debc905e9c +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 21 21:36:30 2012 +0000 + + sna/video: Initialise alignment for video ports > 0 + + We repeatedly set the alignment value on the first port, rather than + once for each. + + Reported-by: Jiri Slaby <jirislaby@gmail.com> + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=47597 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 3793ccf7804cfc870b46c623dfeefbe0c381c1d4 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 21 14:48:07 2012 +0000 + + sna: Remove assertions that the pixmap is wholly defined when uploading + + As the user may only write to a portion of a pixmap (thus only creating + a small amount of damage) and then attempt to use the whole as a source, + we run the risk of triggering an assertion that the whole was defined. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 07dde33a4f51941b4f612823ea6ea7ca01a6efbc +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 21 14:35:32 2012 +0000 + + sna: Remove a pair of stale assertions + + For gen2-5, it does not matter what mode the batch is in when we + insert the scanline wait. With the more aggressive batch flushing, and + relaxed assigned of mode for those generations, we are likely to see + that the batch is idle when we go to insert the waits. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit bdd0cca4e1192df0038621925c4e6243ba419a81 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 21 14:20:23 2012 +0000 + + sna: Refactor test for a rotation matrix + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 347c5a7b33729f1bedd408d2ef24756d51b66f1d +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 21 10:40:47 2012 +0000 + + sna/dri: Refactor get_current_msc between blit/flip paths + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 8a67d3f808fcc7c8c51553b1703e8312f28b87a1 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 21 10:21:06 2012 +0000 + + sna/dri: Set the correct current_msc for the no readback path + + If we are asked to render immediately, then in order to pass the tests + when comparing it to target, we need to set the current_msc to the + ultimate future value, -1. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 48e4dc4bd4b2980f0f804f572d0e3fc1bb4bc21e +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 20 21:54:25 2012 +0000 + + sna/gen4: Backport tight vertex packing of renderblits + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 08d2b073692836aa22f65f8ba30db5d14550c03e +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 20 21:30:32 2012 +0000 + + sna/gen4: Backport more recent state tracking tweaks + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 8ff76fad1fadc5e309f9a12c30f883460a432049 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 20 20:57:40 2012 +0000 + + sna/gen5: Backport tight vertex packing for simple renderblits + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 9144c951915a1e0c1899a72161f9f0f1ab9b9ac4 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 21 09:44:52 2012 +0000 + + sna/dri: Avoid querying the current-msc with swapbbufers wait disabled + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 84c327e17f68c4a56fcb76be1f45ab6d35291b5d +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 20 19:44:46 2012 +0000 + + sna/video: Assert that the frame is initialised + + References: https://bugs.freedesktop.org/show_bug.cgi?id=47597 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 4d750219925cb3199ebc6751cdbd2862dfb4cdfe +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 20 19:34:41 2012 +0000 + + uxa/dri: Correct the destination of the blit after chained flip is broken + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit ca5c028c2b4d9bf02002acd484054fe427ea8d09 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 20 19:31:44 2012 +0000 + + glamor: Release the drawable after passing to glamor_push_pixels + + An unlikely path, but a double prepare instead of a prepare/finish. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit de2de36049e2958a60f63fadffe8f54de8da1e56 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 20 19:29:31 2012 +0000 + + sna: Check the correct variable for a failed allocation + + Having already checked 'dst' and just allocated 'src', that is who we + should be checking. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit deb908fda74541fba649349db279715b05d0554e +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 20 19:22:32 2012 +0000 + + intel: ODEV_ATTRIB_PATH is no longer printed, so kill the temporary variable + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 0f84ecfc3cd7dfe7f43ff99a6498d2ceccd90225 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 20 12:00:00 2012 +0000 + + sna/gen4+: Amalgamate all the gen4-7 vertex buffer emission + + Having reduced all the vb code for these generations to the same set of + routines, we can refactor them into a single set of functions. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 1f4ede0ef8f8a8d07e11781ad05617ecdfcd3faf +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Dec 19 20:39:10 2012 +0000 + + sna: Do not throttle before move-to-cpu + + The idea being that when creating a surface to perform inplace + rasterisation, we won't be using the GPU for a while and so give it time + to naturally throttle. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 5deba2832dc42072d9abaeaa7934bc0e1b28b3ed +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Dec 19 20:03:33 2012 +0000 + + sna: Ignore throttling during vertex close + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit f91a24fdba517c8e9df5a074db2c789fbf066bb3 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 20 09:46:32 2012 +0000 + + sna/video: Remove XvMCScreenInitProc + + The symbols disappears without warning in xorg-1.14 + + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=58552 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit ee99511846a0f10abeeba8d25d8fb5bf59621b02 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Dec 19 18:02:50 2012 +0000 + + sna/gen4+: Tweak preference of GPU placement for spans + + If the CPU bo is busy, make sure we do not stall for an inplace + operation. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit bfd96b092db5e4e0fc2446752deafd1156cf37b3 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Dec 18 20:54:33 2012 +0000 + + sna/video: Fix presentation of cropped sprites + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 2df1b1abf0728f28d2803a096f945779cbe7c70b +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Dec 18 16:07:26 2012 +0000 + + sna/video: Fix up copying cropped textured video packed data + + Simply ignore the cropping and copy the whole plane rather than + complicate the computation of the packed destination pixels. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 8d523fa824dcb1987557164d048711c1745de378 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Dec 18 16:07:26 2012 +0000 + + sna/video: Fix up destination offset for copying cropped textured video planes + + Oh fun. Textured video expects the source content to be relative to the + origin, whereas overlay video expects the source at the origin. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 7bb4573fcc2cf1b8b6bff5d885a2fa81200d2fd7 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Dec 18 15:48:21 2012 +0000 + + sna/video: Fix up the image size for copying + + Yikes, setting image.x2 == image.x1 meant no data was copied whilst the + video was clipped. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 551b400377ddc5eb1e89b8b5827a42e810c8d23d +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Dec 18 15:14:00 2012 +0000 + + sna/video: Amalgamate the computation of source vs dest offsets + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit d96a226cc59c641c10153ae3a086a5138c852423 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Dec 18 14:26:18 2012 +0000 + + sna/video: Fix adjustment of drawable vs source origin wrt to clip + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 79cb6304e983514dd754065e65e2381a903f9bd6 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Dec 18 13:49:59 2012 +0000 + + sna/xvmc: Clean up to avoid crash'n'burn + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 0d26082303f3f4006ce4974d402c560613081b23 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Dec 18 10:54:28 2012 +0000 + + sna: Prefer the GPU once again for PolyPoint + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 0e0a2d300633122d6d0f6f82ff110f513b4e64d7 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Dec 18 10:27:04 2012 +0000 + + sna/gen7: Mark the ring switch before checking bo + + As we may do a batch submission due to the change of mode. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit f522fbe7c98ffad86126c3666b2d9f7e616480b8 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Dec 17 23:04:25 2012 +0000 + + sna: Refine check for an unset context switch + + So it appears that we end up performing a context switch on an empty + batch, but already has a mode. This is caught later, too late, by + assertions. However, we can change the guards slightly to prevent those + assertions without altering the code too greatly. And I can then think + how to detect where we are setting a mode on the batch but doing no + work - which is likely masking a bigger bug. + + Reported-by: Jiri Slaby <jirislaby@gmail.com> + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=47597 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 6c50cf4809816dbbd93d54f589a79b0dab996180 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Dec 17 22:27:14 2012 +0000 + + sna: Untangle the confusion of caching large LLC bo + + We only use a single cache for very large buffers, so we need to be + careful that we set the tiling on them. More so, we need to take extra + care when allocating large CPU bo from that cache to be sure that they + are untiled and the flags are true. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit e474abea7cf761e78e777db07b41ec99c0b6f59f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Dec 17 15:38:04 2012 +0000 + + sna: Promote pinned-batches to run-time detection + + Now that the feature has been committed upstream, we can rely on the + runtime detection. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 4d7e3a9123cf41d2dd97c0a8a0d461c189064822 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Dec 17 12:34:05 2012 +0000 + + uxa: Fix copy'n'paste of false not FALSE + + Bugzilla; https://bugs.freedesktop.org/show_bug.cgi?id=58406 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 7a4d1136bd09bfd4d2657c0b1b64d553eeb6ed4f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Dec 17 09:41:47 2012 +0000 + + sna/video: Pass along the video source offset + + Fortunately nobody had yet noticed that all videos were assumed to play + with a matching src/dst origin. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit dfe9d18f9f97a77ceeb410307010424c789c8bd1 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Dec 17 01:06:57 2012 +0000 + + sna: Limit the default upload buffer size to half the cpu cache + + This seems to help with small slow caches. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 5b0572503eab235bc7eff20d369241330c41e630 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Dec 16 23:04:55 2012 +0000 + + sna: Enable support for opting out of the kernel CS workaround + + Keeping a set of pinned batches in userspace is considerably faster as + we can avoid the blit overhead. However, combining the two approaches + yields even greater performance, as fast as without either w/a, and yet + stable. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 805f78addf3ffb36c736df680806cf722b18fea9 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Dec 16 22:04:54 2012 +0000 + + sna: Try to reuse pinned batches by inspecting the kernel busy status + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit f1aec676810c4a4c180b342d9a83254e08dd55da +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Dec 16 17:37:32 2012 +0000 + + sna: Precompute the base set of batch-flags + + This is to make it easier to extend in future. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit c7ac12003bd0c7d85fa47d43ee2734b222d84a61 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Dec 16 15:28:24 2012 +0000 + + sna: Only flush at the low fence wm if idle + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 4580bbeac0051417cb03f272112b0cfe697e31b3 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Dec 16 15:00:21 2012 +0000 + + intel: Support debugging through AccelMethod + + Ease debugging by allowing all acceleration or render acceleration to be + disabled through AccelMethod: + + Option "AccelMethod" "off" -> disable all acceleration + Option "AccelMethod" "blt" -> disable render acceleration (only use BLT) + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 58770b7d6401d2d81f7fee1c8c0e788d44149712 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Dec 16 14:59:03 2012 +0000 + + man: Describe Option "AccelMethod" + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 83609af3681fad58af88387077bf7ce0c001a1da +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Dec 16 10:53:26 2012 +0000 + + sna: Tweak the idle SHM CopyArea path to also replace a busy GPU bo + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 6490585f65bde487da7bc41fa5cb1c5a028d0bf4 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Dec 15 23:26:30 2012 +0000 + + sna: Do not force use of the GPU for a copy from a SHM pixmap + + As we will undoubtably flush and sync upon the SHM request very shortly + afterwards, we only want to use the GPU for the SHM upload iff it is + currently busy. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 3a08f091875f2f0f49697ba9852077094b3a704b +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Dec 15 22:53:44 2012 +0000 + + sna/gen6+: Tweak prefer-blt-bo + + Split the decision between where it is imperative to use the BLT to + avoid TLB misses and the second case where it is merely preferential to + witch. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit ac9ef1fc606e87b48baa47be22bf828dcfe6659f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Dec 15 20:49:56 2012 +0000 + + sna/gen6+: Keep the bo on its current ring + + Track the most recent ring each bo is executed on, and prefer to keep it + on that ring for the next operation. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 15ccb7148d15d776a661c1b8c5b9b2360fcae4ad +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Dec 15 20:07:56 2012 +0000 + + sna/gen6+: Apply the is_scanout to the key not value in the binding cache + + Oops, we never managed to reuse the cached location of the target + surface as we entered it into the cache with the wrong key. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit fde25b08922d97ca0d4a69c654bf690edbd53b3d +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Dec 15 18:59:53 2012 +0000 + + sna/trapezoids: Add another inline hint + + cell_list_alloc() is only called from one place, and the compiler should + already be inlining it - but does not appear to be. Hint harder. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 2a21c8b351052be9c32c5669264fb05a8510c957 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Dec 15 17:56:27 2012 +0000 + + sna: Include shm hint in render placement + + The goal is to reduce the preference of rendering to a SHM pixmap - only + if it is already active, will we consider continuing to use it on the + GPU. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit a467102a9539c7f4fa8d0700ecdcaba49d77b3f7 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Dec 15 10:00:48 2012 +0000 + + 2.20.16 release + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit b0f8c823b6cafdfdd064c09d58174f946e290541 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Dec 15 09:28:04 2012 +0000 + + sna/dri: Fallback to a blit after a failed flip + + ...rather than force the exchange. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 2c71a8e08abce74b269687d3a6c1edd7f9d643d3 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Dec 15 09:27:07 2012 +0000 + + sna/dri: Honour TripleBuffer Option + + In case anyone ever wants to disable the default. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 6593ad3fecb3d044ee5ca161176d8ecaa0b4126a +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 14 23:48:00 2012 +0000 + + sna/dri: Store and check size of front/back bo attached to a DRI2 drawable + + So that we can prevent feeding back a stale bo when the DRI2 client + tries to swap an old buffer. + + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=57212 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 9df9585cb00958b42461b3139bb7aec32090a869 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 14 15:37:56 2012 +0000 + + sna: Reduce fence watermarks + + Further restrict the amount of fenced bo we try to fit into the batch to + make it easier for the kernel to accommodate the request. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 0d3ba44e448c152a570cc469d289ab057fa7be5c +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 14 12:47:46 2012 +0000 + + sna/gen2+: Experiment with not forcing migration to GPU after CPU rasterisation + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit d867fd01cb0060342102a79600daf43e3dc44a07 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 14 13:10:54 2012 +0000 + + sna/gen3: Don't combine primitives if beginning a ca 2-pass + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 3ca503dac2ea6c036e7ebe878b41923541daf225 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 14 12:49:14 2012 +0000 + + sna/gen3: Remove stray setting of vertex_start + + It is always done at the beginning of vertex emission. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 7f76f100e8033497620ee46548df45afff41064a +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 13 23:12:42 2012 +0000 + + sna/gen2: Reorder reuse_source() to avoid NULL dereference for solids + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 044a54c23384756a5dc1895473abf34f7abb3d83 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 13 23:05:30 2012 +0000 + + sna/gen2: Initialise channel->is_affine for solid + + In case we hit a path were we avoid reusing the source for the mask and + leave is_affine unset for a solid mask. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 29afd0dc8e893cc4110ee0d70546775dae86ddb3 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 13 22:53:18 2012 +0000 + + sna/gen2: Assertions + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 4d2abe1e3daac74747535d88dff34b024b87bbe9 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 13 22:09:37 2012 +0000 + + sna/gen3: Remove incorrect optimisation of an opaque source for CA + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit d428dbf7ad7c246acb1c301b73b9df4a1309de03 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 13 14:53:50 2012 +0000 + + sna/gen2: Program solid mask using the DIFFUSE component + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 9e7311516da81ab45484b291ec668503c5ded0bb +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 13 14:40:25 2012 +0000 + + sna/gen2: Align surface sizes to an even tile + + Makes this 855gm much happier. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit e646047a563598948206167765eaaf4192cfd77f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 13 14:23:54 2012 +0000 + + sna: Fix up BLT overwrite detection to use target_handle + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 4f96439e39a4bf4b127af9ccfdc09d061caff9bd +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 13 13:15:52 2012 +0000 + + sna: Fix typo for 830/845 gen + + Must remember, its octal not decimal. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit f631a56bcb3ff1ce1942b828325a157cef1e0880 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 13 00:55:45 2012 +0000 + + sna: Only flush the batch after an actual relocation + + As we may write preparatory instructions into the batch before checking + for a flush. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 74bbf20e6e652cba55d6d0bc17066f4112f8548c +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Dec 12 21:56:22 2012 +0000 + + sna: Improve the initialisation failure path for pinned batches + + Simplify the later checks by always populating the lists with a single, + albeit unpinned, bo in the case we fail to create pinned batches. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 52c8c9218c8f28fb049b02214d833912a803f911 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Dec 12 21:19:02 2012 +0000 + + sna: Fix the error path in kgem_init_pinned_batches() to use the right iter + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit c7f7dd61fd07dbf938fc6ba711de07986d35ce1f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Dec 12 19:43:19 2012 +0000 + + sna: Pin some batches to avoid CS incoherence on 830/845 + + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=26345 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit b154d0dc404a152e1283a013a78be06b8d734867 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Dec 12 18:34:54 2012 +0000 + + sna/gen2: STIPPLE requires an argument + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 9001263b32efde1361555432914d9ac3ee780511 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Dec 12 12:03:40 2012 +0000 + + sna/gen3+: Use nearest for unscaled videos + + If the output is unscaled, then we do not require pixel interpolation + (and planar formats are exactly subsampled). + + References: https://bugs.freedesktop.org/show_bug.cgi?id=58185 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 2dbe7d91a7f15a3a9ddad696c5088ca98898fca2 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Dec 12 09:50:34 2012 +0000 + + sna/gen4: Use the single-threaded SF w/a for spans as well + + Fixes the flickering seen in the fishtank demo, for example. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 2502218a9340044bb660be68f059971119077e29 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Dec 11 17:47:09 2012 +0000 + + sna/dri: Fix handling of current_msc > target_msc + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 39d6c65f437d7877c1647b7ecf45e76daabc76a6 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Dec 11 15:30:54 2012 +0000 + + sna/dri: Query current msc before use + + Might be worth caching the last-known-value so we can skip the query for + an old swap request. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 162e9a2bfc693db186aa481551cf76b3dc5ee55c +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Dec 10 23:11:48 2012 +0000 + + sna/dri: Disable name exchanges for SwapBuffers + + The DRI2 protocol is inherently racy. Fortuituously, this can be swept + under the carpet by forcing the serialisation between the DRI2 clients + by using a blit for the SwapBuffers. + + References: https://bugs.freedesktop.org/show_bug.cgi?id=58005 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 0c2287c735f990a98b39d00f28168d7a5df25aba +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Dec 10 20:09:22 2012 +0000 + + sna/dri: Only special case 'divisor && msc-passed' for immediate flipping + + As Jesse pointed out, it is legal for the client to request that the + flip be some frame in the future even with no divisor. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 2ab29a1688cd313768d928e87e145570f35b4a70 +Author: Jesse Barnes <jbarnes@virtuousgeek.org> +Date: Mon Dec 10 14:55:32 2012 -0800 + + dri2: don't schedule a flip prematurely at ScheduleSwap time + + If divisor is 0 but the current MSC is behind the target, we shouldn't + schedule a flip (which will occur at the next vblank) or we'll end up + displaying it early and returning the wrong timestamp. + + Preserve the optimization though by allowing us to schedule a flip if + both the divisor is 0 and the current MSC is equal to or ahead of the + target; this avoids a round trip through the kernel. + + Reported-by: Mario Kleiner <mario.kleiner@tuebingen.mpg.de> + Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org> + +commit 986e13a56a8544d5b32dbcaacbc0ee9cf5d47e27 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Dec 10 17:29:08 2012 +0000 + + sna: Try installing a fallback config on VT enter in case full desiredMode fails + + This can happen naturally for 3-pipe config on Ivybridge or if the + outputs are rearranged whilst we slept. Instead of failing to change the + display on the VT, install at least a fb on the CompatOutput so that + hopefully the DE can take over, or give some control to the user. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 8c3b82f207bc8cf697646d3324cb4103da3b7856 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Dec 10 16:20:52 2012 +0000 + + sna: Avoid reusing the same 'busy' bit for two different meanings. + + Oops, I thought the 'busy' bit was now used and apparently forgot it is + used to control the periodic flushing... + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit d1b479a3404e6b52a23e0443c36d0682cbaf3c2f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Dec 10 11:16:04 2012 +0000 + + sna: Compromise and only flush a split batch if writing to scanout + + A compromise between not flushing quick enough and flushing too often, + hopefully. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 3e9120d73c6f0c0e06b617da91cc2edce4434bc3 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Dec 10 11:05:16 2012 +0000 + + sna: Immediately flush a split batch + + If we submit a batch early (for example if the GPU is idle), then submit + whatever else the client drew immediately upon completion of its + blockhandler. This is required to prevent flashing due to visible delay + between the clear at the start of the cycle and then the overdraw later. + + References: https://bugs.freedesktop.org/show_bug.cgi?id=51718 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit aa8c5d8201006397bb32ed6bc28618f9aa77a68a +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 6 22:08:08 2012 +0000 + + sna/sprite: Add a DBG to report whether the kernel supports sprites + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 25c0d440dee45c03f5e45b8e0e45071c0c32f507 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Dec 9 12:11:53 2012 +0000 + + sna: Move source to CPU prior to referencing for inplace trapezoids + + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56825 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 3e82fcc8d243b7ffe1a3d3249a5cdb5fd068093f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Dec 9 11:19:13 2012 +0000 + + sna/gen4+: Refine test for preferring GPU spans + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit c8f622726a4463b419d032b379576cfb3bc492df +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Dec 9 09:26:03 2012 +0000 + + sna: Replace remaining kgem_is_idle() with kgem_ring_is_idle() + + Further experimentation... + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 4e4e10935d2815fb62aeaedbfffe10aad115ec0b +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Dec 8 22:39:32 2012 +0000 + + sna: Flush upon change of target if GPU is idle + + The aim is to improve GPU concurrency by keeping it busy. The possible + complication is that we incur more overhead due to small batches. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit cef11795f627a393d4254845b0a19eefbf6c782c +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 7 17:40:37 2012 +0000 + + sna: Convert the ring from BLT/3D to the internal index for kgem_ring_is_idle() + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 52405b2aed492dc7f76fbf082122842f621e7c06 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 7 17:24:42 2012 +0000 + + sna: Only inspect the target ring for busyness + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 4b7bbb2a23b03bac63f864c33f47fab88dedbf67 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 7 16:43:32 2012 +0000 + + sna: Only flush before adding fresh surfaces to the batch + + Previously, before every operation we would look to see if the GPU was + idle and we were running under a DRI compositor. If the GPU was idle, we + would flush the batch in the hope that we reduce the cost of the context + switch and copy from the compositor (by completing the work earlier). + However, we would complete the work far too earlier and as a result + would need to flush the batch before every single operation resulting in + extra overhead and reduced performance. For example, the gtkperf + circles benchmark under gnome-shell/compiz would be 2x slower on + Ivybridge. + + Reported-by: Michael Larabel <michael@phoronix.com> + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 65a8c23ca1bc8e2ebd087027a30358704d4bf11c +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Dec 7 14:56:18 2012 +0000 + + sna: Only flush at the low apeture watermark if idle + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 4bfc5e90f54be1b0997dec9e81796d67b376a01f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Nov 8 23:42:10 2012 +0000 + + sna: Mark proxies as dirty on first relocation + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 1d2fa5731b7ecfe34a8af809e45bcd3b0b70c890 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 6 22:15:09 2012 +0000 + + Remove the default log message + + Breaks compilation with xorg-1.13 + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 7f4d4afa629bd18be89d7270e6178a865cf9586e +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 6 17:08:15 2012 +0000 + + Fix compilation of UMS probe following 13f47008ec + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit d733f7d1f1dc343ac34c4a27ac99d71fc0572bc2 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 6 16:55:00 2012 +0000 + + sna/gen4+: Add common glyph-to-dst emitters + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 13f47008ec411609968c40b8ec34dd495f14c50b +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Dec 6 14:05:33 2012 +0000 + + Refactor the common probe methods for scrn construction + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 0040eb84c9187476a75202ebb251dd74354e4fc7 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Dec 5 09:25:17 2012 +0000 + + sna: Don't disable CPU bo if supported on unknown hw + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 14069f48645867a735ebdccb1e27a62364643c38 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Dec 5 09:24:02 2012 +0000 + + sna: Assume that future hardware only gets more flexible + + E.g. that BLT can always write to cacheable memory, inflexible fences + are a thing of the past, etc. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit b82bfcb54a6af0d1ee17806ef94d9da504cad606 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Dec 4 17:26:47 2012 +0000 + + sna/gen6+: Cache the scanout targets separately to avoid override PTE caching + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 626dd1324dd2c5b14ca4aff598b5eb1e45550e69 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Dec 4 12:52:50 2012 +0000 + + sna/gen4: Workaround render corruption with multiple SF threads + + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=57410 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit dbdb8fabfbade3f19fd8af3524468b5e6668bb66 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Dec 4 12:37:23 2012 +0000 + + sna/gen4: Special case solids through the general vertex emitter + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit a17354d5ce6aeeab3f6e42aba63fce06ad18c526 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Dec 4 12:01:26 2012 +0000 + + sna/gen4: Remove unused CC viewport + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit b9afb9cb614d2ad44330eed03b3f577a35184a88 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Dec 4 11:14:58 2012 +0000 + + sna/gen4: Avoid emitting URB_FENCE across a cache-line + + Old erratum. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 08c30c48bdd0db500498bd9617c15f37bacd8de9 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Dec 3 13:23:33 2012 +0000 + + sna: Tidy addition of fake GTF modes for panels + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 6f675eeaeade4728af566891b2afbe5b44fbdc2e +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Dec 3 10:47:35 2012 +0000 + + 2.20.15 release + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 37eb7343be1aeeb90a860096756603a577df1a77 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Dec 1 09:40:11 2012 +0000 + + sna/gen5: Inspired by gen4, reorder the flushing + + This may not be totally safe, but it is a nicer explanation for random + single character corruption. + + References: https://bugs.freedesktop.org/show_bug.cgi?id=51422 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit a8a99428c14c8aed2082853cc60d0f98a1fa2d86 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Dec 1 09:44:49 2012 +0000 + + sna/dri: Unknown generations are given the max value and presume i965_dri.so + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 382bb7bf77fca412fdefd7c304f395d1fe9483b5 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 30 16:24:30 2012 +0000 + + sna/gen6+: Only apply the BLT w/a for self-copies + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 1aee8acacfe5869a072d9f20f3b8290b16683260 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 30 12:17:25 2012 +0000 + + sna: Unify gen4 acceleration again + + After disabling render-to-Y, 965g seems just as happy with the new code + paths as g4x. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 5d6dd9c5a7eeb1f879525430ad89ab74d427e469 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 30 12:12:49 2012 +0000 + + Convert generation counter to octal + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 131600020638ef15166361214cd5e1a0c08c2ea6 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 30 11:43:56 2012 +0000 + + sna: Prevent gen4 from rendering to I915_TILING_Y + + It always seems to end up in a hang... + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit ee4d1873ff504c2150b51d13864164b02b85dd53 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 30 09:52:49 2012 +0000 + + sna/g4x: And remove one more flush point + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 1b6c1a30723b1d13e9bd3df0b59a8d75639c89be +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 30 09:27:57 2012 +0000 + + sna: Increase tiling alignment to an even tile + + Seems to help g4x. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 6aeb6fdf75fa322d8f5ffe393337d8195d7a6a62 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 30 09:03:53 2012 +0000 + + sna/g4x: Remove the flush before the pipelined pointer changes + + This one appears unneeded. Hopefully. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 8be2c6695509809c0ab0c5c014e11dc733f73006 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Nov 29 19:56:15 2012 +0000 + + sna/g4x: Emit the flush prior to changing pipelined state + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 2d5d55702bb8eced32d5b8cb3c0cd125fd99d6dc +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Nov 28 15:46:29 2012 +0000 + + sna/gen6+: Override PTE cacheability bits for reads + + This is primarily useful for enabling the render cache for reads from + scanouts. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 3ed87945c9e83fefcbda053b616856658bf2ac8e +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Nov 28 11:05:40 2012 +0000 + + sna/gen5+: Disable max-vbo address + + As we do not use this feature, disable it and save the relocation. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit b2c9e9da639a134577485f83e0f66f54e2371b98 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Nov 27 11:32:11 2012 +0000 + + sna/blt: Avoid clobbering common state before converting to a BLT + + In case we need to continue on with the render operation, we need to + preserve the existing state. + + Reported-by: Jiri Slaby <jirislaby@gmail.com> + References: https://bugs.freedesktop.org/show_bug.cgi?id=57601 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 1e06d19a00f5a5a05369deeb3c5ae15b282c0f92 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Nov 26 15:30:09 2012 +0000 + + sna: Disable shadow tracking upon regen + + References: https://bugs.freedesktop.org/show_bug.cgi?id=56608 + References: https://bugs.freedesktop.org/show_bug.cgi?id=52255 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit d21ed3a6aba5ae227cc5ecd164f3c18bc48c69af +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Nov 26 10:34:28 2012 +0000 + + sna: Use a single execobject flag to mark read/write domains + + Slight modification to the proposed API to only pass the simplified + domain tracking now performed by the kernel. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 7a904ce839933d57176e013cdad147533c33ca2f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Nov 26 08:48:31 2012 +0000 + + 2.20.14 release + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 1367e3f9ef5f606c8927cbde441a2ea41fa6d025 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Nov 25 00:24:45 2012 +0000 + + sna: Exploit the alpha-fixup of the BLT for texture format conversions + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 6d6399f97cf7cb91dcf89e9a5cd1243f761e4314 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Nov 25 00:05:44 2012 +0000 + + sna: Transform a simple repeat pattern into range of the BLT + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 39f1e228b74e98d3d87157cf093fc56ca31e6b13 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Nov 24 20:16:29 2012 +0000 + + sna: Make GPU idle more consistent during wakeup + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 40b6121746c55153de444ccb753df80706ff3a69 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 23 18:21:41 2012 +0000 + + sna/g4x: Refine the w/a for the broken sf shader + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit e0b906b09697b5fe7a5be2fdc52abd9b1c73f96d +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 23 18:01:25 2012 +0000 + + sna/g4x: Use the render pipeline more often for composite operations + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 54d8968da5ae39bfbcad511322926931bce2bda3 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 23 17:47:49 2012 +0000 + + sna/gen4: Revert changes to 965g[m] + + The changes tested on g45/gm45 prove to be highly unstable on 965gm, + suggesting a radical difference in the nature of the bugs between the + two generations. In theory, g4x has additional features that could be + exploited over and above gen4 which may prove interesting in the future. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit d560296561f68c3ac841345c0f4ce2c8e7381156 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 23 16:35:06 2012 +0000 + + sna/gen4: Don't force a flush for the dirty target if the we do not read back + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 4023b2044757a9a67d564be0c8adf4885973a6e3 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 23 15:42:18 2012 +0000 + + sna/gen4: Force composite(WHITE, glyph) for building the glyphstring mask + + For reasons that are not apparent, if we don't composite with + source/mask for the glyph strings, there appears to be some cache + corruption. About as bizarre as the rest of gen4 idiosynacracies. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit f74b62755c6e41097c23cc506984859e556a3415 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 23 14:59:42 2012 +0000 + + sna/gen4: Set composite op before testing for a BLT compatible op + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 4c922eb52cadb867a0a15929e5a214c84a5992f3 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 23 14:19:59 2012 +0000 + + sna/gen4: Pass the mask channel explicitly rather than through a dummy mask + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 2e68efa8ec66b4c89e9816bfa15067b398da5e3e +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 23 14:04:51 2012 +0000 + + sna/gen4: Reduce the flush before performing the CA pass + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 43aff6708fe97aa2fae0e30f98fc7cd9d7311b75 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 23 13:37:44 2012 +0000 + + sna/gen4: Update render fill routines to use the dummy mask + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 352910712266202ef017066891ec383fd037fc4a +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 23 13:29:01 2012 +0000 + + sna/gen4: Move the flush from inside the spans to emit_state() + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 217f3e835b99002669999f818afa0d5c3a1cc852 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 23 12:32:14 2012 +0000 + + sna/gen4: Backport the more efficient composite box emitters + + Now that we aren't flushing after every single rectangle, we can strive + for a little efficiency. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit d3145e3f8146e7d864d32aec49c44c04d619e56a +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 23 12:28:21 2012 +0000 + + sna/gen4: Use a dummy white mask to avoid the flush w/a when compositing + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 65d530b59b957cc5f303ae819baad8075a555ac0 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Nov 22 08:18:38 2012 +0000 + + Revert "uxa: Refactor early-exit paths of uxa_try_driver_composite()." + + This reverts commit fa5c573455cf63090dbb6d167d4e5f1cb23daf72 as it + causes a SIGSEGV. + + Reported-by: lu hua <huax.lu@intel.com> + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=57389 + +commit d3a49f36395d737698616fe8ba9da7b74cd2d89a +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Nov 21 18:41:00 2012 +0000 + + sna/gen3+: Clear the render.vbo when replacing it for vertex upload + + As we may trigger a flush and a retire when searching for a vertex + buffer for the new vertices, we need to be careful to decouple the + destroyed vbo in order to avoid a use-after-free when inspecting the + state. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 1c57a52a1f46e8401429955d8c96fd5095c9012a +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Nov 21 18:17:31 2012 +0000 + + sna: Assert that we do not replace a GPU bo with a proxy + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 8da12a00ee653510e1f1a6fecb28dbb36faa8400 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Nov 21 17:28:46 2012 +0000 + + sna: Skip uploading unintialiased pixmap data + + References: https://bugs.freedesktop.org/show_bug.cgi?id=47597 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 0696ea4bd601ed823dbded03eaef6f316df2a5e8 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Nov 21 17:25:33 2012 +0000 + + sna: Add the missing assertion for !proxy + + References: https://bugs.freedesktop.org/show_bug.cgi?id=47597 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit ee72375ecd4f6d6e756bc361fa512b6675309540 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Nov 21 17:08:53 2012 +0000 + + sna: Do not migrate uninitialised pixmaps + + Reported-by: Jiri Slaby <jirislaby@gmail.com> + References: https://bugs.freedesktop.org/show_bug.cgi?id=47597 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 44dad490140d85a4c0dcb916030c36a838670c01 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Nov 21 16:22:35 2012 +0000 + + sna: Do not dispose of a shadow pixmap + + Fixes regression from 2249e9edc37811c07e2807d6b4def05585b44c22 + + Reported-by: Jiri Slaby <jirislaby@gmail.com> + References: https://bugs.freedesktop.org/show_bug.cgi?id=47597 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 9c627a05247690891062a2c0c1c8f7bbc0273104 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Nov 21 16:03:02 2012 +0000 + + sna: Remove the kgem_bo_is_mappable refcnt assertion from freed paths + + A few callers of kgem_bo_is_mappable operate on freed bo, and so need to + avoid the assert(bo->refcnt). + + References: https://bugs.freedesktop.org/show_bug.cgi?id=47597 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit c0c48c7a5aca4d24936efbeaefc7674ada2ef87f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Nov 21 14:46:45 2012 +0000 + + sna: Add a few refcnt assertions + + References: https://bugs.freedesktop.org/show_bug.cgi?id=47597 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 0884777c33d20dbc329b98ad0db5ffb0df93ac8c +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Nov 21 14:31:44 2012 +0000 + + sna: Fix bogus assertion from 03fb9ded43 + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 2249e9edc37811c07e2807d6b4def05585b44c22 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Nov 21 13:05:36 2012 +0000 + + sna: Dispose of local copy for render sources + + If we transfer the pixmap to the GPU to use as a render source, presume + that we will not need to then touch the local copy (at least for a + while) and so return that memory to the system. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 736bb0f7058bf05ef48cdfe6a30d880de817aff9 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Nov 21 12:16:46 2012 +0000 + + sna: Tighten a couple of assertions for damage with use_bo + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 03fb9ded43f9bf8b73f99d5b3a8dc592fe22b523 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Nov 20 21:13:07 2012 +0000 + + sna: Assert that we do not create a proxy with existent GPU damage + + References: http://bugs.freedesktop.org/show_bug.cgi?id=47597 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 9ab1d1f94e502e5fde87e7c171f3502f8a55f22b +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Nov 20 18:42:58 2012 +0000 + + sna/dri: Queue a vblank-continuation after flip-completion + + If a vblank request was delayed due to a pending flip, we need to make + sure that we then queue it after that flip or else progress ceases. + + Reported-by: Jiri Slaby <jirislaby@gmail.com> + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56423 + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=57156 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 7a7a76b359f73a4c4bcda0d88004f4dd5e94a186 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Nov 20 16:05:32 2012 +0000 + + sna/dri: Avoid a NULL dereference inside a DBG + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit fa5c573455cf63090dbb6d167d4e5f1cb23daf72 +Author: Eric Anholt <eric@anholt.net> +Date: Sat Nov 17 13:11:13 2012 -0800 + + uxa: Refactor early-exit paths of uxa_try_driver_composite(). + + Saves 200b of code at -O2. + +commit edefb64d2b1c95b0b678cb222273ab64b2e6db2a +Author: Eric Anholt <eric@anholt.net> +Date: Sat Nov 17 13:11:12 2012 -0800 + + uxa: Work around uninitialized-value warning. + + The compiler isn't noticing that localDst only diverges from pDst when + the _copy variables have also been set. + +commit 18b2e2a82724407196001ca853bd83150c66c5bd +Author: Eric Anholt <eric@anholt.net> +Date: Sat Nov 17 13:11:10 2012 -0800 + + uxa: Add printf attribute to intel_debug_fallback(). + + Shuts up a bunch of warnings with xorg's shared warning flags, and + should give us more informative warnings in our code. + +commit 2d1e267e662505ca0dd318765464a24739dc5bd8 +Author: Eric Anholt <eric@anholt.net> +Date: Sat Nov 17 13:11:09 2012 -0800 + + uxa/dri: Factor out the repeated swap fallback code. + +commit cd2f373da7a14e004c999f9f0efaf88c785d3d3f +Author: Eric Anholt <eric@anholt.net> +Date: Sat Nov 17 13:11:08 2012 -0800 + + configure.ac: Fix bad syntax for test calls + +commit b8c01d9bd7ce5656706ebebd16e5a8c5ca0ba487 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Nov 19 15:20:10 2012 +0000 + + sna/dri: Add an assert that the cached DRI2 buffer is pinned for DRI + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 84b1a02fa9fde02366e0384044526982e70d0e8d +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Nov 19 13:41:28 2012 +0000 + + sna/dri: Avoid setting off-delay after a failed flip (use-after-free) + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit b4dd1103a55406bcd65f137c668701074a5c41b6 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Nov 18 12:21:49 2012 +0000 + + sna/gen6+: Tweak prefer-blt to offset latency when in rc6 + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 0cb8544dc16d4c1e9ae7f1ee74ee26c7501a9ed7 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Nov 18 12:13:46 2012 +0000 + + Remove useless indirection of intel_chipsets + + Once upon a time this was used to hide a compiler warning about a + pointer mismatch, now the compiler still warns about the cast, making + the indirect moot. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 8509e474f57533fc6afcf213165f4c8633631fb5 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 16 23:02:44 2012 +0000 + + sna/dri: Clear the last-used-cpu flag when performing CopyRegion + + Keeps the internal bookkeeping intact after the small bypass. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 866ed4a26cbbb29ef3845b0aa56383c4d951c65a +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Nov 15 15:13:14 2012 +0000 + + sna/dri: Add a couple of more asserts to track injection of a rogue bo + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit d2897cb0136ffec83365c7530ed544b562cac478 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Nov 13 12:43:44 2012 +0000 + + sna/gen4,5: Fix the opacity shader to use the right vertex attribute + + Reported-by: Edward Sheldrake <ejsheldrake@gmail.com> + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=57054 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 440eaa049756e5266f3bd80e2751f1fd0d5f9890 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Nov 13 12:42:58 2012 +0000 + + sna/gen4: Tidy emission of opacity vertex attribute + + Just make it more consistent between the various emitters + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit b6d2bb961517623d46aa6944307cb998ee125459 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Nov 13 10:05:56 2012 +0000 + + sna/gen4: Do not prefer inplace non-rectilinear spans + + As gen4 requires the per-rectangle vertex flush, emitting spans on the + GPU is inefficient and so we prefer to composite the mask instead. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit ae293609c7400cd3c753ed3762772264c4741df5 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Nov 13 10:21:29 2012 +0000 + + sna/gen4: Always initialise redirect + + Do not assume the caller cleared the composite-op structure for us. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 2954f15e2bcb590a90c2cb6077c0843ee25a4413 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Nov 13 09:46:19 2012 +0000 + + sna: Specialise the decision for inplace xor uploads + + Fixes a regression from + + commit 0be1d964713ca407f029278a8256d02d925dc9da + Author: Chris Wilson <chris@chris-wilson.co.uk> + Date: Tue Sep 11 21:48:24 2012 +0100 + + sna: Use inplace X tiling for LLC uploads + + which introduced the ability to swizzle into CPU maps, but also + convinced the xorg path to the same - which for large images blows up. + + Reported-by: Michael Laß <bevan@bi-co.net> + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=57031 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 66eb0adffa63ef8ece7621ba90dc96af91549612 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Nov 12 14:45:56 2012 +0000 + + sna/dri: Apply the can_blit() check for CopyRegion + + CopyRegion() also needs to check for stale bo in case the pixmap + dimensions have changed size and so may cause out-of-bounds read/writes. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit b27ecf3059bc066ef59f2a71c1d8d8f0ffec7191 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Nov 12 14:06:06 2012 +0000 + + sna/dri: Prevent scheduling a swap on stale buffers + + If the screen has been reconfigured and the DRI client tries to swap the + old buffers (having not processed the invalidate event and retrieved the + current names), quietly drop the request. If we try to obey the request, + we will end up attaching a back buffer that is the wrong size to the + scanout... + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 34aa1e3b2702a55799a5655a3ba10bce4cc2065a +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Nov 12 11:22:53 2012 +0000 + + sna: Compile against ancient libdrm + + We need to trim the sprite video overlays if the prototypes are not + known. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 8f1afde57dca27e6542b0b8e7c87750f3d6367bf +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Nov 11 16:16:20 2012 +0000 + + 2.20.13 release + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit b16219a19f48b52dda91f26fcbbbbeda056589ab +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Nov 11 11:05:35 2012 +0000 + + sna: Filter out the full-damage marker when undoing redirection + + ==25902== Invalid read of size 4 + ==25902== at 0x4980E13: _list_del (intel_list.h:218) + ==25902== by 0x4980EAB: list_del (intel_list.h:240) + ==25902== by 0x4981F4B: free_list (sna_damage.c:403) + ==25902== by 0x4985131: __sna_damage_destroy (sna_damage.c:1467) + ==25902== by 0x49A5276: sna_render_composite_redirect_done (sna_render.c:1921) + ==25902== by 0x49C68FC: gen2_render_composite_done (gen2_render.c:1136) + ==25902== by 0x497F90F: sna_composite (sna_composite.c:567) + ==25902== by 0x4994725: glyphs_via_mask (sna_glyphs.c:1139) + ==25902== by 0x4995FB7: sna_glyphs (sna_glyphs.c:1688) + ==25902== by 0x8150EB4: ??? (in /usr/bin/Xorg) + ==25902== by 0x813CA38: CompositeGlyphs (in /usr/bin/Xorg) + ==25902== by 0x8146DE1: ??? (in /usr/bin/Xorg) + ==25902== Address 0x7c079ac2 is not stack'd, malloc'd or (recently) free'd + + Reported-by: bonbons67@internet.lu + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56785 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 69acbb77e8aad3370d5e8d9a9e067c54872d7082 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Nov 11 10:49:59 2012 +0000 + + sna: Fix printing of uninitialied value in DBG + + ==25902== Use of uninitialised value of size 4 + ==25902== at 0x423098E: _itoa_word (_itoa.c:196) + ==25902== by 0x4233F7F: vfprintf (vfprintf.c:1602) + ==25902== by 0x42FAFAD: __vsnprintf_chk (vsnprintf_chk.c:65) + ==25902== by 0x81DBE8E: Xvscnprintf (in /usr/bin/Xorg) + ==25902== by 0x81DC8FB: LogVMessageVerb (in /usr/bin/Xorg) + ==25902== by 0x81DCA62: LogVWrite (in /usr/bin/Xorg) + ==25902== by 0x81DCA9B: VErrorF (in /usr/bin/Xorg) + ==25902== by 0x81DC333: ErrorF (in /usr/bin/Xorg) + ==25902== by 0x49B2FA8: trapezoid_span_inplace__x8r8g8b8 (sna_trapezoids.c:5069) + ==25902== by 0x49B3407: trapezoid_span_inplace (sna_trapezoids.c:5166) + ==25902== by 0x49B4C96: sna_composite_trapezoids (sna_trapezoids.c:5619) + + Reported-by: bonbons67@internet.lu + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56785 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 66e4c8ff40ab8cf722efa4293bb17b0d8f2dfa88 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Nov 11 09:40:09 2012 +0000 + + sna: Flush pending rendering before enabling an output + + This is to prevent falling in the trap of the rendering being delayed + until the next client renders some new content. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 94dd0b9ee9f55e7c09b8c0ee18939fa69ce66da2 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Nov 10 16:52:09 2012 +0000 + + sna/gen2: Fix use of uninitialised redirection + + ==29553== Invalid read of size 4 + ==29553== at 0x4980E1B: _list_del (intel_list.h:218) + ==29553== by 0x4980EB3: list_del (intel_list.h:240) + ==29553== by 0x4981F53: free_list (sna_damage.c:403) + ==29553== by 0x4985139: __sna_damage_destroy (sna_damage.c:1467) + ==29553== by 0x49A527E: sna_render_composite_redirect_done (sna_render.c:1921) + ==29553== by 0x49C6904: gen2_render_composite_done (gen2_render.c:1136) + ==29553== by 0x497F917: sna_composite (sna_composite.c:567) + ==29553== by 0x8150C41: ??? (in /usr/bin/Xorg) + ==29553== by 0x8142F13: CompositePicture (in /usr/bin/Xorg) + ==29553== by 0x8145F58: ??? (in /usr/bin/Xorg) + ==29553== by 0x81436F2: ??? (in /usr/bin/Xorg) + ==29553== by 0x807965C: ??? (in /usr/bin/Xorg) + ==29553== Address 0x9407e188 is not stack'd, malloc'd or (recently) free'd + + Reported-by: bonbons67@internet.lu + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56785 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 0f1c30818c9d782b066147448bbcc9ac95ac834f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Nov 10 16:52:09 2012 +0000 + + sna: Fix use of uninitialised value in DBG + + ==29553== Use of uninitialised value of size 4 + ==29553== at 0x4230964: _itoa_word (_itoa.c:195) + ==29553== by 0x4233F7F: vfprintf (vfprintf.c:1602) + ==29553== by 0x42FAFAD: __vsnprintf_chk (vsnprintf_chk.c:65) + ==29553== by 0x81DBE8E: Xvscnprintf (in /usr/bin/Xorg) + ==29553== by 0x81DC8FB: LogVMessageVerb (in /usr/bin/Xorg) + ==29553== by 0x81DCA62: LogVWrite (in /usr/bin/Xorg) + ==29553== by 0x81DCA9B: VErrorF (in /usr/bin/Xorg) + ==29553== by 0x81DC333: ErrorF (in /usr/bin/Xorg) + ==29553== by 0x49434F0: kgem_create_buffer (kgem.c:4887) + ==29553== by 0x4943B09: kgem_create_buffer_2d (kgem.c:4969) + ==29553== by 0x4943E19: kgem_upload_source_image (kgem.c:5021) + ==29553== by 0x49A0567: upload (sna_render.c:505) + ==29553== + + Reported-by: bonbons67@internet.lu + References: https://bugs.freedesktop.org/show_bug.cgi?id=56785 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit cc2b13c9c05e57dc5004d93b56f332ea95f0a4ef +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Nov 10 11:50:15 2012 +0000 + + sna: Specify read/write domains for no-relocation fastpath + + On review (read triggering BUGs), we do need to supply the domain tracking + of the buffers that is being replaced from the relocation path. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 0c4a2bcc3d63ecc02e3a940e38e9a416b51ad0c8 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Nov 10 12:34:52 2012 +0000 + + sna: Allow snooped buffers to be retained (and reused) between batches + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit f5d79b202dd448e61ab6ffce26fe9cbf9051d770 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Nov 10 10:30:04 2012 +0000 + + sna/gen2: Add a modicum of fallback DBG + + References: https://bugs.freedesktop.org/show_bug.cgi?id=56785 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 27327633138dce159ca2e91fe5eac1565bd45e1c +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 9 17:08:01 2012 +0000 + + sna/gen4: Only 965gm suffers the !snoop restriction + + So fixup the bogus assertion for g4x + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 8d3b5ea135fd8f16da2cbfb98041e32c7001a38f +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 9 15:31:03 2012 +0000 + + xvmc: Use DRMINTEL_LIBS instead of hardcoding -ldrm_intel + + Reported-by: Maarten Lankhorst <maarten.lankhorst@canonical.com> + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit f040b97b01495aa43f7771ebb8ca5c0d44038bc1 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Nov 8 23:42:10 2012 +0000 + + sna: Mark no-reloc write buffers + + If we bypass the relocation processing, we also then bypass the + pending-write analysis, so we need to supply those to the kernel + ourselves (to maintain gpu-cpu coherency). + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 85ba7e96268dbb8da4bb34078333695a451c6570 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Nov 8 15:56:13 2012 +0000 + + sna: Experiment with using reloc.handle as an index into the execbuffer + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 93d8dddbb92431d6e2c48a17b71cac9f7047902e +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Nov 8 09:41:21 2012 +0000 + + sna: Set the known offset for the batch as well + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 120fa0ef8d04f5e82e5f7a0636033d3d96efa1e8 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Nov 7 17:41:20 2012 +0000 + + sna: Support a fast no relocation changed path + + x11perf -copywinwin10 on gm45 with c2d L9400: + before: 553,000 op/s + after: 565,000 op/s + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit b7d2fcf47a9569d0944097a8be60ca3be72b42f6 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Nov 8 08:55:25 2012 +0000 + + Remove reliance on hard-coded DRI name + + This provides for using the existing DDX with future DRI drivers which + may break from the traditional names - but only with the help of the + user/packager. This scheme needs to be replaced with a robust mechanism + for driver loading if AIGLX and co are to be kept. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit cefce9c81585b73db7620e08fcf60c89a8204873 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Nov 6 17:26:34 2012 +0000 + + sna: Abandon kernels that do not support execbuffer2 + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit b2245838c15b54d72557de8facb7cc15d59624ae +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Nov 6 16:32:32 2012 +0000 + + sna/gen4: opacity spans requires the per-rectangle flush w/a + + Note that this is worsened, but not caused, by: + + commit e1a63de8991a6586b83c06bcb3369208871cf43d + Author: Chris Wilson <chris@chris-wilson.co.uk> + Date: Fri Nov 2 09:10:32 2012 +0000 + + sna/gen4+: Prefer GPU spans if the destination is active + + References: https://bugs.freedesktop.org/show_bug.cgi?id=55500 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit a0540ebff083974688c863e08203e3d71a297340 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Nov 6 16:00:42 2012 +0000 + + sna/gen4: Remove a couple of old, now redundant, w/a flushes + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit aaaa6c356456a4bab595c6e6485893c538064e37 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Nov 6 14:34:51 2012 +0000 + + sna/gen4: Flush after pipelined pointer updates (inverted logic!) + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 28bda6707d979bca29dbea04e932819de204d920 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Nov 5 22:30:29 2012 +0000 + + sna: Prevent use-after-free during partial move-to-gpu + + As we reuse the input parameter 'box' to hold the array of boxes that + need to be migrated, we need to be careful that we do not later confuse + it with the original input parameter. Otherwise, + + ==1315== Invalid read of size 2 + ==1315== at 0x928B091: box_inplace (sna.h:506) + ==1315== by 0x9292278: sna_pixmap_move_area_to_gpu (sna_accel.c:2554) + ==1315== by 0x9292C14: sna_drawable_use_bo (sna_accel.c:2774) + ==1315== by 0x9356C01: gen7_composite_set_target (gen7_render.c:2448) + ==1315== by 0x9357AA2: gen7_render_composite (gen7_render.c:2800) + ==1315== by 0x92DB12E: glyphs_to_dst (sna_glyphs.c:552) + ==1315== by 0x92DEA8D: sna_glyphs (sna_glyphs.c:1664) + ==1315== by 0x4F920E: damageGlyphs (in /tmp/Xorg) + ==1315== by 0x4F2FF6: ProcRenderCompositeGlyphs (in /tmp/Xorg) + ==1315== by 0x437260: Dispatch (in /tmp/Xorg) + ==1315== by 0x426466: main (in /tmp/Xorg) + ==1315== Address 0xd637054 is 20 bytes inside a block of size 208,464 free'd + ==1315== at 0x4C2A2FC: free (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) + ==1315== by 0x92CCFCD: __sna_damage_destroy (sna_damage.c:1469) + ==1315== by 0x928AD74: sna_damage_destroy (sna_damage.h:284) + ==1315== by 0x9291CB2: sna_pixmap_move_area_to_gpu (sna_accel.c:2470) + ==1315== by 0x9292C14: sna_drawable_use_bo (sna_accel.c:2774) + ==1315== by 0x9356C01: gen7_composite_set_target (gen7_render.c:2448) + ==1315== by 0x9357AA2: gen7_render_composite (gen7_render.c:2800) + ==1315== by 0x92DB12E: glyphs_to_dst (sna_glyphs.c:552) + ==1315== by 0x92DEA8D: sna_glyphs (sna_glyphs.c:1664) + ==1315== by 0x4F920E: damageGlyphs (in /tmp/Xorg) + ==1315== by 0x4F2FF6: ProcRenderCompositeGlyphs (in /tmp/Xorg) + ==1315== by 0x437260: Dispatch (in /tmp/Xorg) + + Reported-by: Matti Ruohonen <kiesus@gmail.com> + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56591 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit e62b0cbf958d1ad95e4522973253a1ae5c1a4da9 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 2 15:54:20 2012 +0000 + + sna: Add a small delay during startup if another master is still active + + There exists a race with plymouthd that can cause the drm device to + reject us as the rightful master, and so cause X to fail to load. Try + waiting for a couple of seconds for whatever it was to close before + giving in. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit e1a63de8991a6586b83c06bcb3369208871cf43d +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 2 09:10:32 2012 +0000 + + sna/gen4+: Prefer GPU spans if the destination is active + + Trying to avoid using the inplace scanline rasteriser for large shapes. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 72bcd8f85c98502b13a67d9c606371afe513584c +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Nov 1 09:30:18 2012 +0000 + + sna: Try to reduce ping-pong migration for intermixed render/legacy code paths + + References: https://bugs.freedesktop.org/show_bug.cgi?id=56591 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 4e363906a5ef15e1eb0a387cfb6b3445ac185b9d +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Oct 31 22:58:59 2012 +0000 + + sna: Set a valid box when checking for GPU bo for BLT composite ops + + Reported-by: Jiri Slaby <jirislaby@gmail.com> + References: https://bugs.freedesktop.org/show_bug.cgi?id=47597 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit b924831e445615b82a53b10e1849720e933eddfe +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Oct 31 22:55:56 2012 +0000 + + sna: Preserve mode if flushing before a scanline wait + + Reported-by: Jiri Slaby <jirislaby@gmail.com> + References: https://bugs.freedesktop.org/show_bug.cgi?id=47597 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 678f9586807071bef813bb69d451f14d2fcbcc04 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Oct 31 11:26:18 2012 +0000 + + sna: assert that the source is not the GTT mapping when uploading + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 783b8048a6d1a9fd0a73ebf7768ae17dc0b21900 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Oct 31 11:16:09 2012 +0000 + + sna: Prefer to use the GPU for uploads if continuing on the GPU + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 74c912880c302889f38fe5898c8038a0ba20e5db +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Oct 31 10:57:54 2012 +0000 + + sna: Fix a typo in a DBG + + Reported-by: Matti Ruohonen <kiesus@gmail.com> + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit f48a821aa73cb40a51baafc6cd2b063f1f91d864 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Oct 31 10:53:47 2012 +0000 + + sna: Add a few DBG tracepoints to screen init/fini + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit bf81d552c4be039fbcf3272387828b1a8b3fbdb8 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Oct 31 08:50:44 2012 +0000 + + sna: Clamp the drawable box to prevent int16 overflow + + And assert that the box is valid when migrating. + + References: https://bugs.freedesktop.org/show_bug.cgi?id=56591 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 31eb704b2ad7c861ec4e61fb9de0e9592fc6d269 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Oct 26 13:57:30 2012 +0100 + + sna: Ensure that the trap is clipped if it ends within the boundary pixel + + Reported-and-tested-by: Jiri Slaby <jirislaby@gmail.com> + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56395 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit ef431b2d35c1bf4d77bbcc73688951d22f6aa135 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Oct 25 10:15:39 2012 +0100 + + uxa: Drain the DRM event queue before server regeneration + + Adam Jackson notes that what appeared to be my paranoid ramblings in SNA + actually served a purpose - it prevents a server crash following + server regen if an indirect client happened to be running at the time + (e.g. LIBGL_INDIRECT_ALWAYS=1 glxgears). + + Reported-by: Adam Jackson <ajax@redhat.com> + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit efb8ff16491ecfb4d9c0c6a718684310d949d8d3 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Oct 24 22:56:20 2012 +0100 + + sna: Add missing ValidatePicture() for flattening alphamaps + + Reported-by: Armands Liepins <armandsl@gmail.com> + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56367 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 1a489142c8e6a4828348cc9afbd0f430d3b1e2d8 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Oct 23 23:43:50 2012 +0100 + + sna: Disable RandR hotplug events if Xinerama is enabled + + Since RandR itself is disabled if Xinerama is enabled, for example with + ZaphodHeads, calling RRGetInfo() upon a hotplug event generates an + assertion. + + Reported-by: Stephen Liang <inteldriver@angrywalls.com> + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=55260 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit d87c2756db1af6e4af15864ab0f44d1454079236 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Oct 23 15:50:56 2012 +0100 + + sna: Beware 16-bit overflow when computing sample areas + + Reported-by: Ognian Tenchev <drJeckyll@Jeckyll.net> + References: https://bugs.freedesktop.org/show_bug.cgi?id=56324 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit c7f48684cdebc24128a5fa5678614af3deb14b3b +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Oct 23 15:17:56 2012 +0100 + + sna: Only disallow hw sprite scaling on Haswell + + Earlier chips (Ironlake, Sandybridge and Ivybridge) have integrated + sprite scalers. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 5c3ea9cf6900855502fcd56214a1b9e180265ff5 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Oct 22 22:35:17 2012 +0100 + + sna: Update DRI buffer if attached to the framebuffer for TearFree flips + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 4dfc83359d006a4e410e3280003b49683309afc3 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Oct 22 14:56:01 2012 +0100 + + sna: Tidy udev install/remove and add a couple of lines of DBG + + References: https://bugs.freedesktop.org/show_bug.cgi?id=55260 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 4d9687d49c9869b2e88d408e5f451c9a1f8f3389 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Oct 22 13:41:54 2012 +0100 + + sna: Refactor the common code to enable a timer + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit fb729788872ccb429ddde8a9a4281b1933243096 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Oct 21 14:36:48 2012 +0100 + + sna: Only query the system time if we are processing timers + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit c0d6a75f02eb97e5c80a4345ae5c68e9a81d49b6 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Oct 21 14:32:14 2012 +0100 + + sna: Use the FLUSH_TIMER as the only wakeup timer source + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 7bc829c39a203c17053eb728412f698a429ad9fe +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Oct 21 14:24:01 2012 +0100 + + sna: Remove the unused inactive eviction + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 9fa6e4aa2daee99ff5f6efc11232de22100bac80 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Oct 21 12:48:06 2012 +0100 + + intel: Sanity check that the platform probes points to a i915.ko GEM device + + References: https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1069031 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit f6eed98fcfea54d50a282ac71ee654645551ae11 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Oct 21 10:46:14 2012 +0100 + + sna: Mark the to-be-damaged region first, then Process afterwards + + Damage is processed in two phases, with the actual Damage being appended + before the operation is performed so that a copy can be made before + modification (e.g. software cursors). + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 60e4e009f90543bfd57f6a4e51ebc5b32b4af33b +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Oct 20 17:59:45 2012 +0100 + + sna: Move the source region to the CPU for a self-copy fallback CopyArea + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 7ff8b1ba543508f3b209f2ade7655c3aa34d546d +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Oct 20 16:23:26 2012 +0100 + + 2.20.12 release + + How embarrassing! My fault for rushing :( + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 695b2ce2d32bde191080129b55f9bf8a9d48ee77 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Oct 20 16:19:21 2012 +0100 + + uxa: Fixup drm_intel_bo_disable_reuse() typo + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 2083e253b3d1ecc218ab1e523e4026ddd4561112 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Oct 20 16:07:11 2012 +0100 + + 2.20.11 release + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit fc0ba65f5efe217f2ab5e920255d2133d7c9e5e8 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Oct 20 09:29:10 2012 +0100 + + uxa: Disable bo reuse after binding to a scanout + + On gen6+, bo are expected to be LLC by default. However, as using the bo + for the scanout causes it to be moved into the uncached domain, this + assumption is then false and we should release the bo back to the system + rather than spread the uncached buffers around. The most common + allocator of scanouts is for pageflipping which are already non-reusable + due to the DRI2 export, so there should actually be little impact. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit f4c32af48b0c92a48131090886a6a6b6c45dbe34 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Oct 19 16:29:19 2012 +0100 + + sna: Clear the damage along with the BO when forcing the stall for inplace BLT + + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56180 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 299232bdb69c8c2b6231905e0f45e9cfe74fe09a +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Oct 19 15:02:00 2012 +0100 + + sna: Reorder final checks for using the BO and setting the damage pointer + + When we return NULL from sna_drawable_use_bo(), the expectation is that + the damage pointer is also NULL. However, one SHM path leaked. + + References: https://bugs.freedesktop.org/show_bug.cgi?id=56180 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 257abfdabe39629fb458ed65fab11283f7518dc4 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Oct 17 23:34:22 2012 +0100 + + sna/gen4: Presume we need a flush upon state change similar to gen5+ + + References: https://bugs.freedesktop.org/show_bug.cgi?id=55627 + References: https://bugs.freedesktop.org/show_bug.cgi?id=55500 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 8238c672984e31ae655353d6412e3395a9cdfbe6 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Oct 17 22:16:29 2012 +0100 + + sna: secure batches accepted upstream, so simply use runtime detection + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 540666a0a81c7daedbd47830d0932df5e57ec903 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Oct 17 16:59:05 2012 +0100 + + sna/overlay: Move bo out of GTT domain after binding to overlay plane + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 891bae4aa91e85542dcbe38f6ee92141e3efc801 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Oct 17 11:29:10 2012 +0100 + + sna: Use the secure batches to program scanline waits on gen6+ + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 41be80a8cae1eb0e294392e5033511bfdf2895c5 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Oct 17 11:25:52 2012 +0100 + + sna: Enable support for SECURE batch buffers + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit ba6c82cd9d8089354b90632ca8edbb35cc09b9c4 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Oct 17 13:54:51 2012 +0100 + + sna/dri: Defensively check for GTT mmap failure during fallback + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 7927f9a351ead1a5593bc91e465706bdd889bb8d +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Oct 16 17:56:30 2012 +0100 + + sna/gen7: Filter BLEND flags for CA glyphs + + Fixes regression from commit c51aaa731e5cffc892e59730194ad7c98789b02b + Author: Chris Wilson <chris@chris-wilson.co.uk> + Date: Thu Oct 11 11:36:00 2012 +0100 + + sna/gen7: Replace bogus state tracking assertion + + The assumption being that we only used the encoded flags for determining + the composite state is false for the magic CA pass. + + Reported-by: Oleksij Rempel <bug-track@fisher-privat.net> + Reported-by: Eyal Lotem <eyal.lotem@gmail.com> + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56037 + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 2ad4aa195571fe214ccffa55e123507f1be66243 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Oct 16 11:59:28 2012 +0100 + + sna: Drop fake tiled CPU mapping + + The only path where this is correct already handles it as the special + case that it is, everywhere else it just nonsense. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + commit b42d81b63f5b6a571faffaadd42c74adce40128a Author: Chris Wilson <chris@chris-wilson.co.uk> Date: Sun Oct 14 09:15:38 2012 +0100 diff --git a/Makefile.am b/Makefile.am index b3d37b273..5001674ee 100644 --- a/Makefile.am +++ b/Makefile.am @@ -18,6 +18,7 @@ # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS} SUBDIRS = man diff --git a/Makefile.in b/Makefile.in index 6f2e8f426..edf7e301a 100644 --- a/Makefile.in +++ b/Makefile.in @@ -225,7 +225,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MAN_SUBSTS = @MAN_SUBSTS@ @@ -264,6 +263,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@ VERSION = @VERSION@ X11_CFLAGS = @X11_CFLAGS@ X11_LIBS = @X11_LIBS@ +XCB_CFLAGS = @XCB_CFLAGS@ +XCB_LIBS = @XCB_LIBS@ XORG_CFLAGS = @XORG_CFLAGS@ XORG_LIBS = @XORG_LIBS@ XORG_MAN_PAGE = @XORG_MAN_PAGE@ @@ -322,6 +323,7 @@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ +ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS} SUBDIRS = man $(am__append_1) src $(am__append_2) MAINTAINERCLEANFILES = ChangeLog INSTALL all: config.h @@ -330,7 +332,7 @@ all: config.h .SUFFIXES: am--refresh: Makefile @: -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ @@ -357,9 +359,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) $(SHELL) ./config.status --recheck -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) +$(top_srcdir)/configure: $(am__configure_deps) $(am__cd) $(srcdir) && $(AUTOCONF) -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) $(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) $(am__aclocal_m4_deps): @@ -370,7 +372,7 @@ config.h: stamp-h1 stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status @rm -f stamp-h1 cd $(top_builddir) && $(SHELL) ./config.status config.h -$(srcdir)/config.h.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) +$(srcdir)/config.h.in: $(am__configure_deps) ($(am__cd) $(top_srcdir) && $(AUTOHEADER)) rm -f stamp-h1 touch $@ @@ -1,3 +1,293 @@ +Release 2.21.2 (2013-02-10) +=========================== +Pass the brown paper bags, I need half a dozen or so. That seemingly +innocuous build fix to xorg-1.13 happned to have the little side-effect +of breaking glyph rendering on xorg-1.12 and older on 64-bit machines. + + +Release 2.21.1 (2013-02-10) +=========================== +A fix for a potential GPU hang on 945gm (GMA3100) and earlier chipsets, +along with backporting SNA to the packages found in stable distributions +like Debian 6.0 (Squeeze). + + * Cleanup compilation warnings from deblint, thanks to Paul Menzel + + * Minor build improvements by Damien Lespiau. + + * Disable generating span geometry for non-rectilinear spans on gen4 + in order to work around and prevent one class of render corruption. + + * Prevent cache thrashing and severe performance degradation on LLC + machines for streaming texture updates. However, note the effect was + only observed on just one particular laptop. + + * Fix alignment of subsurface proxies for old chipsets. + https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1120108 + + * Repair build against Xorg-1.6 and contemporary packages. + + +Release 2.21.0 (2013-02-01) +=========================== +A few new features: + + * Enable render acceleration for Haswell GT1/GT2. + + * Enable multi-threaded rasterisation of trapezoids and fallback composition + + * Utilise a new kernel interface (v3.9) for processing relocations + +along with a few older features from the 2.20.x series: + + * PRIME support for hotplug GPUs and hybrid systems + + * Support for IvyBridge GT1 machines, aka HD2500 graphics. + + * Stable 830gm/845g support, at last! + +As usual we have a large number of bug fixes since the last release: + + * Prevent a stray relocation being left after a buffer is removed from + a batch, leading to GPU hangs. + + * Make the driver more robust against its own failures to submit batches + by falling back to software rendering. + + * Fix emission of scanline waits for secondary pipes on gen6/7. Otherwise + you may encounter GPU hangs in MI_WAIT_FOR_EVENT. + + * Fix a missing corner pixel when drawing rectangles with PolyLines + https://bugs.freedesktop.org/show_bug.cgi?id=55484 + + * Don't try to use Y-tiling colour buffers with mesa/i915c as mesa + doesn't support them and wil fallback to software rendering + + * Ensure that any cached mmaps are invalidated for a SwapBuffers + https://bugs.freedesktop.org/show_bug.cgi?id=60042 + + * Correctly handle the composition of rotated displays too large for the + 3D pipeline + https://bugs.freedesktop.org/show_bug.cgi?id=60124 + + * Fix the computation of the planar video frame size + https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1104180 + + +Release 2.20.19 (2013-01-20) +============================ +A quick release as the last broke USB DisplayLink slave outputs badly. The +performance of those displays was unusable due to an inadvertent change that +caused us to flush the entire scanout over the USB for every drawing +operation. + + * Implement the GNOME Build API. A couple of minor changes to make + integrators and distributors lives a little easier, or at least more + consistent. + + * Correctly offset inplace trapezoids for subwindows, such as the GTK+ + close button after it has a background image uploaded. + + * Explicitly prevent ring-switching for synchronized rendering to + scanouts (for vsync). + + * Clip dirty region to slave pixmaps (otherwise UDL is nigh unusuable) + https://bugs.freedesktop.org/show_bug.cgi?id=59539 + + +Release 2.20.18 (2013-01-16) +============================ +A bunch of miscellaneous fixes for assertion failures and various +performance regressions when mixing new methods for offloads, along with +a couple of improvements for rendering with gen4. + + * Remove use of packed unnormalized texture coordinates on gen4/5 as + these GPUs do not support unnormalized coordinates in the sampler. + + * Remove dependency upon x86 asm for cross-building to unsupported + architectures. + https://bugs.gentoo.org/show_bug.cgi?id=448570 + + * Apply damage around PRIME updates in the correct order. + + * Correctly read the initial backlight level for when the user + overrides UXA's choice of backlight controller. + + * Throttle UXA and prevent it queuing work much faster than the GPU can + complete it. This realised itself in impossible performance figures and + the entire display freezing for several seconds whlist the GPU caught + up. One side effect is that it also caused the DDX to consume more + memory than was required as it could not recycle buffers quick + enough, and in some cases this produces a marked improvement in + performance. Also note on gen2/3 this requires a new libdrm [2.4.41] + in order to prevent a bug causing the DDX to fallback to swrast. + +Release 2.20.17 (2012-12-26) +============================ +A minor update to prepare for co-operating with the kernel over managing +stability on 830gm/845g. On this pair of chipsets, the kernel will perform +an extra copy of the batchbuffer into reserved memory, which prevents them +from randomly dying. However, that extra copy does have a noticeable +impact upon throughput, so we also have a mechanism for userspace to +opt-out of the kernel workaround and take responsibility for ensuring its +batches are coherent. + + * Build fixes against xorg-1.14 + https://bugs.freedesktop.org/show_bug.cgi?id=58552 + https://bugs.freedesktop.org/show_bug.cgi?id=58406 + + * Fixed the origin of cropped (textured) video windows (Xv and XvMC) + https://bugs.freedesktop.org/show_bug.cgi?id=23033 + + * Fix potential corruption when using images larger than ~1GiB + + +Release 2.20.16 (2012-12-15) +============================ +Rejoice! We have found a trick to make 830gm/845g stable at long last. +Ever since the switch to GEM and dynamic video memory, those early +second generation chipsets have been plagued by instability. The lack of +flushing cachelines from the CPU to GMCH was eventually solved by using +an undocmented bit, but 830/845 were still hanging under memory pressure. +These deaths were all due to garbage finding its way into the command +streamer, and they go away if we take a leaf out of the original driver +and never reuse those pages for anything else. So for the first time +ever, I have been able to complete running the test suite on an 845g, +even whilst thrashing the page and buffer caches! + + * Run the SF stage as single-threaded on gen4 to workaround a few issues + https://bugs.freedesktop.org/show_bug.cgi?id=57410 + + * Keep the scanout SURFACE_STATE separate to avoid overriding its + memory access control on gen6/7 (i.e. writes to the scanout need to + be kept out of the render cache) + + * Tune batch flushing after an operation to an exported surface under a + compositor. + + * Make sure the source is on the CPU for inplace composition of trapezoids + using the CPU + https://bugs.freedesktop.org/show_bug.cgi?id=56825 + + * Immediately flush in the block hander after a split batch to reduce + latency between the two halves of an operation. + https://bugs.freedesktop.org/show_bug.cgi?id=51718 + + * Install a fallback config if we fail to install the desired config + at VT switch (i.e. booting, after resume with 3 incompatible pipes on + Ivybridge) + + * Pin batches to avoid CS incoherence on 830/845 + https://bugs.freedesktop.org/show_bug.cgi?id=26345 + + +Release 2.20.15 (2012-12-03) +============================ +And lo, enabling more of the common acceleration paths for gen4 revealed +another lurking bug - something is wrong with how we prepare Y-tiling +surfaces for rendering. For the time being, we can surreptiously disable +them for gen4 and avoid hitting GPU hangs. + + * Avoid clobbering the render state after failing to convert the + operation to use the blitter. + https://bugs.freedesktop.org/show_bug.cgi?id=57601 + + * Disable shadow tracking upon server regeneration, and so fix a crash + if you restart the server whilst a RandR transform (e.g. rotation) is + in effect. + https://bugs.freedesktop.org/show_bug.cgi?id=52255 + https://bugs.freedesktop.org/show_bug.cgi?id=56608 + + +Release 2.20.14 (2012-11-26) +============================ +The highlight of this release is gen4, from 965g to gm45. Quite an old +bug surfaced in the shader assembly, sparking a chance to review a few +design choices within that backend and experiment on fresh ways to +workaround the remaining issues. + + * Avoid using inplace XOR'ed uploads for very large buffers + https://bugs.freedesktop.org/show_bug.cgi?id=57031 + + * Fix the gen4/5 opacity shader + https://bugs.freedesktop.org/show_bug.cgi?id=57054 + + * Queue a pending vblank request after flip completion + https://bugs.freedesktop.org/show_bug.cgi?id=56423 + + * Avoid migrating an uninitialised pixmap for use as a render source + https://bugs.freedesktop.org/show_bug.cgi?id=47597 + + * Improve handing of texture fallbacks for 830/845. + https://bugs.freedesktop.org/show_bug.cgi?id=57392 + + +Release 2.20.13 (2012-11-11) +============================ +Nothing but bug fixes. Many thanks to everyone who took the time to +report their issues, and for their help in improving the driver. + + * Sanity check the platform probe points to our expected i915 device + https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1069031 + + * Prevent 16-bit overflow for computing the sample area to upload of + sources for render operations + https://bugs.freedesktop.org/show_bug.cgi?id=56324 + + * Clamp the drawable box for migration to prevent 16-bit overflow + https://bugs.freedesktop.org/show_bug.cgi?id=56591 + + * Disable RandR hotplug events if Xinerama is enabled and thereby prevent + a crash upon hotplug + https://bugs.freedesktop.org/show_bug.cgi?id=55260 + + * Call ValidatePicture before attempting to flatten the alphamaps + https://bugs.freedesktop.org/show_bug.cgi?id=56367 + + * Clip the trapezoid correctly if it ends on the boundary pixel + https://bugs.freedesktop.org/show_bug.cgi?id=56395 + + * Make sure the pipeline choice is propagated to the scanline wait + across a batch flush + https://bugs.freedesktop.org/show_bug.cgi?id=47597 + + * Set the valid drawable box when choosing placement of BLT composite ops + https://bugs.freedesktop.org/show_bug.cgi?id=47597 + + * Prevent use-after-free when promoting a partial-GPU bo to a full-GPU bo + https://bugs.freedesktop.org/show_bug.cgi?id=56591 + + * gen4 opacity spans require the per-rectangle workaround + https://bugs.freedesktop.org/show_bug.cgi?id=55500 + + * Prevent use of invalid damage pointers when redirecting rendering + https://bugs.freedesktop.org/show_bug.cgi?id=56785 + + +Release 2.20.12 (2012-10-20) +============================ +More bug reports, more bug fixes! Perhaps the headline feature is +that with a secure batches, coming to a 3.8 kernel near you, we may +finally have the ability to perform updates to the scanout synchronized +to the refresh rate on later SandyBridge and IvyBridge chipsets. It comes +at quite a power cost as we need to keep the GPU out of its power saving +modes, but it should allow legacy vsync to function at last. But this +should allow us to address a longstanding issue with tearing on +SandyBridge+. + + * Fix component-alpha rendering on IvyBridge, for example subpixel + antialiased glyphs. + https://bugs.freedesktop.org/show_bug.cgi?id=56037 + + * Flush before some "pipelined" state changes on gen4. The evidence is + that the same flushes as required on gen5+ are also required for gen4. + https://bugs.freedesktop.org/show_bug.cgi?id=55627 + + * Prevent a potential crash when forcing a stall on a busy CPU bo + https://bugs.freedesktop.org/show_bug.cgi?id=56180 + +[Release 2.20.11 contained a typo causing UXA to fail immediately.] + Release 2.20.10 (2012-10-14) ============================ The last couple of weeks have been fairly retrospective, a dive into diff --git a/aclocal.m4 b/aclocal.m4 index fa97284c0..77dfdcecf 100644 --- a/aclocal.m4 +++ b/aclocal.m4 @@ -1338,7 +1338,14 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*) LD="${LD-ld} -m elf_i386_fbsd" ;; x86_64-*linux*) - LD="${LD-ld} -m elf_i386" + case `/usr/bin/file conftest.o` in + *x86-64*) + LD="${LD-ld} -m elf32_x86_64" + ;; + *) + LD="${LD-ld} -m elf_i386" + ;; + esac ;; ppc64-*linux*|powerpc64-*linux*) LD="${LD-ld} -m elf32ppclinux" @@ -1702,7 +1709,8 @@ AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl ;; *) lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` - if test -n "$lt_cv_sys_max_cmd_len"; then + if test -n "$lt_cv_sys_max_cmd_len" && \ + test undefined != "$lt_cv_sys_max_cmd_len"; then lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` else @@ -11251,46 +11259,6 @@ fi rmdir .tst 2>/dev/null AC_SUBST([am__leading_dot])]) -# Add --enable-maintainer-mode option to configure. -*- Autoconf -*- -# From Jim Meyering - -# Copyright (C) 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, -# 2011 Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# serial 5 - -# AM_MAINTAINER_MODE([DEFAULT-MODE]) -# ---------------------------------- -# Control maintainer-specific portions of Makefiles. -# Default is to disable them, unless `enable' is passed literally. -# For symmetry, `disable' may be passed as well. Anyway, the user -# can override the default with the --enable/--disable switch. -AC_DEFUN([AM_MAINTAINER_MODE], -[m4_case(m4_default([$1], [disable]), - [enable], [m4_define([am_maintainer_other], [disable])], - [disable], [m4_define([am_maintainer_other], [enable])], - [m4_define([am_maintainer_other], [enable]) - m4_warn([syntax], [unexpected argument to AM@&t@_MAINTAINER_MODE: $1])]) -AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles]) - dnl maintainer-mode's default is 'disable' unless 'enable' is passed - AC_ARG_ENABLE([maintainer-mode], -[ --][am_maintainer_other][-maintainer-mode am_maintainer_other make rules and dependencies not useful - (and sometimes confusing) to the casual installer], - [USE_MAINTAINER_MODE=$enableval], - [USE_MAINTAINER_MODE=]m4_if(am_maintainer_other, [enable], [no], [yes])) - AC_MSG_RESULT([$USE_MAINTAINER_MODE]) - AM_CONDITIONAL([MAINTAINER_MODE], [test $USE_MAINTAINER_MODE = yes]) - MAINT=$MAINTAINER_MODE_TRUE - AC_SUBST([MAINT])dnl -] -) - -AU_DEFUN([jm_MAINTAINER_MODE], [AM_MAINTAINER_MODE]) - # Check to see how 'make' treats includes. -*- Autoconf -*- # Copyright (C) 2001, 2002, 2003, 2005, 2009 Free Software Foundation, Inc. diff --git a/config.h.in b/config.h.in index b02108a49..546254232 100644 --- a/config.h.in +++ b/config.h.in @@ -11,6 +11,9 @@ /* Enable pixmap debugging */ #undef DEBUG_PIXMAP +/* Enable synchronous rendering for debugging */ +#undef DEBUG_SYNC + /* Default acceleration method */ #undef DEFAULT_ACCEL_METHOD @@ -23,6 +26,12 @@ /* Enable pixman glyph cache */ #undef HAS_PIXMAN_GLYPHS +/* Enable pixman triangle rasterisation */ +#undef HAS_PIXMAN_TRIANGLES + +/* Enable if your compiler supports the Intel __sync_* atomic primitives */ +#undef HAVE_ATOMIC_PRIMITIVES + /* Define to 1 if you have the <dgaproc.h> header file. */ #undef HAVE_DGAPROC_H @@ -47,6 +56,9 @@ /* Define to 1 if you have the <inttypes.h> header file. */ #undef HAVE_INTTYPES_H +/* Enable if you have libatomic-ops-dev installed */ +#undef HAVE_LIB_ATOMIC_OPS + /* Define to 1 if you have the <memory.h> header file. */ #undef HAVE_MEMORY_H @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for xf86-video-intel 2.20.10. +# Generated by GNU Autoconf 2.69 for xf86-video-intel 2.21.2. # # Report bugs to <https://bugs.freedesktop.org/enter_bug.cgi?product=xorg>. # @@ -591,8 +591,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='xf86-video-intel' PACKAGE_TARNAME='xf86-video-intel' -PACKAGE_VERSION='2.20.10' -PACKAGE_STRING='xf86-video-intel 2.20.10' +PACKAGE_VERSION='2.21.2' +PACKAGE_STRING='xf86-video-intel 2.21.2' PACKAGE_BUGREPORT='https://bugs.freedesktop.org/enter_bug.cgi?product=xorg' PACKAGE_URL='' @@ -651,6 +651,8 @@ KMS_ONLY_FALSE KMS_ONLY_TRUE XVMC_FALSE XVMC_TRUE +XCB_LIBS +XCB_CFLAGS XVMCLIB_LIBS XVMCLIB_CFLAGS DRI2_FALSE @@ -681,10 +683,10 @@ LIBGLAMOR_LIBS LIBGLAMOR_CFLAGS GLAMOR_FALSE GLAMOR_TRUE -DRMINTEL_LIBS -DRMINTEL_CFLAGS UXA_FALSE UXA_TRUE +DRMINTEL_LIBS +DRMINTEL_CFLAGS SNA_FALSE SNA_TRUE HAVE_X11_FALSE @@ -773,9 +775,6 @@ CPPFLAGS LDFLAGS CFLAGS CC -MAINT -MAINTAINER_MODE_FALSE -MAINTAINER_MODE_TRUE am__untar am__tar AMTAR @@ -840,7 +839,6 @@ SHELL' ac_subst_files='' ac_user_opts=' enable_option_checking -enable_maintainer_mode enable_dependency_tracking enable_selective_werror enable_strict_compilation @@ -907,6 +905,8 @@ PCIACCESS_CFLAGS PCIACCESS_LIBS XVMCLIB_CFLAGS XVMCLIB_LIBS +XCB_CFLAGS +XCB_LIBS VALGRIND_CFLAGS VALGRIND_LIBS' @@ -1449,7 +1449,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures xf86-video-intel 2.20.10 to adapt to many kinds of systems. +\`configure' configures xf86-video-intel 2.21.2 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1520,7 +1520,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of xf86-video-intel 2.20.10:";; + short | recursive ) echo "Configuration of xf86-video-intel 2.21.2:";; esac cat <<\_ACEOF @@ -1528,8 +1528,6 @@ Optional Features: --disable-option-checking ignore unrecognized --enable/--with options --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) --enable-FEATURE[=ARG] include FEATURE [ARG=yes] - --enable-maintainer-mode enable make rules and dependencies not useful - (and sometimes confusing) to the casual installer --disable-dependency-tracking speeds up one-time build --enable-dependency-tracking do not reject slow dependency extractors --disable-selective-werror @@ -1636,6 +1634,8 @@ Some influential environment variables: C compiler flags for XVMCLIB, overriding pkg-config XVMCLIB_LIBS linker flags for XVMCLIB, overriding pkg-config + XCB_CFLAGS C compiler flags for XCB, overriding pkg-config + XCB_LIBS linker flags for XCB, overriding pkg-config VALGRIND_CFLAGS C compiler flags for VALGRIND, overriding pkg-config VALGRIND_LIBS @@ -1707,7 +1707,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -xf86-video-intel configure 2.20.10 +xf86-video-intel configure 2.21.2 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2122,7 +2122,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by xf86-video-intel $as_me 2.20.10, which was +It was created by xf86-video-intel $as_me 2.21.2, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2942,7 +2942,7 @@ fi # Define the identity of the package. PACKAGE='xf86-video-intel' - VERSION='2.20.10' + VERSION='2.21.2' cat >>confdefs.h <<_ACEOF @@ -2983,29 +2983,6 @@ am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -' -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable maintainer-specific portions of Makefiles" >&5 -$as_echo_n "checking whether to enable maintainer-specific portions of Makefiles... " >&6; } - # Check whether --enable-maintainer-mode was given. -if test "${enable_maintainer_mode+set}" = set; then : - enableval=$enable_maintainer_mode; USE_MAINTAINER_MODE=$enableval -else - USE_MAINTAINER_MODE=no -fi - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $USE_MAINTAINER_MODE" >&5 -$as_echo "$USE_MAINTAINER_MODE" >&6; } - if test $USE_MAINTAINER_MODE = yes; then - MAINTAINER_MODE_TRUE= - MAINTAINER_MODE_FALSE='#' -else - MAINTAINER_MODE_TRUE='#' - MAINTAINER_MODE_FALSE= -fi - - MAINT=$MAINTAINER_MODE_TRUE - - - # Require X.Org macros 1.8 or later for MAN_SUBSTS set by XORG_MANPAGE_SECTIONS @@ -11514,7 +11491,8 @@ else ;; *) lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` - if test -n "$lt_cv_sys_max_cmd_len"; then + if test -n "$lt_cv_sys_max_cmd_len" && \ + test undefined != "$lt_cv_sys_max_cmd_len"; then lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` else @@ -13050,7 +13028,14 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*) LD="${LD-ld} -m elf_i386_fbsd" ;; x86_64-*linux*) - LD="${LD-ld} -m elf_i386" + case `/usr/bin/file conftest.o` in + *x86-64*) + LD="${LD-ld} -m elf32_x86_64" + ;; + *) + LD="${LD-ld} -m elf_i386" + ;; + esac ;; ppc64-*linux*|powerpc64-*linux*) LD="${LD-ld} -m elf32ppclinux" @@ -18261,6 +18246,72 @@ else fi +# Check for atomic intrinsics +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for native atomic primitives" >&5 +$as_echo_n "checking for native atomic primitives... " >&6; } +if ${intel_cv_atomic_primitives+:} false; then : + $as_echo_n "(cached) " >&6 +else + + intel_cv_atomic_primitives="none" + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int atomic_add(int i) { return __sync_fetch_and_add (&i, 1); } +int atomic_cmpxchg(int i, int j, int k) { return __sync_val_compare_and_swap (&i, j, k); } + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + intel_cv_atomic_primitives="Intel" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + + if test "x$intel_cv_atomic_primitives" = "xnone"; then + ac_fn_c_check_header_mongrel "$LINENO" "atomic_ops.h" "ac_cv_header_atomic_ops_h" "$ac_includes_default" +if test "x$ac_cv_header_atomic_ops_h" = xyes; then : + intel_cv_atomic_primitives="libatomic-ops" +fi + + + fi + + # atomic functions defined in <atomic.h> & libc on Solaris + if test "x$intel_cv_atomic_primitives" = "xnone"; then + ac_fn_c_check_func "$LINENO" "atomic_cas_uint" "ac_cv_func_atomic_cas_uint" +if test "x$ac_cv_func_atomic_cas_uint" = xyes; then : + intel_cv_atomic_primitives="Solaris" +fi + + fi + + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $intel_cv_atomic_primitives" >&5 +$as_echo "$intel_cv_atomic_primitives" >&6; } +if test "x$intel_cv_atomic_primitives" = xIntel; then + +$as_echo "#define HAVE_ATOMIC_PRIMITIVES 1" >>confdefs.h + +fi +if test "x$intel_cv_atomic_primitives" = "xlibatomic-ops"; then + +$as_echo "#define HAVE_LIB_ATOMIC_OPS 1" >>confdefs.h + +fi + +if test "x$intel_cv_atomic_primitives" = "xnone"; then + as_fn_error $? "xf86-video-intel depends upon atomic operations, which were not found for your compiler/cpu. Try compiling with -march=native, or install the libatomics-op-dev package." "$LINENO" 5 +fi + # Check whether --enable-udev was given. if test "${enable_udev+set}" = set; then : enableval=$enable_udev; UDEV="$enableval" @@ -18341,7 +18392,7 @@ else $as_echo "yes" >&6; } udev=yes fi - if test x$UDEV == xyes -a x$udev != xyes; then + if test x$UDEV = xyes -a x$udev != xyes; then as_fn_error $? "udev support requested but not found (libudev)" "$LINENO" 5 fi if test x$udev = xyes; then @@ -18473,7 +18524,7 @@ fi required_xorg_xserver_version=1.6 -required_pixman_version=0.24 +required_pixman_version=0.16 if pkg-config --exists 'pixman-1 >= 0.27.1'; then @@ -18481,6 +18532,12 @@ $as_echo "#define HAS_PIXMAN_GLYPHS 1" >>confdefs.h fi +if pkg-config --exists 'pixman-1 >= 0.24.0'; then + +$as_echo "#define HAS_PIXMAN_TRIANGLES 1" >>confdefs.h + +fi + # Check whether --enable-sna was given. if test "${enable_sna+set}" = set; then : enableval=$enable_sna; SNA="$enableval" @@ -18507,7 +18564,6 @@ if test "x$SNA" = "xauto" && pkg-config --exists "xorg-server >= 1.10"; then SNA=yes fi if test "x$SNA" != "xno"; then - required_xorg_xserver_version=1.10 $as_echo "#define USE_SNA 1" >>confdefs.h @@ -18529,21 +18585,19 @@ $as_echo "$SNA" >&6; } if test "${enable_uxa+set}" = set; then : enableval=$enable_uxa; UXA="$enableval" else - UXA=yes + UXA=auto fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to include UXA support" >&5 $as_echo_n "checking whether to include UXA support... " >&6; } - if test x$UXA != xno; then - UXA_TRUE= - UXA_FALSE='#' -else - UXA_TRUE='#' - UXA_FALSE= +if test "x$UXA" = "xauto"; then + if ! pkg-config --exists 'libdrm_intel >= 2.4.29'; then + UXA=no + fi + if ! pkg-config --exists 'pixman-1 >= 0.24.0'; then + UXA=no + fi fi - -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $UXA" >&5 -$as_echo "$UXA" >&6; } if test "x$UXA" != "xno"; then $as_echo "#define USE_UXA 1" >>confdefs.h @@ -18639,8 +18693,20 @@ else $as_echo "yes" >&6; } fi + required_pixman_version=0.24 + UXA=yes +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $UXA" >&5 +$as_echo "$UXA" >&6; } + if test x$UXA != xno; then + UXA_TRUE= + UXA_FALSE='#' +else + UXA_TRUE='#' + UXA_FALSE= fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to include GLAMOR support" >&5 $as_echo_n "checking whether to include GLAMOR support... " >&6; } # Check whether --enable-glamor was given. @@ -19278,12 +19344,12 @@ if test -n "$DRM_CFLAGS"; then pkg_cv_DRM_CFLAGS="$DRM_CFLAGS" elif test -n "$PKG_CONFIG"; then if test -n "$PKG_CONFIG" && \ - { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libdrm >= 2.4.24\""; } >&5 - ($PKG_CONFIG --exists --print-errors "libdrm >= 2.4.24") 2>&5 + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libdrm >= 2.4.20\""; } >&5 + ($PKG_CONFIG --exists --print-errors "libdrm >= 2.4.20") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then - pkg_cv_DRM_CFLAGS=`$PKG_CONFIG --cflags "libdrm >= 2.4.24" 2>/dev/null` + pkg_cv_DRM_CFLAGS=`$PKG_CONFIG --cflags "libdrm >= 2.4.20" 2>/dev/null` test "x$?" != "x0" && pkg_failed=yes else pkg_failed=yes @@ -19295,12 +19361,12 @@ if test -n "$DRM_LIBS"; then pkg_cv_DRM_LIBS="$DRM_LIBS" elif test -n "$PKG_CONFIG"; then if test -n "$PKG_CONFIG" && \ - { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libdrm >= 2.4.24\""; } >&5 - ($PKG_CONFIG --exists --print-errors "libdrm >= 2.4.24") 2>&5 + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libdrm >= 2.4.20\""; } >&5 + ($PKG_CONFIG --exists --print-errors "libdrm >= 2.4.20") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then - pkg_cv_DRM_LIBS=`$PKG_CONFIG --libs "libdrm >= 2.4.24" 2>/dev/null` + pkg_cv_DRM_LIBS=`$PKG_CONFIG --libs "libdrm >= 2.4.20" 2>/dev/null` test "x$?" != "x0" && pkg_failed=yes else pkg_failed=yes @@ -19321,14 +19387,14 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - DRM_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libdrm >= 2.4.24" 2>&1` + DRM_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libdrm >= 2.4.20" 2>&1` else - DRM_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libdrm >= 2.4.24" 2>&1` + DRM_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libdrm >= 2.4.20" 2>&1` fi # Put the nasty error message in config.log where it belongs echo "$DRM_PKG_ERRORS" >&5 - as_fn_error $? "Package requirements (libdrm >= 2.4.24) were not met: + as_fn_error $? "Package requirements (libdrm >= 2.4.20) were not met: $DRM_PKG_ERRORS @@ -19702,12 +19768,12 @@ if test -n "$XVMCLIB_CFLAGS"; then pkg_cv_XVMCLIB_CFLAGS="$XVMCLIB_CFLAGS" elif test -n "$PKG_CONFIG"; then if test -n "$PKG_CONFIG" && \ - { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"xvmc xext xfixes dri2proto x11-xcb xcb-dri2 xcb-aux\""; } >&5 - ($PKG_CONFIG --exists --print-errors "xvmc xext xfixes dri2proto x11-xcb xcb-dri2 xcb-aux") 2>&5 + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"xvmc dri2proto\""; } >&5 + ($PKG_CONFIG --exists --print-errors "xvmc dri2proto") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then - pkg_cv_XVMCLIB_CFLAGS=`$PKG_CONFIG --cflags "xvmc xext xfixes dri2proto x11-xcb xcb-dri2 xcb-aux" 2>/dev/null` + pkg_cv_XVMCLIB_CFLAGS=`$PKG_CONFIG --cflags "xvmc dri2proto" 2>/dev/null` test "x$?" != "x0" && pkg_failed=yes else pkg_failed=yes @@ -19719,12 +19785,12 @@ if test -n "$XVMCLIB_LIBS"; then pkg_cv_XVMCLIB_LIBS="$XVMCLIB_LIBS" elif test -n "$PKG_CONFIG"; then if test -n "$PKG_CONFIG" && \ - { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"xvmc xext xfixes dri2proto x11-xcb xcb-dri2 xcb-aux\""; } >&5 - ($PKG_CONFIG --exists --print-errors "xvmc xext xfixes dri2proto x11-xcb xcb-dri2 xcb-aux") 2>&5 + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"xvmc dri2proto\""; } >&5 + ($PKG_CONFIG --exists --print-errors "xvmc dri2proto") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then - pkg_cv_XVMCLIB_LIBS=`$PKG_CONFIG --libs "xvmc xext xfixes dri2proto x11-xcb xcb-dri2 xcb-aux" 2>/dev/null` + pkg_cv_XVMCLIB_LIBS=`$PKG_CONFIG --libs "xvmc dri2proto" 2>/dev/null` test "x$?" != "x0" && pkg_failed=yes else pkg_failed=yes @@ -19745,9 +19811,9 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - XVMCLIB_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "xvmc xext xfixes dri2proto x11-xcb xcb-dri2 xcb-aux" 2>&1` + XVMCLIB_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "xvmc dri2proto" 2>&1` else - XVMCLIB_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "xvmc xext xfixes dri2proto x11-xcb xcb-dri2 xcb-aux" 2>&1` + XVMCLIB_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "xvmc dri2proto" 2>&1` fi # Put the nasty error message in config.log where it belongs echo "$XVMCLIB_PKG_ERRORS" >&5 @@ -19762,7 +19828,78 @@ else XVMCLIB_LIBS=$pkg_cv_XVMCLIB_LIBS { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } - XVMC=yes + +fi + +pkg_failed=no +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for XCB" >&5 +$as_echo_n "checking for XCB... " >&6; } + +if test -n "$XCB_CFLAGS"; then + pkg_cv_XCB_CFLAGS="$XCB_CFLAGS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"x11-xcb xcb-dri2 xcb-aux\""; } >&5 + ($PKG_CONFIG --exists --print-errors "x11-xcb xcb-dri2 xcb-aux") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_XCB_CFLAGS=`$PKG_CONFIG --cflags "x11-xcb xcb-dri2 xcb-aux" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi +if test -n "$XCB_LIBS"; then + pkg_cv_XCB_LIBS="$XCB_LIBS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"x11-xcb xcb-dri2 xcb-aux\""; } >&5 + ($PKG_CONFIG --exists --print-errors "x11-xcb xcb-dri2 xcb-aux") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_XCB_LIBS=`$PKG_CONFIG --libs "x11-xcb xcb-dri2 xcb-aux" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + XCB_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "x11-xcb xcb-dri2 xcb-aux" 2>&1` + else + XCB_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "x11-xcb xcb-dri2 xcb-aux" 2>&1` + fi + # Put the nasty error message in config.log where it belongs + echo "$XCB_PKG_ERRORS" >&5 + + XVMC=no +elif test $pkg_failed = untried; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + XVMC=no +else + XCB_CFLAGS=$pkg_cv_XCB_CFLAGS + XCB_LIBS=$pkg_cv_XCB_LIBS + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to include XvMC support" >&5 @@ -19818,7 +19955,7 @@ else DEBUG_FALSE= fi - if test x$FULL_DEBUG == xfull; then + if test x$DEBUG = xfull; then FULL_DEBUG_TRUE= FULL_DEBUG_FALSE='#' else @@ -19909,6 +20046,11 @@ $as_echo "#define HAVE_VALGRIND 1" >>confdefs.h fi fi +if test "x$DEBUG" = xsync; then + +$as_echo "#define DEBUG_SYNC 1" >>confdefs.h + +fi if test "x$DEBUG" = xmemory; then $as_echo "#define DEBUG_MEMORY 1" >>confdefs.h @@ -20055,10 +20197,6 @@ else am__EXEEXT_FALSE= fi -if test -z "${MAINTAINER_MODE_TRUE}" && test -z "${MAINTAINER_MODE_FALSE}"; then - as_fn_error $? "conditional \"MAINTAINER_MODE\" was never defined. -Usually this means the macro was only invoked conditionally." "$LINENO" 5 -fi if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then as_fn_error $? "conditional \"AMDEP\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 @@ -20532,7 +20670,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by xf86-video-intel $as_me 2.20.10, which was +This file was extended by xf86-video-intel $as_me 2.21.2, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -20598,7 +20736,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -xf86-video-intel config.status 2.20.10 +xf86-video-intel config.status 2.21.2 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index 972d9188e..3a4b6dbcb 100644 --- a/configure.ac +++ b/configure.ac @@ -23,7 +23,7 @@ # Initialize Autoconf AC_PREREQ([2.60]) AC_INIT([xf86-video-intel], - [2.20.10], + [2.21.2], [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg], [xf86-video-intel]) AC_CONFIG_SRCDIR([Makefile.am]) @@ -32,7 +32,6 @@ AC_CONFIG_AUX_DIR(.) # Initialize Automake AM_INIT_AUTOMAKE([foreign dist-bzip2]) -AM_MAINTAINER_MODE # Require X.Org macros 1.8 or later for MAN_SUBSTS set by XORG_MANPAGE_SECTIONS m4_ifndef([XORG_MACROS_VERSION], @@ -105,6 +104,40 @@ if test x$ASM != "xno"; then fi AM_CONDITIONAL(HAVE_GEN4ASM, test x$gen4asm = xyes) +# Check for atomic intrinsics +AC_CACHE_CHECK([for native atomic primitives], intel_cv_atomic_primitives, +[ + intel_cv_atomic_primitives="none" + + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ +int atomic_add(int i) { return __sync_fetch_and_add (&i, 1); } +int atomic_cmpxchg(int i, int j, int k) { return __sync_val_compare_and_swap (&i, j, k); } + ]],[[]])], + [intel_cv_atomic_primitives="Intel"],[]) + + if test "x$intel_cv_atomic_primitives" = "xnone"; then + AC_CHECK_HEADER([atomic_ops.h], intel_cv_atomic_primitives="libatomic-ops") + fi + + # atomic functions defined in <atomic.h> & libc on Solaris + if test "x$intel_cv_atomic_primitives" = "xnone"; then + AC_CHECK_FUNC([atomic_cas_uint], + intel_cv_atomic_primitives="Solaris") + fi + +]) +if test "x$intel_cv_atomic_primitives" = xIntel; then + AC_DEFINE(HAVE_ATOMIC_PRIMITIVES, 1, + [Enable if your compiler supports the Intel __sync_* atomic primitives]) +fi +if test "x$intel_cv_atomic_primitives" = "xlibatomic-ops"; then + AC_DEFINE(HAVE_LIB_ATOMIC_OPS, 1, [Enable if you have libatomic-ops-dev installed]) +fi + +if test "x$intel_cv_atomic_primitives" = "xnone"; then + AC_MSG_ERROR([xf86-video-intel depends upon atomic operations, which were not found for your compiler/cpu. Try compiling with -march=native, or install the libatomics-op-dev package.]) +fi + AC_ARG_ENABLE(udev, AS_HELP_STRING([--disable-udev], [Disable udev-based monitor hotplug detection [default=auto]]), @@ -113,7 +146,7 @@ AC_ARG_ENABLE(udev, if test x$UDEV != "xno"; then PKG_CHECK_MODULES(UDEV, [libudev], [udev=yes], [udev=no]) - if test x$UDEV == xyes -a x$udev != xyes; then + if test x$UDEV = xyes -a x$udev != xyes; then AC_MSG_ERROR([udev support requested but not found (libudev)]) fi if test x$udev = xyes; then @@ -151,12 +184,16 @@ AC_ARG_ENABLE(ums-only, AS_HELP_STRING([--enable-ums-only], [UMS_ONLY=no]) required_xorg_xserver_version=1.6 -required_pixman_version=0.24 +required_pixman_version=0.16 if pkg-config --exists 'pixman-1 >= 0.27.1'; then AC_DEFINE([HAS_PIXMAN_GLYPHS], 1, [Enable pixman glyph cache]) fi +if pkg-config --exists 'pixman-1 >= 0.24.0'; then + AC_DEFINE([HAS_PIXMAN_TRIANGLES], 1, [Enable pixman triangle rasterisation]) +fi + AC_ARG_ENABLE(sna, AS_HELP_STRING([--enable-sna], [Enable SandyBridge's New Acceleration (SNA) [default=auto]]), @@ -168,7 +205,6 @@ if test "x$SNA" = "xauto" && pkg-config --exists "xorg-server >= 1.10"; then SNA=yes fi if test "x$SNA" != "xno"; then - required_xorg_xserver_version=1.10 AC_DEFINE(USE_SNA, 1, [Enable SNA support]) fi AC_MSG_CHECKING([whether to include SNA support]) @@ -179,14 +215,24 @@ AC_ARG_ENABLE(uxa, AS_HELP_STRING([--enable-uxa], [Enable Unified Acceleration Architecture (UXA) [default=yes]]), [UXA="$enableval"], - [UXA=yes]) + [UXA=auto]) AC_MSG_CHECKING([whether to include UXA support]) -AM_CONDITIONAL(UXA, test x$UXA != xno) -AC_MSG_RESULT([$UXA]) +if test "x$UXA" = "xauto"; then + if ! pkg-config --exists 'libdrm_intel >= 2.4.29'; then + UXA=no + fi + if ! pkg-config --exists 'pixman-1 >= 0.24.0'; then + UXA=no + fi +fi if test "x$UXA" != "xno"; then AC_DEFINE(USE_UXA, 1, [Enable UXA support]) PKG_CHECK_MODULES(DRMINTEL, [libdrm_intel >= 2.4.29]) + required_pixman_version=0.24 + UXA=yes fi +AC_MSG_RESULT([$UXA]) +AM_CONDITIONAL(UXA, test x$UXA != xno) AC_MSG_CHECKING([whether to include GLAMOR support]) AC_ARG_ENABLE(glamor, @@ -314,7 +360,7 @@ XORG_DRIVER_CHECK_EXT(XF86DRI, xextproto x11) XORG_DRIVER_CHECK_EXT(DPMSExtension, xextproto) # Obtain compiler/linker options for the driver dependencies -PKG_CHECK_MODULES(DRM, [libdrm >= 2.4.24]) # libdrm_intel is checked separately +PKG_CHECK_MODULES(DRM, [libdrm >= 2.4.20]) # libdrm_intel is checked separately PKG_CHECK_MODULES(DRI, [xf86driproto], , DRI=no) PKG_CHECK_MODULES(DRI2, [dri2proto >= 2.6],, DRI2=no) PKG_CHECK_MODULES(PCIACCESS, [pciaccess >= 0.10]) @@ -370,9 +416,8 @@ AM_CONDITIONAL(DRI2, test "x$DRI2" = xyes) AC_MSG_RESULT([$DRI2]) if test "$XVMC" = yes; then - PKG_CHECK_MODULES(XVMCLIB, - [xvmc xext xfixes dri2proto x11-xcb xcb-dri2 xcb-aux], - [XVMC=yes], [XVMC=no]) + PKG_CHECK_MODULES(XVMCLIB, [xvmc dri2proto], [], [XVMC=no]) + PKG_CHECK_MODULES(XCB, [x11-xcb xcb-dri2 xcb-aux], [], [XVMC=no]) fi AC_MSG_CHECKING([whether to include XvMC support]) AC_MSG_RESULT([$XVMC]) @@ -391,7 +436,7 @@ if test "x$UMS_ONLY" = xyes; then fi AM_CONDITIONAL(DEBUG, test x$DEBUG != xno) -AM_CONDITIONAL(FULL_DEBUG, test x$FULL_DEBUG == xfull) +AM_CONDITIONAL(FULL_DEBUG, test x$DEBUG = xfull) if test "x$DEBUG" = xno; then AC_DEFINE(NDEBUG,1,[Disable internal debugging]) fi @@ -401,6 +446,9 @@ if test "x$DEBUG" != xno; then AC_DEFINE([HAVE_VALGRIND], 1, [Use valgrind intrinsics to suppress false warnings]) fi fi +if test "x$DEBUG" = xsync; then + AC_DEFINE(DEBUG_SYNC,1,[Enable synchronous rendering for debugging]) +fi if test "x$DEBUG" = xmemory; then AC_DEFINE(DEBUG_MEMORY,1,[Enable memory debugging]) fi diff --git a/man/Makefile.in b/man/Makefile.in index 29efd9588..278ae4405 100644 --- a/man/Makefile.in +++ b/man/Makefile.in @@ -196,7 +196,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MAN_SUBSTS = @MAN_SUBSTS@ @@ -235,6 +234,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@ VERSION = @VERSION@ X11_CFLAGS = @X11_CFLAGS@ X11_LIBS = @X11_LIBS@ +XCB_CFLAGS = @XCB_CFLAGS@ +XCB_LIBS = @XCB_LIBS@ XORG_CFLAGS = @XORG_CFLAGS@ XORG_LIBS = @XORG_LIBS@ XORG_MAN_PAGE = @XORG_MAN_PAGE@ @@ -304,7 +305,7 @@ all: all-am .SUFFIXES: .SUFFIXES: .$(DRIVER_MAN_SUFFIX) .man -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ @@ -329,9 +330,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) +$(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): diff --git a/man/intel.man b/man/intel.man index 0942dc1c0..fbd0230aa 100644 --- a/man/intel.man +++ b/man/intel.man @@ -116,6 +116,24 @@ The following driver .B Options are supported for the 830M and later chipsets: .TP +.BI "Option \*qNoAccel\*q \*q" boolean \*q +Disable or enable acceleration. +.IP +Default: acceleration is enabled. +.TP +.BI "Option \*qAccelMethod\*q \*q" string \*q +Select acceleration method. +There are a couple of backends available for accelerating the DDX. \*qUXA\*q (Unified +Acceleration Architecture) is the mature backend that was introduced to support +the GEM driver model. It is in the process of being superseded by \*qSNA\*q +(Sandybridge's New Acceleration). Until that process is complete, the ability to +choose which backend to use remains for backwards compatibility. +In addition, there are a pair of sub-options to limit the acceleration for +debugging use. Specify \*qoff\*q to disable all acceleration, or \*qblt\*q to +disable render acceleration and only use the BLT engine. +.IP +Default: use UXA (render acceleration) +.TP .BI "Option \*qVideoKey\*q \*q" integer \*q This is the same as the .B \*qColorKey\*q diff --git a/src/Makefile.in b/src/Makefile.in index 3c5a911f7..e28de984e 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -343,7 +343,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MAN_SUBSTS = @MAN_SUBSTS@ @@ -382,6 +381,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@ VERSION = @VERSION@ X11_CFLAGS = @X11_CFLAGS@ X11_LIBS = @X11_LIBS@ +XCB_CFLAGS = @XCB_CFLAGS@ +XCB_LIBS = @XCB_LIBS@ XORG_CFLAGS = @XORG_CFLAGS@ XORG_LIBS = @XORG_LIBS@ XORG_MAN_PAGE = @XORG_MAN_PAGE@ @@ -463,7 +464,7 @@ all: all-recursive .SUFFIXES: .SUFFIXES: .c .lo .o .obj -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ @@ -488,9 +489,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) +$(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): install-intel_drv_laLTLIBRARIES: $(intel_drv_la_LTLIBRARIES) diff --git a/src/compat-api.h b/src/compat-api.h index 6b7657241..6d147c74d 100644 --- a/src/compat-api.h +++ b/src/compat-api.h @@ -28,6 +28,10 @@ #ifndef COMPAT_API_H #define COMPAT_API_H +#include <xorg-server.h> +#include <xorgVersion.h> + +#include <picturestr.h> #ifndef GLYPH_HAS_GLYPH_PICTURE_ACCESSOR #define GetGlyphPicture(g, s) GlyphPicture((g))[(s)->myNum] #define SetGlyphPicture(g, s, p) GlyphPicture((g))[(s)->myNum] = p @@ -103,4 +107,54 @@ #endif +#ifndef INCLUDE_LEGACY_REGION_DEFINES +#define RegionCreate(r, s) REGION_CREATE(NULL, r, s) +#define RegionBreak(r) REGION_BREAK(NULL, r) +#define RegionSizeof REGION_SZOF +#define RegionBoxptr REGION_BOXPTR +#define RegionEnd REGION_END +#define RegionExtents(r) REGION_EXTENTS(NULL, r) +#define RegionRects REGION_RECTS +#define RegionNumRects REGION_NUM_RECTS +#define RegionContainsRect(r, b) RECT_IN_REGION(NULL, r, b) +#define RegionContainsPoint(r, x, y, b) POINT_IN_REGION(NULL, r, x, y, b) +#define RegionCopy(res, r) REGION_COPY(NULL, res, r) +#define RegionIntersect(res, r1, r2) REGION_INTERSECT(NULL, res, r1, r2) +#define RegionUnion(res, r1, r2) REGION_UNION(NULL, res, r1, r2) +#define RegionTranslate(r, x, y) REGION_TRANSLATE(NULL, r, x, y) +#define RegionUninit(r) REGION_UNINIT(NULL, r) +#define region_from_bitmap BITMAP_TO_REGION +#define RegionNil REGION_NIL +#define RegionNull(r) REGION_NULL(NULL, r) +#define RegionNotEmpty(r) REGION_NOTEMPTY(NULL, r) +#define RegionEmpty(r) REGION_EMPTY(NULL, r) +#define RegionDestroy(r) REGION_DESTROY(NULL, r) +#else +#define region_from_bitmap BitmapToRegion +#endif + +#ifndef _X_UNUSED +#define _X_UNUSED +#endif + +#if HAS_DEVPRIVATEKEYREC +#define __get_private(p, key) dixGetPrivateAddr(&(p)->devPrivates, &(key)) +#else +#define __get_private(p, key) dixLookupPrivate(&(p)->devPrivates, &(key)) +typedef int DevPrivateKeyRec; +static inline void FreePixmap(PixmapPtr pixmap) +{ + dixFreePrivates(pixmap->devPrivates); + free(pixmap); +} +#endif + +#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,9,99,902,0) +#define SourceValidate(d, x, y, w, h, mode) \ + if ((d)->pScreen->SourceValidate) (d)->pScreen->SourceValidate(d, x, y, w, h, mode) +#else +#define SourceValidate(d, x, y, w, h, mode) \ + if ((d)->pScreen->SourceValidate) (d)->pScreen->SourceValidate(d, x, y, w, h) +#endif + #endif diff --git a/src/i965_3d.c b/src/i965_3d.c index a18db1251..fe2d9aa6b 100644 --- a/src/i965_3d.c +++ b/src/i965_3d.c @@ -35,7 +35,7 @@ void gen6_upload_invariant_states(intel_screen_private *intel) { - Bool ivb = INTEL_INFO(intel)->gen >= 70; + Bool ivb = INTEL_INFO(intel)->gen >= 070; OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2)); OUT_BATCH(BRW_PIPE_CONTROL_IS_FLUSH | @@ -280,7 +280,7 @@ gen7_upload_bypass_states(intel_screen_private *intel) void gen6_upload_vs_state(intel_screen_private *intel) { - Bool ivb = INTEL_INFO(intel)->gen >= 70; + Bool ivb = INTEL_INFO(intel)->gen >= 070; /* disable VS constant buffer */ OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | ((ivb ? 7 : 5) - 2)); OUT_BATCH(0); diff --git a/src/i965_render.c b/src/i965_render.c index 42b195992..39698b0dc 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -1054,7 +1054,7 @@ i965_create_sampler_state(intel_screen_private *intel, sampler_state_extend_t mask_extend, drm_intel_bo * border_color_bo) { - if (INTEL_INFO(intel)->gen < 70) + if (INTEL_INFO(intel)->gen < 070) return gen4_create_sampler_state(intel, src_filter, src_extend, mask_filter, mask_extend, border_color_bo); @@ -1417,7 +1417,7 @@ i965_set_picture_surface_state(intel_screen_private *intel, PicturePtr picture, PixmapPtr pixmap, Bool is_dst) { - if (INTEL_INFO(intel)->gen < 70) + if (INTEL_INFO(intel)->gen < 070) return gen4_set_picture_surface_state(intel, picture, pixmap, is_dst); return gen7_set_picture_surface_state(intel, picture, pixmap, is_dst); } @@ -1571,7 +1571,7 @@ static void i965_emit_composite_state(struct intel_screen_private *intel) } /* Match Mesa driver setup */ - if (INTEL_INFO(intel)->gen >= 45) + if (INTEL_INFO(intel)->gen >= 045) OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); else OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D); @@ -1751,7 +1751,7 @@ static Bool i965_composite_check_aperture(intel_screen_private *intel) render_state->gen6_depth_stencil_bo, }; - if (INTEL_INFO(intel)->gen >= 60) + if (INTEL_INFO(intel)->gen >= 060) return drm_intel_bufmgr_check_aperture_space(gen6_bo_table, ARRAY_SIZE(gen6_bo_table)) == 0; else @@ -2181,7 +2181,7 @@ static void i965_select_vertex_buffer(struct intel_screen_private *intel) if (intel->vertex_id & (1 << id)) return; - if (INTEL_INFO(intel)->gen >= 70) + if (INTEL_INFO(intel)->gen >= 070) modifyenable = GEN7_VB0_ADDRESS_MODIFYENABLE; /* Set up the pointer to our (single) vertex buffer */ @@ -2190,7 +2190,7 @@ static void i965_select_vertex_buffer(struct intel_screen_private *intel) /* XXX could use multiple vbo to reduce relocations if * frequently switching between vertex sizes, like rgb10text. */ - if (INTEL_INFO(intel)->gen >= 60) { + if (INTEL_INFO(intel)->gen >= 060) { OUT_BATCH((id << GEN6_VB0_BUFFER_INDEX_SHIFT) | GEN6_VB0_VERTEXDATA | modifyenable | @@ -2201,7 +2201,7 @@ static void i965_select_vertex_buffer(struct intel_screen_private *intel) (4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT)); } OUT_RELOC(intel->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); - if (INTEL_INFO(intel)->gen >= 50) + if (INTEL_INFO(intel)->gen >= 050) OUT_RELOC(intel->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, sizeof(intel->vertex_ptr) - 1); @@ -2252,7 +2252,7 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, if (intel->needs_render_state_emit) { i965_bind_surfaces(intel); - if (INTEL_INFO(intel)->gen >= 60) + if (INTEL_INFO(intel)->gen >= 060) gen6_emit_composite_state(intel); else i965_emit_composite_state(intel); @@ -2271,7 +2271,7 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, i965_select_vertex_buffer(intel); if (intel->vertex_offset == 0) { - if (INTEL_INFO(intel)->gen >= 70) { + if (INTEL_INFO(intel)->gen >= 070) { OUT_BATCH(BRW_3DPRIMITIVE | (7 - 2)); OUT_BATCH(BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST); @@ -2298,7 +2298,7 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, w, h); intel->vertex_index += 3; - if (INTEL_INFO(intel)->gen < 50) { + if (INTEL_INFO(intel)->gen < 050) { /* XXX OMG! */ i965_vertex_flush(intel); OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); @@ -2355,7 +2355,7 @@ void gen4_render_state_init(ScrnInfoPtr scrn) assert(intel->gen4_render_state != NULL); } - if (INTEL_INFO(intel)->gen >= 60) + if (INTEL_INFO(intel)->gen >= 060) return gen6_render_state_init(scrn); render = intel->gen4_render_state; @@ -2601,7 +2601,7 @@ gen6_composite_cc_state_pointers(intel_screen_private *intel, cc_bo = render_state->cc_state_bo; depth_stencil_bo = render_state->gen6_depth_stencil_bo; } - if (INTEL_INFO(intel)->gen >= 70) { + if (INTEL_INFO(intel)->gen >= 070) { gen7_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset); } else { gen6_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset); @@ -2619,7 +2619,7 @@ gen6_composite_sampler_state_pointers(intel_screen_private *intel, intel->gen6_render_state.samplers = bo; - if (INTEL_INFO(intel)->gen >= 70) + if (INTEL_INFO(intel)->gen >= 070) gen7_upload_sampler_state_pointers(intel, bo); else gen6_upload_sampler_state_pointers(intel, bo); @@ -2628,7 +2628,7 @@ gen6_composite_sampler_state_pointers(intel_screen_private *intel, static void gen6_composite_wm_constants(intel_screen_private *intel) { - Bool ivb = INTEL_INFO(intel)->gen >= 70; + Bool ivb = INTEL_INFO(intel)->gen >= 070; /* disable WM constant buffer */ OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | ((ivb ? 7 : 5) - 2)); OUT_BATCH(0); @@ -2652,7 +2652,7 @@ gen6_composite_sf_state(intel_screen_private *intel, intel->gen6_render_state.num_sf_outputs = num_sf_outputs; - if (INTEL_INFO(intel)->gen >= 70) + if (INTEL_INFO(intel)->gen >= 070) gen7_upload_sf_state(intel, num_sf_outputs, 1); else gen6_upload_sf_state(intel, num_sf_outputs, 1); @@ -2839,7 +2839,7 @@ gen6_emit_composite_state(struct intel_screen_private *intel) sampler_state_extend_t mask_extend = composite_op->mask_extend; Bool is_affine = composite_op->is_affine; Bool has_mask = intel->render_mask != NULL; - Bool ivb = INTEL_INFO(intel)->gen >= 70; + Bool ivb = INTEL_INFO(intel)->gen >= 070; uint32_t src, dst; drm_intel_bo *ps_sampler_state_bo = render->ps_sampler_state_bo[src_filter][src_extend][mask_filter][mask_extend]; diff --git a/src/i965_video.c b/src/i965_video.c index 3276788fb..65f60612a 100644 --- a/src/i965_video.c +++ b/src/i965_video.c @@ -897,7 +897,7 @@ i965_emit_video_setup(ScrnInfoPtr scrn, drm_intel_bo * surface_state_binding_tab /* brw_debug (scrn, "before base address modify"); */ /* Match Mesa driver setup */ - if (INTEL_INFO(intel)->gen >= 45) + if (INTEL_INFO(intel)->gen >= 045) OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); else OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D); @@ -1428,7 +1428,7 @@ gen6_create_vidoe_objects(ScrnInfoPtr scrn) const uint32_t *packed_ps_kernel, *planar_ps_kernel; unsigned int packed_ps_size, planar_ps_size; - if (INTEL_INFO(intel)->gen >= 70) { + if (INTEL_INFO(intel)->gen >= 070) { create_sampler_state = gen7_create_sampler_state; packed_ps_kernel = &ps_kernel_packed_static_gen7[0][0]; packed_ps_size = sizeof(ps_kernel_packed_static_gen7); @@ -1787,7 +1787,7 @@ void Gen6DisplayVideoTextured(ScrnInfoPtr scrn, PixmapPtr, drm_intel_bo *, uint32_t); - if (INTEL_INFO(intel)->gen >= 70) { + if (INTEL_INFO(intel)->gen >= 070) { create_dst_surface_state = gen7_create_dst_surface_state; create_src_surface_state = gen7_create_src_surface_state; emit_video_setup = gen7_emit_video_setup; diff --git a/src/intel.h b/src/intel.h index a5603fee6..d4c9aff21 100644 --- a/src/intel.h +++ b/src/intel.h @@ -182,7 +182,7 @@ typedef struct intel_screen_private { unsigned int batch_emit_start; /** Number of bytes to be emitted in the current BEGIN_BATCH. */ uint32_t batch_emitting; - dri_bo *batch_bo; + dri_bo *batch_bo, *last_batch_bo[2]; /** Whether we're in a section of code that can't tolerate flushing */ Bool in_batch_atomic; /** Ending batch_used that was verified by intel_start_batch_atomic() */ @@ -366,6 +366,7 @@ extern Bool intel_mode_pre_init(ScrnInfoPtr pScrn, int fd, int cpp); extern void intel_mode_init(struct intel_screen_private *intel); extern void intel_mode_disable_unused_functions(ScrnInfoPtr scrn); extern void intel_mode_remove_fb(intel_screen_private *intel); +extern void intel_mode_close(intel_screen_private *intel); extern void intel_mode_fini(intel_screen_private *intel); extern int intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, xf86CrtcPtr crtc); @@ -552,6 +553,9 @@ intel_get_transformed_coordinates_3d(int x, int y, PictTransformPtr transform, float *x_out, float *y_out, float *z_out); static inline void +intel_debug_fallback(ScrnInfoPtr scrn, const char *format, ...) _X_ATTRIBUTE_PRINTF(2, 3); + +static inline void intel_debug_fallback(ScrnInfoPtr scrn, const char *format, ...) { intel_screen_private *intel = intel_get_screen_private(scrn); diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c index 46f22bc36..a44a15632 100644 --- a/src/intel_batchbuffer.c +++ b/src/intel_batchbuffer.c @@ -67,17 +67,26 @@ void intel_next_vertex(intel_screen_private *intel) dri_bo_alloc(intel->bufmgr, "vertex", sizeof (intel->vertex_ptr), 4096); } -static void intel_next_batch(ScrnInfoPtr scrn) +static dri_bo *bo_alloc(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); - + int size = 4 * 4096; /* The 865 has issues with larger-than-page-sized batch buffers. */ if (IS_I865G(intel)) - intel->batch_bo = - dri_bo_alloc(intel->bufmgr, "batch", 4096, 4096); - else - intel->batch_bo = - dri_bo_alloc(intel->bufmgr, "batch", 4096 * 4, 4096); + size = 4096; + return dri_bo_alloc(intel->bufmgr, "batch", size, 4096); +} + +static void intel_next_batch(ScrnInfoPtr scrn, int mode) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + dri_bo *tmp; + + drm_intel_gem_bo_clear_relocs(intel->batch_bo, 0); + + tmp = intel->last_batch_bo[mode]; + intel->last_batch_bo[mode] = intel->batch_bo; + intel->batch_bo = tmp; intel->batch_used = 0; @@ -95,12 +104,25 @@ void intel_batch_init(ScrnInfoPtr scrn) intel->batch_emitting = 0; intel->vertex_id = 0; - intel_next_batch(scrn); + intel->last_batch_bo[0] = bo_alloc(scrn); + intel->last_batch_bo[1] = bo_alloc(scrn); + + intel->batch_bo = bo_alloc(scrn); + intel->batch_used = 0; + intel->last_3d = LAST_3D_OTHER; } void intel_batch_teardown(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); + int i; + + for (i = 0; i < ARRAY_SIZE(intel->last_batch_bo); i++) { + if (intel->last_batch_bo[i] != NULL) { + dri_bo_unreference(intel->last_batch_bo[i]); + intel->last_batch_bo[i] = NULL; + } + } if (intel->batch_bo != NULL) { dri_bo_unreference(intel->batch_bo); @@ -162,7 +184,7 @@ void intel_batch_emit_flush(ScrnInfoPtr scrn) assert (!intel->in_batch_atomic); /* Big hammer, look to the pipelined flushes in future. */ - if ((INTEL_INFO(intel)->gen >= 60)) { + if ((INTEL_INFO(intel)->gen >= 060)) { if (intel->current_batch == BLT_BATCH) { BEGIN_BATCH_BLT(4); OUT_BATCH(MI_FLUSH_DW | 2); @@ -171,7 +193,7 @@ void intel_batch_emit_flush(ScrnInfoPtr scrn) OUT_BATCH(0); ADVANCE_BATCH(); } else { - if ((INTEL_INFO(intel)->gen == 60)) { + if ((INTEL_INFO(intel)->gen == 060)) { /* HW-Workaround for Sandybdrige */ intel_emit_post_sync_nonzero_flush(scrn); } else { @@ -187,7 +209,7 @@ void intel_batch_emit_flush(ScrnInfoPtr scrn) } } else { flags = MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE; - if (INTEL_INFO(intel)->gen >= 40) + if (INTEL_INFO(intel)->gen >= 040) flags = 0; BEGIN_BATCH(1); @@ -239,22 +261,21 @@ void intel_batch_submit(ScrnInfoPtr scrn) } if (ret != 0) { - if (ret == -EIO) { - static int once; - - /* The GPU has hung and unlikely to recover by this point. */ - if (!once) { + static int once; + if (!once) { + if (ret == -EIO) { + /* The GPU has hung and unlikely to recover by this point. */ xf86DrvMsg(scrn->scrnIndex, X_ERROR, "Detected a hung GPU, disabling acceleration.\n"); xf86DrvMsg(scrn->scrnIndex, X_ERROR, "When reporting this, please include i915_error_state from debugfs and the full dmesg.\n"); - uxa_set_force_fallback(xf86ScrnToScreen(scrn), TRUE); - intel->force_fallback = TRUE; - once = 1; + } else { + /* The driver is broken. */ + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Failed to submit batch buffer, expect rendering corruption: %s.\n ", + strerror(-ret)); } - } else { - xf86DrvMsg(scrn->scrnIndex, X_ERROR, - "Failed to submit batch buffer, expect rendering corruption " - "or even a frozen display: %s.\n", - strerror(-ret)); + uxa_set_force_fallback(xf86ScrnToScreen(scrn), TRUE); + intel->force_fallback = TRUE; + once = 1; } } @@ -273,8 +294,7 @@ void intel_batch_submit(ScrnInfoPtr scrn) if (intel->debug_flush & DEBUG_FLUSH_WAIT) drm_intel_bo_wait_rendering(intel->batch_bo); - dri_bo_unreference(intel->batch_bo); - intel_next_batch(scrn); + intel_next_batch(scrn, intel->current_batch == I915_EXEC_BLT); if (intel->batch_commit_notify) intel->batch_commit_notify(intel); diff --git a/src/intel_display.c b/src/intel_display.c index d58e6e0b6..5ee955ee6 100644 --- a/src/intel_display.c +++ b/src/intel_display.c @@ -31,6 +31,7 @@ #include <sys/types.h> #include <sys/stat.h> +#include <sys/poll.h> #include <fcntl.h> #include <unistd.h> #include <errno.h> @@ -270,6 +271,7 @@ intel_output_backlight_init(xf86OutputPtr output) intel_output->backlight_iface = str; intel_output->backlight_max = intel_output_backlight_get_max(output); if (intel_output->backlight_max > 0) { + intel_output->backlight_active_level = intel_output_backlight_get(output); xf86DrvMsg(output->scrn->scrnIndex, X_CONFIG, "found backlight control interface %s\n", path); return; @@ -493,6 +495,8 @@ intel_crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, ErrorF("failed to add fb\n"); return FALSE; } + + drm_intel_bo_disable_reuse(intel->front_buffer); } saved_mode = crtc->mode; @@ -597,6 +601,8 @@ intel_crtc_shadow_allocate(xf86CrtcPtr crtc, int width, int height) return NULL; } + drm_intel_bo_disable_reuse(intel_crtc->rotate_bo); + intel_crtc->rotate_pitch = rotate_pitch; return intel_crtc->rotate_bo; } @@ -723,6 +729,8 @@ intel_set_scanout_pixmap(xf86CrtcPtr crtc, PixmapPtr ppix) ErrorF("have front buffer\n"); } + drm_intel_bo_disable_reuse(bo); + intel_crtc->scanout_pixmap = ppix; return drmModeAddFB(intel->drmSubFD, ppix->drawable.width, ppix->drawable.height, ppix->drawable.depth, @@ -1494,6 +1502,7 @@ intel_xf86crtc_resize(ScrnInfoPtr scrn, int width, int height) if (ret) goto fail; + drm_intel_bo_disable_reuse(intel->front_buffer); intel->front_pitch = pitch; intel->front_tiling = tiling; @@ -1555,6 +1564,7 @@ intel_do_pageflip(intel_screen_private *intel, new_front->handle, &new_fb_id)) goto error_out; + drm_intel_bo_disable_reuse(new_front); intel_glamor_flush(intel); intel_batch_submit(scrn); @@ -1822,6 +1832,26 @@ intel_mode_remove_fb(intel_screen_private *intel) } } +static Bool has_pending_events(int fd) +{ + struct pollfd pfd; + pfd.fd = fd; + pfd.events = POLLIN; + return poll(&pfd, 1, 0) == 1; +} + +void +intel_mode_close(intel_screen_private *intel) +{ + struct intel_mode *mode = intel->modes; + + if (mode == NULL) + return; + + while (has_pending_events(mode->fd)) + drmHandleEvent(mode->fd, &mode->event_context); +} + void intel_mode_fini(intel_screen_private *intel) { diff --git a/src/intel_dri.c b/src/intel_dri.c index 867a4653f..f3512034a 100644 --- a/src/intel_dri.c +++ b/src/intel_dri.c @@ -451,7 +451,7 @@ I830DRI2CopyRegion(DrawablePtr drawable, RegionPtr pRegion, /* Wait for the scanline to be outside the region to be copied */ if (scrn->vtSema && pixmap_is_scanout(get_drawable_pixmap(dst)) && - intel->swapbuffers_wait && INTEL_INFO(intel)->gen < 60) { + intel->swapbuffers_wait && INTEL_INFO(intel)->gen < 060) { BoxPtr box; BoxRec crtcbox; int y1, y2; @@ -485,20 +485,20 @@ I830DRI2CopyRegion(DrawablePtr drawable, RegionPtr pRegion, * of extra time for the blitter to start up and * do its job for a full height blit */ - if (full_height && INTEL_INFO(intel)->gen < 40) + if (full_height && INTEL_INFO(intel)->gen < 040) y2 -= 2; if (pipe == 0) { event = MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW; load_scan_lines_pipe = MI_LOAD_SCAN_LINES_DISPLAY_PIPEA; - if (full_height && INTEL_INFO(intel)->gen >= 40) + if (full_height && INTEL_INFO(intel)->gen >= 040) event = MI_WAIT_FOR_PIPEA_SVBLANK; } else { event = MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW; load_scan_lines_pipe = MI_LOAD_SCAN_LINES_DISPLAY_PIPEB; - if (full_height && INTEL_INFO(intel)->gen >= 40) + if (full_height && INTEL_INFO(intel)->gen >= 040) event = MI_WAIT_FOR_PIPEB_SVBLANK; } @@ -547,6 +547,23 @@ I830DRI2CopyRegion(DrawablePtr drawable, RegionPtr pRegion, intel_batch_submit(scrn); } +static void +I830DRI2FallbackBlitSwap(DrawablePtr drawable, + DRI2BufferPtr dst, + DRI2BufferPtr src) +{ + BoxRec box; + RegionRec region; + + box.x1 = 0; + box.y1 = 0; + box.x2 = drawable->width; + box.y2 = drawable->height; + REGION_INIT(pScreen, ®ion, &box, 0); + + I830DRI2CopyRegion(drawable, ®ion, dst, src); +} + #if DRI2INFOREC_VERSION >= 4 static void I830DRI2ReferenceBuffer(DRI2Buffer2Ptr buffer) @@ -996,17 +1013,8 @@ void I830DRI2FrameEventHandler(unsigned int frame, unsigned int tv_sec, /* else fall through to exchange/blit */ case DRI2_SWAP: { - BoxRec box; - RegionRec region; - - box.x1 = 0; - box.y1 = 0; - box.x2 = drawable->width; - box.y2 = drawable->height; - REGION_INIT(pScreen, ®ion, &box, 0); - - I830DRI2CopyRegion(drawable, - ®ion, swap_info->front, swap_info->back); + I830DRI2FallbackBlitSwap(drawable, + swap_info->front, swap_info->back); DRI2SwapComplete(swap_info->client, drawable, frame, tv_sec, tv_usec, DRI2_BLIT_COMPLETE, swap_info->client ? swap_info->event_complete : NULL, @@ -1089,17 +1097,10 @@ void I830DRI2FlipEventHandler(unsigned int frame, unsigned int tv_sec, i830_dri2_del_frame_event(chain_drawable, chain); } else if (!can_exchange(chain_drawable, chain->front, chain->back) || !I830DRI2ScheduleFlip(intel, chain_drawable, chain)) { - BoxRec box; - RegionRec region; - - box.x1 = 0; - box.y1 = 0; - box.x2 = chain_drawable->width; - box.y2 = chain_drawable->height; - REGION_INIT(pScreen, ®ion, &box, 0); + I830DRI2FallbackBlitSwap(chain_drawable, + chain->front, + chain->back); - I830DRI2CopyRegion(chain_drawable, ®ion, - chain->front, chain->back); DRI2SwapComplete(chain->client, chain_drawable, frame, tv_sec, tv_usec, DRI2_BLIT_COMPLETE, chain->client ? chain->event_complete : NULL, @@ -1162,8 +1163,6 @@ I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, DRI2FrameEventPtr swap_info = NULL; enum DRI2FrameEventType swap_type = DRI2_SWAP; CARD64 current_msc; - BoxRec box; - RegionRec region; /* Drawable not displayed... just complete the swap */ if (pipe == -1) @@ -1231,7 +1230,13 @@ I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, * the swap. */ if (divisor == 0 || current_msc < *target_msc) { - if (flip && I830DRI2ScheduleFlip(intel, draw, swap_info)) + /* + * If we can, schedule the flip directly from here rather + * than waiting for an event from the kernel for the current + * (or a past) MSC. + */ + if (flip && divisor == 0 && current_msc >= *target_msc && + I830DRI2ScheduleFlip(intel, draw, swap_info)) return TRUE; vbl.request.type = @@ -1313,14 +1318,7 @@ I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, return TRUE; blit_fallback: - box.x1 = 0; - box.y1 = 0; - box.x2 = draw->width; - box.y2 = draw->height; - REGION_INIT(pScreen, ®ion, &box, 0); - - I830DRI2CopyRegion(draw, ®ion, front, back); - + I830DRI2FallbackBlitSwap(draw, front, back); DRI2SwapComplete(client, draw, 0, 0, 0, DRI2_BLIT_COMPLETE, func, data); if (swap_info) i830_dri2_del_frame_event(draw, swap_info); @@ -1515,6 +1513,17 @@ out_complete: static int dri2_server_generation; #endif +static const char *dri_driver_name(intel_screen_private *intel) +{ + const char *s = xf86GetOptValString(intel->Options, OPTION_DRI); + Bool dummy; + + if (s == NULL || xf86getBoolValue(&dummy, s)) + return INTEL_INFO(intel)->gen < 040 ? "i915" : "i965"; + + return s; +} + Bool I830DRI2ScreenInit(ScreenPtr screen) { ScrnInfoPtr scrn = xf86ScreenToScrn(screen); @@ -1564,7 +1573,7 @@ Bool I830DRI2ScreenInit(ScreenPtr screen) intel->deviceName = drmGetDeviceNameFromFd(intel->drmSubFD); memset(&info, '\0', sizeof(info)); info.fd = intel->drmSubFD; - info.driverName = INTEL_INFO(intel)->gen < 40 ? "i915" : "i965"; + info.driverName = dri_driver_name(intel); info.deviceName = intel->deviceName; #if DRI2INFOREC_VERSION == 1 diff --git a/src/intel_driver.c b/src/intel_driver.c index 65a50088e..780710624 100644 --- a/src/intel_driver.c +++ b/src/intel_driver.c @@ -221,11 +221,19 @@ static Bool I830GetEarlyOptions(ScrnInfoPtr scrn) return TRUE; } +static Bool intel_option_cast_string_to_bool(intel_screen_private *intel, + int id, Bool val) +{ + xf86getBoolValue(&val, xf86GetOptValString(intel->Options, id)); + return val; +} + static void intel_check_dri_option(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); + intel->directRenderingType = DRI_NONE; - if (!xf86ReturnOptValBool(intel->Options, OPTION_DRI, TRUE)) + if (!intel_option_cast_string_to_bool(intel, OPTION_DRI, TRUE)) intel->directRenderingType = DRI_DISABLED; if (scrn->depth != 16 && scrn->depth != 24 && scrn->depth != 30) { @@ -317,7 +325,7 @@ static int intel_init_bufmgr(intel_screen_private *intel) list_init(&intel->batch_pixmaps); - if ((INTEL_INFO(intel)->gen == 60)) { + if ((INTEL_INFO(intel)->gen == 060)) { intel->wa_scratch_bo = drm_intel_bo_alloc(intel->bufmgr, "wa scratch", 4096, 4096); @@ -397,13 +405,14 @@ static Bool can_accelerate_blt(struct intel_screen_private *intel) if (INTEL_INFO(intel)->gen == -1) return FALSE; - if (xf86ReturnOptValBool(intel->Options, OPTION_ACCEL_DISABLE, FALSE)) { + if (xf86ReturnOptValBool(intel->Options, OPTION_ACCEL_DISABLE, FALSE) || + !intel_option_cast_string_to_bool(intel, OPTION_ACCEL_METHOD, TRUE)) { xf86DrvMsg(intel->scrn->scrnIndex, X_CONFIG, "Disabling hardware acceleration.\n"); return FALSE; } - if (INTEL_INFO(intel)->gen == 60) { + if (INTEL_INFO(intel)->gen == 060) { struct pci_device *const device = intel->PciInfo; /* Sandybridge rev07 locks up easily, even with the @@ -418,7 +427,7 @@ static Bool can_accelerate_blt(struct intel_screen_private *intel) } } - if (INTEL_INFO(intel)->gen >= 60) { + if (INTEL_INFO(intel)->gen >= 060) { drm_i915_getparam_t gp; int value; @@ -579,7 +588,7 @@ static Bool I830PreInit(ScrnInfoPtr scrn, int flags) intel->has_relaxed_fencing = xf86ReturnOptValBool(intel->Options, OPTION_RELAXED_FENCING, - INTEL_INFO(intel)->gen >= 33); + INTEL_INFO(intel)->gen >= 033); /* And override the user if there is no kernel support */ if (intel->has_relaxed_fencing) intel->has_relaxed_fencing = has_relaxed_fencing(intel); @@ -677,7 +686,7 @@ void IntelEmitInvarientState(ScrnInfoPtr scrn) } #ifdef INTEL_PIXMAP_SHARING -static Bool +static void redisplay_dirty(ScreenPtr screen, PixmapDirtyUpdatePtr dirty) { ScrnInfoPtr scrn = xf86ScreenToScrn(screen); @@ -686,8 +695,19 @@ redisplay_dirty(ScreenPtr screen, PixmapDirtyUpdatePtr dirty) int was_blocked; PixmapRegionInit(&pixregion, dirty->slave_dst->master_pixmap); + RegionTranslate(&pixregion, dirty->x, dirty->y); + RegionIntersect(&pixregion, &pixregion, DamageRegion(dirty->damage)); + RegionTranslate(&pixregion, -dirty->x, -dirty->y); + was_blocked = RegionNil(&pixregion); + DamageRegionAppend(&dirty->slave_dst->drawable, &pixregion); + RegionUninit(&pixregion); + if (was_blocked) + return; + PixmapRegionInit(&pixregion, dirty->slave_dst->master_pixmap); PixmapSyncDirtyHelper(dirty, &pixregion); + RegionUninit(&pixregion); + intel_batch_submit(scrn); if (!intel->has_prime_vmap_flush) { drm_intel_bo *bo = intel_get_pixmap_bo(dirty->slave_dst->master_pixmap); @@ -695,10 +715,10 @@ redisplay_dirty(ScreenPtr screen, PixmapDirtyUpdatePtr dirty) drm_intel_bo_map(bo, FALSE); drm_intel_bo_unmap(bo); xf86UnblockSIGIO(was_blocked); - } - DamageRegionAppend(&dirty->slave_dst->drawable, &pixregion); - RegionUninit(&pixregion); - return 0; + } + + DamageRegionProcessPending(&dirty->slave_dst->drawable); + return; } static void @@ -710,7 +730,6 @@ intel_dirty_update(ScreenPtr screen) if (xorg_list_is_empty(&screen->pixmap_dirty_list)) return; - ErrorF("list is not empty\n"); xorg_list_for_each_entry(ent, &screen->pixmap_dirty_list, ent) { region = DamageRegion(ent->damage); if (RegionNotEmpty(region)) { @@ -921,7 +940,7 @@ I830ScreenInit(SCREEN_INIT_ARGS_DECL) intel_batch_init(scrn); - if (INTEL_INFO(intel)->gen >= 40) + if (INTEL_INFO(intel)->gen >= 040) gen4_render_state_init(scrn); miClearVisualTypes(); @@ -1014,7 +1033,7 @@ I830ScreenInit(SCREEN_INIT_ARGS_DECL) xf86DPMSInit(screen, xf86DPMSSet, 0); #ifdef INTEL_XVMC - if (INTEL_INFO(intel)->gen >= 40) + if (INTEL_INFO(intel)->gen >= 040) intel->XvMCEnabled = TRUE; from = ((intel->directRenderingType == DRI_DRI2) && xf86GetOptValBool(intel->Options, OPTION_XVMC, @@ -1139,6 +1158,8 @@ static Bool I830CloseScreen(CLOSE_SCREEN_ARGS_DECL) I830UeventFini(scrn); #endif + intel_mode_close(intel); + DeleteCallback(&FlushCallback, intel_flush_callback, scrn); intel_glamor_close_screen(screen); @@ -1174,7 +1195,7 @@ static Bool I830CloseScreen(CLOSE_SCREEN_ARGS_DECL) intel_batch_teardown(scrn); - if (INTEL_INFO(intel)->gen >= 40) + if (INTEL_INFO(intel)->gen >= 040) gen4_render_state_cleanup(scrn); xf86_cursors_fini(screen); diff --git a/src/intel_driver.h b/src/intel_driver.h index b7190620d..c98025bac 100644 --- a/src/intel_driver.h +++ b/src/intel_driver.h @@ -230,6 +230,9 @@ #define PCI_CHIP_HASWELL_CRW_S_GT2_PLUS 0x0D3A #define PCI_CHIP_VALLEYVIEW_PO 0x0f30 +#define PCI_CHIP_VALLEYVIEW_1 0x0f31 +#define PCI_CHIP_VALLEYVIEW_2 0x0f32 +#define PCI_CHIP_VALLEYVIEW_3 0x0f33 #endif @@ -249,7 +252,7 @@ #define CHIP_REVISION(p) (p)->revision #define INTEL_INFO(intel) ((intel)->info) -#define IS_GENx(intel, X) (INTEL_INFO(intel)->gen >= 10*(X) && INTEL_INFO(intel)->gen < 10*((X)+1)) +#define IS_GENx(intel, X) (INTEL_INFO(intel)->gen >= 8*(X) && INTEL_INFO(intel)->gen < 8*((X)+1)) #define IS_GEN1(intel) IS_GENx(intel, 1) #define IS_GEN2(intel) IS_GENx(intel, 2) #define IS_GEN3(intel) IS_GENx(intel, 3) @@ -257,7 +260,7 @@ #define IS_GEN5(intel) IS_GENx(intel, 5) #define IS_GEN6(intel) IS_GENx(intel, 6) #define IS_GEN7(intel) IS_GENx(intel, 7) -#define IS_HSW(intel) (INTEL_INFO(intel)->gen == 75) +#define IS_HSW(intel) (INTEL_INFO(intel)->gen == 075) /* Some chips have specific errata (or limits) that we need to workaround. */ #define IS_I830(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_I830_M) @@ -270,8 +273,8 @@ #define IS_965_Q(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I965_Q) /* supports Y tiled surfaces (pre-965 Mesa isn't ready yet) */ -#define SUPPORTS_YTILING(pI810) (INTEL_INFO(intel)->gen >= 40) -#define HAS_BLT(pI810) (INTEL_INFO(intel)->gen >= 60) +#define SUPPORTS_YTILING(pI810) (INTEL_INFO(intel)->gen >= 040) +#define HAS_BLT(pI810) (INTEL_INFO(intel)->gen >= 060) struct intel_device_info { int gen; diff --git a/src/intel_hwmc.c b/src/intel_hwmc.c index af8bd8134..25978d22a 100644 --- a/src/intel_hwmc.c +++ b/src/intel_hwmc.c @@ -75,11 +75,11 @@ static int create_context(ScrnInfoPtr scrn, XvMCContextPtr pContext, contextRec->type = XVMC_I915_MPEG2_MC; contextRec->i915.use_phys_addr = 0; } else { - if (INTEL_INFO(intel)->gen >= 45) + if (INTEL_INFO(intel)->gen >= 045) contextRec->type = XVMC_I965_MPEG2_VLD; else contextRec->type = XVMC_I965_MPEG2_MC; - contextRec->i965.is_g4x = INTEL_INFO(intel)->gen == 45; + contextRec->i965.is_g4x = INTEL_INFO(intel)->gen == 045; contextRec->i965.is_965_q = IS_965_Q(intel); contextRec->i965.is_igdng = IS_GEN5(intel); } @@ -227,7 +227,7 @@ Bool intel_xvmc_adaptor_init(ScreenPtr pScreen) name = "i915_xvmc", pAdapt->num_surfaces = ARRAY_SIZE(surface_info_i915); pAdapt->surfaces = surface_info_i915; - } else if (INTEL_INFO(intel)->gen >= 45) { + } else if (INTEL_INFO(intel)->gen >= 045) { name = "xvmc_vld", pAdapt->num_surfaces = ARRAY_SIZE(surface_info_vld); pAdapt->surfaces = surface_info_vld; diff --git a/src/intel_memory.c b/src/intel_memory.c index f08ebdd01..e51fa33a9 100644 --- a/src/intel_memory.c +++ b/src/intel_memory.c @@ -94,7 +94,7 @@ unsigned long intel_get_fence_size(intel_screen_private *intel, unsigned long si unsigned long i; unsigned long start; - if (INTEL_INFO(intel)->gen >= 40 || intel->has_relaxed_fencing) { + if (INTEL_INFO(intel)->gen >= 040 || intel->has_relaxed_fencing) { /* The 965 can have fences at any page boundary. */ return ALIGN(size, 4096); } else { @@ -127,7 +127,7 @@ intel_get_fence_pitch(intel_screen_private *intel, unsigned long pitch, return pitch; /* 965+ is flexible */ - if (INTEL_INFO(intel)->gen >= 40) + if (INTEL_INFO(intel)->gen >= 040) return ALIGN(pitch, tile_width); /* Pre-965 needs power of two tile width */ @@ -173,7 +173,7 @@ static inline int intel_pad_drawable_width(int width) static size_t agp_aperture_size(struct pci_device *dev, int gen) { - return dev->regions[gen < 30 ? 0 : 2].size; + return dev->regions[gen < 030 ? 0 : 2].size; } static void intel_set_gem_max_sizes(ScrnInfoPtr scrn) diff --git a/src/intel_module.c b/src/intel_module.c index e6ca964d6..141f77afe 100644 --- a/src/intel_module.c +++ b/src/intel_module.c @@ -56,62 +56,62 @@ static const struct intel_device_info intel_generic_info = { }; static const struct intel_device_info intel_i81x_info = { - .gen = 10, + .gen = 010, }; static const struct intel_device_info intel_i830_info = { - .gen = 20, + .gen = 020, }; static const struct intel_device_info intel_i845_info = { - .gen = 20, + .gen = 020, }; static const struct intel_device_info intel_i855_info = { - .gen = 21, + .gen = 021, }; static const struct intel_device_info intel_i865_info = { - .gen = 22, + .gen = 022, }; static const struct intel_device_info intel_i915_info = { - .gen = 30, + .gen = 030, }; static const struct intel_device_info intel_i945_info = { - .gen = 31, + .gen = 031, }; static const struct intel_device_info intel_g33_info = { - .gen = 33, + .gen = 033, }; static const struct intel_device_info intel_i965_info = { - .gen = 40, + .gen = 040, }; static const struct intel_device_info intel_g4x_info = { - .gen = 45, + .gen = 045, }; static const struct intel_device_info intel_ironlake_info = { - .gen = 50, + .gen = 050, }; static const struct intel_device_info intel_sandybridge_info = { - .gen = 60, + .gen = 060, }; static const struct intel_device_info intel_ivybridge_info = { - .gen = 70, + .gen = 070, }; static const struct intel_device_info intel_valleyview_info = { - .gen = 70, + .gen = 071, }; static const struct intel_device_info intel_haswell_info = { - .gen = 75, + .gen = 075, }; -static const SymTabRec _intel_chipsets[] = { +static const SymTabRec intel_chipsets[] = { {PCI_CHIP_I810, "i810"}, {PCI_CHIP_I810_DC100, "i810-dc100"}, {PCI_CHIP_I810_E, "i810e"}, @@ -199,9 +199,7 @@ static const SymTabRec _intel_chipsets[] = { {PCI_CHIP_VALLEYVIEW_PO, "ValleyView PO board" }, {-1, NULL} }; -#define NUM_CHIPSETS (sizeof(_intel_chipsets) / sizeof(_intel_chipsets[0])) - -static SymTabRec *intel_chipsets = (SymTabRec *) _intel_chipsets; +#define NUM_CHIPSETS (sizeof(intel_chipsets) / sizeof(intel_chipsets[0])) #define INTEL_DEVICE_MATCH(d,i) \ { 0x8086, (d), PCI_MATCH_ANY, PCI_MATCH_ANY, 0x3 << 16, 0xff << 16, (intptr_t)(i) } @@ -308,6 +306,9 @@ static const struct pci_id_match intel_device_match[] = { INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_S_GT2_PLUS, &intel_haswell_info ), INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_PO, &intel_valleyview_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_1, &intel_valleyview_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_2, &intel_valleyview_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_3, &intel_valleyview_info ), INTEL_DEVICE_MATCH (PCI_MATCH_ANY, &intel_generic_info ), #endif @@ -383,7 +384,7 @@ static Bool intel_driver_func(ScrnInfoPtr pScrn, } } -static Bool has_kernel_mode_setting(struct pci_device *dev) +static Bool has_kernel_mode_setting(const struct pci_device *dev) { char id[20]; int ret, fd; @@ -418,7 +419,6 @@ static Bool has_kernel_mode_setting(struct pci_device *dev) if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) ret = FALSE; } - close(fd); } @@ -465,50 +465,15 @@ static enum accel_method { UXA, SNA } get_accel_method(void) } #endif -/* - * intel_pci_probe -- - * - * Look through the PCI bus to find cards that are intel boards. - * Setup the dispatch table for the rest of the driver functions. - * - */ -static Bool intel_pci_probe(DriverPtr driver, - int entity_num, - struct pci_device *device, - intptr_t match_data) +static Bool +intel_scrn_create(DriverPtr driver, + int entity_num, + intptr_t match_data, + unsigned flags) { ScrnInfoPtr scrn; - PciChipsets intel_pci_chipsets[NUM_CHIPSETS]; - unsigned i; - - if (!has_kernel_mode_setting(device)) { -#if KMS_ONLY - return FALSE; -#else - switch (DEVICE_ID(device)) { - case PCI_CHIP_I810: - case PCI_CHIP_I810_DC100: - case PCI_CHIP_I810_E: - case PCI_CHIP_I815: - break; - default: - return FALSE; - } -#endif - } - for (i = 0; i < NUM_CHIPSETS; i++) { - intel_pci_chipsets[i].numChipset = intel_chipsets[i].token; - intel_pci_chipsets[i].PCIid = intel_chipsets[i].token; -#if XORG_VERSION_CURRENT < XORG_VERSION_NUMERIC(1,6,99,0,0) - intel_pci_chipsets[i].resList = RES_SHARED_VGA; -#else - intel_pci_chipsets[i].dummy = NULL; -#endif - } - - scrn = xf86ConfigPciEntity(NULL, 0, entity_num, intel_pci_chipsets, - NULL, NULL, NULL, NULL, NULL); + scrn = xf86AllocateScreen(driver, flags); if (scrn == NULL) return FALSE; @@ -518,14 +483,13 @@ static Bool intel_pci_probe(DriverPtr driver, scrn->driverPrivate = (void *)(match_data | 1); scrn->Probe = NULL; + if (xf86IsEntitySharable(entity_num)) + xf86SetEntityShared(entity_num); + xf86AddEntityToScreen(scrn, entity_num); + #if !KMS_ONLY - switch (DEVICE_ID(device)) { - case PCI_CHIP_I810: - case PCI_CHIP_I810_DC100: - case PCI_CHIP_I810_E: - case PCI_CHIP_I815: + if ((unsigned)((struct intel_device_info *)match_data)->gen < 020) return lg_i810_init(scrn); - } #endif #if !UMS_ONLY @@ -533,7 +497,6 @@ static Bool intel_pci_probe(DriverPtr driver, #if USE_SNA case SNA: return sna_init_scrn(scrn, entity_num); #endif - #if USE_UXA case UXA: return intel_init_scrn(scrn); #endif @@ -545,6 +508,37 @@ static Bool intel_pci_probe(DriverPtr driver, return FALSE; } +/* + * intel_pci_probe -- + * + * Look through the PCI bus to find cards that are intel boards. + * Setup the dispatch table for the rest of the driver functions. + * + */ +static Bool intel_pci_probe(DriverPtr driver, + int entity_num, + struct pci_device *device, + intptr_t match_data) +{ + if (!has_kernel_mode_setting(device)) { +#if KMS_ONLY + return FALSE; +#else + switch (DEVICE_ID(device)) { + case PCI_CHIP_I810: + case PCI_CHIP_I810_DC100: + case PCI_CHIP_I810_E: + case PCI_CHIP_I815: + break; + default: + return FALSE; + } +#endif + } + + return intel_scrn_create(driver, entity_num, match_data, 0); +} + #ifdef XSERVER_PLATFORM_BUS static Bool intel_platform_probe(DriverPtr driver, @@ -552,13 +546,14 @@ intel_platform_probe(DriverPtr driver, struct xf86_platform_device *dev, intptr_t match_data) { - ScrnInfoPtr scrn = NULL; - char *path = xf86_get_platform_device_attrib(dev, ODEV_ATTRIB_PATH); unsigned scrn_flags = 0; if (!dev->pdev) return FALSE; + if (!has_kernel_mode_setting(dev->pdev)) + return FALSE; + /* Allow ourselves to act as a slaved output if not primary */ if (flags & PLATFORM_PROBE_GPU_SCREEN) { flags &= ~PLATFORM_PROBE_GPU_SCREEN; @@ -569,37 +564,7 @@ intel_platform_probe(DriverPtr driver, if (flags) return FALSE; - scrn = xf86AllocateScreen(driver, scrn_flags); - if (scrn == NULL) - return FALSE; - - scrn->driverVersion = INTEL_VERSION; - scrn->driverName = INTEL_DRIVER_NAME; - scrn->name = INTEL_NAME; - scrn->driverPrivate = (void *)(match_data | 1); - scrn->Probe = NULL; - - if (xf86IsEntitySharable(entity_num)) - xf86SetEntityShared(entity_num); - xf86AddEntityToScreen(scrn, entity_num); - - xf86DrvMsg(scrn->scrnIndex, X_INFO, - "using device path '%s'\n", path ? path : "Default device"); - -#if !UMS_ONLY - switch (get_accel_method()) { -#if USE_SNA - case SNA: return sna_init_scrn(scrn, entity_num); -#endif -#if USE_UXA - case UXA: return intel_init_scrn(scrn); -#endif - - default: break; - } -#endif - - return FALSE; + return intel_scrn_create(driver, entity_num, match_data, scrn_flags); } #endif diff --git a/src/intel_options.c b/src/intel_options.c index dcab9e729..fda2e8b0f 100644 --- a/src/intel_options.c +++ b/src/intel_options.c @@ -8,12 +8,13 @@ const OptionInfoRec intel_options[] = { {OPTION_ACCEL_DISABLE, "NoAccel", OPTV_BOOLEAN, {0}, 0}, {OPTION_ACCEL_METHOD, "AccelMethod", OPTV_STRING, {0}, 0}, {OPTION_BACKLIGHT, "Backlight", OPTV_STRING, {0}, 0}, - {OPTION_DRI, "DRI", OPTV_BOOLEAN, {0}, 1}, + {OPTION_DRI, "DRI", OPTV_STRING, {0}, 0}, {OPTION_COLOR_KEY, "ColorKey", OPTV_INTEGER, {0}, 0}, {OPTION_VIDEO_KEY, "VideoKey", OPTV_INTEGER, {0}, 0}, {OPTION_TILING_2D, "Tiling", OPTV_BOOLEAN, {0}, 1}, {OPTION_TILING_FB, "LinearFramebuffer", OPTV_BOOLEAN, {0}, 0}, {OPTION_SWAPBUFFERS_WAIT, "SwapbuffersWait", OPTV_BOOLEAN, {0}, 1}, + {OPTION_TRIPLE_BUFFER, "TripleBuffer", OPTV_BOOLEAN, {0}, 1}, {OPTION_PREFER_OVERLAY, "XvPreferOverlay", OPTV_BOOLEAN, {0}, 0}, {OPTION_HOTPLUG, "HotPlug", OPTV_BOOLEAN, {0}, 1}, {OPTION_RELAXED_FENCING,"RelaxedFencing", OPTV_BOOLEAN, {0}, 1}, @@ -21,9 +22,7 @@ const OptionInfoRec intel_options[] = { {OPTION_XVMC, "XvMC", OPTV_BOOLEAN, {0}, 1}, #endif #ifdef USE_SNA - {OPTION_THROTTLE, "Throttle", OPTV_BOOLEAN, {0}, 1}, {OPTION_ZAPHOD, "ZaphodHeads", OPTV_STRING, {0}, 0}, - {OPTION_DELAYED_FLUSH, "DelayedFlush", OPTV_BOOLEAN, {0}, 1}, {OPTION_TEAR_FREE, "TearFree", OPTV_BOOLEAN, {0}, 0}, {OPTION_CRTC_PIXMAPS, "PerCrtcPixmaps", OPTV_BOOLEAN, {0}, 0}, #endif @@ -33,7 +32,6 @@ const OptionInfoRec intel_options[] = { {OPTION_DEBUG_FLUSH_CACHES, "DebugFlushCaches", OPTV_BOOLEAN, {0}, 0}, {OPTION_DEBUG_WAIT, "DebugWait", OPTV_BOOLEAN, {0}, 0}, {OPTION_BUFFER_CACHE, "BufferCache", OPTV_BOOLEAN, {0}, 1}, - {OPTION_TRIPLE_BUFFER, "TripleBuffer", OPTV_BOOLEAN, {0}, 1}, #endif {-1, NULL, OPTV_NONE, {0}, 0} }; diff --git a/src/intel_options.h b/src/intel_options.h index 3b5262a55..8fa7a8fb8 100644 --- a/src/intel_options.h +++ b/src/intel_options.h @@ -20,6 +20,7 @@ enum intel_options { OPTION_TILING_2D, OPTION_TILING_FB, OPTION_SWAPBUFFERS_WAIT, + OPTION_TRIPLE_BUFFER, OPTION_PREFER_OVERLAY, OPTION_HOTPLUG, OPTION_RELAXED_FENCING, @@ -28,9 +29,7 @@ enum intel_options { #define INTEL_XVMC 1 #endif #ifdef USE_SNA - OPTION_THROTTLE, OPTION_ZAPHOD, - OPTION_DELAYED_FLUSH, OPTION_TEAR_FREE, OPTION_CRTC_PIXMAPS, #endif @@ -40,7 +39,6 @@ enum intel_options { OPTION_DEBUG_FLUSH_CACHES, OPTION_DEBUG_WAIT, OPTION_BUFFER_CACHE, - OPTION_TRIPLE_BUFFER, #endif NUM_OPTIONS, }; diff --git a/src/intel_uxa.c b/src/intel_uxa.c index 6d202c776..2f141735d 100644 --- a/src/intel_uxa.c +++ b/src/intel_uxa.c @@ -170,7 +170,7 @@ intel_uxa_pixmap_compute_size(PixmapPtr pixmap, pitch = (w * pixmap->drawable.bitsPerPixel + 7) / 8; pitch = ALIGN(pitch, 64); size = pitch * ALIGN (h, 2); - if (INTEL_INFO(intel)->gen < 40) { + if (INTEL_INFO(intel)->gen < 040) { /* Gen 2/3 has a maximum stride for tiling of * 8192 bytes. */ @@ -209,7 +209,7 @@ intel_uxa_pixmap_compute_size(PixmapPtr pixmap, tile_height = 8; else tile_height = 32; - aligned_h = ALIGN(h, tile_height); + aligned_h = ALIGN(h, 2*tile_height); *stride = intel_get_fence_pitch(intel, ALIGN(pitch, 512), @@ -331,7 +331,7 @@ static void intel_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2) cmd |= XY_COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB; - if (INTEL_INFO(intel)->gen >= 40 && intel_pixmap_tiled(pixmap)) { + if (INTEL_INFO(intel)->gen >= 040 && intel_pixmap_tiled(pixmap)) { assert((pitch % 512) == 0); pitch >>= 2; cmd |= XY_COLOR_BLT_TILED; @@ -470,7 +470,7 @@ intel_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1, XY_SRC_COPY_BLT_WRITE_ALPHA | XY_SRC_COPY_BLT_WRITE_RGB; - if (INTEL_INFO(intel)->gen >= 40) { + if (INTEL_INFO(intel)->gen >= 040) { if (intel_pixmap_tiled(dest)) { assert((dst_pitch % 512) == 0); dst_pitch >>= 2; @@ -1281,7 +1281,7 @@ intel_limits_init(intel_screen_private *intel) * the front, which will have an appropriate pitch/offset already set up, * so UXA doesn't need to worry. */ - if (INTEL_INFO(intel)->gen >= 40) { + if (INTEL_INFO(intel)->gen >= 040) { intel->accel_pixmap_offset_alignment = 4 * 2; intel->accel_max_x = 8192; intel->accel_max_y = 8192; @@ -1292,6 +1292,17 @@ intel_limits_init(intel_screen_private *intel) } } +static Bool intel_option_accel_blt(intel_screen_private *intel) +{ + const char *s; + + s = xf86GetOptValString(intel->Options, OPTION_ACCEL_METHOD); + if (s == NULL) + return FALSE; + + return strcasecmp(s, "blt") == 0; +} + Bool intel_uxa_init(ScreenPtr screen) { ScrnInfoPtr scrn = xf86ScreenToScrn(screen); @@ -1338,7 +1349,8 @@ Bool intel_uxa_init(ScreenPtr screen) intel->uxa_driver->done_copy = intel_uxa_done; /* Composite */ - if (IS_GEN2(intel)) { + if (intel_option_accel_blt(intel)) { + } else if (IS_GEN2(intel)) { intel->uxa_driver->check_composite = i830_check_composite; intel->uxa_driver->check_composite_target = i830_check_composite_target; intel->uxa_driver->check_composite_texture = i830_check_composite_texture; diff --git a/src/intel_video.c b/src/intel_video.c index 09782aa5d..6cce18240 100644 --- a/src/intel_video.c +++ b/src/intel_video.c @@ -353,7 +353,7 @@ void I830InitVideo(ScreenPtr screen) * supported hardware. */ if (scrn->bitsPerPixel >= 16 && - INTEL_INFO(intel)->gen >= 30) { + INTEL_INFO(intel)->gen >= 030) { texturedAdaptor = I830SetupImageVideoTextured(screen); if (texturedAdaptor != NULL) { xf86DrvMsg(scrn->scrnIndex, X_INFO, @@ -436,7 +436,7 @@ static XF86VideoAdaptorPtr I830SetupImageVideoOverlay(ScreenPtr screen) adapt->pPortPrivates[0].ptr = (pointer) (adaptor_priv); adapt->nAttributes = NUM_ATTRIBUTES; - if (INTEL_INFO(intel)->gen >= 30) + if (INTEL_INFO(intel)->gen >= 030) adapt->nAttributes += GAMMA_ATTRIBUTES; /* has gamma */ adapt->pAttributes = xnfalloc(sizeof(XF86AttributeRec) * adapt->nAttributes); @@ -445,7 +445,7 @@ static XF86VideoAdaptorPtr I830SetupImageVideoOverlay(ScreenPtr screen) memcpy((char *)att, (char *)Attributes, sizeof(XF86AttributeRec) * NUM_ATTRIBUTES); att += NUM_ATTRIBUTES; - if (INTEL_INFO(intel)->gen >= 30) { + if (INTEL_INFO(intel)->gen >= 030) { memcpy((char *)att, (char *)GammaAttributes, sizeof(XF86AttributeRec) * GAMMA_ATTRIBUTES); } @@ -495,7 +495,7 @@ static XF86VideoAdaptorPtr I830SetupImageVideoOverlay(ScreenPtr screen) /* Allow the pipe to be switched from pipe A to B when in clone mode */ xvPipe = MAKE_ATOM("XV_PIPE"); - if (INTEL_INFO(intel)->gen >= 30) { + if (INTEL_INFO(intel)->gen >= 030) { xvGamma0 = MAKE_ATOM("XV_GAMMA0"); xvGamma1 = MAKE_ATOM("XV_GAMMA1"); xvGamma2 = MAKE_ATOM("XV_GAMMA2"); @@ -681,17 +681,17 @@ I830SetPortAttributeOverlay(ScrnInfoPtr scrn, adaptor_priv->desired_crtc = NULL; else adaptor_priv->desired_crtc = xf86_config->crtc[value]; - } else if (attribute == xvGamma0 && (INTEL_INFO(intel)->gen >= 30)) { + } else if (attribute == xvGamma0 && (INTEL_INFO(intel)->gen >= 030)) { adaptor_priv->gamma0 = value; - } else if (attribute == xvGamma1 && (INTEL_INFO(intel)->gen >= 30)) { + } else if (attribute == xvGamma1 && (INTEL_INFO(intel)->gen >= 030)) { adaptor_priv->gamma1 = value; - } else if (attribute == xvGamma2 && (INTEL_INFO(intel)->gen >= 30)) { + } else if (attribute == xvGamma2 && (INTEL_INFO(intel)->gen >= 030)) { adaptor_priv->gamma2 = value; - } else if (attribute == xvGamma3 && (INTEL_INFO(intel)->gen >= 30)) { + } else if (attribute == xvGamma3 && (INTEL_INFO(intel)->gen >= 030)) { adaptor_priv->gamma3 = value; - } else if (attribute == xvGamma4 && (INTEL_INFO(intel)->gen >= 30)) { + } else if (attribute == xvGamma4 && (INTEL_INFO(intel)->gen >= 030)) { adaptor_priv->gamma4 = value; - } else if (attribute == xvGamma5 && (INTEL_INFO(intel)->gen >= 30)) { + } else if (attribute == xvGamma5 && (INTEL_INFO(intel)->gen >= 030)) { adaptor_priv->gamma5 = value; } else if (attribute == xvColorKey) { adaptor_priv->colorKey = value; @@ -704,7 +704,7 @@ I830SetPortAttributeOverlay(ScrnInfoPtr scrn, attribute == xvGamma2 || attribute == xvGamma3 || attribute == xvGamma4 || - attribute == xvGamma5) && (INTEL_INFO(intel)->gen >= 30)) { + attribute == xvGamma5) && (INTEL_INFO(intel)->gen >= 030)) { OVERLAY_DEBUG("GAMMA\n"); } @@ -739,17 +739,17 @@ I830GetPortAttribute(ScrnInfoPtr scrn, if (c == xf86_config->num_crtc) c = -1; *value = c; - } else if (attribute == xvGamma0 && (INTEL_INFO(intel)->gen >= 30)) { + } else if (attribute == xvGamma0 && (INTEL_INFO(intel)->gen >= 030)) { *value = adaptor_priv->gamma0; - } else if (attribute == xvGamma1 && (INTEL_INFO(intel)->gen >= 30)) { + } else if (attribute == xvGamma1 && (INTEL_INFO(intel)->gen >= 030)) { *value = adaptor_priv->gamma1; - } else if (attribute == xvGamma2 && (INTEL_INFO(intel)->gen >= 30)) { + } else if (attribute == xvGamma2 && (INTEL_INFO(intel)->gen >= 030)) { *value = adaptor_priv->gamma2; - } else if (attribute == xvGamma3 && (INTEL_INFO(intel)->gen >= 30)) { + } else if (attribute == xvGamma3 && (INTEL_INFO(intel)->gen >= 030)) { *value = adaptor_priv->gamma3; - } else if (attribute == xvGamma4 && (INTEL_INFO(intel)->gen >= 30)) { + } else if (attribute == xvGamma4 && (INTEL_INFO(intel)->gen >= 030)) { *value = adaptor_priv->gamma4; - } else if (attribute == xvGamma5 && (INTEL_INFO(intel)->gen >= 30)) { + } else if (attribute == xvGamma5 && (INTEL_INFO(intel)->gen >= 030)) { *value = adaptor_priv->gamma5; } else if (attribute == xvColorKey) { *value = adaptor_priv->colorKey; @@ -1313,18 +1313,18 @@ intel_wait_for_scanline(ScrnInfoPtr scrn, PixmapPtr pixmap, * of extra time for the blitter to start up and * do its job for a full height blit */ - if (full_height && INTEL_INFO(intel)->gen < 40) + if (full_height && INTEL_INFO(intel)->gen < 040) y2 -= 2; if (pipe == 0) { pipe = MI_LOAD_SCAN_LINES_DISPLAY_PIPEA; event = MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW; - if (full_height && INTEL_INFO(intel)->gen >= 40) + if (full_height && INTEL_INFO(intel)->gen >= 040) event = MI_WAIT_FOR_PIPEA_SVBLANK; } else { pipe = MI_LOAD_SCAN_LINES_DISPLAY_PIPEB; event = MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW; - if (full_height && INTEL_INFO(intel)->gen >= 40) + if (full_height && INTEL_INFO(intel)->gen >= 040) event = MI_WAIT_FOR_PIPEB_SVBLANK; } @@ -1381,7 +1381,7 @@ intel_setup_dst_params(ScrnInfoPtr scrn, intel_adaptor_private *adaptor_priv, sh if (adaptor_priv->textured) { pitchAlign = 4; } else { - if (INTEL_INFO(intel)->gen >= 40) + if (INTEL_INFO(intel)->gen >= 040) /* Actually the alignment is 64 bytes, too. But the * stride must be at least 512 bytes. Take the easy fix * and align on 512 bytes unconditionally. */ @@ -1561,16 +1561,16 @@ I830PutImageTextured(ScrnInfoPtr scrn, return BadAlloc; } - if (crtc && adaptor_priv->SyncToVblank != 0 && INTEL_INFO(intel)->gen < 60) { + if (crtc && adaptor_priv->SyncToVblank != 0 && INTEL_INFO(intel)->gen < 060) { intel_wait_for_scanline(scrn, pixmap, crtc, clipBoxes); } - if (INTEL_INFO(intel)->gen >= 60) { + if (INTEL_INFO(intel)->gen >= 060) { Gen6DisplayVideoTextured(scrn, adaptor_priv, id, clipBoxes, width, height, dstPitch, dstPitch2, src_w, src_h, drw_w, drw_h, pixmap); - } else if (INTEL_INFO(intel)->gen >= 40) { + } else if (INTEL_INFO(intel)->gen >= 040) { I965DisplayVideoTextured(scrn, adaptor_priv, id, clipBoxes, width, height, dstPitch, dstPitch2, src_w, src_h, diff --git a/src/legacy/Makefile.in b/src/legacy/Makefile.in index a086138e1..928cf6401 100644 --- a/src/legacy/Makefile.in +++ b/src/legacy/Makefile.in @@ -218,7 +218,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MAN_SUBSTS = @MAN_SUBSTS@ @@ -257,6 +256,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@ VERSION = @VERSION@ X11_CFLAGS = @X11_CFLAGS@ X11_LIBS = @X11_LIBS@ +XCB_CFLAGS = @XCB_CFLAGS@ +XCB_LIBS = @XCB_LIBS@ XORG_CFLAGS = @XORG_CFLAGS@ XORG_LIBS = @XORG_LIBS@ XORG_MAN_PAGE = @XORG_MAN_PAGE@ @@ -324,7 +325,7 @@ EXTRA_DIST = README all: all-recursive .SUFFIXES: -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ @@ -349,9 +350,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) +$(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): diff --git a/src/legacy/i810/Makefile.in b/src/legacy/i810/Makefile.in index 8f339e3fc..c992feb5f 100644 --- a/src/legacy/i810/Makefile.in +++ b/src/legacy/i810/Makefile.in @@ -237,7 +237,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MAN_SUBSTS = @MAN_SUBSTS@ @@ -276,6 +275,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@ VERSION = @VERSION@ X11_CFLAGS = @X11_CFLAGS@ X11_LIBS = @X11_LIBS@ +XCB_CFLAGS = @XCB_CFLAGS@ +XCB_LIBS = @XCB_LIBS@ XORG_CFLAGS = @XORG_CFLAGS@ XORG_LIBS = @XORG_LIBS@ XORG_MAN_PAGE = @XORG_MAN_PAGE@ @@ -349,7 +350,7 @@ all: all-recursive .SUFFIXES: .SUFFIXES: .c .lo .o .obj -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ @@ -374,9 +375,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) +$(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): diff --git a/src/legacy/i810/xvmc/Makefile.in b/src/legacy/i810/xvmc/Makefile.in index 6e03d10c7..830d67767 100644 --- a/src/legacy/i810/xvmc/Makefile.in +++ b/src/legacy/i810/xvmc/Makefile.in @@ -206,7 +206,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MAN_SUBSTS = @MAN_SUBSTS@ @@ -245,6 +244,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@ VERSION = @VERSION@ X11_CFLAGS = @X11_CFLAGS@ X11_LIBS = @X11_LIBS@ +XCB_CFLAGS = @XCB_CFLAGS@ +XCB_LIBS = @XCB_LIBS@ XORG_CFLAGS = @XORG_CFLAGS@ XORG_LIBS = @XORG_LIBS@ XORG_MAN_PAGE = @XORG_MAN_PAGE@ @@ -314,7 +315,7 @@ all: all-am .SUFFIXES: .SUFFIXES: .c .lo .o .obj -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ @@ -339,9 +340,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) +$(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): install-libLTLIBRARIES: $(lib_LTLIBRARIES) diff --git a/src/render_program/Makefile.am b/src/render_program/Makefile.am index 8b82b2e0e..1298625fd 100644 --- a/src/render_program/Makefile.am +++ b/src/render_program/Makefile.am @@ -191,21 +191,21 @@ if HAVE_GEN4ASM SUFFIXES = .g4a .g4b .g5a .g5b .g6a .g6b .g7a .g7b .g4a.g4b: - m4 -I$(srcdir) -s $< > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m + $(AM_V_GEN)m4 -I$(srcdir) -s $< > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m .g5a.g5b: - m4 -I$(srcdir) -s $< > $*.g5m && @INTEL_GEN4ASM@ -g 5 -o $@ $*.g5m && rm $*.g5m + $(AM_V_GEN)m4 -I$(srcdir) -s $< > $*.g5m && @INTEL_GEN4ASM@ -g 5 -o $@ $*.g5m && rm $*.g5m .g6a.g6b: - m4 -I$(srcdir) -s $< > $*.g6m && @INTEL_GEN4ASM@ -g 6 -o $@ $*.g6m && rm $*.g6m + $(AM_V_GEN)m4 -I$(srcdir) -s $< > $*.g6m && @INTEL_GEN4ASM@ -g 6 -o $@ $*.g6m && rm $*.g6m .g7a.g7b: - m4 -I$(srcdir) -s $< > $*.g7m && @INTEL_GEN4ASM@ -g 7 -o $@ $*.g7m && rm $*.g7m + $(AM_V_GEN)m4 -I$(srcdir) -s $< > $*.g7m && @INTEL_GEN4ASM@ -g 7 -o $@ $*.g7m && rm $*.g7m -$(INTEL_G4B): $(INTEL_G4I) -$(INTEL_G5B): $(INTEL_G4I) -$(INTEL_G6B): $(INTEL_G4I) $(INTEL_G6I) -$(INTEL_G7B): $(INTEL_G4I) $(INTEL_G6I) +$(INTEL_G4B): $(INTEL_GEN4ASM) $(INTEL_G4I) +$(INTEL_G5B): $(INTEL_GEN4ASM) $(INTEL_G4I) +$(INTEL_G6B): $(INTEL_GEN4ASM) $(INTEL_G4I) $(INTEL_G6I) +$(INTEL_G7B): $(INTEL_GEN4ASM) $(INTEL_G4I) $(INTEL_G6I) BUILT_SOURCES= $(INTEL_G4B) $(INTEL_G5B) $(INTEL_G6B) $(INTEL_G7B) diff --git a/src/render_program/Makefile.in b/src/render_program/Makefile.in index c079921f3..c941f240c 100644 --- a/src/render_program/Makefile.in +++ b/src/render_program/Makefile.in @@ -143,7 +143,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MAN_SUBSTS = @MAN_SUBSTS@ @@ -182,6 +181,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@ VERSION = @VERSION@ X11_CFLAGS = @X11_CFLAGS@ X11_LIBS = @X11_LIBS@ +XCB_CFLAGS = @XCB_CFLAGS@ +XCB_LIBS = @XCB_LIBS@ XORG_CFLAGS = @XORG_CFLAGS@ XORG_LIBS = @XORG_LIBS@ XORG_MAN_PAGE = @XORG_MAN_PAGE@ @@ -435,7 +436,7 @@ all: $(BUILT_SOURCES) .SUFFIXES: .SUFFIXES: .g4a .g4b .g5a .g5b .g6a .g6b .g7a .g7b -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ @@ -460,9 +461,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) +$(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): @@ -627,21 +628,21 @@ uninstall-am: uninstall uninstall-am @HAVE_GEN4ASM_TRUE@.g4a.g4b: -@HAVE_GEN4ASM_TRUE@ m4 -I$(srcdir) -s $< > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m +@HAVE_GEN4ASM_TRUE@ $(AM_V_GEN)m4 -I$(srcdir) -s $< > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m @HAVE_GEN4ASM_TRUE@.g5a.g5b: -@HAVE_GEN4ASM_TRUE@ m4 -I$(srcdir) -s $< > $*.g5m && @INTEL_GEN4ASM@ -g 5 -o $@ $*.g5m && rm $*.g5m +@HAVE_GEN4ASM_TRUE@ $(AM_V_GEN)m4 -I$(srcdir) -s $< > $*.g5m && @INTEL_GEN4ASM@ -g 5 -o $@ $*.g5m && rm $*.g5m @HAVE_GEN4ASM_TRUE@.g6a.g6b: -@HAVE_GEN4ASM_TRUE@ m4 -I$(srcdir) -s $< > $*.g6m && @INTEL_GEN4ASM@ -g 6 -o $@ $*.g6m && rm $*.g6m +@HAVE_GEN4ASM_TRUE@ $(AM_V_GEN)m4 -I$(srcdir) -s $< > $*.g6m && @INTEL_GEN4ASM@ -g 6 -o $@ $*.g6m && rm $*.g6m @HAVE_GEN4ASM_TRUE@.g7a.g7b: -@HAVE_GEN4ASM_TRUE@ m4 -I$(srcdir) -s $< > $*.g7m && @INTEL_GEN4ASM@ -g 7 -o $@ $*.g7m && rm $*.g7m +@HAVE_GEN4ASM_TRUE@ $(AM_V_GEN)m4 -I$(srcdir) -s $< > $*.g7m && @INTEL_GEN4ASM@ -g 7 -o $@ $*.g7m && rm $*.g7m -@HAVE_GEN4ASM_TRUE@$(INTEL_G4B): $(INTEL_G4I) -@HAVE_GEN4ASM_TRUE@$(INTEL_G5B): $(INTEL_G4I) -@HAVE_GEN4ASM_TRUE@$(INTEL_G6B): $(INTEL_G4I) $(INTEL_G6I) -@HAVE_GEN4ASM_TRUE@$(INTEL_G7B): $(INTEL_G4I) $(INTEL_G6I) +@HAVE_GEN4ASM_TRUE@$(INTEL_G4B): $(INTEL_GEN4ASM) $(INTEL_G4I) +@HAVE_GEN4ASM_TRUE@$(INTEL_G5B): $(INTEL_GEN4ASM) $(INTEL_G4I) +@HAVE_GEN4ASM_TRUE@$(INTEL_G6B): $(INTEL_GEN4ASM) $(INTEL_G4I) $(INTEL_G6I) +@HAVE_GEN4ASM_TRUE@$(INTEL_G7B): $(INTEL_GEN4ASM) $(INTEL_G4I) $(INTEL_G6I) @HAVE_GEN4ASM_TRUE@clean-local: @HAVE_GEN4ASM_TRUE@ -rm -f $(INTEL_G4B) $(INTEL_G4B_GEN5) diff --git a/src/sna/Makefile.am b/src/sna/Makefile.am index 306996b57..c74c904dc 100644 --- a/src/sna/Makefile.am +++ b/src/sna/Makefile.am @@ -34,9 +34,11 @@ AM_CFLAGS += @VALGRIND_CFLAGS@ endif noinst_LTLIBRARIES = libsna.la +libsna_la_LDFLAGS = -pthread libsna_la_LIBADD = @UDEV_LIBS@ -lm @DRM_LIBS@ brw/libbrw.la fb/libfb.la libsna_la_SOURCES = \ + atomic.h \ blt.c \ compiler.h \ kgem.c \ @@ -62,6 +64,8 @@ libsna_la_SOURCES = \ sna_trapezoids.c \ sna_tiling.c \ sna_transform.c \ + sna_threads.c \ + sna_vertex.c \ sna_video.c \ sna_video.h \ sna_video_overlay.c \ @@ -73,6 +77,10 @@ libsna_la_SOURCES = \ gen3_render.h \ gen4_render.c \ gen4_render.h \ + gen4_source.c \ + gen4_source.h \ + gen4_vertex.c \ + gen4_vertex.h \ gen5_render.c \ gen5_render.h \ gen6_render.c \ diff --git a/src/sna/Makefile.in b/src/sna/Makefile.in index 978d36e1d..7b80b60be 100644 --- a/src/sna/Makefile.in +++ b/src/sna/Makefile.in @@ -106,18 +106,19 @@ am__DEPENDENCIES_1 = @DRI2_TRUE@am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) libsna_la_DEPENDENCIES = brw/libbrw.la fb/libfb.la \ $(am__DEPENDENCIES_2) -am__libsna_la_SOURCES_DIST = blt.c compiler.h kgem.c kgem.h rop.h \ - sna.h sna_accel.c sna_blt.c sna_composite.c sna_damage.c \ +am__libsna_la_SOURCES_DIST = atomic.h blt.c compiler.h kgem.c kgem.h \ + rop.h sna.h sna_accel.c sna_blt.c sna_composite.c sna_damage.c \ sna_damage.h sna_display.c sna_driver.c sna_glyphs.c \ sna_gradient.c sna_io.c sna_module.h sna_render.c sna_render.h \ sna_render_inline.h sna_reg.h sna_stream.c sna_trapezoids.c \ - sna_tiling.c sna_transform.c sna_video.c sna_video.h \ - sna_video_overlay.c sna_video_sprite.c sna_video_textured.c \ - gen2_render.c gen2_render.h gen3_render.c gen3_render.h \ - gen4_render.c gen4_render.h gen5_render.c gen5_render.h \ - gen6_render.c gen6_render.h gen7_render.c gen7_render.h \ - sna_dri.c sna_video_hwmc.h sna_video_hwmc.c kgem_debug.c \ - kgem_debug.h kgem_debug_gen2.c kgem_debug_gen3.c \ + sna_tiling.c sna_transform.c sna_threads.c sna_vertex.c \ + sna_video.c sna_video.h sna_video_overlay.c sna_video_sprite.c \ + sna_video_textured.c gen2_render.c gen2_render.h gen3_render.c \ + gen3_render.h gen4_render.c gen4_render.h gen4_source.c \ + gen4_source.h gen4_vertex.c gen4_vertex.h gen5_render.c \ + gen5_render.h gen6_render.c gen6_render.h gen7_render.c \ + gen7_render.h sna_dri.c sna_video_hwmc.h sna_video_hwmc.c \ + kgem_debug.c kgem_debug.h kgem_debug_gen2.c kgem_debug_gen3.c \ kgem_debug_gen4.c kgem_debug_gen5.c kgem_debug_gen6.c \ kgem_debug_gen7.c @DRI2_TRUE@am__objects_1 = sna_dri.lo @@ -130,14 +131,18 @@ am_libsna_la_OBJECTS = blt.lo kgem.lo sna_accel.lo sna_blt.lo \ sna_composite.lo sna_damage.lo sna_display.lo sna_driver.lo \ sna_glyphs.lo sna_gradient.lo sna_io.lo sna_render.lo \ sna_stream.lo sna_trapezoids.lo sna_tiling.lo sna_transform.lo \ - sna_video.lo sna_video_overlay.lo sna_video_sprite.lo \ - sna_video_textured.lo gen2_render.lo gen3_render.lo \ - gen4_render.lo gen5_render.lo gen6_render.lo gen7_render.lo \ - $(am__objects_1) $(am__objects_2) $(am__objects_3) + sna_threads.lo sna_vertex.lo sna_video.lo sna_video_overlay.lo \ + sna_video_sprite.lo sna_video_textured.lo gen2_render.lo \ + gen3_render.lo gen4_render.lo gen4_source.lo gen4_vertex.lo \ + gen5_render.lo gen6_render.lo gen7_render.lo $(am__objects_1) \ + $(am__objects_2) $(am__objects_3) libsna_la_OBJECTS = $(am_libsna_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent +libsna_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(libsna_la_LDFLAGS) $(LDFLAGS) -o $@ DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) depcomp = $(SHELL) $(top_srcdir)/depcomp am__depfiles_maybe = depfiles @@ -281,7 +286,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MAN_SUBSTS = @MAN_SUBSTS@ @@ -320,6 +324,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@ VERSION = @VERSION@ X11_CFLAGS = @X11_CFLAGS@ X11_LIBS = @X11_LIBS@ +XCB_CFLAGS = @XCB_CFLAGS@ +XCB_LIBS = @XCB_LIBS@ XORG_CFLAGS = @XORG_CFLAGS@ XORG_LIBS = @XORG_LIBS@ XORG_MAN_PAGE = @XORG_MAN_PAGE@ @@ -383,24 +389,27 @@ AM_CFLAGS = @CWARNFLAGS@ -I$(top_srcdir)/src \ -I$(top_srcdir)/src/render_program @XORG_CFLAGS@ @UDEV_CFLAGS@ \ @DRM_CFLAGS@ $(NULL) $(am__append_1) $(am__append_2) noinst_LTLIBRARIES = libsna.la +libsna_la_LDFLAGS = -pthread libsna_la_LIBADD = @UDEV_LIBS@ -lm @DRM_LIBS@ brw/libbrw.la \ fb/libfb.la $(am__append_4) -libsna_la_SOURCES = blt.c compiler.h kgem.c kgem.h rop.h sna.h \ - sna_accel.c sna_blt.c sna_composite.c sna_damage.c \ +libsna_la_SOURCES = atomic.h blt.c compiler.h kgem.c kgem.h rop.h \ + sna.h sna_accel.c sna_blt.c sna_composite.c sna_damage.c \ sna_damage.h sna_display.c sna_driver.c sna_glyphs.c \ sna_gradient.c sna_io.c sna_module.h sna_render.c sna_render.h \ sna_render_inline.h sna_reg.h sna_stream.c sna_trapezoids.c \ - sna_tiling.c sna_transform.c sna_video.c sna_video.h \ - sna_video_overlay.c sna_video_sprite.c sna_video_textured.c \ - gen2_render.c gen2_render.h gen3_render.c gen3_render.h \ - gen4_render.c gen4_render.h gen5_render.c gen5_render.h \ - gen6_render.c gen6_render.h gen7_render.c gen7_render.h \ - $(NULL) $(am__append_3) $(am__append_5) $(am__append_6) + sna_tiling.c sna_transform.c sna_threads.c sna_vertex.c \ + sna_video.c sna_video.h sna_video_overlay.c sna_video_sprite.c \ + sna_video_textured.c gen2_render.c gen2_render.h gen3_render.c \ + gen3_render.h gen4_render.c gen4_render.h gen4_source.c \ + gen4_source.h gen4_vertex.c gen4_vertex.h gen5_render.c \ + gen5_render.h gen6_render.c gen6_render.h gen7_render.c \ + gen7_render.h $(NULL) $(am__append_3) $(am__append_5) \ + $(am__append_6) all: all-recursive .SUFFIXES: .SUFFIXES: .c .lo .o .obj -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ @@ -425,9 +434,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) +$(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): @@ -440,7 +449,7 @@ clean-noinstLTLIBRARIES: rm -f "$${dir}/so_locations"; \ done libsna.la: $(libsna_la_OBJECTS) $(libsna_la_DEPENDENCIES) $(EXTRA_libsna_la_DEPENDENCIES) - $(AM_V_CCLD)$(LINK) $(libsna_la_OBJECTS) $(libsna_la_LIBADD) $(LIBS) + $(AM_V_CCLD)$(libsna_la_LINK) $(libsna_la_OBJECTS) $(libsna_la_LIBADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) @@ -452,6 +461,8 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen2_render.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen3_render.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen4_render.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen4_source.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen4_vertex.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen5_render.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen6_render.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen7_render.Plo@am__quote@ @@ -475,9 +486,11 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sna_io.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sna_render.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sna_stream.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sna_threads.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sna_tiling.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sna_transform.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sna_trapezoids.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sna_vertex.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sna_video.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sna_video_hwmc.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sna_video_overlay.Plo@am__quote@ diff --git a/src/sna/atomic.h b/src/sna/atomic.h new file mode 100644 index 000000000..306dc6db8 --- /dev/null +++ b/src/sna/atomic.h @@ -0,0 +1,89 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifndef ATOMIC_H +#define ATOMIC_H + +#if HAVE_ATOMIC_PRIMITIVES + +#define HAS_ATOMIC_OPS 1 + +typedef struct { + int atomic; +} atomic_t; + +# define atomic_read(x) ((x)->atomic) +# define atomic_set(x, val) ((x)->atomic = (val)) +# define atomic_inc(x) ((void) __sync_fetch_and_add (&(x)->atomic, 1)) +# define atomic_dec_and_test(x) (__sync_fetch_and_add (&(x)->atomic, -1) == 1) +# define atomic_add(x, v) ((void) __sync_add_and_fetch(&(x)->atomic, (v))) +# define atomic_dec(x, v) ((void) __sync_sub_and_fetch(&(x)->atomic, (v))) +# define atomic_cmpxchg(x, oldv, newv) __sync_val_compare_and_swap (&(x)->atomic, oldv, newv) + +#endif + +#if HAVE_LIB_ATOMIC_OPS +#include <atomic_ops.h> + +#define HAS_ATOMIC_OPS 1 + +typedef struct { + AO_t atomic; +} atomic_t; + +# define atomic_read(x) AO_load_full(&(x)->atomic) +# define atomic_set(x, val) AO_store_full(&(x)->atomic, (val)) +# define atomic_inc(x) ((void) AO_fetch_and_add1_full(&(x)->atomic)) +# define atomic_add(x, v) ((void) AO_fetch_and_add_full(&(x)->atomic, (v))) +# define atomic_dec(x, v) ((void) AO_fetch_and_add_full(&(x)->atomic, -(v))) +# define atomic_dec_and_test(x) (AO_fetch_and_sub1_full(&(x)->atomic) == 1) +# define atomic_cmpxchg(x, oldv, newv) AO_compare_and_swap_full(&(x)->atomic, oldv, newv) + +#endif + +#if defined(__sun) && !defined(HAS_ATOMIC_OPS) /* Solaris & OpenSolaris */ + +#include <sys/atomic.h> +#define HAS_ATOMIC_OPS 1 + +typedef struct { uint_t atomic; } atomic_t; + +# define atomic_read(x) (int) ((x)->atomic) +# define atomic_set(x, val) ((x)->atomic = (uint_t)(val)) +# define atomic_inc(x) (atomic_inc_uint (&(x)->atomic)) +# define atomic_dec_and_test(x) (atomic_dec_uint_nv(&(x)->atomic) == 1) +# define atomic_add(x, v) (atomic_add_int(&(x)->atomic, (v))) +# define atomic_dec(x, v) (atomic_add_int(&(x)->atomic, -(v))) +# define atomic_cmpxchg(x, oldv, newv) atomic_cas_uint (&(x)->atomic, oldv, newv) + +#endif + +#if ! HAS_ATOMIC_OPS +#error xf86-video-intel requires atomic operations, please define them for your CPU/compiler. +#endif + +#endif diff --git a/src/sna/brw/Makefile.in b/src/sna/brw/Makefile.in index 1c8fbf3dd..8d70764e8 100644 --- a/src/sna/brw/Makefile.in +++ b/src/sna/brw/Makefile.in @@ -203,7 +203,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MAN_SUBSTS = @MAN_SUBSTS@ @@ -242,6 +241,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@ VERSION = @VERSION@ X11_CFLAGS = @X11_CFLAGS@ X11_LIBS = @X11_LIBS@ +XCB_CFLAGS = @XCB_CFLAGS@ +XCB_LIBS = @XCB_LIBS@ XORG_CFLAGS = @XORG_CFLAGS@ XORG_LIBS = @XORG_LIBS@ XORG_MAN_PAGE = @XORG_MAN_PAGE@ @@ -331,7 +332,7 @@ all: all-am .SUFFIXES: .SUFFIXES: .c .lo .o .obj -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ @@ -356,9 +357,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) +$(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): diff --git a/src/sna/brw/brw_disasm.c b/src/sna/brw/brw_disasm.c index e6da17454..ea6155c8e 100644 --- a/src/sna/brw/brw_disasm.c +++ b/src/sna/brw/brw_disasm.c @@ -875,16 +875,17 @@ void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen) string(file, ")"); } - if (inst->header.opcode == BRW_OPCODE_SEND && gen < 60) + if (inst->header.opcode == BRW_OPCODE_SEND && gen < 060) format(file, " %d", inst->header.destreg__conditionalmod); if (opcode[inst->header.opcode].ndst > 0) { pad(file, 16); dest(file, inst); - } else if (gen >= 60 && (inst->header.opcode == BRW_OPCODE_IF || - inst->header.opcode == BRW_OPCODE_ELSE || - inst->header.opcode == BRW_OPCODE_ENDIF || - inst->header.opcode == BRW_OPCODE_WHILE)) { + } else if (gen >= 060 && + (inst->header.opcode == BRW_OPCODE_IF || + inst->header.opcode == BRW_OPCODE_ELSE || + inst->header.opcode == BRW_OPCODE_ENDIF || + inst->header.opcode == BRW_OPCODE_WHILE)) { format(file, " %d", inst->bits1.branch_gen6.jump_count); } @@ -901,9 +902,9 @@ void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen) inst->header.opcode == BRW_OPCODE_SENDC) { enum brw_message_target target; - if (gen >= 60) + if (gen >= 060) target = inst->header.destreg__conditionalmod; - else if (gen >= 50) + else if (gen >= 050) target = inst->bits2.send_gen5.sfid; else target = inst->bits3.generic.msg_target; @@ -912,7 +913,7 @@ void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen) pad (file, 16); space = 0; - if (gen >= 60) { + if (gen >= 060) { control (file, "target function", target_function_gen6, target, &space); } else { @@ -934,19 +935,19 @@ void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen) inst->bits3.math.precision, &space); break; case BRW_SFID_SAMPLER: - if (gen >= 70) { + if (gen >= 070) { format (file, " (%d, %d, %d, %d)", inst->bits3.sampler_gen7.binding_table_index, inst->bits3.sampler_gen7.sampler, inst->bits3.sampler_gen7.msg_type, inst->bits3.sampler_gen7.simd_mode); - } else if (gen >= 50) { + } else if (gen >= 050) { format (file, " (%d, %d, %d, %d)", inst->bits3.sampler_gen5.binding_table_index, inst->bits3.sampler_gen5.sampler, inst->bits3.sampler_gen5.msg_type, inst->bits3.sampler_gen5.simd_mode); - } else if (gen >= 45) { + } else if (gen >= 045) { format (file, " (%d, %d)", inst->bits3.sampler_g4x.binding_table_index, inst->bits3.sampler_g4x.sampler); @@ -961,13 +962,13 @@ void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen) } break; case BRW_SFID_DATAPORT_READ: - if (gen >= 60) { + if (gen >= 060) { format (file, " (%d, %d, %d, %d)", inst->bits3.gen6_dp.binding_table_index, inst->bits3.gen6_dp.msg_control, inst->bits3.gen6_dp.msg_type, inst->bits3.gen6_dp.send_commit_msg); - } else if (gen >= 45) { + } else if (gen >= 045) { format (file, " (%d, %d, %d)", inst->bits3.dp_read_gen5.binding_table_index, inst->bits3.dp_read_gen5.msg_control, @@ -981,7 +982,7 @@ void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen) break; case BRW_SFID_DATAPORT_WRITE: - if (gen >= 70) { + if (gen >= 070) { format (file, " ("); control (file, "DP rc message type", @@ -992,7 +993,7 @@ void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen) inst->bits3.gen7_dp.binding_table_index, inst->bits3.gen7_dp.msg_control, inst->bits3.gen7_dp.msg_type); - } else if (gen >= 60) { + } else if (gen >= 060) { format (file, " ("); control (file, "DP rc message type", @@ -1015,14 +1016,14 @@ void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen) break; case BRW_SFID_URB: - if (gen >= 50) { + if (gen >= 050) { format (file, " %d", inst->bits3.urb_gen5.offset); } else { format (file, " %d", inst->bits3.urb.offset); } space = 1; - if (gen >= 50) { + if (gen >= 050) { control (file, "urb opcode", urb_opcode, inst->bits3.urb_gen5.opcode, &space); } @@ -1051,7 +1052,7 @@ void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen) } if (space) string (file, " "); - if (gen >= 50) { + if (gen >= 050) { format (file, "mlen %d", inst->bits3.generic_gen5.msg_length); format (file, " rlen %d", @@ -1068,13 +1069,13 @@ void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen) string(file, "{"); space = 1; control(file, "access mode", access_mode, inst->header.access_mode, &space); - if (gen >= 60) + if (gen >= 060) control(file, "write enable control", wectrl, inst->header.mask_control, &space); else control(file, "mask control", mask_ctrl, inst->header.mask_control, &space); control(file, "dependency control", dep_ctrl, inst->header.dependency_control, &space); - if (gen >= 60) + if (gen >= 060) qtr_ctrl(file, inst); else { if (inst->header.compression_control == BRW_COMPRESSION_COMPRESSED && @@ -1089,7 +1090,7 @@ void brw_disasm(FILE *file, const struct brw_instruction *inst, int gen) } control(file, "thread control", thread_ctrl, inst->header.thread_control, &space); - if (gen >= 60) + if (gen >= 060) control(file, "acc write control", accwr, inst->header.acc_wr_control, &space); if (inst->header.opcode == BRW_OPCODE_SEND || inst->header.opcode == BRW_OPCODE_SENDC) diff --git a/src/sna/brw/brw_eu.c b/src/sna/brw/brw_eu.c index 7c32ea191..9bd8ba5dc 100644 --- a/src/sna/brw/brw_eu.c +++ b/src/sna/brw/brw_eu.c @@ -79,7 +79,7 @@ void brw_set_compression_control(struct brw_compile *p, { p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED); - if (p->gen >= 60) { + if (p->gen >= 060) { /* Since we don't use the 32-wide support in gen6, we translate * the pre-gen6 compression control here. */ diff --git a/src/sna/brw/brw_eu.h b/src/sna/brw/brw_eu.h index 65e66d5ec..24ab599ad 100644 --- a/src/sna/brw/brw_eu.h +++ b/src/sna/brw/brw_eu.h @@ -1862,7 +1862,7 @@ static inline void brw_set_saturate(struct brw_compile *p, unsigned value) static inline void brw_set_acc_write_control(struct brw_compile *p, unsigned value) { - if (p->gen >= 60) + if (p->gen >= 060) p->current->header.acc_wr_control = value; } @@ -1938,7 +1938,7 @@ static inline void brw_##OP(struct brw_compile *p, \ rnd = brw_next_insn(p, BRW_OPCODE_##OP); \ brw_set_dest(p, rnd, dest); \ brw_set_src0(p, rnd, src); \ - if (p->gen < 60) { \ + if (p->gen < 060) { \ /* turn on round-increments */ \ rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \ add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ diff --git a/src/sna/brw/brw_eu_emit.c b/src/sna/brw/brw_eu_emit.c index 3f01ae7b7..5c0b30654 100644 --- a/src/sna/brw/brw_eu_emit.c +++ b/src/sna/brw/brw_eu_emit.c @@ -61,7 +61,7 @@ gen6_resolve_implied_move(struct brw_compile *p, struct brw_reg *src, unsigned msg_reg_nr) { - if (p->gen < 60) + if (p->gen < 060) return; if (src->file == BRW_MESSAGE_REGISTER_FILE) @@ -88,7 +88,7 @@ gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) * Since we're pretending to have 16 MRFs anyway, we may as well use the * registers required for messages with EOT. */ - if (p->gen >= 70 && reg->file == BRW_MESSAGE_REGISTER_FILE) { + if (p->gen >= 070 && reg->file == BRW_MESSAGE_REGISTER_FILE) { reg->file = BRW_GENERAL_REGISTER_FILE; reg->nr += 111; } @@ -378,13 +378,13 @@ brw_set_message_descriptor(struct brw_compile *p, { brw_set_src1(p, inst, brw_imm_d(0)); - if (p->gen >= 50) { + if (p->gen >= 050) { inst->bits3.generic_gen5.header_present = header_present; inst->bits3.generic_gen5.response_length = response_length; inst->bits3.generic_gen5.msg_length = msg_length; inst->bits3.generic_gen5.end_of_thread = end_of_thread; - if (p->gen >= 60) { + if (p->gen >= 060) { /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */ inst->header.destreg__conditionalmod = sfid; } else { @@ -439,7 +439,7 @@ static void brw_set_math_message(struct brw_compile *p, brw_set_message_descriptor(p, insn, BRW_SFID_MATH, msg_length, response_length, false, false); - if (p->gen == 50) { + if (p->gen == 050) { insn->bits3.math_gen5.function = function; insn->bits3.math_gen5.int_type = integer_type; insn->bits3.math_gen5.precision = low_precision; @@ -485,7 +485,7 @@ static void brw_set_urb_message(struct brw_compile *p, { brw_set_message_descriptor(p, insn, BRW_SFID_URB, msg_length, response_length, true, end_of_thread); - if (p->gen >= 70) { + if (p->gen >= 070) { insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */ insn->bits3.urb_gen7.offset = offset; assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE); @@ -493,7 +493,7 @@ static void brw_set_urb_message(struct brw_compile *p, /* per_slot_offset = 0 makes it ignore offsets in message header */ insn->bits3.urb_gen7.per_slot_offset = 0; insn->bits3.urb_gen7.complete = complete; - } else if (p->gen >= 50) { + } else if (p->gen >= 050) { insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */ insn->bits3.urb_gen5.offset = offset; insn->bits3.urb_gen5.swizzle_control = swizzle_control; @@ -525,13 +525,13 @@ brw_set_dp_write_message(struct brw_compile *p, { unsigned sfid; - if (p->gen >= 70) { + if (p->gen >= 070) { /* Use the Render Cache for RT writes; otherwise use the Data Cache */ if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE) sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; else sfid = GEN7_SFID_DATAPORT_DATA_CACHE; - } else if (p->gen >= 60) { + } else if (p->gen >= 060) { /* Use the render cache for all write messages. */ sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; } else { @@ -542,18 +542,18 @@ brw_set_dp_write_message(struct brw_compile *p, msg_length, response_length, header_present, end_of_thread); - if (p->gen >= 70) { + if (p->gen >= 070) { insn->bits3.gen7_dp.binding_table_index = binding_table_index; insn->bits3.gen7_dp.msg_control = msg_control; insn->bits3.gen7_dp.last_render_target = last_render_target; insn->bits3.gen7_dp.msg_type = msg_type; - } else if (p->gen >= 60) { + } else if (p->gen >= 060) { insn->bits3.gen6_dp.binding_table_index = binding_table_index; insn->bits3.gen6_dp.msg_control = msg_control; insn->bits3.gen6_dp.last_render_target = last_render_target; insn->bits3.gen6_dp.msg_type = msg_type; insn->bits3.gen6_dp.send_commit_msg = send_commit_msg; - } else if (p->gen >= 50) { + } else if (p->gen >= 050) { insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; insn->bits3.dp_write_gen5.msg_control = msg_control; insn->bits3.dp_write_gen5.last_render_target = last_render_target; @@ -580,9 +580,9 @@ brw_set_dp_read_message(struct brw_compile *p, { unsigned sfid; - if (p->gen >= 70) { + if (p->gen >= 070) { sfid = GEN7_SFID_DATAPORT_DATA_CACHE; - } else if (p->gen >= 60) { + } else if (p->gen >= 060) { if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE) sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; else @@ -595,23 +595,23 @@ brw_set_dp_read_message(struct brw_compile *p, msg_length, response_length, true, false); - if (p->gen >= 70) { + if (p->gen >= 070) { insn->bits3.gen7_dp.binding_table_index = binding_table_index; insn->bits3.gen7_dp.msg_control = msg_control; insn->bits3.gen7_dp.last_render_target = 0; insn->bits3.gen7_dp.msg_type = msg_type; - } else if (p->gen >= 60) { + } else if (p->gen >= 060) { insn->bits3.gen6_dp.binding_table_index = binding_table_index; insn->bits3.gen6_dp.msg_control = msg_control; insn->bits3.gen6_dp.last_render_target = 0; insn->bits3.gen6_dp.msg_type = msg_type; insn->bits3.gen6_dp.send_commit_msg = 0; - } else if (p->gen >= 50) { + } else if (p->gen >= 050) { insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; insn->bits3.dp_read_gen5.msg_control = msg_control; insn->bits3.dp_read_gen5.msg_type = msg_type; insn->bits3.dp_read_gen5.target_cache = target_cache; - } else if (p->gen >= 45) { + } else if (p->gen >= 045) { insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/ insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/ insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/ @@ -638,17 +638,17 @@ static void brw_set_sampler_message(struct brw_compile *p, msg_length, response_length, header_present, false); - if (p->gen >= 70) { + if (p->gen >= 070) { insn->bits3.sampler_gen7.binding_table_index = binding_table_index; insn->bits3.sampler_gen7.sampler = sampler; insn->bits3.sampler_gen7.msg_type = msg_type; insn->bits3.sampler_gen7.simd_mode = simd_mode; - } else if (p->gen >= 50) { + } else if (p->gen >= 050) { insn->bits3.sampler_gen5.binding_table_index = binding_table_index; insn->bits3.sampler_gen5.sampler = sampler; insn->bits3.sampler_gen5.msg_type = msg_type; insn->bits3.sampler_gen5.simd_mode = simd_mode; - } else if (p->gen >= 45) { + } else if (p->gen >= 045) { insn->bits3.sampler_g4x.binding_table_index = binding_table_index; insn->bits3.sampler_g4x.sampler = sampler; insn->bits3.sampler_g4x.msg_type = msg_type; @@ -706,11 +706,11 @@ brw_IF(struct brw_compile *p, unsigned execute_size) insn = brw_next_insn(p, BRW_OPCODE_IF); /* Override the defaults for this instruction: */ - if (p->gen < 60) { + if (p->gen < 060) { brw_set_dest(p, insn, brw_ip_reg()); brw_set_src0(p, insn, brw_ip_reg()); brw_set_src1(p, insn, brw_imm_d(0x0)); - } else if (p->gen < 70) { + } else if (p->gen < 070) { brw_set_dest(p, insn, brw_imm_w(0)); insn->bits1.branch_gen6.jump_count = 0; brw_set_src0(p, insn, __retype_d(brw_null_reg())); @@ -827,7 +827,7 @@ patch_IF_ELSE(struct brw_compile *p, /* Jump count is for 64bit data chunk each, so one 128bit instruction * requires 2 chunks. */ - if (p->gen >= 50) + if (p->gen >= 050) br = 2; assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF); @@ -835,7 +835,7 @@ patch_IF_ELSE(struct brw_compile *p, if (else_inst == NULL) { /* Patch IF -> ENDIF */ - if (p->gen < 60) { + if (p->gen < 060) { /* Turn it into an IFF, which means no mask stack operations for * all-false and jumping past the ENDIF. */ @@ -843,7 +843,7 @@ patch_IF_ELSE(struct brw_compile *p, if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1); if_inst->bits3.if_else.pop_count = 0; if_inst->bits3.if_else.pad0 = 0; - } else if (p->gen < 70) { + } else if (p->gen < 070) { /* As of gen6, there is no IFF and IF must point to the ENDIF. */ if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst); } else { @@ -854,23 +854,23 @@ patch_IF_ELSE(struct brw_compile *p, else_inst->header.execution_size = if_inst->header.execution_size; /* Patch IF -> ELSE */ - if (p->gen < 60) { + if (p->gen < 060) { if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst); if_inst->bits3.if_else.pop_count = 0; if_inst->bits3.if_else.pad0 = 0; - } else if (p->gen <= 70) { + } else if (p->gen <= 070) { if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1); } /* Patch ELSE -> ENDIF */ - if (p->gen < 60) { + if (p->gen < 060) { /* BRW_OPCODE_ELSE pre-gen6 should point just past the * matching ENDIF. */ else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1); else_inst->bits3.if_else.pop_count = 1; else_inst->bits3.if_else.pad0 = 0; - } else if (p->gen < 70) { + } else if (p->gen < 070) { /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst); } else { @@ -890,11 +890,11 @@ brw_ELSE(struct brw_compile *p) insn = brw_next_insn(p, BRW_OPCODE_ELSE); - if (p->gen < 60) { + if (p->gen < 060) { brw_set_dest(p, insn, brw_ip_reg()); brw_set_src0(p, insn, brw_ip_reg()); brw_set_src1(p, insn, brw_imm_d(0x0)); - } else if (p->gen < 70) { + } else if (p->gen < 070) { brw_set_dest(p, insn, brw_imm_w(0)); insn->bits1.branch_gen6.jump_count = 0; brw_set_src0(p, insn, __retype_d(brw_null_reg())); @@ -938,11 +938,11 @@ brw_ENDIF(struct brw_compile *p) insn = brw_next_insn(p, BRW_OPCODE_ENDIF); - if (p->gen < 60) { + if (p->gen < 060) { brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0))); brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0))); brw_set_src1(p, insn, brw_imm_d(0x0)); - } else if (p->gen < 70) { + } else if (p->gen < 070) { brw_set_dest(p, insn, brw_imm_w(0)); brw_set_src0(p, insn, __retype_d(brw_null_reg())); brw_set_src1(p, insn, __retype_d(brw_null_reg())); @@ -957,11 +957,11 @@ brw_ENDIF(struct brw_compile *p) insn->header.thread_control = BRW_THREAD_SWITCH; /* Also pop item off the stack in the endif instruction: */ - if (p->gen < 60) { + if (p->gen < 060) { insn->bits3.if_else.jump_count = 0; insn->bits3.if_else.pop_count = 1; insn->bits3.if_else.pad0 = 0; - } else if (p->gen < 70) { + } else if (p->gen < 070) { insn->bits1.branch_gen6.jump_count = 2; } else { insn->bits3.break_cont.jip = 2; @@ -974,7 +974,7 @@ struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) struct brw_instruction *insn; insn = brw_next_insn(p, BRW_OPCODE_BREAK); - if (p->gen >= 60) { + if (p->gen >= 060) { brw_set_dest(p, insn, __retype_d(brw_null_reg())); brw_set_src0(p, insn, __retype_d(brw_null_reg())); brw_set_src1(p, insn, brw_imm_d(0x0)); @@ -1041,7 +1041,7 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) */ struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size) { - if (p->gen >= 60 || p->single_program_flow) { + if (p->gen >= 060 || p->single_program_flow) { return &p->store[p->nr_insn]; } else { struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_DO); @@ -1068,10 +1068,10 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *insn; unsigned br = 1; - if (p->gen >= 50) + if (p->gen >= 050) br = 2; - if (p->gen >= 70) { + if (p->gen >= 070) { insn = brw_next_insn(p, BRW_OPCODE_WHILE); brw_set_dest(p, insn, __retype_d(brw_null_reg())); @@ -1080,7 +1080,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, insn->bits3.break_cont.jip = br * (do_insn - insn); insn->header.execution_size = BRW_EXECUTE_8; - } else if (p->gen >= 60) { + } else if (p->gen >= 060) { insn = brw_next_insn(p, BRW_OPCODE_WHILE); brw_set_dest(p, insn, brw_imm_w(0)); @@ -1126,7 +1126,7 @@ void brw_land_fwd_jump(struct brw_compile *p, struct brw_instruction *landing = &p->store[p->nr_insn]; unsigned jmpi = 1; - if (p->gen >= 50) + if (p->gen >= 050) jmpi = 2; assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); @@ -1195,7 +1195,7 @@ void brw_math(struct brw_compile *p, unsigned data_type, unsigned precision) { - if (p->gen >= 60) { + if (p->gen >= 060) { struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH); assert(dest.file == BRW_GENERAL_REGISTER_FILE); @@ -1294,7 +1294,7 @@ void brw_math_16(struct brw_compile *p, { struct brw_instruction *insn; - if (p->gen >= 60) { + if (p->gen >= 060) { insn = brw_next_insn(p, BRW_OPCODE_MATH); /* Math is the same ISA format as other opcodes, except that CondModifier @@ -1362,7 +1362,7 @@ void brw_oword_block_write_scratch(struct brw_compile *p, uint32_t msg_control, msg_type; int mlen; - if (p->gen >= 60) + if (p->gen >= 060) offset /= 16; mrf = __retype_ud(mrf); @@ -1418,7 +1418,7 @@ void brw_oword_block_write_scratch(struct brw_compile *p, * protection. Our use of DP writes is all about register * spilling within a thread. */ - if (p->gen >= 60) { + if (p->gen >= 060) { dest = __retype_uw(vec16(brw_null_reg())); send_commit_msg = 0; } else { @@ -1427,13 +1427,13 @@ void brw_oword_block_write_scratch(struct brw_compile *p, } brw_set_dest(p, insn, dest); - if (p->gen >= 60) { + if (p->gen >= 060) { brw_set_src0(p, insn, mrf); } else { brw_set_src0(p, insn, brw_null_reg()); } - if (p->gen >= 60) + if (p->gen >= 060) msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; else msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; @@ -1470,7 +1470,7 @@ brw_oword_block_read_scratch(struct brw_compile *p, uint32_t msg_control; int rlen; - if (p->gen >= 60) + if (p->gen >= 060) offset /= 16; mrf = __retype_ud(mrf); @@ -1507,7 +1507,7 @@ brw_oword_block_read_scratch(struct brw_compile *p, insn->header.destreg__conditionalmod = mrf.nr; brw_set_dest(p, insn, dest); /* UW? */ - if (p->gen >= 60) { + if (p->gen >= 060) { brw_set_src0(p, insn, mrf); } else { brw_set_src0(p, insn, brw_null_reg()); @@ -1538,7 +1538,7 @@ void brw_oword_block_read(struct brw_compile *p, struct brw_instruction *insn; /* On newer hardware, offset is in units of owords. */ - if (p->gen >= 60) + if (p->gen >= 060) offset /= 16; mrf = __retype_ud(mrf); @@ -1562,7 +1562,7 @@ void brw_oword_block_read(struct brw_compile *p, dest = __retype_uw(vec8(dest)); brw_set_dest(p, insn, dest); - if (p->gen >= 60) { + if (p->gen >= 060) { brw_set_src0(p, insn, mrf); } else { brw_set_src0(p, insn, brw_null_reg()); @@ -1634,7 +1634,7 @@ void brw_dp_READ_4_vs(struct brw_compile *p, struct brw_instruction *insn; unsigned msg_reg_nr = 1; - if (p->gen >= 60) + if (p->gen >= 060) location /= 16; /* Setup MRF[1] with location/offset into const buffer */ @@ -1655,7 +1655,7 @@ void brw_dp_READ_4_vs(struct brw_compile *p, insn->header.mask_control = BRW_MASK_DISABLE; brw_set_dest(p, insn, dest); - if (p->gen >= 60) { + if (p->gen >= 060) { brw_set_src0(p, insn, brw_message_reg(msg_reg_nr)); } else { brw_set_src0(p, insn, brw_null_reg()); @@ -1710,9 +1710,9 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, brw_set_dest(p, insn, dest); brw_set_src0(p, insn, src); - if (p->gen >= 60) + if (p->gen >= 060) msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; - else if (p->gen >= 45) + else if (p->gen >= 045) msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; else msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; @@ -1747,7 +1747,7 @@ void brw_fb_WRITE(struct brw_compile *p, else dest = __retype_uw(vec8(brw_null_reg())); - if (p->gen >= 60 && binding_table_index == 0) { + if (p->gen >= 060 && binding_table_index == 0) { insn = brw_next_insn(p, BRW_OPCODE_SENDC); } else { insn = brw_next_insn(p, BRW_OPCODE_SEND); @@ -1756,7 +1756,7 @@ void brw_fb_WRITE(struct brw_compile *p, insn->header.predicate_control = 0; insn->header.compression_control = BRW_COMPRESSION_NONE; - if (p->gen >= 60) { + if (p->gen >= 060) { /* headerless version, just submit color payload */ src0 = brw_message_reg(msg_reg_nr); @@ -1802,7 +1802,7 @@ void brw_SAMPLE(struct brw_compile *p, { assert(writemask); - if (p->gen < 50 || writemask != WRITEMASK_XYZW) { + if (p->gen < 050 || writemask != WRITEMASK_XYZW) { struct brw_reg m1 = brw_message_reg(msg_reg_nr); writemask = ~writemask & WRITEMASK_XYZW; @@ -1828,7 +1828,7 @@ void brw_SAMPLE(struct brw_compile *p, insn = brw_next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - if (p->gen < 60) + if (p->gen < 060) insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(p, insn, dest); @@ -1865,7 +1865,7 @@ void brw_urb_WRITE(struct brw_compile *p, gen6_resolve_implied_move(p, &src0, msg_reg_nr); - if (p->gen >= 70) { + if (p->gen >= 070) { /* Enable Channel Masks in the URB_WRITE_HWORD message header */ brw_push_insn_state(p); brw_set_access_mode(p, BRW_ALIGN_1); @@ -1883,7 +1883,7 @@ void brw_urb_WRITE(struct brw_compile *p, brw_set_src0(p, insn, src0); brw_set_src1(p, insn, brw_imm_d(0)); - if (p->gen <= 60) + if (p->gen <= 060) insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_urb_message(p, @@ -1931,7 +1931,7 @@ brw_find_loop_end(struct brw_compile *p, int start) struct brw_instruction *insn = &p->store[ip]; if (insn->header.opcode == BRW_OPCODE_WHILE) { - int jip = p->gen <= 70 ? insn->bits1.branch_gen6.jump_count + int jip = p->gen <= 070 ? insn->bits1.branch_gen6.jump_count : insn->bits3.break_cont.jip; if (ip + jip / br <= start) return ip; @@ -1950,7 +1950,7 @@ brw_set_uip_jip(struct brw_compile *p) int ip; int br = 2; - if (p->gen <= 60) + if (p->gen <= 060) return; for (ip = 0; ip < p->nr_insn; ip++) { @@ -1961,7 +1961,7 @@ brw_set_uip_jip(struct brw_compile *p) insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); /* Gen7 UIP points to WHILE; Gen6 points just after it */ insn->bits3.break_cont.uip = - br * (brw_find_loop_end(p, ip) - ip + (p->gen <= 70 ? 1 : 0)); + br * (brw_find_loop_end(p, ip) - ip + (p->gen <= 070 ? 1 : 0)); break; case BRW_OPCODE_CONTINUE: insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); @@ -1991,7 +1991,7 @@ void brw_ff_sync(struct brw_compile *p, brw_set_src0(p, insn, src0); brw_set_src1(p, insn, brw_imm_d(0)); - if (p->gen < 60) + if (p->gen < 060) insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_ff_sync_message(p, diff --git a/src/sna/brw/brw_wm.c b/src/sna/brw/brw_wm.c index f54e55efe..e8dc6ac47 100644 --- a/src/sna/brw/brw_wm.c +++ b/src/sna/brw/brw_wm.c @@ -41,15 +41,15 @@ static void brw_wm_affine_st(struct brw_compile *p, int dw, if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); - uv = p->gen >= 60 ? 6 : 3; + uv = p->gen >= 060 ? 6 : 3; } else { brw_set_compression_control(p, BRW_COMPRESSION_NONE); - uv = p->gen >= 60 ? 4 : 3; + uv = p->gen >= 060 ? 4 : 3; } uv += 2*channel; msg++; - if (p->gen >= 60) { + if (p->gen >= 060) { brw_PLN(p, brw_message_reg(msg), brw_vec1_grf(uv, 0), @@ -96,7 +96,7 @@ static int brw_wm_sample(struct brw_compile *p, int dw, int len; len = dw == 16 ? 4 : 2; - if (p->gen >= 60) { + if (p->gen >= 060) { header = false; src0 = brw_message_reg(++msg); } else { @@ -125,7 +125,7 @@ static int brw_wm_sample__alpha(struct brw_compile *p, int dw, rlen = 2; } - if (p->gen >= 60) + if (p->gen >= 060) src0 = brw_message_reg(msg); else src0 = brw_vec8_grf(0, 0); @@ -182,7 +182,7 @@ static void brw_fb_write(struct brw_compile *p, int dw) msg_len = 4; } - if (p->gen < 60) { + if (p->gen < 060) { brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); @@ -197,7 +197,7 @@ static void brw_fb_write(struct brw_compile *p, int dw) insn->header.predicate_control = 0; insn->header.compression_control = BRW_COMPRESSION_NONE; - if (p->gen >= 60) { + if (p->gen >= 060) { msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; src0 = brw_message_reg(2); header = false; @@ -219,7 +219,7 @@ static void brw_wm_write(struct brw_compile *p, int dw, int src) { int n; - if (dw == 8 && p->gen >= 60) { + if (dw == 8 && p->gen >= 060) { /* XXX pixel execution mask? */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); @@ -233,11 +233,11 @@ static void brw_wm_write(struct brw_compile *p, int dw, int src) brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); for (n = 0; n < 4; n++) { - if (p->gen >= 60) { + if (p->gen >= 060) { brw_MOV(p, brw_message_reg(2 + 2*n), brw_vec8_grf(src + 2*n, 0)); - } else if (p->gen >= 45 && dw == 16) { + } else if (p->gen >= 045 && dw == 16) { brw_MOV(p, brw_message_reg(2 + n + BRW_MRF_COMPR4), brw_vec8_grf(src + 2*n, 0)); @@ -265,7 +265,7 @@ static void brw_wm_write__mask(struct brw_compile *p, int dw, { int n; - if (dw == 8 && p->gen >= 60) { + if (dw == 8 && p->gen >= 060) { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MUL(p, @@ -291,12 +291,12 @@ static void brw_wm_write__mask(struct brw_compile *p, int dw, brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); for (n = 0; n < 4; n++) { - if (p->gen >= 60) { + if (p->gen >= 060) { brw_MUL(p, brw_message_reg(2 + 2*n), brw_vec8_grf(src + 2*n, 0), brw_vec8_grf(mask, 0)); - } else if (p->gen >= 45 && dw == 16) { + } else if (p->gen >= 045 && dw == 16) { brw_MUL(p, brw_message_reg(2 + n + BRW_MRF_COMPR4), brw_vec8_grf(src + 2*n, 0), @@ -327,7 +327,7 @@ static void brw_wm_write__opacity(struct brw_compile *p, int dw, { int n; - if (dw == 8 && p->gen >= 60) { + if (dw == 8 && p->gen >= 060) { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MUL(p, @@ -353,12 +353,12 @@ static void brw_wm_write__opacity(struct brw_compile *p, int dw, brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); for (n = 0; n < 4; n++) { - if (p->gen >= 60) { + if (p->gen >= 060) { brw_MUL(p, brw_message_reg(2 + 2*n), brw_vec8_grf(src + 2*n, 0), brw_vec1_grf(mask, 3)); - } else if (p->gen >= 45 && dw == 16) { + } else if (p->gen >= 045 && dw == 16) { brw_MUL(p, brw_message_reg(2 + n + BRW_MRF_COMPR4), brw_vec8_grf(src + 2*n, 0), @@ -389,7 +389,7 @@ static void brw_wm_write__mask_ca(struct brw_compile *p, int dw, { int n; - if (dw == 8 && p->gen >= 60) { + if (dw == 8 && p->gen >= 060) { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MUL(p, @@ -415,12 +415,12 @@ static void brw_wm_write__mask_ca(struct brw_compile *p, int dw, brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); for (n = 0; n < 4; n++) { - if (p->gen >= 60) { + if (p->gen >= 060) { brw_MUL(p, brw_message_reg(2 + 2*n), brw_vec8_grf(src + 2*n, 0), brw_vec8_grf(mask + 2*n, 0)); - } else if (p->gen >= 45 && dw == 16) { + } else if (p->gen >= 045 && dw == 16) { brw_MUL(p, brw_message_reg(2 + n + BRW_MRF_COMPR4), brw_vec8_grf(src + 2*n, 0), @@ -449,7 +449,7 @@ done: bool brw_wm_kernel__affine(struct brw_compile *p, int dispatch) { - if (p->gen < 60) + if (p->gen < 060) brw_wm_xy(p, dispatch); brw_wm_write(p, dispatch, brw_wm_affine(p, dispatch, 0, 1, 12)); @@ -461,7 +461,7 @@ brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch) { int src, mask; - if (p->gen < 60) + if (p->gen < 060) brw_wm_xy(p, dispatch); src = brw_wm_affine(p, dispatch, 0, 1, 12); @@ -476,7 +476,7 @@ brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch) { int src, mask; - if (p->gen < 60) + if (p->gen < 060) brw_wm_xy(p, dispatch); src = brw_wm_affine(p, dispatch, 0, 1, 12); @@ -491,7 +491,7 @@ brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch) { int src, mask; - if (p->gen < 60) + if (p->gen < 060) brw_wm_xy(p, dispatch); src = brw_wm_affine__alpha(p, dispatch, 0, 1, 12); @@ -510,15 +510,15 @@ static void brw_wm_projective_st(struct brw_compile *p, int dw, if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); - uv = p->gen >= 60 ? 6 : 3; + uv = p->gen >= 060 ? 6 : 3; } else { brw_set_compression_control(p, BRW_COMPRESSION_NONE); - uv = p->gen >= 60 ? 4 : 3; + uv = p->gen >= 060 ? 4 : 3; } uv += 2*channel; msg++; - if (p->gen >= 60) { + if (p->gen >= 060) { /* First compute 1/z */ brw_PLN(p, brw_message_reg(msg), @@ -594,7 +594,7 @@ static int brw_wm_projective__alpha(struct brw_compile *p, int dw, bool brw_wm_kernel__projective(struct brw_compile *p, int dispatch) { - if (p->gen < 60) + if (p->gen < 060) brw_wm_xy(p, dispatch); brw_wm_write(p, dispatch, brw_wm_projective(p, dispatch, 0, 1, 12)); @@ -606,7 +606,7 @@ brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch) { int src, mask; - if (p->gen < 60) + if (p->gen < 060) brw_wm_xy(p, dispatch); src = brw_wm_projective(p, dispatch, 0, 1, 12); @@ -621,7 +621,7 @@ brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch) { int src, mask; - if (p->gen < 60) + if (p->gen < 060) brw_wm_xy(p, dispatch); src = brw_wm_projective(p, dispatch, 0, 1, 12); @@ -636,7 +636,7 @@ brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch) { int src, mask; - if (p->gen < 60) + if (p->gen < 060) brw_wm_xy(p, dispatch); src = brw_wm_projective__alpha(p, dispatch, 0, 1, 12); @@ -651,9 +651,9 @@ brw_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch) { int src, mask; - if (p->gen < 60) { + if (p->gen < 060) { brw_wm_xy(p, dispatch); - mask = 4; + mask = 5; } else mask = dispatch == 16 ? 8 : 6; @@ -668,9 +668,9 @@ brw_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch) { int src, mask; - if (p->gen < 60) { + if (p->gen < 060) { brw_wm_xy(p, dispatch); - mask = 4; + mask = 5; } else mask = dispatch == 16 ? 8 : 6; diff --git a/src/sna/compiler.h b/src/sna/compiler.h index ff80365eb..b985f2bca 100644 --- a/src/sna/compiler.h +++ b/src/sna/compiler.h @@ -36,6 +36,7 @@ #define fastcall __attribute__((regparm(3))) #define must_check __attribute__((warn_unused_result)) #define constant __attribute__((const)) +#define __packed__ __attribute__((__packed__)) #else #define likely(expr) (expr) #define unlikely(expr) (expr) @@ -44,6 +45,7 @@ #define fastcall #define must_check #define constant +#define __packed__ #endif #ifdef HAVE_VALGRIND diff --git a/src/sna/fb/Makefile.in b/src/sna/fb/Makefile.in index d9ca271f0..d21411b96 100644 --- a/src/sna/fb/Makefile.in +++ b/src/sna/fb/Makefile.in @@ -182,7 +182,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MAN_SUBSTS = @MAN_SUBSTS@ @@ -221,6 +220,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@ VERSION = @VERSION@ X11_CFLAGS = @X11_CFLAGS@ X11_LIBS = @X11_LIBS@ +XCB_CFLAGS = @XCB_CFLAGS@ +XCB_LIBS = @XCB_LIBS@ XORG_CFLAGS = @XORG_CFLAGS@ XORG_LIBS = @XORG_LIBS@ XORG_MAN_PAGE = @XORG_MAN_PAGE@ @@ -319,7 +320,7 @@ all: all-am .SUFFIXES: .SUFFIXES: .c .lo .o .obj -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ @@ -344,9 +345,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) +$(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): diff --git a/src/sna/fb/fb.h b/src/sna/fb/fb.h index e58e03967..d99453da6 100644 --- a/src/sna/fb/fb.h +++ b/src/sna/fb/fb.h @@ -33,6 +33,7 @@ #include <gcstruct.h> #include <colormap.h> #include <windowstr.h> +#include <regionstr.h> #include <stdbool.h> #include <pixman.h> @@ -45,6 +46,8 @@ #include "sfb.h" +#include "../../compat-api.h" + #define WRITE(ptr, val) (*(ptr) = (val)) #define READ(ptr) (*(ptr)) @@ -294,12 +297,12 @@ extern DevPrivateKeyRec sna_window_key; static inline FbGCPrivate *fb_gc(GCPtr gc) { - return dixGetPrivateAddr(&gc->devPrivates, &sna_gc_key); + return (FbGCPrivate *)__get_private(gc, sna_gc_key); } static inline PixmapPtr fbGetWindowPixmap(WindowPtr window) { - return *(PixmapPtr *)dixGetPrivateAddr(&window->devPrivates, &sna_window_key); + return *(PixmapPtr *)__get_private(window, sna_window_key); } #ifdef ROOTLESS @@ -360,8 +363,14 @@ static inline PixmapPtr fbGetWindowPixmap(WindowPtr window) * XFree86 empties the root BorderClip when the VT is inactive, * here's a macro which uses that to disable GetImage and GetSpans */ + +#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,10,0,0,0) #define fbWindowEnabled(pWin) \ - RegionNotEmpty(&(pWin)->drawable.pScreen->root->borderClip) + RegionNotEmpty(&(pWin)->drawable.pScreen->root->borderClip) +#else +#define fbWindowEnabled(pWin) \ + RegionNotEmpty(&WindowTable[(pWin)->drawable.pScreen->myNum]->borderClip) +#endif #define fbDrawableEnabled(drawable) \ ((drawable)->type == DRAWABLE_PIXMAP ? \ TRUE : fbWindowEnabled((WindowPtr) drawable)) diff --git a/src/sna/fb/fbbitmap.c b/src/sna/fb/fbbitmap.c index 7c037fe36..2ea92a992 100644 --- a/src/sna/fb/fbbitmap.c +++ b/src/sna/fb/fbbitmap.c @@ -25,21 +25,50 @@ #include "fb.h" +static Bool region_grow(RegionPtr region) +{ + RegDataPtr data; + int n; + + n = 16; + if (!region->data) { + region->data = malloc(RegionSizeof(n)); + if (!region->data) + return RegionBreak(region); + region->data->numRects = 1; + *RegionBoxptr(region) = region->extents; + } else if (!region->data->size) { + region->data = malloc(RegionSizeof(n)); + if (!region->data) + return RegionBreak(region); + region->data->numRects = 0; + } else { + n = 2 * region->data->numRects; + data = (RegDataPtr) realloc(region->data, RegionSizeof(n)); + if (!data) + return RegionBreak(region); + region->data = data; + } + region->data->size = n; + return TRUE; +} + static inline void add(RegionPtr region, int16_t x1, int16_t y1, int16_t x2, int16_t y2) { BoxPtr r; - if (region->data->numRects == region->data->size) - RegionRectAlloc(region, 1); + if (region->data->numRects == region->data->size && + !region_grow(region)) + return; r = RegionBoxptr(region) + region->data->numRects++; r->x1 = x1; r->y1 = y1; r->x2 = x2; r->y2 = y2; - DBG(("%s[%d/%d]: (%d, %d), (%d, %d)\n", + DBG(("%s[%ld/%ld]: (%d, %d), (%d, %d)\n", __FUNCTION__, - region->data->numRects, region->data->size, + (long)region->data->numRects, (long)region->data->size, x1, y1, x2, y2)); if (x1 < region->extents.x1) @@ -149,11 +178,11 @@ fbBitmapToRegion(PixmapPtr pixmap) } else region->extents.x1 = region->extents.x2 = 0; - DBG(("%s: region extents=(%d, %d), (%d, %d) x %d\n", + DBG(("%s: region extents=(%d, %d), (%d, %d) x %ld\n", __FUNCTION__, region->extents.x1, region->extents.y1, region->extents.x2, region->extents.y2, - RegionNumRects(region))); + (long)RegionNumRects(region))); return region; } diff --git a/src/sna/fb/fbblt.c b/src/sna/fb/fbblt.c index d4d20b68b..5ad2e2e25 100644 --- a/src/sna/fb/fbblt.c +++ b/src/sna/fb/fbblt.c @@ -270,7 +270,7 @@ fbBlt(FbBits *srcLine, FbStride srcStride, int srcX, int alu, FbBits pm, int bpp, Bool reverse, Bool upsidedown) { - DBG(("%s %dx%d, alu=%d, pm=%d, bpp=%d (reverse=%d, upsidedown=%d)\n", + DBG(("%s %dx%d, alu=%d, pm=%x, bpp=%d (reverse=%d, upsidedown=%d)\n", __FUNCTION__, width, height, alu, pm, bpp, reverse, upsidedown)); if (alu == GXcopy && pm == FB_ALLONES && ((srcX|dstX|width) & 7) == 0) { @@ -285,9 +285,9 @@ fbBlt(FbBits *srcLine, FbStride srcStride, int srcX, s += srcX >> 3; d += dstX >> 3; - DBG(("%s fast blt, src_stride=%d, dst_stride=%d, width=%d (offset=%d)\n", + DBG(("%s fast blt, src_stride=%d, dst_stride=%d, width=%d (offset=%ld)\n", __FUNCTION__, - srcStride, dstStride, width, s - d)); + srcStride, dstStride, width, (long)(s - d))); if (width == srcStride && width == dstStride) { width *= height; diff --git a/src/sna/fb/fbpict.c b/src/sna/fb/fbpict.c index a2038518e..906a5f316 100644 --- a/src/sna/fb/fbpict.c +++ b/src/sna/fb/fbpict.c @@ -26,25 +26,19 @@ #include <string.h> #include "fb.h" - -#include <picturestr.h> -#include <mipict.h> #include "fbpict.h" static void SourceValidateOnePicture(PicturePtr picture) { DrawablePtr drawable = picture->pDrawable; - ScreenPtr screen; if (!drawable) return; - screen = drawable->pScreen; - if (screen->SourceValidate) - screen->SourceValidate(drawable, - 0, 0, drawable->width, drawable->height, - picture->subWindowMode); + SourceValidate(drawable, + 0, 0, drawable->width, drawable->height, + picture->subWindowMode); } static void diff --git a/src/sna/fb/fbpict.h b/src/sna/fb/fbpict.h index 1ce09df25..560138533 100644 --- a/src/sna/fb/fbpict.h +++ b/src/sna/fb/fbpict.h @@ -24,17 +24,23 @@ #ifndef FBPICT_H #define FBPICT_H +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <picturestr.h> + #include "sfb.h" extern void fbComposite(CARD8 op, - PicturePtr pSrc, - PicturePtr pMask, - PicturePtr pDst, - INT16 xSrc, - INT16 ySrc, - INT16 xMask, - INT16 yMask, INT16 xDst, INT16 yDst, CARD16 width, CARD16 height); + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, INT16 ySrc, + INT16 xMask, INT16 yMask, + INT16 xDst, INT16 yDst, + CARD16 width, CARD16 height); extern pixman_image_t *image_from_pict(PicturePtr pict, Bool has_clip, diff --git a/src/sna/fb/fbpoint.c b/src/sna/fb/fbpoint.c index 3df79a261..c5f0f876f 100644 --- a/src/sna/fb/fbpoint.c +++ b/src/sna/fb/fbpoint.c @@ -93,10 +93,10 @@ fbPolyPoint(DrawablePtr drawable, GCPtr gc, int xoff, int yoff, FbBits and, FbBits xor); - DBG(("%s x %d, clip=[(%d, %d), (%d, %d)]x%d\n", __FUNCTION__, n, + DBG(("%s x %d, clip=[(%d, %d), (%d, %d)]x%ld\n", __FUNCTION__, n, gc->pCompositeClip->extents.x1, gc->pCompositeClip->extents.y1, gc->pCompositeClip->extents.x2, gc->pCompositeClip->extents.y2, - RegionNumRects(gc->pCompositeClip))); + (long)RegionNumRects(gc->pCompositeClip))); if (mode == CoordModePrevious) fbFixCoordModePrevious(n, pt); diff --git a/src/sna/fb/fbseg.c b/src/sna/fb/fbseg.c index 5b8173f08..67ad38958 100644 --- a/src/sna/fb/fbseg.c +++ b/src/sna/fb/fbseg.c @@ -353,7 +353,8 @@ fbSelectBres(DrawablePtr drawable, GCPtr gc) FbBres *bres; DBG(("%s: line=%d, fill=%d, and=%lx, bgand=%lx\n", - __FUNCTION__, gc->lineStyle, gc->fillStyle, pgc->and, pgc->bgand)); + __FUNCTION__, gc->lineStyle, gc->fillStyle, + (long)pgc->and, (long)pgc->bgand)); assert(gc->lineWidth == 0); if (gc->lineStyle == LineSolid) { diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c index 501266241..4d92adcf7 100644 --- a/src/sna/gen2_render.c +++ b/src/sna/gen2_render.c @@ -46,9 +46,6 @@ #define NO_FILL_ONE 0 #define NO_FILL_BOXES 0 -#define PREFER_BLT_FILL 1 -#define PREFER_BLT_COPY 1 - #define MAX_3D_SIZE 2048 #define MAX_3D_PITCH 8192 @@ -175,7 +172,7 @@ gen2_get_card_format(struct sna *sna, uint32_t format) if (i8xx_tex_formats[i].fmt == format) return i8xx_tex_formats[i].card_fmt; - if (sna->kgem.gen < 21) { + if (sna->kgem.gen < 021) { /* Whilst these are not directly supported on 830/845, * we only enable them when we can implicitly convert * them to a supported variant through the texture @@ -203,7 +200,7 @@ gen2_check_format(struct sna *sna, PicturePtr p) if (i8xx_tex_formats[i].fmt == p->format) return true; - if (sna->kgem.gen > 21) { + if (sna->kgem.gen > 021) { for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) if (i85x_tex_formats[i].fmt == p->format) return true; @@ -396,6 +393,15 @@ gen2_get_blend_factors(const struct sna_composite_op *op, cblend |= TB0C_OP_MODULATE; ablend |= TB0A_OP_MODULATE; + } else if (op->mask.is_solid) { + cblend |= TB0C_ARG2_SEL_DIFFUSE; + ablend |= TB0A_ARG2_SEL_DIFFUSE; + + if (op->dst.format == PICT_a8 || !op->has_component_alpha) + cblend |= TB0C_ARG2_REPLICATE_ALPHA; + + cblend |= TB0C_OP_MODULATE; + ablend |= TB0A_OP_MODULATE; } else { cblend |= TB0C_OP_ARG1; ablend |= TB0A_OP_ARG1; @@ -504,6 +510,7 @@ static void gen2_emit_invariant(struct sna *sna) ENABLE_TEX_CACHE); BATCH(_3DSTATE_STIPPLE); + BATCH(0); BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) | TEXPIPE_COLOR | @@ -536,9 +543,9 @@ static void gen2_emit_invariant(struct sna *sna) } static void -gen2_get_batch(struct sna *sna) +gen2_get_batch(struct sna *sna, const struct sna_composite_op *op) { - kgem_set_mode(&sna->kgem, KGEM_RENDER); + kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); if (!kgem_check_batch(&sna->kgem, INVARIANT_SIZE+40)) { DBG(("%s: flushing batch: size %d > %d\n", @@ -574,7 +581,7 @@ static void gen2_emit_target(struct sna *sna, const struct sna_composite_op *op) { assert(!too_large(op->dst.width, op->dst.height)); assert(op->dst.bo->pitch >= 8 && op->dst.bo->pitch <= MAX_3D_PITCH); - assert(sna->render_state.gen2.vertex_offset == 0); + assert(sna->render.vertex_offset == 0); if (sna->render_state.gen2.target == op->dst.bo->unique_id) { kgem_bo_mark_dirty(op->dst.bo); @@ -662,7 +669,7 @@ static void gen2_emit_composite_state(struct sna *sna, uint32_t cblend, ablend; int tex; - gen2_get_batch(sna); + gen2_get_batch(sna, op); if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo) @@ -726,6 +733,12 @@ static void gen2_emit_composite_state(struct sna *sna, else texcoordfmt |= TEXCOORDFMT_3D << (2*tex); gen2_emit_texture(sna, &op->mask, tex++); + } else if (op->mask.is_solid) { + if (op->mask.u.gen2.pixel != sna->render_state.gen2.diffuse) { + BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); + BATCH(op->mask.u.gen2.pixel); + sna->render_state.gen2.diffuse = op->mask.u.gen2.pixel; + } } v = _3DSTATE_VERTEX_FORMAT_2_CMD | texcoordfmt; @@ -749,9 +762,9 @@ gen2_emit_composite_linear(struct sna *sna, { float v; - v = (x * channel->u.gen2.linear_dx + - y * channel->u.gen2.linear_dy + - channel->u.gen2.linear_offset); + v = (x * channel->u.linear.dx + + y * channel->u.linear.dy + + channel->u.linear.offset); DBG(("%s: (%d, %d) -> %f\n", __FUNCTION__, x, y, v)); VERTEX(v); VERTEX(v); @@ -895,33 +908,30 @@ gen2_emit_composite_primitive_affine(struct sna *sna, const struct sna_composite_rectangles *r) { PictTransform *transform = op->src.transform; - int16_t dst_x = r->dst.x + op->dst.x; - int16_t dst_y = r->dst.y + op->dst.y; int src_x = r->src.x + (int)op->src.offset[0]; int src_y = r->src.y + (int)op->src.offset[1]; - float sx, sy; + float *v; - _sna_get_transformed_coordinates(src_x + r->width, src_y + r->height, - transform, - &sx, &sy); + v = (float *)sna->kgem.batch + sna->kgem.nbatch; + sna->kgem.nbatch += 12; - gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); - VERTEX(sx * op->src.scale[0]); - VERTEX(sy * op->src.scale[1]); + v[8] = v[4] = r->dst.x + op->dst.x; + v[0] = v[4] + r->width; - _sna_get_transformed_coordinates(src_x, src_y + r->height, - transform, - &sx, &sy); - gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); - VERTEX(sx * op->src.scale[0]); - VERTEX(sy * op->src.scale[1]); + v[9] = r->dst.y + op->dst.y; + v[5] = v[1] = v[9] + r->height; - _sna_get_transformed_coordinates(src_x, src_y, - transform, - &sx, &sy); - gen2_emit_composite_dstcoord(sna, dst_x, dst_y); - VERTEX(sx * op->src.scale[0]); - VERTEX(sy * op->src.scale[1]); + _sna_get_transformed_scaled(src_x + r->width, src_y + r->height, + transform, op->src.scale, + &v[2], &v[3]); + + _sna_get_transformed_scaled(src_x, src_y + r->height, + transform, op->src.scale, + &v[6], &v[7]); + + _sna_get_transformed_scaled(src_x, src_y, + transform, op->src.scale, + &v[10], &v[11]); } fastcall static void @@ -959,7 +969,7 @@ static void gen2_magic_ca_pass(struct sna *sna, return; DBG(("%s: batch=%x, vertex=%x\n", __FUNCTION__, - sna->kgem.nbatch, sna->render_state.gen2.vertex_offset)); + sna->kgem.nbatch, sna->render.vertex_offset)); assert(op->mask.bo); assert(op->has_component_alpha); @@ -978,7 +988,7 @@ static void gen2_magic_ca_pass(struct sna *sna, BATCH(ablend); sna->render_state.gen2.ls2 = 0; - src = sna->kgem.batch + sna->render_state.gen2.vertex_offset; + src = sna->kgem.batch + sna->render.vertex_offset; dst = sna->kgem.batch + sna->kgem.nbatch; n = 1 + sna->render.vertex_index; sna->kgem.nbatch += n; @@ -993,12 +1003,12 @@ static void gen2_vertex_flush(struct sna *sna, if (sna->render.vertex_index == 0) return; - sna->kgem.batch[sna->render_state.gen2.vertex_offset] |= + sna->kgem.batch[sna->render.vertex_offset] |= sna->render.vertex_index - 1; gen2_magic_ca_pass(sna, op); - sna->render_state.gen2.vertex_offset = 0; + sna->render.vertex_offset = 0; sna->render.vertex_index = 0; } @@ -1006,7 +1016,6 @@ inline static int gen2_get_rectangles(struct sna *sna, const struct sna_composite_op *op, int want) { - struct gen2_render_state *state = &sna->render_state.gen2; int rem = batch_space(sna), size, need; DBG(("%s: want=%d, floats_per_vertex=%d, rem=%d\n", @@ -1030,16 +1039,17 @@ inline static int gen2_get_rectangles(struct sna *sna, } rem -= need; - if (state->vertex_offset == 0) { + if (sna->render.vertex_offset == 0) { if ((sna->kgem.batch[sna->kgem.nbatch-1] & ~0xffff) == (PRIM3D_INLINE | PRIM3D_RECTLIST)) { uint32_t *b = &sna->kgem.batch[sna->kgem.nbatch-1]; + assert(*b & 0xffff); sna->render.vertex_index = 1 + (*b & 0xffff); *b = PRIM3D_INLINE | PRIM3D_RECTLIST; - state->vertex_offset = sna->kgem.nbatch - 1; + sna->render.vertex_offset = sna->kgem.nbatch - 1; assert(!op->need_magic_ca_pass); } else { - state->vertex_offset = sna->kgem.nbatch; + sna->render.vertex_offset = sna->kgem.nbatch; BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST); } } @@ -1144,6 +1154,7 @@ gen2_composite_solid_init(struct sna *sna, channel->filter = PictFilterNearest; channel->repeat = RepeatNormal; channel->is_solid = true; + channel->is_affine = true; channel->width = 1; channel->height = 1; channel->pict_format = PICT_a8r8g8b8; @@ -1251,12 +1262,12 @@ gen2_composite_linear_init(struct sna *sna, dx /= sf; dy /= sf; - channel->u.gen2.linear_dx = dx; - channel->u.gen2.linear_dy = dy; - channel->u.gen2.linear_offset = -dx*(x0+dst_x-x) + -dy*(y0+dst_y-y); + channel->u.linear.dx = dx; + channel->u.linear.dy = dy; + channel->u.linear.offset = -dx*(x0+dst_x-x) + -dy*(y0+dst_y-y); DBG(("%s: dx=%f, dy=%f, offset=%f\n", - __FUNCTION__, dx, dy, channel->u.gen2.linear_offset)); + __FUNCTION__, dx, dy, channel->u.linear.offset)); return channel->bo != NULL; } @@ -1304,7 +1315,8 @@ static bool gen2_check_card_format(struct sna *sna, PicturePtr picture, struct sna_composite_channel *channel, - int x, int y, int w, int h) + int x, int y, int w, int h, + bool *fixup_alpha) { uint32_t format = picture->format; unsigned int i; @@ -1316,7 +1328,7 @@ gen2_check_card_format(struct sna *sna, for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) { if (i85x_tex_formats[i].fmt == format) { - if (sna->kgem.gen >= 21) + if (sna->kgem.gen >= 021) return true; if (source_is_covered(picture, x, y, w,h)) { @@ -1324,10 +1336,12 @@ gen2_check_card_format(struct sna *sna, return true; } + *fixup_alpha = true; return false; } } + *fixup_alpha = false; return false; } @@ -1343,6 +1357,7 @@ gen2_composite_picture(struct sna *sna, PixmapPtr pixmap; uint32_t color; int16_t dx, dy; + bool fixup_alpha; DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", __FUNCTION__, x, y, w, h, dst_x, dst_y)); @@ -1417,9 +1432,9 @@ gen2_composite_picture(struct sna *sna, } else channel->transform = picture->transform; - if (!gen2_check_card_format(sna, picture, channel, x, y, w ,h)) + if (!gen2_check_card_format(sna, picture, channel, x, y, w ,h, &fixup_alpha)) return sna_render_picture_convert(sna, picture, channel, pixmap, - x, y, w, h, dst_x, dst_y); + x, y, w, h, dst_x, dst_y, fixup_alpha); channel->pict_format = picture->format; if (too_large(pixmap->drawable.width, pixmap->drawable.height)) @@ -1499,49 +1514,6 @@ gen2_composite_set_target(struct sna *sna, } static bool -try_blt(struct sna *sna, - PicturePtr dst, - PicturePtr src, - int width, int height) -{ - uint32_t color; - - if (sna->kgem.mode != KGEM_RENDER) { - DBG(("%s: already performing BLT\n", __FUNCTION__)); - return true; - } - - if (too_large(width, height)) { - DBG(("%s: operation too large for 3D pipe (%d, %d)\n", - __FUNCTION__, width, height)); - return true; - } - - if (too_large(dst->pDrawable->width, dst->pDrawable->height)) { - DBG(("%s: target too large for 3D pipe (%d, %d)\n", - __FUNCTION__, - dst->pDrawable->width, dst->pDrawable->height)); - return true; - } - - /* If it is a solid, try to use the BLT paths */ - if (sna_picture_is_solid(src, &color)) - return true; - - if (!src->pDrawable) - return false; - - if (too_large(src->pDrawable->width, src->pDrawable->height)) { - DBG(("%s: source too large for 3D pipe (%d, %d)\n", - __FUNCTION__, - src->pDrawable->width, src->pDrawable->height)); - return true; - } - - return !is_gpu(src->pDrawable); -} - -static bool is_unhandled_gradient(PicturePtr picture) { if (picture->pDrawable) @@ -1563,12 +1535,6 @@ has_alphamap(PicturePtr p) } static bool -untransformed(PicturePtr p) -{ - return !p->transform || pixman_transform_is_int_translate(p->transform); -} - -static bool need_upload(PicturePtr p) { return p->pDrawable && unattached(p->pDrawable) && untransformed(p); @@ -1614,7 +1580,6 @@ gen2_composite_fallback(struct sna *sna, PicturePtr mask, PicturePtr dst) { - struct sna_pixmap *priv; PixmapPtr src_pixmap; PixmapPtr mask_pixmap; PixmapPtr dst_pixmap; @@ -1653,8 +1618,7 @@ gen2_composite_fallback(struct sna *sna, } /* If anything is on the GPU, push everything out to the GPU */ - priv = sna_pixmap(dst_pixmap); - if (priv && priv->gpu_damage && !priv->clear) { + if (dst_use_gpu(dst_pixmap)) { DBG(("%s: dst is already on the GPU, try to use GPU\n", __FUNCTION__)); return false; @@ -1689,14 +1653,14 @@ gen2_composite_fallback(struct sna *sna, if (too_large(dst_pixmap->drawable.width, dst_pixmap->drawable.height) && - (priv == NULL || DAMAGE_IS_ALL(priv->cpu_damage))) { + dst_is_cpu(dst_pixmap)) { DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); return true; } DBG(("%s: dst is not on the GPU and the operation should not fallback\n", __FUNCTION__)); - return false; + return dst_use_cpu(dst_pixmap); } static int @@ -1709,6 +1673,12 @@ reuse_source(struct sna *sna, if (src_x != msk_x || src_y != msk_y) return false; + if (sna_picture_is_solid(mask, &color)) + return gen2_composite_solid_init(sna, mc, color); + + if (sc->is_solid) + return false; + if (src == mask) { DBG(("%s: mask is source\n", __FUNCTION__)); *mc = *sc; @@ -1716,12 +1686,6 @@ reuse_source(struct sna *sna, return true; } - if (sna_picture_is_solid(mask, &color)) - return gen2_composite_solid_init(sna, mc, color); - - if (sc->is_solid) - return false; - if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) return false; @@ -1773,13 +1737,8 @@ gen2_render_composite(struct sna *sna, return false; } - /* Try to use the BLT engine unless it implies a - * 3D -> 2D context switch. - */ if (mask == NULL && - try_blt(sna, dst, src, width, height) && - sna_blt_composite(sna, - op, src, dst, + sna_blt_composite(sna, op, src, dst, src_x, src_y, dst_x, dst_y, width, height, @@ -1805,6 +1764,8 @@ gen2_render_composite(struct sna *sna, } tmp->op = op; + + sna_render_composite_redirect_init(tmp); if (too_large(tmp->dst.width, tmp->dst.height) || tmp->dst.bo->pitch > MAX_3D_PITCH) { if (!sna_render_composite_redirect(sna, tmp, @@ -1818,6 +1779,8 @@ gen2_render_composite(struct sna *sna, dst_x, dst_y, dst->polyMode == PolyModePrecise)) { case -1: + DBG(("%s: fallback -- unable to prepare source\n", + __FUNCTION__)); goto cleanup_dst; case 0: gen2_composite_solid_init(sna, &tmp->src, 0); @@ -1841,6 +1804,8 @@ gen2_render_composite(struct sna *sna, dst_x, dst_y, dst->polyMode == PolyModePrecise)) { case -1: + DBG(("%s: fallback -- unable to prepare mask\n", + __FUNCTION__)); goto cleanup_src; case 0: gen2_composite_solid_init(sna, &tmp->mask, 0); @@ -1857,8 +1822,12 @@ gen2_render_composite(struct sna *sna, tmp->has_component_alpha = true; if (gen2_blend_op[op].src_alpha && (gen2_blend_op[op].src_blend != BLENDFACTOR_ZERO)) { - if (op != PictOpOver) - return false; + if (op != PictOpOver) { + DBG(("%s: fallback -- unsupported CA blend (src_blend=%d)\n", + __FUNCTION__, + gen2_blend_op[op].src_blend)); + goto cleanup_src; + } tmp->need_magic_ca_pass = true; tmp->op = PictOpOutReverse; @@ -1866,8 +1835,12 @@ gen2_render_composite(struct sna *sna, } /* convert solid to a texture (pure convenience) */ - if (tmp->mask.is_solid) + if (tmp->mask.is_solid && tmp->src.is_solid) { + assert(tmp->mask.is_affine); tmp->mask.bo = sna_render_get_solid(sna, tmp->mask.u.gen2.pixel); + if (!tmp->mask.bo) + goto cleanup_src; + } } tmp->floats_per_vertex = 2; @@ -1880,18 +1853,27 @@ gen2_render_composite(struct sna *sna, tmp->prim_emit = gen2_emit_composite_primitive; if (tmp->mask.bo) { if (tmp->mask.transform == NULL) { - if (tmp->src.is_solid) + if (tmp->src.is_solid) { + assert(tmp->floats_per_rect == 12); tmp->prim_emit = gen2_emit_composite_primitive_constant_identity_mask; + } } } else { - if (tmp->src.is_solid) + if (tmp->src.is_solid) { + assert(tmp->floats_per_rect == 6); tmp->prim_emit = gen2_emit_composite_primitive_constant; - else if (tmp->src.is_linear) + } else if (tmp->src.is_linear) { + assert(tmp->floats_per_rect == 12); tmp->prim_emit = gen2_emit_composite_primitive_linear; - else if (tmp->src.transform == NULL) + } else if (tmp->src.transform == NULL) { + assert(tmp->floats_per_rect == 12); tmp->prim_emit = gen2_emit_composite_primitive_identity; - else if (tmp->src.is_affine) + } else if (tmp->src.is_affine) { + assert(tmp->floats_per_rect == 12); + tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; + tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; tmp->prim_emit = gen2_emit_composite_primitive_affine; + } } tmp->blt = gen2_render_composite_blt; @@ -1905,8 +1887,11 @@ gen2_render_composite(struct sna *sna, kgem_submit(&sna->kgem); if (!kgem_check_bo(&sna->kgem, tmp->dst.bo, tmp->src.bo, tmp->mask.bo, - NULL)) + NULL)) { + DBG(("%s: fallback, operation does not fit into GTT\n", + __FUNCTION__)); goto cleanup_mask; + } } gen2_emit_composite_state(sna, tmp); @@ -2016,8 +2001,8 @@ gen2_emit_composite_spans_primitive_affine_source(struct sna *sna, { PictTransform *transform = op->base.src.transform; uint32_t alpha = (uint8_t)(255 * opacity) << 24; - float x, y, *v; - + float *v; + v = (float *)sna->kgem.batch + sna->kgem.nbatch; sna->kgem.nbatch += 15; @@ -2029,26 +2014,20 @@ gen2_emit_composite_spans_primitive_affine_source(struct sna *sna, *((uint32_t *)v + 7) = alpha; *((uint32_t *)v + 12) = alpha; - _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x2, - (int)op->base.src.offset[1] + box->y2, - transform, - &x, &y); - v[3] = x * op->base.src.scale[0]; - v[4] = y * op->base.src.scale[1]; + _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2, + (int)op->base.src.offset[1] + box->y2, + transform, op->base.src.scale, + &v[3], &v[4]); - _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1, - (int)op->base.src.offset[1] + box->y2, - transform, - &x, &y); - v[8] = x * op->base.src.scale[0]; - v[9] = y * op->base.src.scale[1]; + _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, + (int)op->base.src.offset[1] + box->y2, + transform, op->base.src.scale, + &v[8], &v[9]); - _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1, - (int)op->base.src.offset[1] + box->y1, - transform, - &x, &y); - v[13] = x * op->base.src.scale[0]; - v[14] = y * op->base.src.scale[1]; + _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, + (int)op->base.src.offset[1] + box->y1, + transform, op->base.src.scale, + &v[13], &v[14]); } static void @@ -2131,7 +2110,7 @@ static void gen2_emit_composite_spans_state(struct sna *sna, { uint32_t unwind; - gen2_get_batch(sna); + gen2_get_batch(sna, &op->base); gen2_emit_target(sna, &op->base); unwind = sna->kgem.nbatch; @@ -2248,7 +2227,7 @@ gen2_check_composite_spans(struct sna *sna, return false; if (need_tiling(sna, width, height)) { - if (!is_gpu(dst->pDrawable)) { + if (!is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { DBG(("%s: fallback, tiled operation not on GPU\n", __FUNCTION__)); return false; @@ -2289,6 +2268,8 @@ gen2_render_composite_spans(struct sna *sna, } tmp->base.op = op; + + sna_render_composite_redirect_init(&tmp->base); if (too_large(tmp->base.dst.width, tmp->base.dst.height) || tmp->base.dst.bo->pitch > MAX_3D_PITCH) { if (!sna_render_composite_redirect(sna, &tmp->base, @@ -2321,8 +2302,11 @@ gen2_render_composite_spans(struct sna *sna, tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3; if (tmp->base.src.transform == NULL) tmp->prim_emit = gen2_emit_composite_spans_primitive_identity_source; - else if (tmp->base.src.is_affine) + else if (tmp->base.src.is_affine) { + tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2]; + tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2]; tmp->prim_emit = gen2_emit_composite_spans_primitive_affine_source; + } } tmp->base.mask.bo = NULL; tmp->base.floats_per_rect = 3*tmp->base.floats_per_vertex; @@ -2387,7 +2371,7 @@ static void gen2_emit_fill_composite_state(struct sna *sna, { uint32_t ls1; - gen2_get_batch(sna); + gen2_get_batch(sna, op); gen2_emit_target(sna, op); ls1 = sna->kgem.nbatch; @@ -2443,24 +2427,6 @@ gen2_render_fill_boxes_try_blt(struct sna *sna, pixel, box, n); } -static inline bool prefer_blt_fill(struct sna *sna) -{ -#if PREFER_BLT_FILL - return true; -#else - return sna->kgem.mode != KGEM_RENDER; -#endif -} - -static inline bool prefer_blt_copy(struct sna *sna, unsigned flags) -{ -#if PREFER_BLT_COPY - return true; -#else - return sna->kgem.mode != KGEM_RENDER; -#endif -} - static bool gen2_render_fill_boxes(struct sna *sna, CARD8 op, @@ -2483,6 +2449,11 @@ gen2_render_fill_boxes(struct sna *sna, dst, dst_bo, box, n); #endif + if (gen2_render_fill_boxes_try_blt(sna, op, format, color, + dst, dst_bo, + box, n)) + return true; + DBG(("%s (op=%d, format=%x, color=(%04x,%04x,%04x, %04x))\n", __FUNCTION__, op, (int)format, @@ -2493,11 +2464,6 @@ gen2_render_fill_boxes(struct sna *sna, !gen2_check_dst_format(format)) { DBG(("%s: try blt, too large or incompatible destination\n", __FUNCTION__)); - if (gen2_render_fill_boxes_try_blt(sna, op, format, color, - dst, dst_bo, - box, n)) - return true; - if (!gen2_check_dst_format(format)) return false; @@ -2506,12 +2472,6 @@ gen2_render_fill_boxes(struct sna *sna, dst, dst_bo, box, n); } - if (prefer_blt_fill(sna) && - gen2_render_fill_boxes_try_blt(sna, op, format, color, - dst, dst_bo, - box, n)) - return true; - if (op == PictOpClear) pixel = 0; else if (!sna_get_pixel_from_rgba(&pixel, @@ -2572,7 +2532,7 @@ static void gen2_emit_fill_state(struct sna *sna, { uint32_t ls1; - gen2_get_batch(sna); + gen2_get_batch(sna, op); gen2_emit_target(sna, op); ls1 = sna->kgem.nbatch; @@ -2683,8 +2643,7 @@ gen2_render_fill(struct sna *sna, uint8_t alu, #endif /* Prefer to use the BLT if already engaged */ - if (prefer_blt_fill(sna) && - sna_blt_fill(sna, alu, + if (sna_blt_fill(sna, alu, dst_bo, dst->drawable.bitsPerPixel, color, tmp)) @@ -2693,10 +2652,7 @@ gen2_render_fill(struct sna *sna, uint8_t alu, /* Must use the BLT if we can't RENDER... */ if (too_large(dst->drawable.width, dst->drawable.height) || dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH) - return sna_blt_fill(sna, alu, - dst_bo, dst->drawable.bitsPerPixel, - color, - tmp); + return false; tmp->base.op = alu; tmp->base.dst.pixmap = dst; @@ -2761,16 +2717,14 @@ gen2_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, #endif /* Prefer to use the BLT if already engaged */ - if (prefer_blt_fill(sna) && - gen2_render_fill_one_try_blt(sna, dst, bo, color, + if (gen2_render_fill_one_try_blt(sna, dst, bo, color, x1, y1, x2, y2, alu)) return true; /* Must use the BLT if we can't RENDER... */ if (too_large(dst->drawable.width, dst->drawable.height) || bo->pitch < 8 || bo->pitch > MAX_3D_PITCH) - return gen2_render_fill_one_try_blt(sna, dst, bo, color, - x1, y1, x2, y2, alu); + return false; if (!kgem_check_bo(&sna->kgem, bo, NULL)) { kgem_submit(&sna->kgem); @@ -2865,7 +2819,7 @@ static void gen2_emit_copy_state(struct sna *sna, const struct sna_composite_op { uint32_t ls1, v; - gen2_get_batch(sna); + gen2_get_batch(sna, op); if (kgem_bo_is_dirty(op->src.bo)) { if (op->src.bo == op->dst.bo) @@ -2925,8 +2879,7 @@ gen2_render_copy_boxes(struct sna *sna, uint8_t alu, DBG(("%s (%d, %d)->(%d, %d) x %d\n", __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n)); - if (prefer_blt_copy(sna, flags) && - sna_blt_compare_depth(&src->drawable, &dst->drawable) && + if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && sna_blt_copy_boxes(sna, alu, src_bo, src_dx, src_dy, dst_bo, dst_dx, dst_dy, @@ -3091,8 +3044,7 @@ gen2_render_copy(struct sna *sna, uint8_t alu, #endif /* Prefer to use the BLT */ - if (prefer_blt_copy(sna, 0) && - sna_blt_compare_depth(&src->drawable, &dst->drawable) && + if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && sna_blt_copy(sna, alu, src_bo, dst_bo, dst->drawable.bitsPerPixel, @@ -3145,7 +3097,6 @@ gen2_render_reset(struct sna *sna) { sna->render_state.gen2.need_invariant = true; sna->render_state.gen2.logic_op_enabled = 0; - sna->render_state.gen2.vertex_offset = 0; sna->render_state.gen2.target = 0; sna->render_state.gen2.ls1 = 0; @@ -3160,6 +3111,7 @@ static void gen2_render_flush(struct sna *sna) { assert(sna->render.vertex_index == 0); + assert(sna->render.vertex_offset == 0); } static void @@ -3168,13 +3120,13 @@ gen2_render_context_switch(struct kgem *kgem, { struct sna *sna = container_of(kgem, struct sna, kgem); - if (!kgem->mode) + if (!kgem->nbatch) return; /* Reload BLT registers following a lost context */ sna->blt_state.fill_bo = 0; - if (kgem_is_idle(kgem)) { + if (kgem_ring_is_idle(kgem, kgem->ring)) { DBG(("%s: GPU idle, flushing\n", __FUNCTION__)); _kgem_submit(kgem); } @@ -3191,10 +3143,12 @@ bool gen2_render_init(struct sna *sna) */ #if !NO_COMPOSITE render->composite = gen2_render_composite; + render->prefer_gpu |= PREFER_GPU_RENDER; #endif #if !NO_COMPOSITE_SPANS render->check_composite_spans = gen2_check_composite_spans; render->composite_spans = gen2_render_composite_spans; + render->prefer_gpu |= PREFER_GPU_SPANS; #endif render->fill_boxes = gen2_render_fill_boxes; render->fill = gen2_render_fill; diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c index 7c303f419..95d44ab56 100644 --- a/src/sna/gen3_render.c +++ b/src/sna/gen3_render.c @@ -431,6 +431,26 @@ gen3_emit_composite_primitive_constant(struct sna *sna, } fastcall static void +gen3_emit_composite_boxes_constant(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + do { + v[0] = box->x2; + v[1] = box->y2; + + v[2] = box->x1; + v[3] = box->y2; + + v[4] = box->x1; + v[5] = box->y1; + + box++; + v += 6; + } while (--nbox); +} + +fastcall static void gen3_emit_composite_primitive_identity_gradient(struct sna *sna, const struct sna_composite_op *op, const struct sna_composite_rectangles *r) @@ -457,6 +477,32 @@ gen3_emit_composite_primitive_identity_gradient(struct sna *sna, } fastcall static void +gen3_emit_composite_boxes_identity_gradient(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + do { + v[0] = box->x2; + v[1] = box->y2; + v[2] = box->x2 + op->src.offset[0]; + v[3] = box->y2 + op->src.offset[1]; + + v[4] = box->x1; + v[5] = box->y2; + v[6] = box->x1 + op->src.offset[0]; + v[7] = box->y2 + op->src.offset[1]; + + v[8] = box->x1; + v[9] = box->y1; + v[10] = box->x1 + op->src.offset[0]; + v[11] = box->y1 + op->src.offset[1]; + + v += 12; + box++; + } while (--nbox); +} + +fastcall static void gen3_emit_composite_primitive_affine_gradient(struct sna *sna, const struct sna_composite_op *op, const struct sna_composite_rectangles *r) @@ -494,6 +540,40 @@ gen3_emit_composite_primitive_affine_gradient(struct sna *sna, } fastcall static void +gen3_emit_composite_boxes_affine_gradient(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + const PictTransform *transform = op->src.transform; + + do { + v[0] = box->x2; + v[1] = box->y2; + sna_get_transformed_coordinates(box->x2 + op->src.offset[0], + box->y2 + op->src.offset[1], + transform, + &v[2], &v[3]); + + v[4] = box->x1; + v[5] = box->y2; + sna_get_transformed_coordinates(box->x1 + op->src.offset[0], + box->y2 + op->src.offset[1], + transform, + &v[6], &v[7]); + + v[8] = box->x1; + v[9] = box->y1; + sna_get_transformed_coordinates(box->x1 + op->src.offset[0], + box->y1 + op->src.offset[1], + transform, + &v[10], &v[11]); + + box++; + v += 12; + } while (--nbox); +} + +fastcall static void gen3_emit_composite_primitive_identity_source(struct sna *sna, const struct sna_composite_op *op, const struct sna_composite_rectangles *r) @@ -519,6 +599,28 @@ gen3_emit_composite_primitive_identity_source(struct sna *sna, } fastcall static void +gen3_emit_composite_boxes_identity_source(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + do { + v[0] = box->x2 + op->dst.x; + v[8] = v[4] = box->x1 + op->dst.x; + v[5] = v[1] = box->y2 + op->dst.y; + v[9] = box->y1 + op->dst.y; + + v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; + v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; + + v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; + v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; + + v += 12; + box++; + } while (--nbox); +} + +fastcall static void gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna, const struct sna_composite_op *op, const struct sna_composite_rectangles *r) @@ -544,6 +646,28 @@ gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna, } fastcall static void +gen3_emit_composite_boxes_identity_source_no_offset(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + do { + v[0] = box->x2; + v[8] = v[4] = box->x1; + v[5] = v[1] = box->y2; + v[9] = box->y1; + + v[10] = v[6] = box->x1 * op->src.scale[0]; + v[2] = box->x2 * op->src.scale[0]; + + v[11] = box->y1 * op->src.scale[1]; + v[7] = v[3] = box->y2 * op->src.scale[1]; + + v += 12; + box++; + } while (--nbox); +} + +fastcall static void gen3_emit_composite_primitive_affine_source(struct sna *sna, const struct sna_composite_op *op, const struct sna_composite_rectangles *r) @@ -553,29 +677,60 @@ gen3_emit_composite_primitive_affine_source(struct sna *sna, int16_t dst_y = r->dst.y + op->dst.y; int src_x = r->src.x + (int)op->src.offset[0]; int src_y = r->src.y + (int)op->src.offset[1]; - float sx, sy; + float *v; - _sna_get_transformed_coordinates(src_x + r->width, src_y + r->height, - transform, - &sx, &sy); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; - gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); - OUT_VERTEX(sx * op->src.scale[0]); - OUT_VERTEX(sy * op->src.scale[1]); + v[0] = dst_x + r->width; + v[5] = v[1] = dst_y + r->height; + v[8] = v[4] = dst_x; + v[9] = dst_y; - _sna_get_transformed_coordinates(src_x, src_y + r->height, - transform, - &sx, &sy); - gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); - OUT_VERTEX(sx * op->src.scale[0]); - OUT_VERTEX(sy * op->src.scale[1]); + _sna_get_transformed_scaled(src_x + r->width, src_y + r->height, + transform, op->src.scale, + &v[2], &v[3]); - _sna_get_transformed_coordinates(src_x, src_y, - transform, - &sx, &sy); - gen3_emit_composite_dstcoord(sna, dst_x, dst_y); - OUT_VERTEX(sx * op->src.scale[0]); - OUT_VERTEX(sy * op->src.scale[1]); + _sna_get_transformed_scaled(src_x, src_y + r->height, + transform, op->src.scale, + &v[6], &v[7]); + + _sna_get_transformed_scaled(src_x, src_y, + transform, op->src.scale, + &v[10], &v[11]); +} + +fastcall static void +gen3_emit_composite_boxes_affine_source(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + const PictTransform *transform = op->src.transform; + + do { + v[0] = box->x2; + v[5] = v[1] = box->y2; + v[8] = v[4] = box->x1; + v[9] = box->y1; + + _sna_get_transformed_scaled(box->x2 + op->src.offset[0], + box->y2 + op->src.offset[1], + transform, op->src.scale, + &v[2], &v[3]); + + _sna_get_transformed_scaled(box->x1 + op->src.offset[0], + box->y2 + op->src.offset[1], + transform, op->src.scale, + &v[6], &v[7]); + + _sna_get_transformed_scaled(box->x1 + op->src.offset[0], + box->y1 + op->src.offset[1], + transform, op->src.scale, + &v[10], &v[11]); + + v += 12; + box++; + } while (--nbox); } fastcall static void @@ -929,13 +1084,6 @@ gen3_composite_emit_shader(struct sna *sna, if (mask->u.gen3.type == SHADER_NONE) mask = NULL; - if (mask && src->is_opaque && - gen3_blend_op[blend].src_alpha && - op->has_component_alpha) { - src = mask; - mask = NULL; - } - id = (src->u.gen3.type | src->is_affine << 4 | src->alpha_fixup << 5 | @@ -1298,9 +1446,9 @@ static void gen3_emit_invariant(struct sna *sna) #define MAX_OBJECTS 3 /* worst case: dst + src + mask */ static void -gen3_get_batch(struct sna *sna) +gen3_get_batch(struct sna *sna, const struct sna_composite_op *op) { - kgem_set_mode(&sna->kgem, KGEM_RENDER); + kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); if (!kgem_check_batch(&sna->kgem, 200)) { DBG(("%s: flushing batch: size %d > %d\n", @@ -1389,7 +1537,7 @@ static void gen3_emit_composite_state(struct sna *sna, unsigned int tex_count, n; uint32_t ss2; - gen3_get_batch(sna); + gen3_get_batch(sna, op); if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo) @@ -1578,11 +1726,11 @@ static void gen3_emit_composite_state(struct sna *sna, gen3_composite_emit_shader(sna, op, op->op); } -static void gen3_magic_ca_pass(struct sna *sna, +static bool gen3_magic_ca_pass(struct sna *sna, const struct sna_composite_op *op) { if (!op->need_magic_ca_pass) - return; + return false; DBG(("%s(%d)\n", __FUNCTION__, sna->render.vertex_index - sna->render.vertex_start)); @@ -1596,23 +1744,24 @@ static void gen3_magic_ca_pass(struct sna *sna, OUT_BATCH(sna->render.vertex_start); sna->render_state.gen3.last_blend = 0; + return true; } static void gen3_vertex_flush(struct sna *sna) { - assert(sna->render_state.gen3.vertex_offset); + assert(sna->render.vertex_offset); DBG(("%s[%x] = %d\n", __FUNCTION__, - 4*sna->render_state.gen3.vertex_offset, + 4*sna->render.vertex_offset, sna->render.vertex_index - sna->render.vertex_start)); - sna->kgem.batch[sna->render_state.gen3.vertex_offset] = + sna->kgem.batch[sna->render.vertex_offset] = PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL | (sna->render.vertex_index - sna->render.vertex_start); - sna->kgem.batch[sna->render_state.gen3.vertex_offset + 1] = + sna->kgem.batch[sna->render.vertex_offset + 1] = sna->render.vertex_start; - sna->render_state.gen3.vertex_offset = 0; + sna->render.vertex_offset = 0; } static int gen3_vertex_finish(struct sna *sna) @@ -1622,24 +1771,27 @@ static int gen3_vertex_finish(struct sna *sna) DBG(("%s: used=%d/%d, vbo active? %d\n", __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size, sna->render.vbo ? sna->render.vbo->handle : 0)); + assert(sna->render.vertex_offset == 0); assert(sna->render.vertex_used); assert(sna->render.vertex_used <= sna->render.vertex_size); + sna_vertex_wait__locked(&sna->render); + bo = sna->render.vbo; if (bo) { - if (sna->render_state.gen3.vertex_offset) - gen3_vertex_flush(sna); - DBG(("%s: reloc = %d\n", __FUNCTION__, sna->render.vertex_reloc[0])); - sna->kgem.batch[sna->render.vertex_reloc[0]] = - kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0], - bo, I915_GEM_DOMAIN_VERTEX << 16, 0); + if (sna->render.vertex_reloc[0]) { + sna->kgem.batch[sna->render.vertex_reloc[0]] = + kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0], + bo, I915_GEM_DOMAIN_VERTEX << 16, 0); - sna->render.vertex_reloc[0] = 0; + sna->render.vertex_reloc[0] = 0; + } sna->render.vertex_used = 0; sna->render.vertex_index = 0; + sna->render.vbo = NULL; kgem_bo_destroy(&sna->kgem, bo); } @@ -1671,15 +1823,14 @@ static void gen3_vertex_close(struct sna *sna) struct kgem_bo *bo, *free_bo = NULL; unsigned int delta = 0; - assert(sna->render_state.gen3.vertex_offset == 0); + assert(sna->render.vertex_offset == 0); + if (sna->render.vertex_reloc[0] == 0) + return; DBG(("%s: used=%d/%d, vbo active? %d\n", __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size, sna->render.vbo ? sna->render.vbo->handle : 0)); - if (sna->render.vertex_used == 0) - return; - bo = sna->render.vbo; if (bo) { if (sna->render.vertex_size - sna->render.vertex_used < 64) { @@ -1713,7 +1864,8 @@ static void gen3_vertex_close(struct sna *sna) DBG(("%s: new vbo: %d\n", __FUNCTION__, sna->render.vertex_used)); bo = kgem_create_linear(&sna->kgem, - 4*sna->render.vertex_used, 0); + 4*sna->render.vertex_used, + CREATE_NO_THROTTLE); if (bo) { assert(bo->snoop == false); kgem_bo_write(&sna->kgem, bo, @@ -1724,15 +1876,11 @@ static void gen3_vertex_close(struct sna *sna) } } - DBG(("%s: reloc = %d\n", __FUNCTION__, - sna->render.vertex_reloc[0])); - - if (sna->render.vertex_reloc[0]) { - sna->kgem.batch[sna->render.vertex_reloc[0]] = - kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0], - bo, I915_GEM_DOMAIN_VERTEX << 16, delta); - sna->render.vertex_reloc[0] = 0; - } + DBG(("%s: reloc = %d\n", __FUNCTION__, sna->render.vertex_reloc[0])); + sna->kgem.batch[sna->render.vertex_reloc[0]] = + kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0], + bo, I915_GEM_DOMAIN_VERTEX << 16, delta); + sna->render.vertex_reloc[0] = 0; if (sna->render.vbo == NULL) { DBG(("%s: resetting vbo\n", __FUNCTION__)); @@ -1752,6 +1900,9 @@ static bool gen3_rectangle_begin(struct sna *sna, struct gen3_render_state *state = &sna->render_state.gen3; int ndwords, i1_cmd = 0, i1_len = 0; + if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) + return true; + ndwords = 2; if (op->need_magic_ca_pass) ndwords += 100; @@ -1774,14 +1925,15 @@ static bool gen3_rectangle_begin(struct sna *sna, } } - if (sna->kgem.nbatch == 2 + state->last_vertex_offset) { - state->vertex_offset = state->last_vertex_offset; + if (sna->kgem.nbatch == 2 + state->last_vertex_offset && + !op->need_magic_ca_pass) { + sna->render.vertex_offset = state->last_vertex_offset; } else { - state->vertex_offset = sna->kgem.nbatch; + sna->render.vertex_offset = sna->kgem.nbatch; OUT_BATCH(MI_NOOP); /* to be filled later */ OUT_BATCH(MI_NOOP); sna->render.vertex_start = sna->render.vertex_index; - state->last_vertex_offset = state->vertex_offset; + state->last_vertex_offset = sna->render.vertex_offset; } return true; @@ -1790,13 +1942,28 @@ static bool gen3_rectangle_begin(struct sna *sna, static int gen3_get_rectangles__flush(struct sna *sna, const struct sna_composite_op *op) { + /* Preventing discarding new vbo after lock contention */ + if (sna_vertex_wait__locked(&sna->render)) { + int rem = vertex_space(sna); + if (rem > op->floats_per_rect) + return rem; + } + if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 105: 5)) return 0; if (!kgem_check_reloc_and_exec(&sna->kgem, 1)) return 0; - if (op->need_magic_ca_pass && sna->render.vbo) - return 0; + if (sna->render.vertex_offset) { + gen3_vertex_flush(sna); + if (gen3_magic_ca_pass(sna, op)) { + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0); + OUT_BATCH(gen3_get_blend_cntl(op->op, + op->has_component_alpha, + op->dst.format)); + gen3_composite_emit_shader(sna, op, op->op); + } + } return gen3_vertex_finish(sna); } @@ -1822,7 +1989,7 @@ start: goto flush; } - if (unlikely(sna->render_state.gen3.vertex_offset == 0 && + if (unlikely(sna->render.vertex_offset == 0 && !gen3_rectangle_begin(sna, op))) goto flush; @@ -1836,12 +2003,15 @@ start: flush: DBG(("%s: flushing batch\n", __FUNCTION__)); - if (sna->render_state.gen3.vertex_offset) { + if (sna->render.vertex_offset) { gen3_vertex_flush(sna); gen3_magic_ca_pass(sna, op); } + sna_vertex_wait__locked(&sna->render); _kgem_submit(&sna->kgem); gen3_emit_composite_state(sna, op); + assert(sna->render.vertex_offset == 0); + assert(sna->render.vertex_reloc[0] == 0); goto start; } @@ -1886,9 +2056,9 @@ gen3_render_composite_box(struct sna *sna, } static void -gen3_render_composite_boxes(struct sna *sna, - const struct sna_composite_op *op, - const BoxRec *box, int nbox) +gen3_render_composite_boxes__blt(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) { DBG(("%s: nbox=%d, src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n", __FUNCTION__, nbox, @@ -1922,12 +2092,66 @@ gen3_render_composite_boxes(struct sna *sna, } static void +gen3_render_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen3_get_rectangles(sna, op, nbox); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + } while (nbox); +} + +static void +gen3_render_composite_boxes__thread(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen3_get_rectangles(sna, op, nbox); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + +static void gen3_render_composite_done(struct sna *sna, const struct sna_composite_op *op) { DBG(("%s()\n", __FUNCTION__)); - if (sna->render_state.gen3.vertex_offset) { + if (sna->render.vertex_offset) { gen3_vertex_flush(sna); gen3_magic_ca_pass(sna, op); } @@ -1971,7 +2195,6 @@ gen3_render_reset(struct sna *sna) state->floats_per_vertex = 0; state->last_floats_per_vertex = 0; state->last_vertex_offset = 0; - state->vertex_offset = 0; if (sna->render.vbo != NULL && !kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) { @@ -1979,6 +2202,9 @@ gen3_render_reset(struct sna *sna) __FUNCTION__, sna->render.vbo->presumed_offset)); discard_vbo(sna); } + + sna->render.vertex_reloc[0] = 0; + sna->render.vertex_offset = 0; } static void @@ -2401,7 +2627,8 @@ gen3_composite_picture(struct sna *sna, if (!gen3_composite_channel_set_format(channel, picture->format) && !gen3_composite_channel_set_xformat(picture, channel, x, y, w, h)) return sna_render_picture_convert(sna, picture, channel, pixmap, - x, y, w, h, dst_x, dst_y); + x, y, w, h, dst_x, dst_y, + false); if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { DBG(("%s: pixmap too large (%dx%d), extracting (%d, %d)x(%d,%d)\n", @@ -2431,7 +2658,7 @@ source_use_blt(struct sna *sna, PicturePtr picture) if (too_large(picture->pDrawable->width, picture->pDrawable->height)) return true; - return !is_gpu(picture->pDrawable); + return !is_gpu(sna, picture->pDrawable, PREFER_GPU_RENDER); } static bool @@ -2589,12 +2816,6 @@ has_alphamap(PicturePtr p) } static bool -untransformed(PicturePtr p) -{ - return !p->transform || pixman_transform_is_int_translate(p->transform); -} - -static bool need_upload(PicturePtr p) { return p->pDrawable && unattached(p->pDrawable) && untransformed(p); @@ -2641,7 +2862,6 @@ gen3_composite_fallback(struct sna *sna, PicturePtr mask, PicturePtr dst) { - struct sna_pixmap *priv; PixmapPtr src_pixmap; PixmapPtr mask_pixmap; PixmapPtr dst_pixmap; @@ -2681,17 +2901,16 @@ gen3_composite_fallback(struct sna *sna, if (mask && mask->componentAlpha && PICT_FORMAT_RGB(mask->format) && - op != PictOpOver && - gen3_blend_op[op].src_blend != BLENDFACT_ZERO) - { + gen3_blend_op[op].src_alpha && + gen3_blend_op[op].src_blend != BLENDFACT_ZERO && + op != PictOpOver) { DBG(("%s: component-alpha mask with op=%d, should fallback\n", __FUNCTION__, op)); return true; } /* If anything is on the GPU, push everything out to the GPU */ - priv = sna_pixmap(dst_pixmap); - if (priv && priv->gpu_damage && !priv->clear) { + if (dst_use_gpu(dst_pixmap)) { DBG(("%s: dst is already on the GPU, try to use GPU\n", __FUNCTION__)); return false; @@ -2726,14 +2945,14 @@ gen3_composite_fallback(struct sna *sna, if (too_large(dst_pixmap->drawable.width, dst_pixmap->drawable.height) && - (priv == NULL || DAMAGE_IS_ALL(priv->cpu_damage))) { + dst_is_cpu(dst_pixmap)) { DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); return true; } - DBG(("%s: dst is not on the GPU and the operation should not fallback\n", - __FUNCTION__)); - return false; + DBG(("%s: dst is not on the GPU and the operation should not fallback: use-cpu? %d\n", + __FUNCTION__, dst_use_cpu(dst_pixmap))); + return dst_use_cpu(dst_pixmap); } static int @@ -2922,13 +3141,12 @@ gen3_render_composite(struct sna *sna, tmp->mask.u.gen3.type = SHADER_NONE; tmp->has_component_alpha = false; } else if (gen3_blend_op[op].src_alpha && - (gen3_blend_op[op].src_blend != BLENDFACT_ZERO)) { + gen3_blend_op[op].src_blend != BLENDFACT_ZERO) { if (op != PictOpOver) goto cleanup_mask; tmp->need_magic_ca_pass = true; tmp->op = PictOpOutReverse; - sna->render.vertex_start = sna->render.vertex_index; } } else { if (tmp->mask.is_opaque) { @@ -2978,22 +3196,33 @@ gen3_render_composite(struct sna *sna, case SHADER_WHITE: case SHADER_CONSTANT: tmp->prim_emit = gen3_emit_composite_primitive_constant; + tmp->emit_boxes = gen3_emit_composite_boxes_constant; break; case SHADER_LINEAR: case SHADER_RADIAL: - if (tmp->src.transform == NULL) + if (tmp->src.transform == NULL) { tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient; - else if (tmp->src.is_affine) + tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient; + } else if (tmp->src.is_affine) { tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient; + tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient; + } break; case SHADER_TEXTURE: if (tmp->src.transform == NULL) { - if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0) + if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0) { tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset; - else + tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset; + } else { tmp->prim_emit = gen3_emit_composite_primitive_identity_source; - } else if (tmp->src.is_affine) + tmp->emit_boxes = gen3_emit_composite_boxes_identity_source; + } + } else if (tmp->src.is_affine) { + tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; + tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; tmp->prim_emit = gen3_emit_composite_primitive_affine_source; + tmp->emit_boxes = gen3_emit_composite_boxes_affine_source; + } break; } } else if (tmp->mask.u.gen3.type == SHADER_TEXTURE) { @@ -3024,7 +3253,11 @@ gen3_render_composite(struct sna *sna, tmp->blt = gen3_render_composite_blt; tmp->box = gen3_render_composite_box; - tmp->boxes = gen3_render_composite_boxes; + tmp->boxes = gen3_render_composite_boxes__blt; + if (tmp->emit_boxes) { + tmp->boxes = gen3_render_composite_boxes; + tmp->thread_boxes = gen3_render_composite_boxes__thread; + } tmp->done = gen3_render_composite_done; if (!kgem_check_bo(&sna->kgem, @@ -3084,6 +3317,26 @@ gen3_emit_composite_spans_primitive_zero(struct sna *sna, } fastcall static void +gen3_emit_composite_spans_primitive_zero__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, float *v) +{ + do { + v[0] = op->base.dst.x + b->box.x2; + v[1] = op->base.dst.y + b->box.y2; + + v[2] = op->base.dst.x + b->box.x1; + v[3] = v[1]; + + v[4] = v[2]; + v[5] = op->base.dst.x + b->box.y1; + + v += 6; + b++; + } while (--nbox); +} + +fastcall static void gen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna, const struct sna_composite_spans_op *op, const BoxRec *box, @@ -3099,6 +3352,22 @@ gen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna, } fastcall static void +gen3_emit_composite_spans_primitive_zero_no_offset__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, float *v) +{ + do { + v[0] = b->box.x2; + v[3] = v[1] = b->box.y2; + v[4] = v[2] = b->box.x1; + v[5] = b->box.y1; + + b++; + v += 6; + } while (--nbox); +} + +fastcall static void gen3_emit_composite_spans_primitive_constant(struct sna *sna, const struct sna_composite_spans_op *op, const BoxRec *box, @@ -3115,6 +3384,24 @@ gen3_emit_composite_spans_primitive_constant(struct sna *sna, } fastcall static void +gen3_emit_composite_spans_primitive_constant__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, + float *v) +{ + do { + v[0] = op->base.dst.x + b->box.x2; + v[6] = v[3] = op->base.dst.x + b->box.x1; + v[4] = v[1] = op->base.dst.y + b->box.y2; + v[7] = op->base.dst.y + b->box.y1; + v[8] = v[5] = v[2] = b->alpha; + + v += 9; + b++; + } while (--nbox); +} + +fastcall static void gen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna, const struct sna_composite_spans_op *op, const BoxRec *box, @@ -3131,6 +3418,23 @@ gen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna, } fastcall static void +gen3_emit_composite_spans_primitive_constant_no_offset__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, float *v) +{ + do { + v[0] = b->box.x2; + v[6] = v[3] = b->box.x1; + v[4] = v[1] = b->box.y2; + v[7] = b->box.y1; + v[8] = v[5] = v[2] = b->alpha; + + v += 9; + b++; + } while (--nbox); +} + +fastcall static void gen3_emit_composite_spans_primitive_identity_source(struct sna *sna, const struct sna_composite_spans_op *op, const BoxRec *box, @@ -3159,13 +3463,43 @@ gen3_emit_composite_spans_primitive_identity_source(struct sna *sna, } fastcall static void +gen3_emit_composite_spans_primitive_identity_source__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, + float *v) +{ + do { + v[0] = op->base.dst.x + b->box.x2; + v[1] = op->base.dst.y + b->box.y2; + v[2] = (op->base.src.offset[0] + b->box.x2) * op->base.src.scale[0]; + v[3] = (op->base.src.offset[1] + b->box.y2) * op->base.src.scale[1]; + v[4] = b->alpha; + + v[5] = op->base.dst.x + b->box.x1; + v[6] = v[1]; + v[7] = (op->base.src.offset[0] + b->box.x1) * op->base.src.scale[0]; + v[8] = v[3]; + v[9] = b->alpha; + + v[10] = v[5]; + v[11] = op->base.dst.y + b->box.y1; + v[12] = v[7]; + v[13] = (op->base.src.offset[1] + b->box.y1) * op->base.src.scale[1]; + v[14] = b->alpha; + + v += 15; + b++; + } while (--nbox); +} + +fastcall static void gen3_emit_composite_spans_primitive_affine_source(struct sna *sna, const struct sna_composite_spans_op *op, const BoxRec *box, float opacity) { PictTransform *transform = op->base.src.transform; - float x, y, *v; + float *v; v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 15; @@ -3174,30 +3508,56 @@ gen3_emit_composite_spans_primitive_affine_source(struct sna *sna, v[6] = v[1] = op->base.dst.y + box->y2; v[10] = v[5] = op->base.dst.x + box->x1; v[11] = op->base.dst.y + box->y1; - v[4] = opacity; - v[9] = opacity; - v[14] = opacity; - - _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x2, - (int)op->base.src.offset[1] + box->y2, - transform, - &x, &y); - v[2] = x * op->base.src.scale[0]; - v[3] = y * op->base.src.scale[1]; + v[14] = v[9] = v[4] = opacity; + + _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2, + (int)op->base.src.offset[1] + box->y2, + transform, op->base.src.scale, + &v[2], &v[3]); + + _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, + (int)op->base.src.offset[1] + box->y2, + transform, op->base.src.scale, + &v[7], &v[8]); + + _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, + (int)op->base.src.offset[1] + box->y1, + transform, op->base.src.scale, + &v[12], &v[13]); +} - _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1, - (int)op->base.src.offset[1] + box->y2, - transform, - &x, &y); - v[7] = x * op->base.src.scale[0]; - v[8] = y * op->base.src.scale[1]; +fastcall static void +gen3_emit_composite_spans_primitive_affine_source__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, + float *v) +{ + PictTransform *transform = op->base.src.transform; - _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1, - (int)op->base.src.offset[1] + box->y1, - transform, - &x, &y); - v[12] = x * op->base.src.scale[0]; - v[13] = y * op->base.src.scale[1]; + do { + v[0] = op->base.dst.x + b->box.x2; + v[6] = v[1] = op->base.dst.y + b->box.y2; + v[10] = v[5] = op->base.dst.x + b->box.x1; + v[11] = op->base.dst.y + b->box.y1; + v[14] = v[9] = v[4] = b->alpha; + + _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x2, + (int)op->base.src.offset[1] + b->box.y2, + transform, op->base.src.scale, + &v[2], &v[3]); + + _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1, + (int)op->base.src.offset[1] + b->box.y2, + transform, op->base.src.scale, + &v[7], &v[8]); + + _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1, + (int)op->base.src.offset[1] + b->box.y1, + transform, op->base.src.scale, + &v[12], &v[13]); + v += 15; + b++; + } while (--nbox); } fastcall static void @@ -3229,6 +3589,36 @@ gen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna, } fastcall static void +gen3_emit_composite_spans_primitive_identity_gradient__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, + float *v) +{ + do { + v[0] = op->base.dst.x + b->box.x2; + v[1] = op->base.dst.y + b->box.y2; + v[2] = op->base.src.offset[0] + b->box.x2; + v[3] = op->base.src.offset[1] + b->box.y2; + v[4] = b->alpha; + + v[5] = op->base.dst.x + b->box.x1; + v[6] = v[1]; + v[7] = op->base.src.offset[0] + b->box.x1; + v[8] = v[3]; + v[9] = b->alpha; + + v[10] = v[5]; + v[11] = op->base.dst.y + b->box.y1; + v[12] = v[7]; + v[13] = op->base.src.offset[1] + b->box.y1; + v[14] = b->alpha; + + v += 15; + b++; + } while (--nbox); +} + +fastcall static void gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna, const struct sna_composite_spans_op *op, const BoxRec *box, @@ -3264,6 +3654,43 @@ gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna, } fastcall static void +gen3_emit_composite_spans_primitive_affine_gradient__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, + float *v) +{ + PictTransform *transform = op->base.src.transform; + + do { + v[0] = op->base.dst.x + b->box.x2; + v[1] = op->base.dst.y + b->box.y2; + _sna_get_transformed_coordinates((int)op->base.src.offset[0] + b->box.x2, + (int)op->base.src.offset[1] + b->box.y2, + transform, + &v[2], &v[3]); + v[4] = b->alpha; + + v[5] = op->base.dst.x + b->box.x1; + v[6] = v[1]; + _sna_get_transformed_coordinates((int)op->base.src.offset[0] + b->box.x1, + (int)op->base.src.offset[1] + b->box.y2, + transform, + &v[7], &v[8]); + v[9] = b->alpha; + + v[10] = v[5]; + v[11] = op->base.dst.y + b->box.y1; + _sna_get_transformed_coordinates((int)op->base.src.offset[0] + b->box.x1, + (int)op->base.src.offset[1] + b->box.y1, + transform, + &v[12], &v[13]); + v[14] = b->alpha; + v += 15; + b++; + } while (--nbox); +} + +fastcall static void gen3_emit_composite_spans_primitive(struct sna *sna, const struct sna_composite_spans_op *op, const BoxRec *box, @@ -3308,6 +3735,48 @@ gen3_render_composite_spans_constant_box(struct sna *sna, } fastcall static void +gen3_render_composite_spans_constant_thread_boxes(struct sna *sna, + const struct sna_composite_spans_op *op, + const struct sna_opacity_box *box, + int nbox) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + op->base.dst.x, op->base.dst.y)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * 9; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + do { + v[0] = box->box.x2; + v[6] = v[3] = box->box.x1; + v[4] = v[1] = box->box.y2; + v[7] = box->box.y1; + v[8] = v[5] = v[2] = box->alpha; + v += 9; + box++; + } while (--nbox_this_time); + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + +fastcall static void gen3_render_composite_spans_box(struct sna *sna, const struct sna_composite_spans_op *op, const BoxRec *box, float opacity) @@ -3355,10 +3824,45 @@ gen3_render_composite_spans_boxes(struct sna *sna, } fastcall static void +gen3_render_composite_spans_boxes__thread(struct sna *sna, + const struct sna_composite_spans_op *op, + const struct sna_opacity_box *box, + int nbox) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + op->base.dst.x, op->base.dst.y)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + +fastcall static void gen3_render_composite_spans_done(struct sna *sna, const struct sna_composite_spans_op *op) { - if (sna->render_state.gen3.vertex_offset) + if (sna->render.vertex_offset) gen3_vertex_flush(sna); DBG(("%s()\n", __FUNCTION__)); @@ -3380,12 +3884,11 @@ gen3_check_composite_spans(struct sna *sna, if (gen3_composite_fallback(sna, op, src, NULL, dst)) return false; - if (need_tiling(sna, width, height)) { - if (!is_gpu(dst->pDrawable)) { - DBG(("%s: fallback, tiled operation not on GPU\n", - __FUNCTION__)); - return false; - } + if (need_tiling(sna, width, height) && + !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { + DBG(("%s: fallback, tiled operation not on GPU\n", + __FUNCTION__)); + return false; } return true; @@ -3458,37 +3961,58 @@ gen3_render_composite_spans(struct sna *sna, no_offset = tmp->base.dst.x == 0 && tmp->base.dst.y == 0; tmp->box = gen3_render_composite_spans_box; tmp->boxes = gen3_render_composite_spans_boxes; + tmp->thread_boxes = gen3_render_composite_spans_boxes__thread; tmp->done = gen3_render_composite_spans_done; tmp->prim_emit = gen3_emit_composite_spans_primitive; switch (tmp->base.src.u.gen3.type) { case SHADER_NONE: assert(0); case SHADER_ZERO: - tmp->prim_emit = no_offset ? gen3_emit_composite_spans_primitive_zero_no_offset : gen3_emit_composite_spans_primitive_zero; + if (no_offset) { + tmp->prim_emit = gen3_emit_composite_spans_primitive_zero_no_offset; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero_no_offset__boxes; + } else { + tmp->prim_emit = gen3_emit_composite_spans_primitive_zero; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero__boxes; + } break; case SHADER_BLACK: case SHADER_WHITE: case SHADER_CONSTANT: if (no_offset) { tmp->box = gen3_render_composite_spans_constant_box; + tmp->thread_boxes = gen3_render_composite_spans_constant_thread_boxes; tmp->prim_emit = gen3_emit_composite_spans_primitive_constant_no_offset; - } else + tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant_no_offset__boxes; + } else { tmp->prim_emit = gen3_emit_composite_spans_primitive_constant; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__boxes; + } break; case SHADER_LINEAR: case SHADER_RADIAL: - if (tmp->base.src.transform == NULL) + if (tmp->base.src.transform == NULL) { tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient; - else if (tmp->base.src.is_affine) + tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__boxes; + } else if (tmp->base.src.is_affine) { tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__boxes; + } break; case SHADER_TEXTURE: - if (tmp->base.src.transform == NULL) + if (tmp->base.src.transform == NULL) { tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source; - else if (tmp->base.src.is_affine) + tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__boxes; + } else if (tmp->base.src.is_affine) { + tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2]; + tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2]; tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__boxes; + } break; } + if (tmp->emit_boxes == NULL) + tmp->thread_boxes = NULL; tmp->base.mask.bo = NULL; @@ -3528,7 +4052,8 @@ gen3_emit_video_state(struct sna *sna, struct sna_video_frame *frame, PixmapPtr pixmap, struct kgem_bo *dst_bo, - int width, int height) + int width, int height, + bool bilinear) { struct gen3_render_state *state = &sna->render_state.gen3; uint32_t id, ms3, rewind; @@ -3841,9 +4366,9 @@ gen3_emit_video_state(struct sna *sna, } static void -gen3_video_get_batch(struct sna *sna) +gen3_video_get_batch(struct sna *sna, struct kgem_bo *bo) { - kgem_set_mode(&sna->kgem, KGEM_RENDER); + kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); if (!kgem_check_batch(&sna->kgem, 120) || !kgem_check_reloc(&sna->kgem, 4) || @@ -3875,18 +4400,18 @@ gen3_render_video(struct sna *sna, RegionPtr dstRegion, short src_w, short src_h, short drw_w, short drw_h, + short dx, short dy, PixmapPtr pixmap) { struct sna_pixmap *priv = sna_pixmap(pixmap); BoxPtr pbox = REGION_RECTS(dstRegion); int nbox = REGION_NUM_RECTS(dstRegion); - int dxo = dstRegion->extents.x1; - int dyo = dstRegion->extents.y1; - int width = dstRegion->extents.x2 - dxo; - int height = dstRegion->extents.y2 - dyo; + int width = dstRegion->extents.x2 - dstRegion->extents.x1; + int height = dstRegion->extents.y2 - dstRegion->extents.y1; float src_scale_x, src_scale_y; int pix_xoff, pix_yoff; struct kgem_bo *dst_bo; + bool bilinear; int copy = 0; DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, src_w, src_h, drw_w, drw_h)); @@ -3908,8 +4433,8 @@ gen3_render_video(struct sna *sna, if (!dst_bo) return false; - pix_xoff = -dxo; - pix_yoff = -dyo; + pix_xoff = -dstRegion->extents.x1; + pix_yoff = -dstRegion->extents.y1; copy = 1; } else { width = pixmap->drawable.width; @@ -3927,22 +4452,24 @@ gen3_render_video(struct sna *sna, #endif } + bilinear = src_w != drw_w || src_h != drw_h; + src_scale_x = ((float)src_w / frame->width) / drw_w; src_scale_y = ((float)src_h / frame->height) / drw_h; DBG(("%s: src offset=(%d, %d), scale=(%f, %f), dst offset=(%d, %d)\n", __FUNCTION__, - dxo, dyo, src_scale_x, src_scale_y, pix_xoff, pix_yoff)); + dx, dy, src_scale_x, src_scale_y, pix_xoff, pix_yoff)); - gen3_video_get_batch(sna); + gen3_video_get_batch(sna, dst_bo); gen3_emit_video_state(sna, video, frame, pixmap, - dst_bo, width, height); + dst_bo, width, height, bilinear); do { int nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4); if (nbox_this_time == 0) { - gen3_video_get_batch(sna); + gen3_video_get_batch(sna, dst_bo); gen3_emit_video_state(sna, video, frame, pixmap, - dst_bo, width, height); + dst_bo, width, height, bilinear); nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4); } nbox -= nbox_this_time; @@ -3962,20 +4489,20 @@ gen3_render_video(struct sna *sna, /* bottom right */ OUT_BATCH_F(box_x2 + pix_xoff); OUT_BATCH_F(box_y2 + pix_yoff); - OUT_BATCH_F((box_x2 - dxo) * src_scale_x); - OUT_BATCH_F((box_y2 - dyo) * src_scale_y); + OUT_BATCH_F((box_x2 - dx) * src_scale_x); + OUT_BATCH_F((box_y2 - dy) * src_scale_y); /* bottom left */ OUT_BATCH_F(box_x1 + pix_xoff); OUT_BATCH_F(box_y2 + pix_yoff); - OUT_BATCH_F((box_x1 - dxo) * src_scale_x); - OUT_BATCH_F((box_y2 - dyo) * src_scale_y); + OUT_BATCH_F((box_x1 - dx) * src_scale_x); + OUT_BATCH_F((box_y2 - dy) * src_scale_y); /* top left */ OUT_BATCH_F(box_x1 + pix_xoff); OUT_BATCH_F(box_y1 + pix_yoff); - OUT_BATCH_F((box_x1 - dxo) * src_scale_x); - OUT_BATCH_F((box_y1 - dyo) * src_scale_y); + OUT_BATCH_F((box_x1 - dx) * src_scale_x); + OUT_BATCH_F((box_y1 - dy) * src_scale_y); } } while (nbox); @@ -3988,7 +4515,7 @@ gen3_render_video(struct sna *sna, pix_yoff = 0; #endif sna_blt_copy_boxes(sna, GXcopy, - dst_bo, -dxo, -dyo, + dst_bo, -dstRegion->extents.x1, -dstRegion->extents.y1, priv->gpu_bo, pix_xoff, pix_yoff, pixmap->drawable.bitsPerPixel, REGION_RECTS(dstRegion), @@ -4207,7 +4734,7 @@ gen3_render_copy_blt(struct sna *sna, static void gen3_render_copy_done(struct sna *sna, const struct sna_copy_op *op) { - if (sna->render_state.gen3.vertex_offset) + if (sna->render.vertex_offset) gen3_vertex_flush(sna); } @@ -4500,7 +5027,7 @@ gen3_render_fill_op_boxes(struct sna *sna, static void gen3_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) { - if (sna->render_state.gen3.vertex_offset) + if (sna->render.vertex_offset) gen3_vertex_flush(sna); } @@ -4661,6 +5188,9 @@ gen3_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, static void gen3_render_flush(struct sna *sna) { gen3_vertex_close(sna); + + assert(sna->render.vertex_reloc[0] == 0); + assert(sna->render.vertex_offset == 0); } static void @@ -4674,10 +5204,12 @@ bool gen3_render_init(struct sna *sna) #if !NO_COMPOSITE render->composite = gen3_render_composite; + render->prefer_gpu |= PREFER_GPU_RENDER; #endif #if !NO_COMPOSITE_SPANS render->check_composite_spans = gen3_check_composite_spans; render->composite_spans = gen3_render_composite_spans; + render->prefer_gpu |= PREFER_GPU_SPANS; #endif render->video = gen3_render_video; diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c index ceef528f0..53fe52a92 100644 --- a/src/sna/gen4_render.c +++ b/src/sna/gen4_render.c @@ -42,13 +42,15 @@ #include "brw/brw.h" #include "gen4_render.h" +#include "gen4_source.h" +#include "gen4_vertex.h" /* gen4 has a serious issue with its shaders that we need to flush * after every rectangle... So until that is resolved, prefer * the BLT engine. */ -#define PREFER_BLT 1 -#define FLUSH_EVERY_VERTEX 1 +#define FORCE_SPANS 0 +#define FORCE_NONRECTILINEAR_SPANS -1 #define NO_COMPOSITE 0 #define NO_COMPOSITE_SPANS 0 @@ -59,19 +61,6 @@ #define NO_FILL_BOXES 0 #define NO_VIDEO 0 -#if FLUSH_EVERY_VERTEX -#define _FLUSH() do { \ - gen4_vertex_flush(sna); \ - OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); \ -} while (0) -#define FLUSH(OP) do { \ - if ((OP)->mask.bo == NULL) _FLUSH(); \ -} while (0) -#else -#define _FLUSH() -#define FLUSH(OP) -#endif - #define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) /* Set up a default static partitioning of the URB, which is supposed to @@ -100,15 +89,9 @@ #define SF_KERNEL_NUM_GRF 16 #define PS_KERNEL_NUM_GRF 32 -static const struct gt_info { - uint32_t max_sf_threads; - uint32_t max_wm_threads; - uint32_t urb_size; -} gen4_gt_info = { - 24, 32, 256, -}, g4x_gt_info = { - 24, 50, 384, -}; +#define GEN4_MAX_SF_THREADS 24 +#define GEN4_MAX_WM_THREADS 32 +#define G4X_MAX_WM_THREADS 50 static const uint32_t ps_kernel_packed_static[][4] = { #include "exa_wm_xy.g4b" @@ -225,21 +208,20 @@ gen4_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) return base + !is_affine; } -static void gen4_magic_ca_pass(struct sna *sna, +static bool gen4_magic_ca_pass(struct sna *sna, const struct sna_composite_op *op) { struct gen4_render_state *state = &sna->render_state.gen4; if (!op->need_magic_ca_pass) - return; + return false; + + assert(sna->render.vertex_index > sna->render.vertex_start); DBG(("%s: CA fixup\n", __FUNCTION__)); assert(op->mask.bo != NULL); assert(op->has_component_alpha); - if (FLUSH_EVERY_VERTEX) - OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); - gen4_emit_pipelined_pointers(sna, op, PictOpAdd, gen4_choose_composite_kernel(PictOpAdd, true, true, op->is_affine)); @@ -256,154 +238,9 @@ static void gen4_magic_ca_pass(struct sna *sna, OUT_BATCH(0); /* index buffer offset, ignored */ state->last_primitive = sna->kgem.nbatch; + return true; } -static void gen4_vertex_flush(struct sna *sna) -{ - if (sna->render_state.gen4.vertex_offset == 0) - return; - - DBG(("%s[%x] = %d\n", __FUNCTION__, - 4*sna->render_state.gen4.vertex_offset, - sna->render.vertex_index - sna->render.vertex_start)); - sna->kgem.batch[sna->render_state.gen4.vertex_offset] = - sna->render.vertex_index - sna->render.vertex_start; - sna->render_state.gen4.vertex_offset = 0; -} - -static int gen4_vertex_finish(struct sna *sna) -{ - struct kgem_bo *bo; - unsigned int i; - - assert(sna->render.vertex_used); - assert(sna->render.nvertex_reloc); - - /* Note: we only need dword alignment (currently) */ - - bo = sna->render.vbo; - if (bo) { - gen4_vertex_flush(sna); - - for (i = 0; i < sna->render.nvertex_reloc; i++) { - DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, - i, sna->render.vertex_reloc[i])); - - sna->kgem.batch[sna->render.vertex_reloc[i]] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i], bo, - I915_GEM_DOMAIN_VERTEX << 16, - 0); - } - - sna->render.nvertex_reloc = 0; - sna->render.vertex_used = 0; - sna->render.vertex_index = 0; - sna->render_state.gen4.vb_id = 0; - - kgem_bo_destroy(&sna->kgem, bo); - } - - sna->render.vertices = NULL; - sna->render.vbo = kgem_create_linear(&sna->kgem, - 256*1024, CREATE_GTT_MAP); - if (sna->render.vbo) - sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo); - if (sna->render.vertices == NULL) { - if (sna->render.vbo) - kgem_bo_destroy(&sna->kgem, sna->render.vbo); - sna->render.vbo = NULL; - return 0; - } - - if (sna->render.vertex_used) { - memcpy(sna->render.vertices, - sna->render.vertex_data, - sizeof(float)*sna->render.vertex_used); - } - sna->render.vertex_size = 64 * 1024 - 1; - return sna->render.vertex_size - sna->render.vertex_used; -} - -static void gen4_vertex_close(struct sna *sna) -{ - struct kgem_bo *bo, *free_bo = NULL; - unsigned int i, delta = 0; - - assert(sna->render_state.gen4.vertex_offset == 0); - if (!sna->render_state.gen4.vb_id) - return; - - DBG(("%s: used=%d, vbo active? %d\n", - __FUNCTION__, sna->render.vertex_used, sna->render.vbo != NULL)); - - bo = sna->render.vbo; - if (bo) { - if (sna->render.vertex_size - sna->render.vertex_used < 64) { - DBG(("%s: discarding full vbo\n", __FUNCTION__)); - sna->render.vbo = NULL; - sna->render.vertices = sna->render.vertex_data; - sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); - free_bo = bo; - } else if (IS_CPU_MAP(bo->map)) { - DBG(("%s: converting CPU map to GTT\n", __FUNCTION__)); - sna->render.vertices = - kgem_bo_map__gtt(&sna->kgem, sna->render.vbo); - if (sna->render.vertices == NULL) { - sna->render.vbo = NULL; - sna->render.vertices = sna->render.vertex_data; - sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); - free_bo = bo; - } - } - } else { - if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) { - DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__, - sna->render.vertex_used, sna->kgem.nbatch)); - memcpy(sna->kgem.batch + sna->kgem.nbatch, - sna->render.vertex_data, - sna->render.vertex_used * 4); - delta = sna->kgem.nbatch * 4; - bo = NULL; - sna->kgem.nbatch += sna->render.vertex_used; - } else { - bo = kgem_create_linear(&sna->kgem, - 4*sna->render.vertex_used, 0); - if (bo && !kgem_bo_write(&sna->kgem, bo, - sna->render.vertex_data, - 4*sna->render.vertex_used)) { - kgem_bo_destroy(&sna->kgem, bo); - bo = NULL; - } - DBG(("%s: new vbo: %d\n", __FUNCTION__, - sna->render.vertex_used)); - free_bo = bo; - } - } - - assert(sna->render.nvertex_reloc); - for (i = 0; i < sna->render.nvertex_reloc; i++) { - DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, - i, sna->render.vertex_reloc[i])); - - sna->kgem.batch[sna->render.vertex_reloc[i]] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i], bo, - I915_GEM_DOMAIN_VERTEX << 16, - delta); - } - sna->render.nvertex_reloc = 0; - - if (sna->render.vbo == NULL) { - sna->render.vertex_used = 0; - sna->render.vertex_index = 0; - } - - if (free_bo) - kgem_bo_destroy(&sna->kgem, free_bo); -} - - static uint32_t gen4_get_blend(int op, bool has_component_alpha, uint32_t dst_format) @@ -635,6 +472,17 @@ static bool gen4_check_repeat(PicturePtr picture) } } +static uint32_t +gen4_tiling_bits(uint32_t tiling) +{ + switch (tiling) { + default: assert(0); + case I915_TILING_NONE: return 0; + case I915_TILING_X: return GEN4_SURFACE_TILED; + case I915_TILING_Y: return GEN4_SURFACE_TILED | GEN4_SURFACE_TILED_Y; + } +} + /** * Sets up the common fields for a surface state buffer for the given * picture in the given surface state buffer. @@ -647,11 +495,11 @@ gen4_bind_bo(struct sna *sna, uint32_t format, bool is_dst) { - struct gen4_surface_state *ss; uint32_t domains; uint16_t offset; + uint32_t *ss; - assert(!kgem_bo_is_snoop(bo)); + assert(sna->kgem.gen != 040 || !kgem_bo_is_snoop(bo)); /* After the first bind, we manage the cache domains within the batch */ offset = kgem_bo_get_binding(bo, format); @@ -663,340 +511,58 @@ gen4_bind_bo(struct sna *sna, offset = sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); - ss = memset(sna->kgem.batch + offset, 0, sizeof(*ss)); + ss = sna->kgem.batch + offset; - ss->ss0.surface_type = GEN4_SURFACE_2D; - ss->ss0.surface_format = format; + ss[0] = (GEN4_SURFACE_2D << GEN4_SURFACE_TYPE_SHIFT | + GEN4_SURFACE_BLEND_ENABLED | + format << GEN4_SURFACE_FORMAT_SHIFT); if (is_dst) domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER; else domains = I915_GEM_DOMAIN_SAMPLER << 16; + ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); - ss->ss0.data_return_format = GEN4_SURFACERETURNFORMAT_FLOAT32; - ss->ss0.color_blend = 1; - ss->ss1.base_addr = - kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); - - ss->ss2.height = height - 1; - ss->ss2.width = width - 1; - ss->ss3.pitch = bo->pitch - 1; - ss->ss3.tiled_surface = bo->tiling != I915_TILING_NONE; - ss->ss3.tile_walk = bo->tiling == I915_TILING_Y; + ss[2] = ((width - 1) << GEN4_SURFACE_WIDTH_SHIFT | + (height - 1) << GEN4_SURFACE_HEIGHT_SHIFT); + ss[3] = (gen4_tiling_bits(bo->tiling) | + (bo->pitch - 1) << GEN4_SURFACE_PITCH_SHIFT); + ss[4] = 0; + ss[5] = 0; kgem_bo_set_binding(bo, format, offset); DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", - offset, bo->handle, ss->ss1.base_addr, - ss->ss0.surface_format, width, height, bo->pitch, bo->tiling, + offset, bo->handle, ss[1], + format, width, height, bo->pitch, bo->tiling, domains & 0xffff ? "render" : "sampler")); return offset * sizeof(uint32_t); } -fastcall static void -gen4_emit_composite_primitive_solid(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - float *v; - union { - struct sna_coordinate p; - float f; - } dst; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 9; - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - v[1] = 1.; - v[2] = 1.; - - dst.p.x = r->dst.x; - v[3] = dst.f; - v[4] = 0.; - v[5] = 1.; - - dst.p.y = r->dst.y; - v[6] = dst.f; - v[7] = 0.; - v[8] = 0.; -} - -fastcall static void -gen4_emit_composite_primitive_identity_source(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - const float *sf = op->src.scale; - float sx, sy, *v; - union { - struct sna_coordinate p; - float f; - } dst; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 9; - - sx = r->src.x + op->src.offset[0]; - sy = r->src.y + op->src.offset[1]; - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - v[1] = (sx + r->width) * sf[0]; - v[2] = (sy + r->height) * sf[1]; - - dst.p.x = r->dst.x; - v[3] = dst.f; - v[4] = sx * sf[0]; - v[5] = v[2]; - - dst.p.y = r->dst.y; - v[6] = dst.f; - v[7] = v[4]; - v[8] = sy * sf[1]; -} - -fastcall static void -gen4_emit_composite_primitive_affine_source(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - union { - struct sna_coordinate p; - float f; - } dst; - float *v; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 9; - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x + r->width, - op->src.offset[1] + r->src.y + r->height, - op->src.transform, - &v[1], &v[2]); - v[1] *= op->src.scale[0]; - v[2] *= op->src.scale[1]; - - dst.p.x = r->dst.x; - v[3] = dst.f; - _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x, - op->src.offset[1] + r->src.y + r->height, - op->src.transform, - &v[4], &v[5]); - v[4] *= op->src.scale[0]; - v[5] *= op->src.scale[1]; - - dst.p.y = r->dst.y; - v[6] = dst.f; - _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x, - op->src.offset[1] + r->src.y, - op->src.transform, - &v[7], &v[8]); - v[7] *= op->src.scale[0]; - v[8] *= op->src.scale[1]; -} - -fastcall static void -gen4_emit_composite_primitive_identity_source_mask(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - union { - struct sna_coordinate p; - float f; - } dst; - float src_x, src_y; - float msk_x, msk_y; - float w, h; - float *v; - - src_x = r->src.x + op->src.offset[0]; - src_y = r->src.y + op->src.offset[1]; - msk_x = r->mask.x + op->mask.offset[0]; - msk_y = r->mask.y + op->mask.offset[1]; - w = r->width; - h = r->height; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 15; - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - v[1] = (src_x + w) * op->src.scale[0]; - v[2] = (src_y + h) * op->src.scale[1]; - v[3] = (msk_x + w) * op->mask.scale[0]; - v[4] = (msk_y + h) * op->mask.scale[1]; - - dst.p.x = r->dst.x; - v[5] = dst.f; - v[6] = src_x * op->src.scale[0]; - v[7] = v[2]; - v[8] = msk_x * op->mask.scale[0]; - v[9] = v[4]; - - dst.p.y = r->dst.y; - v[10] = dst.f; - v[11] = v[6]; - v[12] = src_y * op->src.scale[1]; - v[13] = v[8]; - v[14] = msk_y * op->mask.scale[1]; -} - -fastcall static void -gen4_emit_composite_primitive(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3]; - bool is_affine = op->is_affine; - const float *src_sf = op->src.scale; - const float *mask_sf = op->mask.scale; - - if (is_affine) { - sna_get_transformed_coordinates(r->src.x + op->src.offset[0], - r->src.y + op->src.offset[1], - op->src.transform, - &src_x[0], - &src_y[0]); - - sna_get_transformed_coordinates(r->src.x + op->src.offset[0], - r->src.y + op->src.offset[1] + r->height, - op->src.transform, - &src_x[1], - &src_y[1]); - - sna_get_transformed_coordinates(r->src.x + op->src.offset[0] + r->width, - r->src.y + op->src.offset[1] + r->height, - op->src.transform, - &src_x[2], - &src_y[2]); - } else { - sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0], - r->src.y + op->src.offset[1], - op->src.transform, - &src_x[0], - &src_y[0], - &src_w[0]); - sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0], - r->src.y + op->src.offset[1] + r->height, - op->src.transform, - &src_x[1], - &src_y[1], - &src_w[1]); - sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0] + r->width, - r->src.y + op->src.offset[1] + r->height, - op->src.transform, - &src_x[2], - &src_y[2], - &src_w[2]); - } - - if (op->mask.bo) { - if (is_affine) { - sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0], - r->mask.y + op->mask.offset[1], - op->mask.transform, - &mask_x[0], - &mask_y[0]); - - sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0], - r->mask.y + op->mask.offset[1] + r->height, - op->mask.transform, - &mask_x[1], - &mask_y[1]); - - sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0] + r->width, - r->mask.y + op->mask.offset[1] + r->height, - op->mask.transform, - &mask_x[2], - &mask_y[2]); - } else { - sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0], - r->mask.y + op->mask.offset[1], - op->mask.transform, - &mask_x[0], - &mask_y[0], - &mask_w[0]); - sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0], - r->mask.y + op->mask.offset[1] + r->height, - op->mask.transform, - &mask_x[1], - &mask_y[1], - &mask_w[1]); - sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0] + r->width, - r->mask.y + op->mask.offset[1] + r->height, - op->mask.transform, - &mask_x[2], - &mask_y[2], - &mask_w[2]); - } - } - - OUT_VERTEX(r->dst.x + r->width, r->dst.y + r->height); - OUT_VERTEX_F(src_x[2] * src_sf[0]); - OUT_VERTEX_F(src_y[2] * src_sf[1]); - if (!is_affine) - OUT_VERTEX_F(src_w[2]); - if (op->mask.bo) { - OUT_VERTEX_F(mask_x[2] * mask_sf[0]); - OUT_VERTEX_F(mask_y[2] * mask_sf[1]); - if (!is_affine) - OUT_VERTEX_F(mask_w[2]); - } - - OUT_VERTEX(r->dst.x, r->dst.y + r->height); - OUT_VERTEX_F(src_x[1] * src_sf[0]); - OUT_VERTEX_F(src_y[1] * src_sf[1]); - if (!is_affine) - OUT_VERTEX_F(src_w[1]); - if (op->mask.bo) { - OUT_VERTEX_F(mask_x[1] * mask_sf[0]); - OUT_VERTEX_F(mask_y[1] * mask_sf[1]); - if (!is_affine) - OUT_VERTEX_F(mask_w[1]); - } - - OUT_VERTEX(r->dst.x, r->dst.y); - OUT_VERTEX_F(src_x[0] * src_sf[0]); - OUT_VERTEX_F(src_y[0] * src_sf[1]); - if (!is_affine) - OUT_VERTEX_F(src_w[0]); - if (op->mask.bo) { - OUT_VERTEX_F(mask_x[0] * mask_sf[0]); - OUT_VERTEX_F(mask_y[0] * mask_sf[1]); - if (!is_affine) - OUT_VERTEX_F(mask_w[0]); - } -} - static void gen4_emit_vertex_buffer(struct sna *sna, const struct sna_composite_op *op) { int id = op->u.gen4.ve_id; + assert((sna->render.vb_id & (1 << id)) == 0); + OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS | 3); OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA | (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT)); + assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc)); sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); - sna->render_state.gen4.vb_id |= 1 << id; + sna->render.vb_id |= 1 << id; } static void gen4_emit_primitive(struct sna *sna) { if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) { - sna->render_state.gen4.vertex_offset = sna->kgem.nbatch - 5; + sna->render.vertex_offset = sna->kgem.nbatch - 5; return; } @@ -1005,7 +571,7 @@ static void gen4_emit_primitive(struct sna *sna) (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) | (0 << 9) | 4); - sna->render_state.gen4.vertex_offset = sna->kgem.nbatch; + sna->render.vertex_offset = sna->kgem.nbatch; OUT_BATCH(0); /* vertex count, to be filled in later */ OUT_BATCH(sna->render.vertex_index); OUT_BATCH(1); /* single instance */ @@ -1022,19 +588,20 @@ static bool gen4_rectangle_begin(struct sna *sna, int id = op->u.gen4.ve_id; int ndwords; + if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) + return true; + /* 7xpipelined pointers + 6xprimitive + 1xflush */ ndwords = op->need_magic_ca_pass? 20 : 6; - if (FLUSH_EVERY_VERTEX) - ndwords += 1; - if ((sna->render_state.gen4.vb_id & (1 << id)) == 0) + if ((sna->render.vb_id & (1 << id)) == 0) ndwords += 5; if (!kgem_check_batch(&sna->kgem, ndwords)) return false; - if ((sna->render_state.gen4.vb_id & (1 << id)) == 0) + if ((sna->render.vb_id & (1 << id)) == 0) gen4_emit_vertex_buffer(sna, op); - if (sna->render_state.gen4.vertex_offset == 0) + if (sna->render.vertex_offset == 0) gen4_emit_primitive(sna); return true; @@ -1043,14 +610,28 @@ static bool gen4_rectangle_begin(struct sna *sna, static int gen4_get_rectangles__flush(struct sna *sna, const struct sna_composite_op *op) { - if (!kgem_check_batch(&sna->kgem, (FLUSH_EVERY_VERTEX || op->need_magic_ca_pass) ? 25 : 6)) + /* Preventing discarding new vbo after lock contention */ + if (sna_vertex_wait__locked(&sna->render)) { + int rem = vertex_space(sna); + if (rem > op->floats_per_rect) + return rem; + } + + if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 25 : 6)) return 0; - if (!kgem_check_reloc_and_exec(&sna->kgem, 1)) + if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) return 0; if (op->need_magic_ca_pass && sna->render.vbo) return 0; + if (sna->render.vertex_offset) { + gen4_vertex_flush(sna); + if (gen4_magic_ca_pass(sna, op)) + gen4_emit_pipelined_pointers(sna, op, op->op, + op->u.gen4.wm_kernel); + } + return gen4_vertex_finish(sna); } @@ -1063,7 +644,7 @@ inline static int gen4_get_rectangles(struct sna *sna, start: rem = vertex_space(sna); - if (rem < op->floats_per_rect) { + if (unlikely(rem < op->floats_per_rect)) { DBG(("flushing vbo for %s: %d < %d\n", __FUNCTION__, rem, op->floats_per_rect)); rem = gen4_get_rectangles__flush(sna, op); @@ -1071,7 +652,7 @@ start: goto flush; } - if (unlikely(sna->render_state.gen4.vertex_offset == 0 && + if (unlikely(sna->render.vertex_offset == 0 && !gen4_rectangle_begin(sna, op))) goto flush; @@ -1082,18 +663,18 @@ start: return want; flush: - if (sna->render_state.gen4.vertex_offset) { + if (sna->render.vertex_offset) { gen4_vertex_flush(sna); gen4_magic_ca_pass(sna, op); } + sna_vertex_wait__locked(&sna->render); _kgem_submit(&sna->kgem); emit_state(sna, op); goto start; } static uint32_t * -gen4_composite_get_binding_table(struct sna *sna, - uint16_t *offset) +gen4_composite_get_binding_table(struct sna *sna, uint16_t *offset) { sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); @@ -1129,6 +710,9 @@ gen4_emit_urb(struct sna *sna) urb_cs_start = urb_sf_start + urb_sf_size; urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; + while ((sna->kgem.nbatch & 15) > 12) + OUT_BATCH(MI_NOOP); + OUT_BATCH(GEN4_URB_FENCE | UF0_CS_REALLOC | UF0_SF_REALLOC | @@ -1176,7 +760,7 @@ gen4_emit_invariant(struct sna *sna) { assert(sna->kgem.surface == sna->kgem.batch_size); - if (sna->kgem.gen >= 45) + if (sna->kgem.gen >= 045) OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); else OUT_BATCH(GEN4_PIPELINE_SELECT | PIPELINE_SELECT_3D); @@ -1187,9 +771,9 @@ gen4_emit_invariant(struct sna *sna) } static void -gen4_get_batch(struct sna *sna) +gen4_get_batch(struct sna *sna, const struct sna_composite_op *op) { - kgem_set_mode(&sna->kgem, KGEM_RENDER); + kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) { DBG(("%s: flushing batch: %d < %d+%d\n", @@ -1245,11 +829,11 @@ gen4_emit_pipelined_pointers(struct sna *sna, const struct sna_composite_op *op, int blend, int kernel) { - uint32_t key; uint16_t sp, bp; + uint32_t key; DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n", - __FUNCTION__, op->mask.bo != NULL, + __FUNCTION__, op->u.gen4.ve_id & 2, op->src.filter, op->src.repeat, op->mask.filter, op->mask.repeat, kernel, blend, op->has_component_alpha, (int)op->dst.format)); @@ -1260,8 +844,7 @@ gen4_emit_pipelined_pointers(struct sna *sna, bp = gen4_get_blend(blend, op->has_component_alpha, op->dst.format); DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp)); - - key = sp | bp << 16; + key = sp | (uint32_t)bp << 16; if (key == sna->render_state.gen4.last_pipelined_pointers) return; @@ -1269,7 +852,7 @@ gen4_emit_pipelined_pointers(struct sna *sna, OUT_BATCH(sna->render_state.gen4.vs); OUT_BATCH(GEN4_GS_DISABLE); /* passthrough */ OUT_BATCH(GEN4_CLIP_DISABLE); /* passthrough */ - OUT_BATCH(sna->render_state.gen4.sf[op->mask.bo != NULL]); + OUT_BATCH(sna->render_state.gen4.sf); OUT_BATCH(sna->render_state.gen4.wm + sp); OUT_BATCH(sna->render_state.gen4.cc + bp); @@ -1277,7 +860,7 @@ gen4_emit_pipelined_pointers(struct sna *sna, gen4_emit_urb(sna); } -static void +static bool gen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op) { uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); @@ -1288,7 +871,8 @@ gen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op) if (sna->render_state.gen4.drawrect_limit == limit && sna->render_state.gen4.drawrect_offset == offset) - return; + return true; + sna->render_state.gen4.drawrect_offset = offset; sna->render_state.gen4.drawrect_limit = limit; @@ -1296,6 +880,7 @@ gen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op) OUT_BATCH(0); OUT_BATCH(limit); OUT_BATCH(offset); + return false; } static void @@ -1309,65 +894,108 @@ gen4_emit_vertex_elements(struct sna *sna, * texture coordinate 1 if (has_mask is true): same as above */ struct gen4_render_state *render = &sna->render_state.gen4; - bool has_mask = op->mask.bo != NULL; - int nelem = has_mask ? 2 : 1; - int selem; - uint32_t w_component; - uint32_t src_format; + uint32_t src_format, dw; int id = op->u.gen4.ve_id; if (render->ve_id == id) return; - render->ve_id = id; - if (op->is_affine) { - src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT; - w_component = GEN4_VFCOMPONENT_STORE_1_FLT; - selem = 2; - } else { - src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT; - w_component = GEN4_VFCOMPONENT_STORE_SRC; - selem = 3; - } - /* The VUE layout * dword 0-3: position (x, y, 1.0, 1.0), * dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0) * [optional] dword 8-11: texture coordinate 1 (u1, v1, w1, 1.0) */ - OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + nelem) - 1)); + OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + 2) - 1)); /* x,y */ OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | - 0 << VE0_OFFSET_SHIFT); /* offsets vb in bytes */ - OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | - GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | - GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT | - GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT | - (1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* VUE offset in dwords */ + 0 << VE0_OFFSET_SHIFT); + OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | + VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | + VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT | + VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT | + (1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* u0, v0, w0 */ + /* u0, v0, w0 */ + DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3)); + dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; + switch (id & 3) { + default: + assert(0); + case 0: + src_format = GEN4_SURFACEFORMAT_R16G16_SSCALED; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; + break; + case 1: + src_format = GEN4_SURFACEFORMAT_R32_FLOAT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; + dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; + break; + case 2: + src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; + break; + case 3: + src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; + break; + } OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | src_format << VE0_FORMAT_SHIFT | - 4 << VE0_OFFSET_SHIFT); /* offset vb in bytes */ - OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | - GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | - w_component << VE1_VFCOMPONENT_2_SHIFT | - GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT | - (2*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* VUE offset in dwords */ + 4 << VE0_OFFSET_SHIFT); + OUT_BATCH(dw | 8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* u1, v1, w1 */ - if (has_mask) { + if (id >> 2) { + unsigned src_offset = 4 + ((id & 3) ?: 1) * sizeof(float); + DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, + id >> 2, src_offset)); + dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; + switch (id >> 2) { + case 1: + src_format = GEN4_SURFACEFORMAT_R32_FLOAT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; + dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; + break; + default: + assert(0); + case 2: + src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; + break; + case 3: + src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; + break; + } OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | src_format << VE0_FORMAT_SHIFT | - ((1 + selem) * 4) << VE0_OFFSET_SHIFT); /* vb offset in bytes */ - OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | - GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | - w_component << VE1_VFCOMPONENT_2_SHIFT | - GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT | - (3*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* VUE offset in dwords */ + src_offset << VE0_OFFSET_SHIFT); + OUT_BATCH(dw | 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); + } else { + OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | + GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | + 0 << VE0_OFFSET_SHIFT); + OUT_BATCH(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT | + VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT | + VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | + VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT | + 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); } } @@ -1376,32 +1004,37 @@ gen4_emit_state(struct sna *sna, const struct sna_composite_op *op, uint16_t wm_binding_table) { - if (FLUSH_EVERY_VERTEX) - OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); - - gen4_emit_drawing_rectangle(sna, op); - gen4_emit_binding_table(sna, wm_binding_table); - gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel); - gen4_emit_vertex_elements(sna, op); + bool flush; + flush = wm_binding_table & 1; if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { - DBG(("%s: flushing dirty (%d, %d)\n", __FUNCTION__, + DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__, kgem_bo_is_dirty(op->src.bo), - kgem_bo_is_dirty(op->mask.bo))); + kgem_bo_is_dirty(op->mask.bo), + flush)); OUT_BATCH(MI_FLUSH); kgem_clear_dirty(&sna->kgem); kgem_bo_mark_dirty(op->dst.bo); + flush = false; } + flush &= gen4_emit_drawing_rectangle(sna, op); + if (flush && op->op > PictOpSrc) + OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); + + gen4_emit_binding_table(sna, wm_binding_table & ~1); + gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel); + gen4_emit_vertex_elements(sna, op); } static void gen4_bind_surfaces(struct sna *sna, const struct sna_composite_op *op) { + bool dirty = kgem_bo_is_dirty(op->dst.bo); uint32_t *binding_table; uint16_t offset; - gen4_get_batch(sna); + gen4_get_batch(sna, op); binding_table = gen4_composite_get_binding_table(sna, &offset); @@ -1415,7 +1048,8 @@ gen4_bind_surfaces(struct sna *sna, op->src.bo, op->src.width, op->src.height, op->src.card_format, false); - if (op->mask.bo) + if (op->mask.bo) { + assert(op->u.gen4.ve_id >> 2); binding_table[2] = gen4_bind_bo(sna, op->mask.bo, @@ -1423,6 +1057,7 @@ gen4_bind_surfaces(struct sna *sna, op->mask.height, op->mask.card_format, false); + } if (sna->kgem.surface == offset && *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table && @@ -1432,7 +1067,7 @@ gen4_bind_surfaces(struct sna *sna, offset = sna->render_state.gen4.surface_table; } - gen4_emit_state(sna, op, offset); + gen4_emit_state(sna, op, offset | dirty); } fastcall static void @@ -1449,9 +1084,6 @@ gen4_render_composite_blt(struct sna *sna, gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces); op->prim_emit(sna, op, r); - - /* XXX are the shaders fubar? */ - FLUSH(op); } fastcall static void @@ -1461,19 +1093,25 @@ gen4_render_composite_box(struct sna *sna, { struct sna_composite_rectangles r; + DBG((" %s: (%d, %d), (%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2)); + + gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces); + r.dst.x = box->x1; r.dst.y = box->y1; r.width = box->x2 - box->x1; r.height = box->y2 - box->y1; r.mask = r.src = r.dst; - gen4_render_composite_blt(sna, op, &r); + op->prim_emit(sna, op, &r); } static void -gen4_render_composite_boxes(struct sna *sna, - const struct sna_composite_op *op, - const BoxRec *box, int nbox) +gen4_render_composite_boxes__blt(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) { DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n", __FUNCTION__, nbox, op->dst.x, op->dst.y, @@ -1483,16 +1121,84 @@ gen4_render_composite_boxes(struct sna *sna, op->mask.width, op->mask.height)); do { - struct sna_composite_rectangles r; - - r.dst.x = box->x1; - r.dst.y = box->y1; - r.width = box->x2 - box->x1; - r.height = box->y2 - box->y1; - r.mask = r.src = r.dst; - gen4_render_composite_blt(sna, op, &r); - box++; - } while (--nbox); + int nbox_this_time; + + nbox_this_time = gen4_get_rectangles(sna, op, nbox, + gen4_bind_surfaces); + nbox -= nbox_this_time; + + do { + struct sna_composite_rectangles r; + + DBG((" %s: (%d, %d), (%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2)); + + r.dst.x = box->x1; + r.dst.y = box->y1; + r.width = box->x2 - box->x1; + r.height = box->y2 - box->y1; + r.mask = r.src = r.dst; + op->prim_emit(sna, op, &r); + box++; + } while (--nbox_this_time); + } while (nbox); +} + +static void +gen4_render_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen4_get_rectangles(sna, op, nbox, + gen4_bind_surfaces); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + } while (nbox); +} + +static void +gen4_render_composite_boxes__thread(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen4_get_rectangles(sna, op, nbox, + gen4_bind_surfaces); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); } #ifndef MAX @@ -1533,6 +1239,7 @@ static uint32_t gen4_bind_video_source(struct sna *sna, static void gen4_video_bind_surfaces(struct sna *sna, const struct sna_composite_op *op) { + bool dirty = kgem_bo_is_dirty(op->dst.bo); struct sna_video_frame *frame = op->priv; uint32_t src_surf_format; uint32_t src_surf_base[6]; @@ -1574,10 +1281,9 @@ static void gen4_video_bind_surfaces(struct sna *sna, n_src = 1; } - gen4_get_batch(sna); + gen4_get_batch(sna, op); binding_table = gen4_composite_get_binding_table(sna, &offset); - binding_table[0] = gen4_bind_bo(sna, op->dst.bo, op->dst.width, op->dst.height, @@ -1594,7 +1300,7 @@ static void gen4_video_bind_surfaces(struct sna *sna, src_surf_format); } - gen4_emit_state(sna, op, offset); + gen4_emit_state(sna, op, offset | dirty); } static bool @@ -1604,10 +1310,11 @@ gen4_render_video(struct sna *sna, RegionPtr dstRegion, short src_w, short src_h, short drw_w, short drw_h, + short dx, short dy, PixmapPtr pixmap) { struct sna_composite_op tmp; - int nbox, dxo, dyo, pix_xoff, pix_yoff; + int nbox, pix_xoff, pix_yoff; float src_scale_x, src_scale_y; struct sna_pixmap *priv; BoxPtr box; @@ -1627,13 +1334,16 @@ gen4_render_video(struct sna *sna, tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth); tmp.dst.bo = priv->gpu_bo; - tmp.src.filter = SAMPLER_FILTER_BILINEAR; + if (src_w == drw_w && src_h == drw_h) + tmp.src.filter = SAMPLER_FILTER_NEAREST; + else + tmp.src.filter = SAMPLER_FILTER_BILINEAR; tmp.src.repeat = SAMPLER_EXTEND_PAD; tmp.src.bo = frame->bo; tmp.mask.bo = NULL; tmp.u.gen4.wm_kernel = is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED; - tmp.u.gen4.ve_id = 1; + tmp.u.gen4.ve_id = 2; tmp.is_affine = true; tmp.floats_per_vertex = 3; tmp.floats_per_rect = 9; @@ -1658,9 +1368,6 @@ gen4_render_video(struct sna *sna, pix_yoff = 0; #endif - dxo = dstRegion->extents.x1; - dyo = dstRegion->extents.y1; - /* Use normalized texture coordinates */ src_scale_x = ((float)src_w / frame->width) / (float)drw_w; src_scale_y = ((float)src_h / frame->height) / (float)drw_h; @@ -1678,18 +1385,16 @@ gen4_render_video(struct sna *sna, gen4_get_rectangles(sna, &tmp, 1, gen4_video_bind_surfaces); OUT_VERTEX(r.x2, r.y2); - OUT_VERTEX_F((box->x2 - dxo) * src_scale_x); - OUT_VERTEX_F((box->y2 - dyo) * src_scale_y); + OUT_VERTEX_F((box->x2 - dx) * src_scale_x); + OUT_VERTEX_F((box->y2 - dy) * src_scale_y); OUT_VERTEX(r.x1, r.y2); - OUT_VERTEX_F((box->x1 - dxo) * src_scale_x); - OUT_VERTEX_F((box->y2 - dyo) * src_scale_y); + OUT_VERTEX_F((box->x1 - dx) * src_scale_x); + OUT_VERTEX_F((box->y2 - dy) * src_scale_y); OUT_VERTEX(r.x1, r.y1); - OUT_VERTEX_F((box->x1 - dxo) * src_scale_x); - OUT_VERTEX_F((box->y1 - dyo) * src_scale_y); - - _FLUSH(); + OUT_VERTEX_F((box->x1 - dx) * src_scale_x); + OUT_VERTEX_F((box->y1 - dy) * src_scale_y); if (!DAMAGE_IS_ALL(priv->gpu_damage)) { sna_damage_add_box(&priv->gpu_damage, &r); @@ -1703,141 +1408,6 @@ gen4_render_video(struct sna *sna, return true; } -static bool -gen4_composite_solid_init(struct sna *sna, - struct sna_composite_channel *channel, - uint32_t color) -{ - channel->filter = PictFilterNearest; - channel->repeat = RepeatNormal; - channel->is_affine = true; - channel->is_solid = true; - channel->transform = NULL; - channel->width = 1; - channel->height = 1; - channel->card_format = GEN4_SURFACEFORMAT_B8G8R8A8_UNORM; - - channel->bo = sna_render_get_solid(sna, color); - - channel->scale[0] = channel->scale[1] = 1; - channel->offset[0] = channel->offset[1] = 0; - return channel->bo != NULL; -} - -static bool -gen4_composite_linear_init(struct sna *sna, - PicturePtr picture, - struct sna_composite_channel *channel, - int x, int y, - int w, int h, - int dst_x, int dst_y) -{ - PictLinearGradient *linear = - (PictLinearGradient *)picture->pSourcePict; - pixman_fixed_t tx, ty; - float x0, y0, sf; - float dx, dy; - - DBG(("%s: p1=(%f, %f), p2=(%f, %f), src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n", - __FUNCTION__, - pixman_fixed_to_double(linear->p1.x), pixman_fixed_to_double(linear->p1.y), - pixman_fixed_to_double(linear->p2.x), pixman_fixed_to_double(linear->p2.y), - x, y, dst_x, dst_y, w, h)); - - if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y) - return 0; - - if (!sna_transform_is_affine(picture->transform)) { - DBG(("%s: fallback due to projective transform\n", - __FUNCTION__)); - return sna_render_picture_fixup(sna, picture, channel, - x, y, w, h, dst_x, dst_y); - } - - channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear); - if (!channel->bo) - return 0; - - channel->filter = PictFilterNearest; - channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; - channel->width = channel->bo->pitch / 4; - channel->height = 1; - channel->pict_format = PICT_a8r8g8b8; - - channel->scale[0] = channel->scale[1] = 1; - channel->offset[0] = channel->offset[1] = 0; - - if (sna_transform_is_translation(picture->transform, &tx, &ty)) { - dx = pixman_fixed_to_double(linear->p2.x - linear->p1.x); - dy = pixman_fixed_to_double(linear->p2.y - linear->p1.y); - - x0 = pixman_fixed_to_double(linear->p1.x); - y0 = pixman_fixed_to_double(linear->p1.y); - - if (tx | ty) { - x0 -= pixman_fixed_to_double(tx); - y0 -= pixman_fixed_to_double(ty); - } - } else { - struct pixman_f_vector p1, p2; - struct pixman_f_transform m, inv; - - pixman_f_transform_from_pixman_transform(&m, picture->transform); - DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n", - __FUNCTION__, - m.m[0][0], m.m[0][1], m.m[0][2], - m.m[1][0], m.m[1][1], m.m[1][2], - m.m[2][0], m.m[2][1], m.m[2][2])); - if (!pixman_f_transform_invert(&inv, &m)) - return 0; - - p1.v[0] = pixman_fixed_to_double(linear->p1.x); - p1.v[1] = pixman_fixed_to_double(linear->p1.y); - p1.v[2] = 1.; - pixman_f_transform_point(&inv, &p1); - - p2.v[0] = pixman_fixed_to_double(linear->p2.x); - p2.v[1] = pixman_fixed_to_double(linear->p2.y); - p2.v[2] = 1.; - pixman_f_transform_point(&inv, &p2); - - DBG(("%s: untransformed: p1=(%f, %f, %f), p2=(%f, %f, %f)\n", - __FUNCTION__, - p1.v[0], p1.v[1], p1.v[2], - p2.v[0], p2.v[1], p2.v[2])); - - dx = p2.v[0] - p1.v[0]; - dy = p2.v[1] - p1.v[1]; - - x0 = p1.v[0]; - y0 = p1.v[1]; - } - - sf = dx*dx + dy*dy; - dx /= sf; - dy /= sf; - - channel->embedded_transform.matrix[0][0] = pixman_double_to_fixed(dx); - channel->embedded_transform.matrix[0][1] = pixman_double_to_fixed(dy); - channel->embedded_transform.matrix[0][2] = -pixman_double_to_fixed(dx*(x0+dst_x-x) + dy*(y0+dst_y-y)); - - channel->embedded_transform.matrix[1][0] = 0; - channel->embedded_transform.matrix[1][1] = 0; - channel->embedded_transform.matrix[1][2] = pixman_double_to_fixed(.5); - - channel->embedded_transform.matrix[2][0] = 0; - channel->embedded_transform.matrix[2][1] = 0; - channel->embedded_transform.matrix[2][2] = pixman_fixed_1; - - channel->transform = &channel->embedded_transform; - channel->is_affine = 1; - - DBG(("%s: dx=%f, dy=%f, offset=%f\n", - __FUNCTION__, dx, dy, -dx*(x0-x+dst_x) + -dy*(y0-y+dst_y))); - - return channel->bo != NULL; -} - static int gen4_composite_picture(struct sna *sna, PicturePtr picture, @@ -1858,16 +1428,16 @@ gen4_composite_picture(struct sna *sna, channel->card_format = -1; if (sna_picture_is_solid(picture, &color)) - return gen4_composite_solid_init(sna, channel, color); + return gen4_channel_init_solid(sna, channel, color); if (picture->pDrawable == NULL) { int ret; if (picture->pSourcePict->type == SourcePictTypeLinear) - return gen4_composite_linear_init(sna, picture, channel, - x, y, - w, h, - dst_x, dst_y); + return gen4_channel_init_linear(sna, picture, channel, + x, y, + w, h, + dst_x, dst_y); DBG(("%s -- fixup, gradient\n", __FUNCTION__)); ret = -1; @@ -1922,7 +1492,8 @@ gen4_composite_picture(struct sna *sna, channel->card_format = gen4_get_card_format(picture->format); if (channel->card_format == -1) return sna_render_picture_convert(sna, picture, channel, pixmap, - x, y, w, h, dst_x, dst_y); + x, y, w, h, dst_x, dst_y, + false); if (too_large(pixmap->drawable.width, pixmap->drawable.height)) return sna_render_picture_extract(sna, picture, channel, @@ -1950,7 +1521,7 @@ gen4_render_composite_done(struct sna *sna, { DBG(("%s()\n", __FUNCTION__)); - if (sna->render_state.gen4.vertex_offset) { + if (sna->render.vertex_offset) { gen4_vertex_flush(sna); gen4_magic_ca_pass(sna, op); } @@ -1964,54 +1535,49 @@ gen4_render_composite_done(struct sna *sna, } static bool -gen4_composite_set_target(PicturePtr dst, struct sna_composite_op *op) +gen4_composite_set_target(struct sna *sna, + struct sna_composite_op *op, + PicturePtr dst, + int x, int y, int w, int h) { - struct sna_pixmap *priv; - - if (!gen4_check_dst_format(dst->format)) { - DBG(("%s: incompatible render target format %08x\n", - __FUNCTION__, dst->format)); - return false; - } + BoxRec box; op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); op->dst.width = op->dst.pixmap->drawable.width; op->dst.height = op->dst.pixmap->drawable.height; op->dst.format = dst->format; - priv = sna_pixmap_force_to_gpu(op->dst.pixmap, MOVE_READ | MOVE_WRITE); - if (priv == NULL) - return false; + if (w && h) { + box.x1 = x; + box.y1 = y; + box.x2 = x + w; + box.y2 = y + h; + } else + sna_render_picture_extents(dst, &box); - op->dst.bo = priv->gpu_bo; - op->damage = &priv->gpu_damage; - if (sna_damage_is_all(&priv->gpu_damage, op->dst.width, op->dst.height)) - op->damage = NULL; - DBG(("%s: all-damaged=%d, damage=%p\n", __FUNCTION__, - sna_damage_is_all(&priv->gpu_damage, op->dst.width, op->dst.height), - op->damage)); + op->dst.bo = sna_drawable_use_bo (dst->pDrawable, + PREFER_GPU | FORCE_GPU | RENDER_GPU, + &box, &op->damage); + if (op->dst.bo == NULL) + return false; get_drawable_deltas(dst->pDrawable, op->dst.pixmap, &op->dst.x, &op->dst.y); - return true; -} -static inline bool -picture_is_cpu(PicturePtr picture) -{ - if (!picture->pDrawable) - return false; + DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", + __FUNCTION__, + op->dst.pixmap, (int)op->dst.format, + op->dst.width, op->dst.height, + op->dst.bo->pitch, + op->dst.x, op->dst.y, + op->damage ? *op->damage : (void *)-1)); - return !is_gpu(picture->pDrawable); -} + assert(op->dst.bo->proxy == NULL); + + if (too_large(op->dst.width, op->dst.height) && + !sna_render_composite_redirect(sna, op, x, y, w, h)) + return false; -static inline bool prefer_blt(struct sna *sna) -{ -#if PREFER_BLT return true; - (void)sna; -#else - return sna->kgem.mode != KGEM_RENDER; -#endif } static bool @@ -2019,7 +1585,7 @@ try_blt(struct sna *sna, PicturePtr dst, PicturePtr src, int width, int height) { - if (prefer_blt(sna)) { + if (sna->kgem.mode != KGEM_RENDER) { DBG(("%s: already performing BLT\n", __FUNCTION__)); return true; } @@ -2038,7 +1604,7 @@ try_blt(struct sna *sna, return true; /* is the source picture only in cpu memory e.g. a shm pixmap? */ - return picture_is_cpu(src); + return picture_is_cpu(sna, src); } static bool @@ -2060,15 +1626,10 @@ has_alphamap(PicturePtr p) } static bool -untransformed(PicturePtr p) +need_upload(struct sna *sna, PicturePtr p) { - return !p->transform || pixman_transform_is_int_translate(p->transform); -} - -static bool -need_upload(PicturePtr p) -{ - return p->pDrawable && untransformed(p) && !is_gpu(p->pDrawable); + return p->pDrawable && untransformed(p) && + !is_gpu(sna, p->pDrawable, PREFER_GPU_RENDER); } static bool @@ -2084,11 +1645,14 @@ source_is_busy(PixmapPtr pixmap) if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) return true; + if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) + return true; + return priv->gpu_damage && !priv->cpu_damage; } static bool -source_fallback(PicturePtr p, PixmapPtr pixmap) +source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap) { if (sna_picture_is_solid(p, NULL)) return false; @@ -2103,7 +1667,7 @@ source_fallback(PicturePtr p, PixmapPtr pixmap) if (pixmap && source_is_busy(pixmap)) return false; - return has_alphamap(p) || !gen4_check_filter(p) || need_upload(p); + return has_alphamap(p) || !gen4_check_filter(p) || need_upload(sna, p); } static bool @@ -2112,7 +1676,6 @@ gen4_composite_fallback(struct sna *sna, PicturePtr mask, PicturePtr dst) { - struct sna_pixmap *priv; PixmapPtr src_pixmap; PixmapPtr mask_pixmap; PixmapPtr dst_pixmap; @@ -2127,11 +1690,11 @@ gen4_composite_fallback(struct sna *sna, dst_pixmap = get_drawable_pixmap(dst->pDrawable); src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; - src_fallback = source_fallback(src, src_pixmap); + src_fallback = source_fallback(sna, src, src_pixmap); if (mask) { mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; - mask_fallback = source_fallback(mask, mask_pixmap); + mask_fallback = source_fallback(sna, mask, mask_pixmap); } else { mask_pixmap = NULL; mask_fallback = false; @@ -2151,8 +1714,7 @@ gen4_composite_fallback(struct sna *sna, } /* If anything is on the GPU, push everything out to the GPU */ - priv = sna_pixmap(dst_pixmap); - if (priv && priv->gpu_damage && !priv->clear) { + if (dst_use_gpu(dst_pixmap)) { DBG(("%s: dst is already on the GPU, try to use GPU\n", __FUNCTION__)); return false; @@ -2187,14 +1749,14 @@ gen4_composite_fallback(struct sna *sna, if (too_large(dst_pixmap->drawable.width, dst_pixmap->drawable.height) && - (priv == NULL || DAMAGE_IS_ALL(priv->cpu_damage))) { + dst_is_cpu(dst_pixmap)) { DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); return true; } DBG(("%s: dst is not on the GPU and the operation should not fallback\n", __FUNCTION__)); - return false; + return dst_use_cpu(dst_pixmap); } static int @@ -2215,7 +1777,7 @@ reuse_source(struct sna *sna, } if (sna_picture_is_solid(mask, &color)) - return gen4_composite_solid_init(sna, mc, color); + return gen4_channel_init_solid(sna, mc, color); if (sc->is_solid) return false; @@ -2291,15 +1853,13 @@ gen4_render_composite(struct sna *sna, width, height, tmp); - if (!gen4_composite_set_target(dst, tmp)) - return false; - sna_render_reduce_damage(tmp, dst_x, dst_y, width, height); - - if (too_large(tmp->dst.width, tmp->dst.height) && - !sna_render_composite_redirect(sna, tmp, - dst_x, dst_y, width, height)) + if (!gen4_composite_set_target(sna, tmp, dst, + dst_x, dst_y, width, height)) { + DBG(("%s: failed to set composite target\n", __FUNCTION__)); return false; + } + tmp->op = op; switch (gen4_composite_picture(sna, src, &tmp->src, src_x, src_y, width, height, @@ -2309,7 +1869,7 @@ gen4_render_composite(struct sna *sna, DBG(("%s: failed to prepare source\n", __FUNCTION__)); goto cleanup_dst; case 0: - if (!gen4_composite_solid_init(sna, &tmp->src, 0)) + if (!gen4_channel_init_solid(sna, &tmp->src, 0)) goto cleanup_dst; /* fall through to fixup */ case 1: @@ -2323,12 +1883,10 @@ gen4_render_composite(struct sna *sna, break; } - tmp->op = op; tmp->is_affine = tmp->src.is_affine; tmp->has_component_alpha = false; tmp->need_magic_ca_pass = false; - tmp->prim_emit = gen4_emit_composite_primitive; if (mask) { if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { tmp->has_component_alpha = true; @@ -2363,7 +1921,7 @@ gen4_render_composite(struct sna *sna, DBG(("%s: failed to prepare mask\n", __FUNCTION__)); goto cleanup_src; case 0: - if (!gen4_composite_solid_init(sna, &tmp->mask, 0)) + if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) goto cleanup_src; /* fall through to fixup */ case 1: @@ -2373,33 +1931,22 @@ gen4_render_composite(struct sna *sna, } tmp->is_affine &= tmp->mask.is_affine; - - if (tmp->src.transform == NULL && tmp->mask.transform == NULL) - tmp->prim_emit = gen4_emit_composite_primitive_identity_source_mask; - - tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine; - } else { - if (tmp->src.is_solid) - tmp->prim_emit = gen4_emit_composite_primitive_solid; - else if (tmp->src.transform == NULL) - tmp->prim_emit = gen4_emit_composite_primitive_identity_source; - else if (tmp->src.is_affine) - tmp->prim_emit = gen4_emit_composite_primitive_affine_source; - - tmp->floats_per_vertex = 3 + !tmp->is_affine; } - tmp->floats_per_rect = 3*tmp->floats_per_vertex; tmp->u.gen4.wm_kernel = gen4_choose_composite_kernel(tmp->op, tmp->mask.bo != NULL, tmp->has_component_alpha, tmp->is_affine); - tmp->u.gen4.ve_id = (tmp->mask.bo != NULL) << 1 | tmp->is_affine; + tmp->u.gen4.ve_id = gen4_choose_composite_emitter(tmp); tmp->blt = gen4_render_composite_blt; tmp->box = gen4_render_composite_box; - tmp->boxes = gen4_render_composite_boxes; + tmp->boxes = gen4_render_composite_boxes__blt; + if (tmp->emit_boxes) { + tmp->boxes = gen4_render_composite_boxes; + tmp->thread_boxes = gen4_render_composite_boxes__thread; + } tmp->done = gen4_render_composite_done; if (!kgem_check_bo(&sna->kgem, @@ -2428,127 +1975,7 @@ cleanup_dst: return false; } -/* A poor man's span interface. But better than nothing? */ #if !NO_COMPOSITE_SPANS -inline static void -gen4_emit_composite_texcoord(struct sna *sna, - const struct sna_composite_channel *channel, - int16_t x, int16_t y) -{ - float t[3]; - - if (channel->is_affine) { - sna_get_transformed_coordinates(x + channel->offset[0], - y + channel->offset[1], - channel->transform, - &t[0], &t[1]); - OUT_VERTEX_F(t[0] * channel->scale[0]); - OUT_VERTEX_F(t[1] * channel->scale[1]); - } else { - t[0] = t[1] = 0; t[2] = 1; - sna_get_transformed_coordinates_3d(x + channel->offset[0], - y + channel->offset[1], - channel->transform, - &t[0], &t[1], &t[2]); - OUT_VERTEX_F(t[0] * channel->scale[0]); - OUT_VERTEX_F(t[1] * channel->scale[1]); - OUT_VERTEX_F(t[2]); - } -} - -inline static void -gen4_emit_composite_texcoord_affine(struct sna *sna, - const struct sna_composite_channel *channel, - int16_t x, int16_t y) -{ - float t[2]; - - sna_get_transformed_coordinates(x + channel->offset[0], - y + channel->offset[1], - channel->transform, - &t[0], &t[1]); - OUT_VERTEX_F(t[0] * channel->scale[0]); - OUT_VERTEX_F(t[1] * channel->scale[1]); -} - -inline static void -gen4_emit_composite_spans_vertex(struct sna *sna, - const struct sna_composite_spans_op *op, - int16_t x, int16_t y) -{ - OUT_VERTEX(x, y); - gen4_emit_composite_texcoord(sna, &op->base.src, x, y); -} - -fastcall static void -gen4_emit_composite_spans_primitive(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - gen4_emit_composite_spans_vertex(sna, op, box->x2, box->y2); - OUT_VERTEX_F(opacity); - OUT_VERTEX_F(1); - if (!op->base.is_affine) - OUT_VERTEX_F(1); - - gen4_emit_composite_spans_vertex(sna, op, box->x1, box->y2); - OUT_VERTEX_F(opacity); - OUT_VERTEX_F(1); - if (!op->base.is_affine) - OUT_VERTEX_F(1); - - gen4_emit_composite_spans_vertex(sna, op, box->x1, box->y1); - OUT_VERTEX_F(opacity); - OUT_VERTEX_F(0); - if (!op->base.is_affine) - OUT_VERTEX_F(1); -} - -fastcall static void -gen4_emit_composite_spans_solid(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - OUT_VERTEX(box->x2, box->y2); - OUT_VERTEX_F(1); OUT_VERTEX_F(1); - OUT_VERTEX_F(opacity); OUT_VERTEX_F(1); - - OUT_VERTEX(box->x1, box->y2); - OUT_VERTEX_F(0); OUT_VERTEX_F(1); - OUT_VERTEX_F(opacity); OUT_VERTEX_F(1); - - OUT_VERTEX(box->x1, box->y1); - OUT_VERTEX_F(0); OUT_VERTEX_F(0); - OUT_VERTEX_F(opacity); OUT_VERTEX_F(0); -} - -fastcall static void -gen4_emit_composite_spans_affine(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - OUT_VERTEX(box->x2, box->y2); - gen4_emit_composite_texcoord_affine(sna, &op->base.src, - box->x2, box->y2); - OUT_VERTEX_F(opacity); - OUT_VERTEX_F(1); - - OUT_VERTEX(box->x1, box->y2); - gen4_emit_composite_texcoord_affine(sna, &op->base.src, - box->x1, box->y2); - OUT_VERTEX_F(opacity); - OUT_VERTEX_F(1); - - OUT_VERTEX(box->x1, box->y1); - gen4_emit_composite_texcoord_affine(sna, &op->base.src, - box->x1, box->y1); - OUT_VERTEX_F(opacity); - OUT_VERTEX_F(0); -} - fastcall static void gen4_render_composite_spans_box(struct sna *sna, const struct sna_composite_spans_op *op, @@ -2580,22 +2007,69 @@ gen4_render_composite_spans_boxes(struct sna *sna, op->base.dst.x, op->base.dst.y)); do { - gen4_render_composite_spans_box(sna, op, box++, opacity); - } while (--nbox); + int nbox_this_time; + + nbox_this_time = gen4_get_rectangles(sna, &op->base, nbox, + gen4_bind_surfaces); + nbox -= nbox_this_time; + + do { + DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1)); + + op->prim_emit(sna, op, box++, opacity); + } while (--nbox_this_time); + } while (nbox); +} + +fastcall static void +gen4_render_composite_spans_boxes__thread(struct sna *sna, + const struct sna_composite_spans_op *op, + const struct sna_opacity_box *box, + int nbox) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + op->base.dst.x, op->base.dst.y)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen4_get_rectangles(sna, &op->base, nbox, + gen4_bind_surfaces); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); } fastcall static void gen4_render_composite_spans_done(struct sna *sna, const struct sna_composite_spans_op *op) { - gen4_vertex_flush(sna); + if (sna->render.vertex_offset) + gen4_vertex_flush(sna); DBG(("%s()\n", __FUNCTION__)); - kgem_bo_destroy(&sna->kgem, op->base.mask.bo); - if (op->base.src.bo) - kgem_bo_destroy(&sna->kgem, op->base.src.bo); - + kgem_bo_destroy(&sna->kgem, op->base.src.bo); sna_render_composite_redirect_done(sna, &op->base); } @@ -2605,17 +2079,43 @@ gen4_check_composite_spans(struct sna *sna, int16_t width, int16_t height, unsigned flags) { - if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) - return false; + DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n", + __FUNCTION__, op, width, height, flags)); if (op >= ARRAY_SIZE(gen4_blend_op)) return false; - if (gen4_composite_fallback(sna, src, NULL, dst)) + if (gen4_composite_fallback(sna, src, NULL, dst)) { + DBG(("%s: operation would fallback\n", __FUNCTION__)); return false; + } - if (need_tiling(sna, width, height) && !is_gpu(dst->pDrawable)) + if (need_tiling(sna, width, height) && + !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { + DBG(("%s: fallback, tiled operation not on GPU\n", + __FUNCTION__)); return false; + } + + if (FORCE_SPANS) + return FORCE_SPANS > 0; + + if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) { + struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable); + assert(priv); + + if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) + return true; + + if (flags & COMPOSITE_SPANS_INPLACE_HINT) + return false; + + if ((sna->render.prefer_gpu & PREFER_GPU_SPANS) == 0 && + dst->format == PICT_a8) + return false; + + return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo); + } return true; } @@ -2645,15 +2145,9 @@ gen4_render_composite_spans(struct sna *sna, } tmp->base.op = op; - if (!gen4_composite_set_target(dst, &tmp->base)) + if (!gen4_composite_set_target(sna, &tmp->base, dst, + dst_x, dst_y, width, height)) return false; - sna_render_reduce_damage(&tmp->base, dst_x, dst_y, width, height); - - if (too_large(tmp->base.dst.width, tmp->base.dst.height)) { - if (!sna_render_composite_redirect(sna, &tmp->base, - dst_x, dst_y, width, height)) - return false; - } switch (gen4_composite_picture(sna, src, &tmp->base.src, src_x, src_y, @@ -2663,7 +2157,7 @@ gen4_render_composite_spans(struct sna *sna, case -1: goto cleanup_dst; case 0: - if (!gen4_composite_solid_init(sna, &tmp->base.src, 0)) + if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) goto cleanup_dst; /* fall through to fixup */ case 1: @@ -2671,27 +2165,21 @@ gen4_render_composite_spans(struct sna *sna, break; } - tmp->base.mask.bo = sna_render_get_solid(sna, 0); - if (tmp->base.mask.bo == NULL) - goto cleanup_src; + tmp->base.mask.bo = NULL; + tmp->base.mask.filter = SAMPLER_FILTER_NEAREST; + tmp->base.mask.repeat = SAMPLER_EXTEND_NONE; tmp->base.is_affine = tmp->base.src.is_affine; tmp->base.has_component_alpha = false; tmp->base.need_magic_ca_pass = false; - tmp->prim_emit = gen4_emit_composite_spans_primitive; - if (tmp->base.src.is_solid) - tmp->prim_emit = gen4_emit_composite_spans_solid; - else if (tmp->base.is_affine) - tmp->prim_emit = gen4_emit_composite_spans_affine; - tmp->base.floats_per_vertex = 5 + 2*!tmp->base.is_affine; - tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex; - - tmp->base.u.gen5.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine; - tmp->base.u.gen4.ve_id = 1 << 1 | tmp->base.is_affine; + tmp->base.u.gen4.ve_id = gen4_choose_spans_emitter(tmp); + tmp->base.u.gen4.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine; tmp->box = gen4_render_composite_spans_box; tmp->boxes = gen4_render_composite_spans_boxes; + if (tmp->emit_boxes) + tmp->thread_boxes = gen4_render_composite_spans_boxes__thread; tmp->done = gen4_render_composite_spans_done; if (!kgem_check_bo(&sna->kgem, @@ -2721,10 +2209,11 @@ cleanup_dst: static void gen4_copy_bind_surfaces(struct sna *sna, const struct sna_composite_op *op) { + bool dirty = kgem_bo_is_dirty(op->dst.bo); uint32_t *binding_table; uint16_t offset; - gen4_get_batch(sna); + gen4_get_batch(sna, op); binding_table = gen4_composite_get_binding_table(sna, &offset); @@ -2745,7 +2234,7 @@ gen4_copy_bind_surfaces(struct sna *sna, const struct sna_composite_op *op) offset = sna->render_state.gen4.surface_table; } - gen4_emit_state(sna, op, offset); + gen4_emit_state(sna, op, offset | dirty); } static void @@ -2768,19 +2257,6 @@ gen4_render_copy_one(struct sna *sna, OUT_VERTEX(dx, dy); OUT_VERTEX_F(sx*op->src.scale[0]); OUT_VERTEX_F(sy*op->src.scale[1]); - - _FLUSH(); -} - -static inline bool prefer_blt_copy(struct sna *sna, unsigned flags) -{ -#if PREFER_BLT - return true; - (void)sna; -#else - return sna->kgem.mode != KGEM_RENDER; -#endif - (void)flags; } static bool @@ -2793,8 +2269,7 @@ gen4_render_copy_boxes(struct sna *sna, uint8_t alu, DBG(("%s x %d\n", __FUNCTION__, n)); - if (prefer_blt_copy(sna, flags) && - sna_blt_compare_depth(&src->drawable, &dst->drawable) && + if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && sna_blt_copy_boxes(sna, alu, src_bo, src_dx, src_dy, dst_bo, dst_dx, dst_dy, @@ -2899,7 +2374,7 @@ fallback_blt: tmp.floats_per_vertex = 3; tmp.floats_per_rect = 9; tmp.u.gen4.wm_kernel = WM_KERNEL; - tmp.u.gen4.ve_id = 1; + tmp.u.gen4.ve_id = 2; if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { kgem_submit(&sna->kgem); @@ -2936,6 +2411,14 @@ fallback_tiled_dst: if (tmp.redirect.real_bo) kgem_bo_destroy(&sna->kgem, tmp.dst.bo); fallback_tiled: + if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && + sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->drawable.bitsPerPixel, + box, n)) + return true; + return sna_tiling_copy_boxes(sna, alu, src, src_bo, src_dx, src_dy, dst, dst_bo, dst_dx, dst_dy, @@ -2955,7 +2438,8 @@ gen4_render_copy_blt(struct sna *sna, static void gen4_render_copy_done(struct sna *sna, const struct sna_copy_op *op) { - gen4_vertex_flush(sna); + if (sna->render.vertex_offset) + gen4_vertex_flush(sna); } static bool @@ -2970,8 +2454,7 @@ gen4_render_copy(struct sna *sna, uint8_t alu, dst->drawable.serialNumber, alu)); - if (prefer_blt(sna) && - sna_blt_compare_depth(&src->drawable, &dst->drawable) && + if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && sna_blt_copy(sna, alu, src_bo, dst_bo, dst->drawable.bitsPerPixel, @@ -3021,7 +2504,7 @@ fallback: op->base.floats_per_vertex = 3; op->base.floats_per_rect = 9; op->base.u.gen4.wm_kernel = WM_KERNEL; - op->base.u.gen4.ve_id = 1; + op->base.u.gen4.ve_id = 2; if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { kgem_submit(&sna->kgem); @@ -3047,56 +2530,20 @@ fallback: } static void -gen4_fill_bind_surfaces(struct sna *sna, const struct sna_composite_op *op) -{ - uint32_t *binding_table; - uint16_t offset; - - gen4_get_batch(sna); - - binding_table = gen4_composite_get_binding_table(sna, &offset); - - binding_table[0] = - gen4_bind_bo(sna, - op->dst.bo, op->dst.width, op->dst.height, - gen4_get_dest_format(op->dst.format), - true); - binding_table[1] = - gen4_bind_bo(sna, - op->src.bo, 1, 1, - GEN4_SURFACEFORMAT_B8G8R8A8_UNORM, - false); - - if (sna->kgem.surface == offset && - *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table) { - sna->kgem.surface += - sizeof(struct gen4_surface_state_padded)/sizeof(uint32_t); - offset = sna->render_state.gen4.surface_table; - } - - gen4_emit_state(sna, op, offset); -} - -static void gen4_render_fill_rectangle(struct sna *sna, const struct sna_composite_op *op, int x, int y, int w, int h) { - gen4_get_rectangles(sna, op, 1, gen4_fill_bind_surfaces); + gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces); OUT_VERTEX(x+w, y+h); - OUT_VERTEX_F(1); - OUT_VERTEX_F(1); + OUT_VERTEX_F(.5); OUT_VERTEX(x, y+h); - OUT_VERTEX_F(0); - OUT_VERTEX_F(1); + OUT_VERTEX_F(.5); OUT_VERTEX(x, y); - OUT_VERTEX_F(0); - OUT_VERTEX_F(0); - - _FLUSH(); + OUT_VERTEX_F(.5); } static bool @@ -3116,10 +2563,7 @@ gen4_render_fill_boxes(struct sna *sna, return false; } - if (op <= PictOpSrc && - (prefer_blt(sna) || - too_large(dst->drawable.width, dst->drawable.height) || - !gen4_check_dst_format(format))) { + if (op <= PictOpSrc) { uint8_t alu = GXinvalid; pixel = 0; @@ -3170,13 +2614,11 @@ gen4_render_fill_boxes(struct sna *sna, tmp.dst.format = format; tmp.dst.bo = dst_bo; - tmp.src.bo = sna_render_get_solid(sna, pixel); - tmp.src.filter = SAMPLER_FILTER_NEAREST; - tmp.src.repeat = SAMPLER_EXTEND_REPEAT; + gen4_channel_init_solid(sna, &tmp.src, pixel); tmp.is_affine = true; - tmp.floats_per_vertex = 3; - tmp.floats_per_rect = 9; + tmp.floats_per_vertex = 2; + tmp.floats_per_rect = 6; tmp.u.gen4.wm_kernel = WM_KERNEL; tmp.u.gen4.ve_id = 1; @@ -3185,7 +2627,7 @@ gen4_render_fill_boxes(struct sna *sna, assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); } - gen4_fill_bind_surfaces(sna, &tmp); + gen4_bind_surfaces(sna, &tmp); gen4_align_vertex(sna, &tmp); do { @@ -3235,7 +2677,8 @@ gen4_render_fill_op_boxes(struct sna *sna, static void gen4_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) { - gen4_vertex_flush(sna); + if (sna->render.vertex_offset) + gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, op->base.src.bo); } @@ -3245,8 +2688,7 @@ gen4_render_fill(struct sna *sna, uint8_t alu, uint32_t color, struct sna_fill_op *op) { - if (prefer_blt(sna) && - sna_blt_fill(sna, alu, + if (sna_blt_fill(sna, alu, dst_bo, dst->drawable.bitsPerPixel, color, op)) @@ -3274,20 +2716,14 @@ gen4_render_fill(struct sna *sna, uint8_t alu, op->base.need_magic_ca_pass = 0; op->base.has_component_alpha = 0; - op->base.src.bo = - sna_render_get_solid(sna, - sna_rgba_for_color(color, - dst->drawable.depth)); - op->base.src.filter = SAMPLER_FILTER_NEAREST; - op->base.src.repeat = SAMPLER_EXTEND_REPEAT; - + gen4_channel_init_solid(sna, &op->base.src, + sna_rgba_for_color(color, + dst->drawable.depth)); op->base.mask.bo = NULL; - op->base.mask.filter = SAMPLER_FILTER_NEAREST; - op->base.mask.repeat = SAMPLER_EXTEND_NONE; op->base.is_affine = true; - op->base.floats_per_vertex = 3; - op->base.floats_per_rect = 9; + op->base.floats_per_vertex = 2; + op->base.floats_per_rect = 6; op->base.u.gen4.wm_kernel = WM_KERNEL; op->base.u.gen4.ve_id = 1; @@ -3296,7 +2732,7 @@ gen4_render_fill(struct sna *sna, uint8_t alu, assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); } - gen4_fill_bind_surfaces(sna, &op->base); + gen4_bind_surfaces(sna, &op->base); gen4_align_vertex(sna, &op->base); op->blt = gen4_render_fill_op_blt; @@ -3356,32 +2792,29 @@ gen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, tmp.dst.bo = bo; tmp.dst.x = tmp.dst.y = 0; - tmp.src.bo = - sna_render_get_solid(sna, - sna_rgba_for_color(color, - dst->drawable.depth)); - tmp.src.filter = SAMPLER_FILTER_NEAREST; - tmp.src.repeat = SAMPLER_EXTEND_REPEAT; - + gen4_channel_init_solid(sna, &tmp.src, + sna_rgba_for_color(color, + dst->drawable.depth)); tmp.mask.bo = NULL; - tmp.mask.filter = SAMPLER_FILTER_NEAREST; - tmp.mask.repeat = SAMPLER_EXTEND_NONE; tmp.is_affine = true; - tmp.floats_per_vertex = 3; - tmp.floats_per_rect = 9; - tmp.has_component_alpha = 0; + tmp.floats_per_vertex = 2; + tmp.floats_per_rect = 6; + tmp.has_component_alpha = false; tmp.need_magic_ca_pass = false; tmp.u.gen4.wm_kernel = WM_KERNEL; tmp.u.gen4.ve_id = 1; if (!kgem_check_bo(&sna->kgem, bo, NULL)) { - _kgem_submit(&sna->kgem); - assert(kgem_check_bo(&sna->kgem, bo, NULL)); + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, bo, NULL)) { + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + return false; + } } - gen4_fill_bind_surfaces(sna, &tmp); + gen4_bind_surfaces(sna, &tmp); gen4_align_vertex(sna, &tmp); gen4_render_fill_rectangle(sna, &tmp, x1, y1, x2 - x1, y2 - y1); @@ -3396,6 +2829,9 @@ static void gen4_render_flush(struct sna *sna) { gen4_vertex_close(sna); + + assert(sna->render.vb_id == 0); + assert(sna->render.vertex_offset == 0); } static void @@ -3438,7 +2874,6 @@ static void gen4_render_reset(struct sna *sna) { sna->render_state.gen4.needs_invariant = true; sna->render_state.gen4.needs_urb = true; - sna->render_state.gen4.vb_id = 0; sna->render_state.gen4.ve_id = -1; sna->render_state.gen4.last_primitive = -1; sna->render_state.gen4.last_pipelined_pointers = -1; @@ -3452,6 +2887,10 @@ static void gen4_render_reset(struct sna *sna) DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); discard_vbo(sna); } + + sna->render.vertex_offset = 0; + sna->render.nvertex_reloc = 0; + sna->render.vb_id = 0; } static void gen4_render_fini(struct sna *sna) @@ -3473,8 +2912,7 @@ static uint32_t gen4_create_vs_unit_state(struct sna_static_stream *stream) } static uint32_t gen4_create_sf_state(struct sna_static_stream *stream, - const struct gt_info *info, - uint32_t kernel) + int gen, uint32_t kernel) { struct gen4_sf_unit_state *sf; @@ -3488,7 +2926,7 @@ static uint32_t gen4_create_sf_state(struct sna_static_stream *stream, /* don't smash vertex header, read start from dw8 */ sf->thread3.urb_entry_read_offset = 1; sf->thread3.dispatch_grf_start_reg = 3; - sf->thread4.max_threads = info->max_sf_threads - 1; + sf->thread4.max_threads = GEN4_MAX_SF_THREADS - 1; sf->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; sf->thread4.nr_urb_entries = URB_SF_ENTRIES; sf->sf5.viewport_transform = false; /* skip viewport */ @@ -3519,7 +2957,7 @@ static uint32_t gen4_create_sampler_state(struct sna_static_stream *stream, } static void gen4_init_wm_state(struct gen4_wm_unit_state *wm, - const struct gt_info *info, + int gen, bool has_mask, uint32_t kernel, uint32_t sampler) @@ -3540,7 +2978,7 @@ static void gen4_init_wm_state(struct gen4_wm_unit_state *wm, wm->wm4.sampler_state_pointer = sampler >> 5; wm->wm4.sampler_count = 1; - wm->wm5.max_threads = info->max_wm_threads - 1; + wm->wm5.max_threads = gen >= 045 ? G4X_MAX_WM_THREADS - 1 : GEN4_MAX_WM_THREADS - 1; wm->wm5.transposed_urb_read = 0; wm->wm5.thread_dispatch_enable = 1; /* just use 16-pixel dispatch (4 subspans), don't need to change kernel @@ -3560,23 +2998,11 @@ static void gen4_init_wm_state(struct gen4_wm_unit_state *wm, } } -static uint32_t gen4_create_cc_viewport(struct sna_static_stream *stream) -{ - struct gen4_cc_viewport vp; - - vp.min_depth = -1.e35; - vp.max_depth = 1.e35; - - return sna_static_stream_add(stream, &vp, sizeof(vp), 32); -} - static uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream) { uint8_t *ptr, *base; - uint32_t vp; int i, j; - vp = gen4_create_cc_viewport(stream); base = ptr = sna_static_stream_map(stream, GEN4_BLENDFACTOR_COUNT*GEN4_BLENDFACTOR_COUNT*64, @@ -3589,7 +3015,6 @@ static uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream) state->cc3.blend_enable = !(j == GEN4_BLENDFACTOR_ZERO && i == GEN4_BLENDFACTOR_ONE); - state->cc4.cc_viewport_state_offset = vp >> 5; state->cc5.logicop_func = 0xc; /* COPY */ state->cc5.ia_blend_function = GEN4_BLENDFUNCTION_ADD; @@ -3616,15 +3041,9 @@ static bool gen4_render_setup(struct sna *sna) struct gen4_render_state *state = &sna->render_state.gen4; struct sna_static_stream general; struct gen4_wm_unit_state_padded *wm_state; - const struct gt_info *info; - uint32_t sf[2], wm[KERNEL_COUNT]; + uint32_t sf, wm[KERNEL_COUNT]; int i, j, k, l, m; - if (sna->kgem.gen == 45) - info = &g4x_gt_info; - else - info = &gen4_gt_info; - sna_static_stream_init(&general); /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer @@ -3632,8 +3051,7 @@ static bool gen4_render_setup(struct sna *sna) */ null_create(&general); - sf[0] = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__nomask); - sf[1] = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask); + sf = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask); for (m = 0; m < KERNEL_COUNT; m++) { if (wm_kernels[m].size) { wm[m] = sna_static_stream_add(&general, @@ -3648,8 +3066,7 @@ static bool gen4_render_setup(struct sna *sna) } state->vs = gen4_create_vs_unit_state(&general); - state->sf[0] = gen4_create_sf_state(&general, info, sf[0]); - state->sf[1] = gen4_create_sf_state(&general, info, sf[1]); + state->sf = gen4_create_sf_state(&general, sna->kgem.gen, sf); wm_state = sna_static_stream_map(&general, sizeof(*wm_state) * KERNEL_COUNT * @@ -3669,7 +3086,8 @@ static bool gen4_render_setup(struct sna *sna) k, l); for (m = 0; m < KERNEL_COUNT; m++) { - gen4_init_wm_state(&wm_state->state, info, + gen4_init_wm_state(&wm_state->state, + sna->kgem.gen, wm_kernels[m].has_mask, wm[m], sampler_state); wm_state++; @@ -3695,10 +3113,13 @@ bool gen4_render_init(struct sna *sna) #if !NO_COMPOSITE sna->render.composite = gen4_render_composite; + sna->render.prefer_gpu |= PREFER_GPU_RENDER; #endif #if !NO_COMPOSITE_SPANS sna->render.check_composite_spans = gen4_check_composite_spans; sna->render.composite_spans = gen4_render_composite_spans; + if (0) + sna->render.prefer_gpu |= PREFER_GPU_SPANS; #endif #if !NO_VIDEO diff --git a/src/sna/gen4_render.h b/src/sna/gen4_render.h index 49d232e88..53c7fc2f7 100644 --- a/src/sna/gen4_render.h +++ b/src/sna/gen4_render.h @@ -25,8 +25,8 @@ * **************************************************************************/ -#ifndef GEN5_RENDER_H -#define GEN5_RENDER_H +#ifndef GEN4_RENDER_H +#define GEN4_RENDER_H #define GEN4_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \ ((Pipeline) << 27) | \ @@ -661,15 +661,14 @@ #define GEN4_VERTEXBUFFER_ACCESS_VERTEXDATA 0 #define GEN4_VERTEXBUFFER_ACCESS_INSTANCEDATA 1 -#define GEN4_VFCOMPONENT_NOSTORE 0 -#define GEN4_VFCOMPONENT_STORE_SRC 1 -#define GEN4_VFCOMPONENT_STORE_0 2 -#define GEN4_VFCOMPONENT_STORE_1_FLT 3 -#define GEN4_VFCOMPONENT_STORE_1_INT 4 -#define GEN4_VFCOMPONENT_STORE_VID 5 -#define GEN4_VFCOMPONENT_STORE_IID 6 -#define GEN4_VFCOMPONENT_STORE_PID 7 - +#define VFCOMPONENT_NOSTORE 0 +#define VFCOMPONENT_STORE_SRC 1 +#define VFCOMPONENT_STORE_0 2 +#define VFCOMPONENT_STORE_1_FLT 3 +#define VFCOMPONENT_STORE_1_INT 4 +#define VFCOMPONENT_STORE_VID 5 +#define VFCOMPONENT_STORE_IID 6 +#define VFCOMPONENT_STORE_PID 7 /* Execution Unit (EU) defines @@ -725,8 +724,8 @@ #define GEN4_INSTRUCTION_NORMAL 0 #define GEN4_INSTRUCTION_SATURATE 1 -#define GEN4_MASK_ENABLE 0 -#define GEN4_MASK_DISABLE 1 +#define _MASK_ENABLE 0 +#define _MASK_DISABLE 1 #define GEN4_OPCODE_MOV 1 #define GEN4_OPCODE_SEL 2 @@ -2043,6 +2042,54 @@ struct gen4_surface_state } ss5; }; +/* Surface state DW0 */ +#define GEN4_SURFACE_RC_READ_WRITE (1 << 8) +#define GEN4_SURFACE_MIPLAYOUT_SHIFT 10 +#define GEN4_SURFACE_MIPMAPLAYOUT_BELOW 0 +#define GEN4_SURFACE_MIPMAPLAYOUT_RIGHT 1 +#define GEN4_SURFACE_CUBEFACE_ENABLES 0x3f +#define GEN4_SURFACE_BLEND_ENABLED (1 << 13) +#define GEN4_SURFACE_WRITEDISABLE_B_SHIFT 14 +#define GEN4_SURFACE_WRITEDISABLE_G_SHIFT 15 +#define GEN4_SURFACE_WRITEDISABLE_R_SHIFT 16 +#define GEN4_SURFACE_WRITEDISABLE_A_SHIFT 17 +#define GEN4_SURFACE_FORMAT_SHIFT 18 +#define GEN4_SURFACE_FORMAT_MASK _MASK(26, 18) + +#define GEN4_SURFACE_TYPE_SHIFT 29 +#define GEN4_SURFACE_TYPE_MASK _MASK(31, 29) +#define GEN4_SURFACE_1D 0 +#define GEN4_SURFACE_2D 1 +#define GEN4_SURFACE_3D 2 +#define GEN4_SURFACE_CUBE 3 +#define GEN4_SURFACE_BUFFER 4 +#define GEN4_SURFACE_NULL 7 + +/* Surface state DW2 */ +#define GEN4_SURFACE_HEIGHT_SHIFT 19 +#define GEN4_SURFACE_HEIGHT_MASK _MASK(31, 19) +#define GEN4_SURFACE_WIDTH_SHIFT 6 +#define GEN4_SURFACE_WIDTH_MASK _MASK(18, 6) +#define GEN4_SURFACE_LOD_SHIFT 2 +#define GEN4_SURFACE_LOD_MASK _MASK(5, 2) + +/* Surface state DW3 */ +#define GEN4_SURFACE_DEPTH_SHIFT 21 +#define GEN4_SURFACE_DEPTH_MASK _MASK(31, 21) +#define GEN4_SURFACE_PITCH_SHIFT 3 +#define GEN4_SURFACE_PITCH_MASK _MASK(19, 3) +#define GEN4_SURFACE_TILED (1 << 1) +#define GEN4_SURFACE_TILED_Y (1 << 0) + +/* Surface state DW4 */ +#define GEN4_SURFACE_MIN_LOD_SHIFT 28 +#define GEN4_SURFACE_MIN_LOD_MASK _MASK(31, 28) + +/* Surface state DW5 */ +#define GEN4_SURFACE_X_OFFSET_SHIFT 25 +#define GEN4_SURFACE_X_OFFSET_MASK _MASK(31, 25) +#define GEN4_SURFACE_Y_OFFSET_SHIFT 20 +#define GEN4_SURFACE_Y_OFFSET_MASK _MASK(23, 20) struct gen4_vertex_buffer_state diff --git a/src/sna/gen4_source.c b/src/sna/gen4_source.c new file mode 100644 index 000000000..749de8d60 --- /dev/null +++ b/src/sna/gen4_source.c @@ -0,0 +1,179 @@ +/* + * Copyright © 2011,2012,2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" +#include "sna_render.h" +#include "sna_render_inline.h" +#include "gen4_source.h" +#include "gen4_render.h" + +bool +gen4_channel_init_solid(struct sna *sna, + struct sna_composite_channel *channel, + uint32_t color) +{ + channel->filter = PictFilterNearest; + channel->repeat = RepeatNormal; + channel->is_affine = true; + channel->is_solid = true; + channel->is_opaque = (color >> 24) == 0xff; + channel->transform = NULL; + channel->width = 1; + channel->height = 1; + channel->pict_format = PICT_a8r8g8b8; + channel->card_format = GEN4_SURFACEFORMAT_B8G8R8A8_UNORM; + + channel->bo = sna_render_get_solid(sna, color); + + channel->scale[0] = channel->scale[1] = 1; + channel->offset[0] = channel->offset[1] = 0; + return channel->bo != NULL; +} + +bool +gen4_channel_init_linear(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int x, int y, + int w, int h, + int dst_x, int dst_y) +{ + PictLinearGradient *linear = + (PictLinearGradient *)picture->pSourcePict; + pixman_fixed_t tx, ty; + float x0, y0, sf; + float dx, dy; + + DBG(("%s: p1=(%f, %f), p2=(%f, %f), src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n", + __FUNCTION__, + pixman_fixed_to_double(linear->p1.x), pixman_fixed_to_double(linear->p1.y), + pixman_fixed_to_double(linear->p2.x), pixman_fixed_to_double(linear->p2.y), + x, y, dst_x, dst_y, w, h)); + + if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y) + return 0; + + if (!sna_transform_is_affine(picture->transform)) { + DBG(("%s: fallback due to projective transform\n", + __FUNCTION__)); + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + } + + channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear); + if (!channel->bo) + return 0; + + channel->filter = PictFilterNearest; + channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; + channel->width = channel->bo->pitch / 4; + channel->height = 1; + channel->pict_format = PICT_a8r8g8b8; + channel->card_format = GEN4_SURFACEFORMAT_B8G8R8A8_UNORM; + channel->is_linear = 1; + channel->is_affine = 1; + + channel->scale[0] = channel->scale[1] = 1; + channel->offset[0] = channel->offset[1] = 0; + + if (sna_transform_is_translation(picture->transform, &tx, &ty)) { + dx = pixman_fixed_to_double(linear->p2.x - linear->p1.x); + dy = pixman_fixed_to_double(linear->p2.y - linear->p1.y); + + x0 = pixman_fixed_to_double(linear->p1.x); + y0 = pixman_fixed_to_double(linear->p1.y); + + if (tx | ty) { + x0 -= pixman_fixed_to_double(tx); + y0 -= pixman_fixed_to_double(ty); + } + } else { + struct pixman_f_vector p1, p2; + struct pixman_f_transform m, inv; + + pixman_f_transform_from_pixman_transform(&m, picture->transform); + DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n", + __FUNCTION__, + m.m[0][0], m.m[0][1], m.m[0][2], + m.m[1][0], m.m[1][1], m.m[1][2], + m.m[2][0], m.m[2][1], m.m[2][2])); + if (!pixman_f_transform_invert(&inv, &m)) + return 0; + + p1.v[0] = pixman_fixed_to_double(linear->p1.x); + p1.v[1] = pixman_fixed_to_double(linear->p1.y); + p1.v[2] = 1.; + pixman_f_transform_point(&inv, &p1); + + p2.v[0] = pixman_fixed_to_double(linear->p2.x); + p2.v[1] = pixman_fixed_to_double(linear->p2.y); + p2.v[2] = 1.; + pixman_f_transform_point(&inv, &p2); + + DBG(("%s: untransformed: p1=(%f, %f, %f), p2=(%f, %f, %f)\n", + __FUNCTION__, + p1.v[0], p1.v[1], p1.v[2], + p2.v[0], p2.v[1], p2.v[2])); + + dx = p2.v[0] - p1.v[0]; + dy = p2.v[1] - p1.v[1]; + + x0 = p1.v[0]; + y0 = p1.v[1]; + } + + sf = dx*dx + dy*dy; + dx /= sf; + dy /= sf; + + channel->u.linear.dx = dx; + channel->u.linear.dy = dy; + channel->u.linear.offset = -dx*(x0+dst_x-x) + -dy*(y0+dst_y-y); + + channel->embedded_transform.matrix[0][0] = pixman_double_to_fixed(dx); + channel->embedded_transform.matrix[0][1] = pixman_double_to_fixed(dy); + channel->embedded_transform.matrix[0][2] = pixman_double_to_fixed(channel->u.linear.offset); + + channel->embedded_transform.matrix[1][0] = 0; + channel->embedded_transform.matrix[1][1] = 0; + channel->embedded_transform.matrix[1][2] = pixman_double_to_fixed(.5); + + channel->embedded_transform.matrix[2][0] = 0; + channel->embedded_transform.matrix[2][1] = 0; + channel->embedded_transform.matrix[2][2] = pixman_fixed_1; + + channel->transform = &channel->embedded_transform; + + DBG(("%s: dx=%f, dy=%f, offset=%f\n", + __FUNCTION__, dx, dy, channel->u.linear.offset)); + + return channel->bo != NULL; +} diff --git a/src/sna/gen4_source.h b/src/sna/gen4_source.h new file mode 100644 index 000000000..c73afaca9 --- /dev/null +++ b/src/sna/gen4_source.h @@ -0,0 +1,22 @@ +#ifndef GEN4_SOURCE_H +#define GEN4_SOURCE_H + +#include "compiler.h" + +#include "sna.h" +#include "sna_render.h" + +bool +gen4_channel_init_solid(struct sna *sna, + struct sna_composite_channel *channel, + uint32_t color); + +bool +gen4_channel_init_linear(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int x, int y, + int w, int h, + int dst_x, int dst_y); + +#endif /* GEN4_SOURCE_H */ diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c new file mode 100644 index 000000000..5062ebdf0 --- /dev/null +++ b/src/sna/gen4_vertex.c @@ -0,0 +1,1543 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" +#include "sna_render.h" +#include "sna_render_inline.h" +#include "gen4_vertex.h" + +void gen4_vertex_flush(struct sna *sna) +{ + DBG(("%s[%x] = %d\n", __FUNCTION__, + 4*sna->render.vertex_offset, + sna->render.vertex_index - sna->render.vertex_start)); + + assert(sna->render.vertex_offset); + assert(sna->render.vertex_index > sna->render.vertex_start); + + sna->kgem.batch[sna->render.vertex_offset] = + sna->render.vertex_index - sna->render.vertex_start; + sna->render.vertex_offset = 0; +} + +int gen4_vertex_finish(struct sna *sna) +{ + struct kgem_bo *bo; + unsigned int i; + unsigned hint, size; + + DBG(("%s: used=%d / %d\n", __FUNCTION__, + sna->render.vertex_used, sna->render.vertex_size)); + assert(sna->render.vertex_offset == 0); + assert(sna->render.vertex_used); + + sna_vertex_wait__locked(&sna->render); + + /* Note: we only need dword alignment (currently) */ + + bo = sna->render.vbo; + if (bo) { + for (i = 0; i < sna->render.nvertex_reloc; i++) { + DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, + i, sna->render.vertex_reloc[i])); + + sna->kgem.batch[sna->render.vertex_reloc[i]] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[i], bo, + I915_GEM_DOMAIN_VERTEX << 16, + 0); + } + + assert(!sna->render.active); + sna->render.nvertex_reloc = 0; + sna->render.vertex_used = 0; + sna->render.vertex_index = 0; + sna->render.vbo = NULL; + sna->render.vb_id = 0; + + kgem_bo_destroy(&sna->kgem, bo); + } + + hint = CREATE_GTT_MAP; + if (bo) + hint |= CREATE_CACHED | CREATE_NO_THROTTLE; + + size = 256*1024; + assert(!sna->render.active); + sna->render.vertices = NULL; + sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint); + while (sna->render.vbo == NULL && size > 16*1024) { + size /= 2; + sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint); + } + if (sna->render.vbo == NULL) + sna->render.vbo = kgem_create_linear(&sna->kgem, + 256*1024, CREATE_GTT_MAP); + if (sna->render.vbo) + sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo); + if (sna->render.vertices == NULL) { + if (sna->render.vbo) { + kgem_bo_destroy(&sna->kgem, sna->render.vbo); + sna->render.vbo = NULL; + } + sna->render.vertices = sna->render.vertex_data; + sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); + return 0; + } + + if (sna->render.vertex_used) { + DBG(("%s: copying initial buffer x %d to handle=%d\n", + __FUNCTION__, + sna->render.vertex_used, + sna->render.vbo->handle)); + assert(sizeof(float)*sna->render.vertex_used <= + __kgem_bo_size(sna->render.vbo)); + memcpy(sna->render.vertices, + sna->render.vertex_data, + sizeof(float)*sna->render.vertex_used); + } + + size = __kgem_bo_size(sna->render.vbo)/4; + if (size >= UINT16_MAX) + size = UINT16_MAX - 1; + + DBG(("%s: create vbo handle=%d, size=%d\n", + __FUNCTION__, sna->render.vbo->handle, size)); + + sna->render.vertex_size = size; + return sna->render.vertex_size - sna->render.vertex_used; +} + +void gen4_vertex_close(struct sna *sna) +{ + struct kgem_bo *bo, *free_bo = NULL; + unsigned int i, delta = 0; + + assert(sna->render.vertex_offset == 0); + if (!sna->render.vb_id) + return; + + DBG(("%s: used=%d, vbo active? %d, vb=%x, nreloc=%d\n", + __FUNCTION__, sna->render.vertex_used, sna->render.vbo ? sna->render.vbo->handle : 0, + sna->render.vb_id, sna->render.nvertex_reloc)); + + assert(!sna->render.active); + + bo = sna->render.vbo; + if (bo) { + if (sna->render.vertex_size - sna->render.vertex_used < 64) { + DBG(("%s: discarding vbo (full), handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); + sna->render.vbo = NULL; + sna->render.vertices = sna->render.vertex_data; + sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); + free_bo = bo; + } else if (IS_CPU_MAP(bo->map) && !sna->kgem.has_llc) { + DBG(("%s: converting CPU map to GTT\n", __FUNCTION__)); + sna->render.vertices = + kgem_bo_map__gtt(&sna->kgem, sna->render.vbo); + if (sna->render.vertices == NULL) { + sna->render.vbo = NULL; + sna->render.vertices = sna->render.vertex_data; + sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); + free_bo = bo; + } + + } + } else { + if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) { + DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__, + sna->render.vertex_used, sna->kgem.nbatch)); + memcpy(sna->kgem.batch + sna->kgem.nbatch, + sna->render.vertex_data, + sna->render.vertex_used * 4); + delta = sna->kgem.nbatch * 4; + bo = NULL; + sna->kgem.nbatch += sna->render.vertex_used; + } else { + bo = kgem_create_linear(&sna->kgem, + 4*sna->render.vertex_used, + CREATE_NO_THROTTLE); + if (bo && !kgem_bo_write(&sna->kgem, bo, + sna->render.vertex_data, + 4*sna->render.vertex_used)) { + kgem_bo_destroy(&sna->kgem, bo); + bo = NULL; + } + DBG(("%s: new vbo: %d\n", __FUNCTION__, + sna->render.vertex_used)); + free_bo = bo; + } + } + + assert(sna->render.nvertex_reloc); + for (i = 0; i < sna->render.nvertex_reloc; i++) { + DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, + i, sna->render.vertex_reloc[i])); + + sna->kgem.batch[sna->render.vertex_reloc[i]] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[i], bo, + I915_GEM_DOMAIN_VERTEX << 16, + delta); + } + sna->render.nvertex_reloc = 0; + sna->render.vb_id = 0; + + if (sna->render.vbo == NULL) { + assert(!sna->render.active); + sna->render.vertex_used = 0; + sna->render.vertex_index = 0; + assert(sna->render.vertices == sna->render.vertex_data); + assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data)); + } + + if (free_bo) + kgem_bo_destroy(&sna->kgem, free_bo); +} + +/* specialised vertex emission routines */ + +#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) /* XXX assert(!too_large(x, y)); */ +#define OUT_VERTEX_F(v) vertex_emit(sna, v) + +inline static float +compute_linear(const struct sna_composite_channel *channel, + int16_t x, int16_t y) +{ + return ((x+channel->offset[0]) * channel->u.linear.dx + + (y+channel->offset[1]) * channel->u.linear.dy + + channel->u.linear.offset); +} + +inline static void +emit_texcoord(struct sna *sna, + const struct sna_composite_channel *channel, + int16_t x, int16_t y) +{ + if (channel->is_solid) { + OUT_VERTEX_F(x); + return; + } + + x += channel->offset[0]; + y += channel->offset[1]; + + if (channel->is_affine) { + float s, t; + + sna_get_transformed_coordinates(x, y, + channel->transform, + &s, &t); + OUT_VERTEX_F(s * channel->scale[0]); + OUT_VERTEX_F(t * channel->scale[1]); + } else { + float s, t, w; + + sna_get_transformed_coordinates_3d(x, y, + channel->transform, + &s, &t, &w); + OUT_VERTEX_F(s * channel->scale[0]); + OUT_VERTEX_F(t * channel->scale[1]); + OUT_VERTEX_F(w); + } +} + +inline static void +emit_vertex(struct sna *sna, + const struct sna_composite_op *op, + int16_t srcX, int16_t srcY, + int16_t mskX, int16_t mskY, + int16_t dstX, int16_t dstY) +{ + OUT_VERTEX(dstX, dstY); + emit_texcoord(sna, &op->src, srcX, srcY); +} + +fastcall static void +emit_primitive(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + emit_vertex(sna, op, + r->src.x + r->width, r->src.y + r->height, + r->mask.x + r->width, r->mask.y + r->height, + r->dst.x + r->width, r->dst.y + r->height); + emit_vertex(sna, op, + r->src.x, r->src.y + r->height, + r->mask.x, r->mask.y + r->height, + r->dst.x, r->dst.y + r->height); + emit_vertex(sna, op, + r->src.x, r->src.y, + r->mask.x, r->mask.y, + r->dst.x, r->dst.y); +} + +inline static void +emit_vertex_mask(struct sna *sna, + const struct sna_composite_op *op, + int16_t srcX, int16_t srcY, + int16_t mskX, int16_t mskY, + int16_t dstX, int16_t dstY) +{ + OUT_VERTEX(dstX, dstY); + emit_texcoord(sna, &op->src, srcX, srcY); + emit_texcoord(sna, &op->mask, mskX, mskY); +} + +fastcall static void +emit_primitive_mask(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + emit_vertex_mask(sna, op, + r->src.x + r->width, r->src.y + r->height, + r->mask.x + r->width, r->mask.y + r->height, + r->dst.x + r->width, r->dst.y + r->height); + emit_vertex_mask(sna, op, + r->src.x, r->src.y + r->height, + r->mask.x, r->mask.y + r->height, + r->dst.x, r->dst.y + r->height); + emit_vertex_mask(sna, op, + r->src.x, r->src.y, + r->mask.x, r->mask.y, + r->dst.x, r->dst.y); +} + +fastcall static void +emit_primitive_solid(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + assert(op->floats_per_rect == 6); + assert((sna->render.vertex_used % 2) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + dst.p.x = r->dst.x; + v[2] = dst.f; + dst.p.y = r->dst.y; + v[4] = dst.f; + + v[5] = v[3] = v[1] = .5; +} + +fastcall static void +emit_boxes_solid(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + dst.p.x = box->x1; + v[2] = dst.f; + dst.p.y = box->y1; + v[4] = dst.f; + + v[5] = v[3] = v[1] = .5; + box++; + v += 6; + } while (--nbox); +} + +fastcall static void +emit_primitive_linear(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + assert(op->floats_per_rect == 6); + assert((sna->render.vertex_used % 2) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + dst.p.x = r->dst.x; + v[2] = dst.f; + dst.p.y = r->dst.y; + v[4] = dst.f; + + v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); + v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height); + v[5] = compute_linear(&op->src, r->src.x, r->src.y); +} + +fastcall static void +emit_boxes_linear(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + union { + struct sna_coordinate p; + float f; + } dst; + + do { + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + dst.p.x = box->x1; + v[2] = dst.f; + dst.p.y = box->y1; + v[4] = dst.f; + + v[1] = compute_linear(&op->src, box->x2, box->y2); + v[3] = compute_linear(&op->src, box->x1, box->y2); + v[5] = compute_linear(&op->src, box->x1, box->y1); + + v += 6; + box++; + } while (--nbox); +} + +fastcall static void +emit_primitive_identity_source(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float *v; + + assert(op->floats_per_rect == 9); + assert((sna->render.vertex_used % 3) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 9; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + dst.p.x = r->dst.x; + v[3] = dst.f; + dst.p.y = r->dst.y; + v[6] = dst.f; + + v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; + v[1] = v[4] + r->width * op->src.scale[0]; + + v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; + v[5] = v[2] = v[8] + r->height * op->src.scale[1]; +} + +fastcall static void +emit_boxes_identity_source(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + dst.p.x = box->x1; + v[3] = dst.f; + dst.p.y = box->y1; + v[6] = dst.f; + + v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; + v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; + + v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; + v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; + + v += 9; + box++; + } while (--nbox); +} + +fastcall static void +emit_primitive_simple_source(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + float xx = op->src.transform->matrix[0][0]; + float x0 = op->src.transform->matrix[0][2]; + float yy = op->src.transform->matrix[1][1]; + float y0 = op->src.transform->matrix[1][2]; + float sx = op->src.scale[0]; + float sy = op->src.scale[1]; + int16_t tx = op->src.offset[0]; + int16_t ty = op->src.offset[1]; + + assert(op->floats_per_rect == 9); + assert((sna->render.vertex_used % 3) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 3*3; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; + v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; + + dst.p.x = r->dst.x; + v[3] = dst.f; + v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx; + + dst.p.y = r->dst.y; + v[6] = dst.f; + v[8] = ((r->src.y + ty) * yy + y0) * sy; +} + +fastcall static void +emit_boxes_simple_source(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + float xx = op->src.transform->matrix[0][0]; + float x0 = op->src.transform->matrix[0][2]; + float yy = op->src.transform->matrix[1][1]; + float y0 = op->src.transform->matrix[1][2]; + float sx = op->src.scale[0]; + float sy = op->src.scale[1]; + int16_t tx = op->src.offset[0]; + int16_t ty = op->src.offset[1]; + + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + v[1] = ((box->x2 + tx) * xx + x0) * sx; + v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy; + + dst.p.x = box->x1; + v[3] = dst.f; + v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx; + + dst.p.y = box->y1; + v[6] = dst.f; + v[8] = ((box->y1 + ty) * yy + y0) * sy; + + v += 9; + box++; + } while (--nbox); +} + +fastcall static void +emit_primitive_affine_source(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float *v; + + assert(op->floats_per_rect == 9); + assert((sna->render.vertex_used % 3) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 9; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + _sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width, + op->src.offset[1] + r->src.y + r->height, + op->src.transform, op->src.scale, + &v[1], &v[2]); + + dst.p.x = r->dst.x; + v[3] = dst.f; + _sna_get_transformed_scaled(op->src.offset[0] + r->src.x, + op->src.offset[1] + r->src.y + r->height, + op->src.transform, op->src.scale, + &v[4], &v[5]); + + dst.p.y = r->dst.y; + v[6] = dst.f; + _sna_get_transformed_scaled(op->src.offset[0] + r->src.x, + op->src.offset[1] + r->src.y, + op->src.transform, op->src.scale, + &v[7], &v[8]); +} + +fastcall static void +emit_boxes_affine_source(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + _sna_get_transformed_scaled(op->src.offset[0] + box->x2, + op->src.offset[1] + box->y2, + op->src.transform, op->src.scale, + &v[1], &v[2]); + + dst.p.x = box->x1; + v[3] = dst.f; + _sna_get_transformed_scaled(op->src.offset[0] + box->x1, + op->src.offset[1] + box->y2, + op->src.transform, op->src.scale, + &v[4], &v[5]); + + dst.p.y = box->y1; + v[6] = dst.f; + _sna_get_transformed_scaled(op->src.offset[0] + box->x1, + op->src.offset[1] + box->y1, + op->src.transform, op->src.scale, + &v[7], &v[8]); + box++; + v += 9; + } while (--nbox); +} + +fastcall static void +emit_primitive_identity_mask(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float msk_x, msk_y; + float w, h; + float *v; + + msk_x = r->mask.x + op->mask.offset[0]; + msk_y = r->mask.y + op->mask.offset[1]; + w = r->width; + h = r->height; + + DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", + __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); + + assert(op->floats_per_rect == 12); + assert((sna->render.vertex_used % 4) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[2] = (msk_x + w) * op->mask.scale[0]; + v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; + + dst.p.x = r->dst.x; + v[4] = dst.f; + v[10] = v[6] = msk_x * op->mask.scale[0]; + + dst.p.y = r->dst.y; + v[8] = dst.f; + v[11] = msk_y * op->mask.scale[1]; + + v[9] = v[5] = v[1] = .5; +} + +fastcall static void +emit_boxes_identity_mask(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + float msk_x = op->mask.offset[0]; + float msk_y = op->mask.offset[1]; + + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + v[2] = (msk_x + box->x2) * op->mask.scale[0]; + v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; + + dst.p.x = box->x1; + v[4] = dst.f; + v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; + + dst.p.y = box->y1; + v[8] = dst.f; + v[11] = (msk_y + box->y1) * op->mask.scale[1]; + + v[9] = v[5] = v[1] = .5; + v += 12; + box++; + } while (--nbox); +} + +fastcall static void +emit_primitive_linear_identity_mask(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float msk_x, msk_y; + float w, h; + float *v; + + msk_x = r->mask.x + op->mask.offset[0]; + msk_y = r->mask.y + op->mask.offset[1]; + w = r->width; + h = r->height; + + DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", + __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); + + assert(op->floats_per_rect == 12); + assert((sna->render.vertex_used % 4) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[2] = (msk_x + w) * op->mask.scale[0]; + v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; + + dst.p.x = r->dst.x; + v[4] = dst.f; + v[10] = v[6] = msk_x * op->mask.scale[0]; + + dst.p.y = r->dst.y; + v[8] = dst.f; + v[11] = msk_y * op->mask.scale[1]; + + v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); + v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height); + v[9] = compute_linear(&op->src, r->src.x, r->src.y); +} + +fastcall static void +emit_boxes_linear_identity_mask(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + float msk_x = op->mask.offset[0]; + float msk_y = op->mask.offset[1]; + + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + v[2] = (msk_x + box->x2) * op->mask.scale[0]; + v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; + + dst.p.x = box->x1; + v[4] = dst.f; + v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; + + dst.p.y = box->y1; + v[8] = dst.f; + v[11] = (msk_y + box->y1) * op->mask.scale[1]; + + v[1] = compute_linear(&op->src, box->x2, box->y2); + v[5] = compute_linear(&op->src, box->x1, box->y2); + v[9] = compute_linear(&op->src, box->x1, box->y1); + + v += 12; + box++; + } while (--nbox); +} + +fastcall static void +emit_primitive_identity_source_mask(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float src_x, src_y; + float msk_x, msk_y; + float w, h; + float *v; + + src_x = r->src.x + op->src.offset[0]; + src_y = r->src.y + op->src.offset[1]; + msk_x = r->mask.x + op->mask.offset[0]; + msk_y = r->mask.y + op->mask.offset[1]; + w = r->width; + h = r->height; + + assert(op->floats_per_rect == 15); + assert((sna->render.vertex_used % 5) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 15; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[1] = (src_x + w) * op->src.scale[0]; + v[2] = (src_y + h) * op->src.scale[1]; + v[3] = (msk_x + w) * op->mask.scale[0]; + v[4] = (msk_y + h) * op->mask.scale[1]; + + dst.p.x = r->dst.x; + v[5] = dst.f; + v[6] = src_x * op->src.scale[0]; + v[7] = v[2]; + v[8] = msk_x * op->mask.scale[0]; + v[9] = v[4]; + + dst.p.y = r->dst.y; + v[10] = dst.f; + v[11] = v[6]; + v[12] = src_y * op->src.scale[1]; + v[13] = v[8]; + v[14] = msk_y * op->mask.scale[1]; +} + +fastcall static void +emit_primitive_simple_source_identity(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + float xx = op->src.transform->matrix[0][0]; + float x0 = op->src.transform->matrix[0][2]; + float yy = op->src.transform->matrix[1][1]; + float y0 = op->src.transform->matrix[1][2]; + float sx = op->src.scale[0]; + float sy = op->src.scale[1]; + int16_t tx = op->src.offset[0]; + int16_t ty = op->src.offset[1]; + float msk_x = r->mask.x + op->mask.offset[0]; + float msk_y = r->mask.y + op->mask.offset[1]; + float w = r->width, h = r->height; + + assert(op->floats_per_rect == 15); + assert((sna->render.vertex_used % 5) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 3*5; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; + v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; + v[3] = (msk_x + w) * op->mask.scale[0]; + v[4] = (msk_y + h) * op->mask.scale[1]; + + dst.p.x = r->dst.x; + v[5] = dst.f; + v[6] = ((r->src.x + tx) * xx + x0) * sx; + v[7] = v[2]; + v[8] = msk_x * op->mask.scale[0]; + v[9] = v[4]; + + dst.p.y = r->dst.y; + v[10] = dst.f; + v[11] = v[6]; + v[12] = ((r->src.y + ty) * yy + y0) * sy; + v[13] = v[8]; + v[14] = msk_y * op->mask.scale[1]; +} + +fastcall static void +emit_primitive_affine_source_identity(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + float msk_x = r->mask.x + op->mask.offset[0]; + float msk_y = r->mask.y + op->mask.offset[1]; + float w = r->width, h = r->height; + + assert(op->floats_per_rect == 15); + assert((sna->render.vertex_used % 5) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 3*5; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + _sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width, + op->src.offset[1] + r->src.y + r->height, + op->src.transform, op->src.scale, + &v[1], &v[2]); + v[3] = (msk_x + w) * op->mask.scale[0]; + v[4] = (msk_y + h) * op->mask.scale[1]; + + dst.p.x = r->dst.x; + v[5] = dst.f; + _sna_get_transformed_scaled(op->src.offset[0] + r->src.x, + op->src.offset[1] + r->src.y + r->height, + op->src.transform, op->src.scale, + &v[6], &v[7]); + v[8] = msk_x * op->mask.scale[0]; + v[9] = v[4]; + + dst.p.y = r->dst.y; + v[10] = dst.f; + _sna_get_transformed_scaled(op->src.offset[0] + r->src.x, + op->src.offset[1] + r->src.y, + op->src.transform, op->src.scale, + &v[11], &v[12]); + v[13] = v[8]; + v[14] = msk_y * op->mask.scale[1]; +} + +inline static void +emit_composite_texcoord_affine(struct sna *sna, + const struct sna_composite_channel *channel, + int16_t x, int16_t y) +{ + float t[2]; + + sna_get_transformed_coordinates(x + channel->offset[0], + y + channel->offset[1], + channel->transform, + &t[0], &t[1]); + OUT_VERTEX_F(t[0] * channel->scale[0]); + OUT_VERTEX_F(t[1] * channel->scale[1]); +} + + +unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp) +{ + unsigned vb; + + if (tmp->mask.bo) { + if (tmp->mask.transform == NULL) { + if (tmp->src.is_solid) { + DBG(("%s: solid, identity mask\n", __FUNCTION__)); + tmp->prim_emit = emit_primitive_identity_mask; + tmp->emit_boxes = emit_boxes_identity_mask; + tmp->floats_per_vertex = 4; + vb = 1 | 2 << 2; + } else if (tmp->src.is_linear) { + DBG(("%s: linear, identity mask\n", __FUNCTION__)); + tmp->prim_emit = emit_primitive_linear_identity_mask; + tmp->emit_boxes = emit_boxes_linear_identity_mask; + tmp->floats_per_vertex = 4; + vb = 1 | 2 << 2; + } else if (tmp->src.transform == NULL) { + DBG(("%s: identity source, identity mask\n", __FUNCTION__)); + tmp->prim_emit = emit_primitive_identity_source_mask; + tmp->floats_per_vertex = 5; + vb = 2 << 2 | 2; + } else if (tmp->src.is_affine) { + tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; + tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; + if (!sna_affine_transform_is_rotation(tmp->src.transform)) { + DBG(("%s: simple src, identity mask\n", __FUNCTION__)); + tmp->prim_emit = emit_primitive_simple_source_identity; + } else { + DBG(("%s: affine src, identity mask\n", __FUNCTION__)); + tmp->prim_emit = emit_primitive_affine_source_identity; + } + tmp->floats_per_vertex = 5; + vb = 2 << 2 | 2; + } else { + DBG(("%s: projective source, identity mask\n", __FUNCTION__)); + tmp->prim_emit = emit_primitive_mask; + tmp->floats_per_vertex = 6; + vb = 2 << 2 | 3; + } + } else { + tmp->prim_emit = emit_primitive_mask; + tmp->floats_per_vertex = 1; + vb = 0; + if (tmp->mask.is_solid) { + tmp->floats_per_vertex += 1; + vb |= 1 << 2; + } else if (tmp->mask.is_affine) { + tmp->floats_per_vertex += 2; + vb |= 2 << 2; + }else { + tmp->floats_per_vertex += 3; + vb |= 3 << 2; + } + if (tmp->src.is_solid) { + tmp->floats_per_vertex += 1; + vb |= 1; + } else if (tmp->src.is_affine) { + tmp->floats_per_vertex += 2; + vb |= 2 ; + }else { + tmp->floats_per_vertex += 3; + vb |= 3; + } + DBG(("%s: general mask: floats-per-vertex=%d, vb=%x\n", + __FUNCTION__,tmp->floats_per_vertex, vb)); + } + } else { + if (tmp->src.is_solid) { + DBG(("%s: solid, no mask\n", __FUNCTION__)); + tmp->prim_emit = emit_primitive_solid; + tmp->emit_boxes = emit_boxes_solid; + if (tmp->src.is_opaque && tmp->op == PictOpOver) + tmp->op = PictOpSrc; + tmp->floats_per_vertex = 2; + vb = 1; + } else if (tmp->src.is_linear) { + DBG(("%s: linear, no mask\n", __FUNCTION__)); + tmp->prim_emit = emit_primitive_linear; + tmp->emit_boxes = emit_boxes_linear; + tmp->floats_per_vertex = 2; + vb = 1; + } else if (tmp->src.transform == NULL) { + DBG(("%s: identity src, no mask\n", __FUNCTION__)); + tmp->prim_emit = emit_primitive_identity_source; + tmp->emit_boxes = emit_boxes_identity_source; + tmp->floats_per_vertex = 3; + vb = 2; + } else if (tmp->src.is_affine) { + tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; + tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; + if (!sna_affine_transform_is_rotation(tmp->src.transform)) { + DBG(("%s: simple src, no mask\n", __FUNCTION__)); + tmp->prim_emit = emit_primitive_simple_source; + tmp->emit_boxes = emit_boxes_simple_source; + } else { + DBG(("%s: affine src, no mask\n", __FUNCTION__)); + tmp->prim_emit = emit_primitive_affine_source; + tmp->emit_boxes = emit_boxes_affine_source; + } + tmp->floats_per_vertex = 3; + vb = 2; + } else { + DBG(("%s: projective src, no mask\n", __FUNCTION__)); + assert(!tmp->src.is_solid); + tmp->prim_emit = emit_primitive; + tmp->floats_per_vertex = 4; + vb = 3; + } + } + tmp->floats_per_rect = 3 * tmp->floats_per_vertex; + + return vb; +} + +inline static void +emit_span_vertex(struct sna *sna, + const struct sna_composite_spans_op *op, + int16_t x, int16_t y) +{ + OUT_VERTEX(x, y); + emit_texcoord(sna, &op->base.src, x, y); +} + +fastcall static void +emit_composite_spans_primitive(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + emit_span_vertex(sna, op, box->x2, box->y2); + OUT_VERTEX_F(opacity); + + emit_span_vertex(sna, op, box->x1, box->y2); + OUT_VERTEX_F(opacity); + + emit_span_vertex(sna, op, box->x1, box->y1); + OUT_VERTEX_F(opacity); +} + +fastcall static void +emit_span_solid(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + assert(op->base.floats_per_rect == 9); + assert((sna->render.vertex_used % 3) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 3*3; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + + dst.p.x = box->x1; + v[3] = dst.f; + + dst.p.y = box->y1; + v[6] = dst.f; + + v[7] = v[4] = v[1] = .5; + v[8] = v[5] = v[2] = opacity; +} + +fastcall static void +emit_span_boxes_solid(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = b->box.x2; + dst.p.y = b->box.y2; + v[0] = dst.f; + + dst.p.x = b->box.x1; + v[3] = dst.f; + + dst.p.y = b->box.y1; + v[6] = dst.f; + + v[7] = v[4] = v[1] = .5; + v[8] = v[5] = v[2] = b->alpha; + + v += 9; + b++; + } while (--nbox); +} + +fastcall static void +emit_span_identity(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + float sx = op->base.src.scale[0]; + float sy = op->base.src.scale[1]; + int16_t tx = op->base.src.offset[0]; + int16_t ty = op->base.src.offset[1]; + + assert(op->base.floats_per_rect == 12); + assert((sna->render.vertex_used % 4) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 3*4; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + v[1] = (box->x2 + tx) * sx; + v[6] = v[2] = (box->y2 + ty) * sy; + + dst.p.x = box->x1; + v[4] = dst.f; + v[9] = v[5] = (box->x1 + tx) * sx; + + dst.p.y = box->y1; + v[8] = dst.f; + v[10] = (box->y1 + ty) * sy; + + v[11] = v[7] = v[3] = opacity; +} + +fastcall static void +emit_span_boxes_identity(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + float sx = op->base.src.scale[0]; + float sy = op->base.src.scale[1]; + int16_t tx = op->base.src.offset[0]; + int16_t ty = op->base.src.offset[1]; + + dst.p.x = b->box.x2; + dst.p.y = b->box.y2; + v[0] = dst.f; + v[1] = (b->box.x2 + tx) * sx; + v[6] = v[2] = (b->box.y2 + ty) * sy; + + dst.p.x = b->box.x1; + v[4] = dst.f; + v[9] = v[5] = (b->box.x1 + tx) * sx; + + dst.p.y = b->box.y1; + v[8] = dst.f; + v[10] = (b->box.y1 + ty) * sy; + + v[11] = v[7] = v[3] = b->alpha; + + v += 12; + b++; + } while (--nbox); +} + +fastcall static void +emit_span_simple(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + float xx = op->base.src.transform->matrix[0][0]; + float x0 = op->base.src.transform->matrix[0][2]; + float yy = op->base.src.transform->matrix[1][1]; + float y0 = op->base.src.transform->matrix[1][2]; + float sx = op->base.src.scale[0]; + float sy = op->base.src.scale[1]; + int16_t tx = op->base.src.offset[0]; + int16_t ty = op->base.src.offset[1]; + + assert(op->base.floats_per_rect == 12); + assert((sna->render.vertex_used % 4) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 3*4; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + v[1] = ((box->x2 + tx) * xx + x0) * sx; + v[6] = v[2] = ((box->y2 + ty) * yy + y0) * sy; + + dst.p.x = box->x1; + v[4] = dst.f; + v[9] = v[5] = ((box->x1 + tx) * xx + x0) * sx; + + dst.p.y = box->y1; + v[8] = dst.f; + v[10] = ((box->y1 + ty) * yy + y0) * sy; + + v[11] = v[7] = v[3] = opacity; +} + +fastcall static void +emit_span_boxes_simple(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, int nbox, + float *v) +{ + float xx = op->base.src.transform->matrix[0][0]; + float x0 = op->base.src.transform->matrix[0][2]; + float yy = op->base.src.transform->matrix[1][1]; + float y0 = op->base.src.transform->matrix[1][2]; + float sx = op->base.src.scale[0]; + float sy = op->base.src.scale[1]; + int16_t tx = op->base.src.offset[0]; + int16_t ty = op->base.src.offset[1]; + + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = b->box.x2; + dst.p.y = b->box.y2; + v[0] = dst.f; + v[1] = ((b->box.x2 + tx) * xx + x0) * sx; + v[6] = v[2] = ((b->box.y2 + ty) * yy + y0) * sy; + + dst.p.x = b->box.x1; + v[4] = dst.f; + v[9] = v[5] = ((b->box.x1 + tx) * xx + x0) * sx; + + dst.p.y = b->box.y1; + v[8] = dst.f; + v[10] = ((b->box.y1 + ty) * yy + y0) * sy; + + v[11] = v[7] = v[3] = b->alpha; + + v += 12; + b++; + } while (--nbox); +} + +fastcall static void +emit_span_affine(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float *v; + + assert(op->base.floats_per_rect == 12); + assert((sna->render.vertex_used % 4) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + _sna_get_transformed_scaled(op->base.src.offset[0] + box->x2, + op->base.src.offset[1] + box->y2, + op->base.src.transform, + op->base.src.scale, + &v[1], &v[2]); + + dst.p.x = box->x1; + v[4] = dst.f; + _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, + op->base.src.offset[1] + box->y2, + op->base.src.transform, + op->base.src.scale, + &v[5], &v[6]); + + dst.p.y = box->y1; + v[8] = dst.f; + _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, + op->base.src.offset[1] + box->y1, + op->base.src.transform, + op->base.src.scale, + &v[9], &v[10]); + + v[11] = v[7] = v[3] = opacity; +} + +fastcall static void +emit_span_boxes_affine(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = b->box.x2; + dst.p.y = b->box.y2; + v[0] = dst.f; + _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2, + op->base.src.offset[1] + b->box.y2, + op->base.src.transform, + op->base.src.scale, + &v[1], &v[2]); + + dst.p.x = b->box.x1; + v[4] = dst.f; + _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, + op->base.src.offset[1] + b->box.y2, + op->base.src.transform, + op->base.src.scale, + &v[5], &v[6]); + + dst.p.y = b->box.y1; + v[8] = dst.f; + _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, + op->base.src.offset[1] + b->box.y1, + op->base.src.transform, + op->base.src.scale, + &v[9], &v[10]); + + v[11] = v[7] = v[3] = b->alpha; + + v += 12; + b++; + } while (--nbox); +} + +fastcall static void +emit_span_linear(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float *v; + + assert(op->base.floats_per_rect == 9); + assert((sna->render.vertex_used % 3) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 9; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + dst.p.x = box->x1; + v[3] = dst.f; + dst.p.y = box->y1; + v[6] = dst.f; + + v[1] = compute_linear(&op->base.src, box->x2, box->y2); + v[4] = compute_linear(&op->base.src, box->x1, box->y2); + v[7] = compute_linear(&op->base.src, box->x1, box->y1); + + v[8] = v[5] = v[2] = opacity; +} + +fastcall static void +emit_span_boxes_linear(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = b->box.x2; + dst.p.y = b->box.y2; + v[0] = dst.f; + dst.p.x = b->box.x1; + v[3] = dst.f; + dst.p.y = b->box.y1; + v[6] = dst.f; + + v[1] = compute_linear(&op->base.src, b->box.x2, b->box.y2); + v[4] = compute_linear(&op->base.src, b->box.x1, b->box.y2); + v[7] = compute_linear(&op->base.src, b->box.x1, b->box.y1); + + v[8] = v[5] = v[2] = b->alpha; + + v += 9; + b++; + } while (--nbox); +} + +inline inline static uint32_t +gen4_choose_spans_vertex_buffer(const struct sna_composite_op *op) +{ + int id = op->src.is_solid ? 1 : 2 + !op->src.is_affine; + DBG(("%s: id=%x (%d, 1)\n", __FUNCTION__, 1 << 2 | id, id)); + return 1 << 2 | id; +} + +unsigned gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp) +{ + unsigned vb; + + if (tmp->base.src.is_solid) { + tmp->prim_emit = emit_span_solid; + tmp->emit_boxes = emit_span_boxes_solid; + tmp->base.floats_per_vertex = 3; + vb = 1 << 2 | 1; + } else if (tmp->base.src.is_linear) { + tmp->prim_emit = emit_span_linear; + tmp->emit_boxes = emit_span_boxes_linear; + tmp->base.floats_per_vertex = 3; + vb = 1 << 2 | 1; + } else if (tmp->base.src.transform == NULL) { + tmp->prim_emit = emit_span_identity; + tmp->emit_boxes = emit_span_boxes_identity; + tmp->base.floats_per_vertex = 4; + vb = 1 << 2 | 2; + } else if (tmp->base.is_affine) { + tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2]; + tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2]; + if (!sna_affine_transform_is_rotation(tmp->base.src.transform)) { + tmp->prim_emit = emit_span_simple; + tmp->emit_boxes = emit_span_boxes_simple; + } else { + tmp->prim_emit = emit_span_affine; + tmp->emit_boxes = emit_span_boxes_affine; + } + tmp->base.floats_per_vertex = 4; + vb = 1 << 2 | 2; + } else { + tmp->prim_emit = emit_composite_spans_primitive; + tmp->base.floats_per_vertex = 5; + vb = 1 << 2 | 3; + } + tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex; + return vb; +} diff --git a/src/sna/gen4_vertex.h b/src/sna/gen4_vertex.h new file mode 100644 index 000000000..431b545eb --- /dev/null +++ b/src/sna/gen4_vertex.h @@ -0,0 +1,16 @@ +#ifndef GEN4_VERTEX_H +#define GEN4_VERTEX_H + +#include "compiler.h" + +#include "sna.h" +#include "sna_render.h" + +void gen4_vertex_flush(struct sna *sna); +int gen4_vertex_finish(struct sna *sna); +void gen4_vertex_close(struct sna *sna); + +unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp); +unsigned gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp); + +#endif /* GEN4_VERTEX_H */ diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c index 5d559377b..6e1199638 100644 --- a/src/sna/gen5_render.c +++ b/src/sna/gen5_render.c @@ -42,7 +42,10 @@ #include "brw/brw.h" #include "gen5_render.h" +#include "gen4_source.h" +#include "gen4_vertex.h" +#define NO_COMPOSITE 0 #define NO_COMPOSITE_SPANS 0 #define PREFER_BLT_FILL 1 @@ -196,17 +199,19 @@ gen5_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) return base + !is_affine; } -static void gen5_magic_ca_pass(struct sna *sna, +static bool gen5_magic_ca_pass(struct sna *sna, const struct sna_composite_op *op) { struct gen5_render_state *state = &sna->render_state.gen5; if (!op->need_magic_ca_pass) - return; + return false; assert(sna->render.vertex_index > sna->render.vertex_start); DBG(("%s: CA fixup\n", __FUNCTION__)); + assert(op->mask.bo != NULL); + assert(op->has_component_alpha); gen5_emit_pipelined_pointers (sna, op, PictOpAdd, @@ -225,162 +230,7 @@ static void gen5_magic_ca_pass(struct sna *sna, OUT_BATCH(0); /* index buffer offset, ignored */ state->last_primitive = sna->kgem.nbatch; -} - -static void gen5_vertex_flush(struct sna *sna) -{ - assert(sna->render_state.gen5.vertex_offset); - assert(sna->render.vertex_index > sna->render.vertex_start); - - DBG(("%s[%x] = %d\n", __FUNCTION__, - 4*sna->render_state.gen5.vertex_offset, - sna->render.vertex_index - sna->render.vertex_start)); - sna->kgem.batch[sna->render_state.gen5.vertex_offset] = - sna->render.vertex_index - sna->render.vertex_start; - sna->render_state.gen5.vertex_offset = 0; -} - -static int gen5_vertex_finish(struct sna *sna) -{ - struct kgem_bo *bo; - unsigned int i; - - assert(sna->render.vertex_used); - assert(sna->render.nvertex_reloc); - - /* Note: we only need dword alignment (currently) */ - - bo = sna->render.vbo; - if (bo) { - if (sna->render_state.gen5.vertex_offset) - gen5_vertex_flush(sna); - - for (i = 0; i < sna->render.nvertex_reloc; i++) { - DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, - i, sna->render.vertex_reloc[i])); - - sna->kgem.batch[sna->render.vertex_reloc[i]] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i], bo, - I915_GEM_DOMAIN_VERTEX << 16, - 0); - sna->kgem.batch[sna->render.vertex_reloc[i]+1] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i]+1, bo, - I915_GEM_DOMAIN_VERTEX << 16, - sna->render.vertex_used * 4 - 1); - } - - sna->render.nvertex_reloc = 0; - sna->render.vertex_used = 0; - sna->render.vertex_index = 0; - sna->render_state.gen5.vb_id = 0; - - kgem_bo_destroy(&sna->kgem, bo); - } - - sna->render.vertices = NULL; - sna->render.vbo = kgem_create_linear(&sna->kgem, - 256*1024, CREATE_GTT_MAP); - if (sna->render.vbo) - sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo); - if (sna->render.vertices == NULL) { - if (sna->render.vbo) - kgem_bo_destroy(&sna->kgem, sna->render.vbo); - sna->render.vbo = NULL; - return 0; - } - - if (sna->render.vertex_used) { - memcpy(sna->render.vertices, - sna->render.vertex_data, - sizeof(float)*sna->render.vertex_used); - } - sna->render.vertex_size = 64 * 1024 - 1; - return sna->render.vertex_size - sna->render.vertex_used; -} - -static void gen5_vertex_close(struct sna *sna) -{ - struct kgem_bo *bo, *free_bo = NULL; - unsigned int i, delta = 0; - - assert(sna->render_state.gen5.vertex_offset == 0); - if (!sna->render_state.gen5.vb_id) - return; - - DBG(("%s: used=%d, vbo active? %d\n", - __FUNCTION__, sna->render.vertex_used, sna->render.vbo != NULL)); - - bo = sna->render.vbo; - if (bo) { - if (sna->render.vertex_size - sna->render.vertex_used < 64) { - DBG(("%s: discarding full vbo\n", __FUNCTION__)); - sna->render.vbo = NULL; - sna->render.vertices = sna->render.vertex_data; - sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); - free_bo = bo; - } else if (IS_CPU_MAP(bo->map)) { - DBG(("%s: converting CPU map to GTT\n", __FUNCTION__)); - sna->render.vertices = - kgem_bo_map__gtt(&sna->kgem, sna->render.vbo); - if (sna->render.vertices == NULL) { - sna->render.vbo = NULL; - sna->render.vertices = sna->render.vertex_data; - sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); - free_bo = bo; - } - } - } else { - if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) { - DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__, - sna->render.vertex_used, sna->kgem.nbatch)); - memcpy(sna->kgem.batch + sna->kgem.nbatch, - sna->render.vertex_data, - sna->render.vertex_used * 4); - delta = sna->kgem.nbatch * 4; - bo = NULL; - sna->kgem.nbatch += sna->render.vertex_used; - } else { - bo = kgem_create_linear(&sna->kgem, - 4*sna->render.vertex_used, 0); - if (bo && !kgem_bo_write(&sna->kgem, bo, - sna->render.vertex_data, - 4*sna->render.vertex_used)) { - kgem_bo_destroy(&sna->kgem, bo); - bo = NULL; - } - DBG(("%s: new vbo: %d\n", __FUNCTION__, - sna->render.vertex_used)); - free_bo = bo; - } - } - - assert(sna->render.nvertex_reloc); - for (i = 0; i < sna->render.nvertex_reloc; i++) { - DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, - i, sna->render.vertex_reloc[i])); - - sna->kgem.batch[sna->render.vertex_reloc[i]] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i], bo, - I915_GEM_DOMAIN_VERTEX << 16, - delta); - sna->kgem.batch[sna->render.vertex_reloc[i]+1] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i]+1, bo, - I915_GEM_DOMAIN_VERTEX << 16, - delta + sna->render.vertex_used * 4 - 1); - } - sna->render.nvertex_reloc = 0; - - if (sna->render.vbo == NULL) { - sna->render.vertex_used = 0; - sna->render.vertex_index = 0; - } - - if (free_bo) - kgem_bo_destroy(&sna->kgem, free_bo); + return true; } static uint32_t gen5_get_blend(int op, @@ -679,310 +529,29 @@ gen5_bind_bo(struct sna *sna, return offset * sizeof(uint32_t); } -fastcall static void -gen5_emit_composite_primitive_solid(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - float *v; - union { - struct sna_coordinate p; - float f; - } dst; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 9; - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - v[1] = 1.; - v[2] = 1.; - - dst.p.x = r->dst.x; - v[3] = dst.f; - v[4] = 0.; - v[5] = 1.; - - dst.p.y = r->dst.y; - v[6] = dst.f; - v[7] = 0.; - v[8] = 0.; -} - -fastcall static void -gen5_emit_composite_primitive_identity_source(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - const float *sf = op->src.scale; - float sx, sy, *v; - union { - struct sna_coordinate p; - float f; - } dst; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 9; - - sx = r->src.x + op->src.offset[0]; - sy = r->src.y + op->src.offset[1]; - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - v[1] = (sx + r->width) * sf[0]; - v[5] = v[2] = (sy + r->height) * sf[1]; - - dst.p.x = r->dst.x; - v[3] = dst.f; - v[7] = v[4] = sx * sf[0]; - - dst.p.y = r->dst.y; - v[6] = dst.f; - v[8] = sy * sf[1]; -} - -fastcall static void -gen5_emit_composite_primitive_affine_source(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - union { - struct sna_coordinate p; - float f; - } dst; - float *v; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 9; - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x + r->width, - op->src.offset[1] + r->src.y + r->height, - op->src.transform, - &v[1], &v[2]); - v[1] *= op->src.scale[0]; - v[2] *= op->src.scale[1]; - - dst.p.x = r->dst.x; - v[3] = dst.f; - _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x, - op->src.offset[1] + r->src.y + r->height, - op->src.transform, - &v[4], &v[5]); - v[4] *= op->src.scale[0]; - v[5] *= op->src.scale[1]; - - dst.p.y = r->dst.y; - v[6] = dst.f; - _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x, - op->src.offset[1] + r->src.y, - op->src.transform, - &v[7], &v[8]); - v[7] *= op->src.scale[0]; - v[8] *= op->src.scale[1]; -} - -fastcall static void -gen5_emit_composite_primitive_identity_source_mask(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - union { - struct sna_coordinate p; - float f; - } dst; - float src_x, src_y; - float msk_x, msk_y; - float w, h; - float *v; - - src_x = r->src.x + op->src.offset[0]; - src_y = r->src.y + op->src.offset[1]; - msk_x = r->mask.x + op->mask.offset[0]; - msk_y = r->mask.y + op->mask.offset[1]; - w = r->width; - h = r->height; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 15; - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - v[1] = (src_x + w) * op->src.scale[0]; - v[2] = (src_y + h) * op->src.scale[1]; - v[3] = (msk_x + w) * op->mask.scale[0]; - v[4] = (msk_y + h) * op->mask.scale[1]; - - dst.p.x = r->dst.x; - v[5] = dst.f; - v[6] = src_x * op->src.scale[0]; - v[7] = v[2]; - v[8] = msk_x * op->mask.scale[0]; - v[9] = v[4]; - - dst.p.y = r->dst.y; - v[10] = dst.f; - v[11] = v[6]; - v[12] = src_y * op->src.scale[1]; - v[13] = v[8]; - v[14] = msk_y * op->mask.scale[1]; -} - -fastcall static void -gen5_emit_composite_primitive(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3]; - bool is_affine = op->is_affine; - const float *src_sf = op->src.scale; - const float *mask_sf = op->mask.scale; - - if (is_affine) { - sna_get_transformed_coordinates(r->src.x + op->src.offset[0], - r->src.y + op->src.offset[1], - op->src.transform, - &src_x[0], - &src_y[0]); - - sna_get_transformed_coordinates(r->src.x + op->src.offset[0], - r->src.y + op->src.offset[1] + r->height, - op->src.transform, - &src_x[1], - &src_y[1]); - - sna_get_transformed_coordinates(r->src.x + op->src.offset[0] + r->width, - r->src.y + op->src.offset[1] + r->height, - op->src.transform, - &src_x[2], - &src_y[2]); - } else { - sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0], - r->src.y + op->src.offset[1], - op->src.transform, - &src_x[0], - &src_y[0], - &src_w[0]); - sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0], - r->src.y + op->src.offset[1] + r->height, - op->src.transform, - &src_x[1], - &src_y[1], - &src_w[1]); - sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0] + r->width, - r->src.y + op->src.offset[1] + r->height, - op->src.transform, - &src_x[2], - &src_y[2], - &src_w[2]); - } - - if (op->mask.bo) { - if (is_affine) { - sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0], - r->mask.y + op->mask.offset[1], - op->mask.transform, - &mask_x[0], - &mask_y[0]); - - sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0], - r->mask.y + op->mask.offset[1] + r->height, - op->mask.transform, - &mask_x[1], - &mask_y[1]); - - sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0] + r->width, - r->mask.y + op->mask.offset[1] + r->height, - op->mask.transform, - &mask_x[2], - &mask_y[2]); - } else { - sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0], - r->mask.y + op->mask.offset[1], - op->mask.transform, - &mask_x[0], - &mask_y[0], - &mask_w[0]); - - sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0], - r->mask.y + op->mask.offset[1] + r->height, - op->mask.transform, - &mask_x[1], - &mask_y[1], - &mask_w[1]); - sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0] + r->width, - r->mask.y + op->mask.offset[1] + r->height, - op->mask.transform, - &mask_x[2], - &mask_y[2], - &mask_w[2]); - } - } - - OUT_VERTEX(r->dst.x + r->width, r->dst.y + r->height); - OUT_VERTEX_F(src_x[2] * src_sf[0]); - OUT_VERTEX_F(src_y[2] * src_sf[1]); - if (!is_affine) - OUT_VERTEX_F(src_w[2]); - if (op->mask.bo) { - OUT_VERTEX_F(mask_x[2] * mask_sf[0]); - OUT_VERTEX_F(mask_y[2] * mask_sf[1]); - if (!is_affine) - OUT_VERTEX_F(mask_w[2]); - } - - OUT_VERTEX(r->dst.x, r->dst.y + r->height); - OUT_VERTEX_F(src_x[1] * src_sf[0]); - OUT_VERTEX_F(src_y[1] * src_sf[1]); - if (!is_affine) - OUT_VERTEX_F(src_w[1]); - if (op->mask.bo) { - OUT_VERTEX_F(mask_x[1] * mask_sf[0]); - OUT_VERTEX_F(mask_y[1] * mask_sf[1]); - if (!is_affine) - OUT_VERTEX_F(mask_w[1]); - } - - OUT_VERTEX(r->dst.x, r->dst.y); - OUT_VERTEX_F(src_x[0] * src_sf[0]); - OUT_VERTEX_F(src_y[0] * src_sf[1]); - if (!is_affine) - OUT_VERTEX_F(src_w[0]); - if (op->mask.bo) { - OUT_VERTEX_F(mask_x[0] * mask_sf[0]); - OUT_VERTEX_F(mask_y[0] * mask_sf[1]); - if (!is_affine) - OUT_VERTEX_F(mask_w[0]); - } -} - static void gen5_emit_vertex_buffer(struct sna *sna, const struct sna_composite_op *op) { int id = op->u.gen5.ve_id; - assert((unsigned)id <= 3); + assert((sna->render.vb_id & (1 << id)) == 0); OUT_BATCH(GEN5_3DSTATE_VERTEX_BUFFERS | 3); - OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA | + OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA | (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT)); + assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc)); sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; OUT_BATCH(0); - OUT_BATCH(0); + OUT_BATCH(~0); /* max address: disabled */ OUT_BATCH(0); - sna->render_state.gen5.vb_id |= 1 << id; + sna->render.vb_id |= 1 << id; } static void gen5_emit_primitive(struct sna *sna) { if (sna->kgem.nbatch == sna->render_state.gen5.last_primitive) { - sna->render_state.gen5.vertex_offset = sna->kgem.nbatch - 5; + sna->render.vertex_offset = sna->kgem.nbatch - 5; return; } @@ -991,7 +560,7 @@ static void gen5_emit_primitive(struct sna *sna) (_3DPRIM_RECTLIST << GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT) | (0 << 9) | 4); - sna->render_state.gen5.vertex_offset = sna->kgem.nbatch; + sna->render.vertex_offset = sna->kgem.nbatch; OUT_BATCH(0); /* vertex count, to be filled in later */ OUT_BATCH(sna->render.vertex_index); OUT_BATCH(1); /* single instance */ @@ -1008,18 +577,19 @@ static bool gen5_rectangle_begin(struct sna *sna, int id = op->u.gen5.ve_id; int ndwords; - assert((unsigned)id <= 3); + if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) + return true; ndwords = op->need_magic_ca_pass ? 20 : 6; - if ((sna->render_state.gen5.vb_id & (1 << id)) == 0) + if ((sna->render.vb_id & (1 << id)) == 0) ndwords += 5; if (!kgem_check_batch(&sna->kgem, ndwords)) return false; - if ((sna->render_state.gen5.vb_id & (1 << id)) == 0) + if ((sna->render.vb_id & (1 << id)) == 0) gen5_emit_vertex_buffer(sna, op); - if (sna->render_state.gen5.vertex_offset == 0) + if (sna->render.vertex_offset == 0) gen5_emit_primitive(sna); return true; @@ -1028,17 +598,26 @@ static bool gen5_rectangle_begin(struct sna *sna, static int gen5_get_rectangles__flush(struct sna *sna, const struct sna_composite_op *op) { + /* Preventing discarding new vbo after lock contention */ + if (sna_vertex_wait__locked(&sna->render)) { + int rem = vertex_space(sna); + if (rem > op->floats_per_rect) + return rem; + } + if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 20 : 6)) return 0; - if (!kgem_check_exec(&sna->kgem, 1)) - return 0; - if (!kgem_check_reloc(&sna->kgem, 2)) + if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) return 0; - if (op->need_magic_ca_pass && sna->render.vbo) - return 0; + if (sna->render.vertex_offset) { + gen4_vertex_flush(sna); + if (gen5_magic_ca_pass(sna, op)) + gen5_emit_pipelined_pointers(sna, op, op->op, + op->u.gen5.wm_kernel); + } - return gen5_vertex_finish(sna); + return gen4_vertex_finish(sna); } inline static int gen5_get_rectangles(struct sna *sna, @@ -1051,7 +630,7 @@ inline static int gen5_get_rectangles(struct sna *sna, start: rem = vertex_space(sna); - if (rem < op->floats_per_rect) { + if (unlikely(rem < op->floats_per_rect)) { DBG(("flushing vbo for %s: %d < %d\n", __FUNCTION__, rem, op->floats_per_rect)); rem = gen5_get_rectangles__flush(sna, op); @@ -1059,21 +638,22 @@ start: goto flush; } - if (unlikely(sna->render_state.gen5.vertex_offset == 0 && + if (unlikely(sna->render.vertex_offset == 0 && !gen5_rectangle_begin(sna, op))) goto flush; - if (want * op->floats_per_rect > rem) + if (want > 1 && want * op->floats_per_rect > rem) want = rem / op->floats_per_rect; sna->render.vertex_index += 3*want; return want; flush: - if (sna->render_state.gen5.vertex_offset) { - gen5_vertex_flush(sna); + if (sna->render.vertex_offset) { + gen4_vertex_flush(sna); gen5_magic_ca_pass(sna, op); } + sna_vertex_wait__locked(&sna->render); _kgem_submit(&sna->kgem); emit_state(sna, op); goto start; @@ -1083,18 +663,15 @@ static uint32_t * gen5_composite_get_binding_table(struct sna *sna, uint16_t *offset) { - uint32_t *table; - sna->kgem.surface -= sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t); - /* Clear all surplus entries to zero in case of prefetch */ - table = memset(sna->kgem.batch + sna->kgem.surface, - 0, sizeof(struct gen5_surface_state_padded)); - *offset = sna->kgem.surface; DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); - return table; + /* Clear all surplus entries to zero in case of prefetch */ + *offset = sna->kgem.surface; + return memset(sna->kgem.batch + sna->kgem.surface, + 0, sizeof(struct gen5_surface_state_padded)); } static void @@ -1181,9 +758,9 @@ gen5_emit_invariant(struct sna *sna) } static void -gen5_get_batch(struct sna *sna) +gen5_get_batch(struct sna *sna, const struct sna_composite_op *op) { - kgem_set_mode(&sna->kgem, KGEM_RENDER); + kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) { DBG(("%s: flushing batch: %d < %d+%d\n", @@ -1200,9 +777,10 @@ gen5_get_batch(struct sna *sna) static void gen5_align_vertex(struct sna *sna, const struct sna_composite_op *op) { + assert(op->floats_per_rect == 3*op->floats_per_vertex); if (op->floats_per_vertex != sna->render_state.gen5.floats_per_vertex) { if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect) - gen5_vertex_finish(sna); + gen4_vertex_finish(sna); DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n", sna->render_state.gen5.floats_per_vertex, @@ -1215,12 +793,12 @@ gen5_align_vertex(struct sna *sna, const struct sna_composite_op *op) } } -static bool +static void gen5_emit_binding_table(struct sna *sna, uint16_t offset) { if (!DBG_NO_STATE_CACHE && sna->render_state.gen5.surface_table == offset) - return false; + return; sna->render_state.gen5.surface_table = offset; @@ -1232,8 +810,6 @@ gen5_emit_binding_table(struct sna *sna, uint16_t offset) OUT_BATCH(0); /* sf */ /* Only the PS uses the binding table */ OUT_BATCH(offset*4); - - return true; } static bool @@ -1241,33 +817,36 @@ gen5_emit_pipelined_pointers(struct sna *sna, const struct sna_composite_op *op, int blend, int kernel) { - uint16_t offset = sna->kgem.nbatch, last; + uint16_t sp, bp; + uint32_t key; + + DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n", + __FUNCTION__, op->u.gen5.ve_id & 2, + op->src.filter, op->src.repeat, + op->mask.filter, op->mask.repeat, + kernel, blend, op->has_component_alpha, (int)op->dst.format)); + + sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat, + op->mask.filter, op->mask.repeat, + kernel); + bp = gen5_get_blend(blend, op->has_component_alpha, op->dst.format); + + DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp)); + key = sp | (uint32_t)bp << 16 | (op->mask.bo != NULL) << 31; + if (key == sna->render_state.gen5.last_pipelined_pointers) + return false; + OUT_BATCH(GEN5_3DSTATE_PIPELINED_POINTERS | 5); OUT_BATCH(sna->render_state.gen5.vs); OUT_BATCH(GEN5_GS_DISABLE); /* passthrough */ OUT_BATCH(GEN5_CLIP_DISABLE); /* passthrough */ OUT_BATCH(sna->render_state.gen5.sf[op->mask.bo != NULL]); - OUT_BATCH(sna->render_state.gen5.wm + - SAMPLER_OFFSET(op->src.filter, op->src.repeat, - op->mask.filter, op->mask.repeat, - kernel)); - OUT_BATCH(sna->render_state.gen5.cc + - gen5_get_blend(blend, op->has_component_alpha, op->dst.format)); - - last = sna->render_state.gen5.last_pipelined_pointers; - if (!DBG_NO_STATE_CACHE && last && - sna->kgem.batch[offset + 1] == sna->kgem.batch[last + 1] && - sna->kgem.batch[offset + 3] == sna->kgem.batch[last + 3] && - sna->kgem.batch[offset + 4] == sna->kgem.batch[last + 4] && - sna->kgem.batch[offset + 5] == sna->kgem.batch[last + 5] && - sna->kgem.batch[offset + 6] == sna->kgem.batch[last + 6]) { - sna->kgem.nbatch = offset; - return false; - } else { - sna->render_state.gen5.last_pipelined_pointers = offset; - return true; - } + OUT_BATCH(sna->render_state.gen5.wm + sp); + OUT_BATCH(sna->render_state.gen5.cc + bp); + + sna->render_state.gen5.last_pipelined_pointers = key; + return true; } static void @@ -1304,28 +883,16 @@ gen5_emit_vertex_elements(struct sna *sna, * texture coordinate 1 if (has_mask is true): same as above */ struct gen5_render_state *render = &sna->render_state.gen5; - bool has_mask = op->mask.bo != NULL; - bool is_affine = op->is_affine; - int nelem = has_mask ? 2 : 1; - int selem = is_affine ? 2 : 3; - uint32_t w_component; - uint32_t src_format; int id = op->u.gen5.ve_id; + bool has_mask = id >> 2; + uint32_t format, dw; - assert((unsigned)id <= 3); if (!DBG_NO_STATE_CACHE && render->ve_id == id) return; + DBG(("%s: changing %d -> %d\n", __FUNCTION__, render->ve_id, id)); render->ve_id = id; - if (is_affine) { - src_format = GEN5_SURFACEFORMAT_R32G32_FLOAT; - w_component = GEN5_VFCOMPONENT_STORE_1_FLT; - } else { - src_format = GEN5_SURFACEFORMAT_R32G32B32_FLOAT; - w_component = GEN5_VFCOMPONENT_STORE_SRC; - } - /* The VUE layout * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) * dword 4-7: position (x, y, 1.0, 1.0), @@ -1335,43 +902,92 @@ gen5_emit_vertex_elements(struct sna *sna, * dword 4-15 are fetched from vertex buffer */ OUT_BATCH(GEN5_3DSTATE_VERTEX_ELEMENTS | - ((2 * (2 + nelem)) + 1 - 2)); + ((2 * (has_mask ? 4 : 3)) + 1 - 2)); OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | (GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT) | (0 << VE0_OFFSET_SHIFT)); - OUT_BATCH((GEN5_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) | - (GEN5_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | - (GEN5_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | - (GEN5_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); + OUT_BATCH((VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) | + (VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | + (VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | + (VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); /* x,y */ - OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | - (GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT) | - (0 << VE0_OFFSET_SHIFT)); /* offsets vb in bytes */ - OUT_BATCH((GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | - (GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | - (GEN5_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | - (GEN5_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | + GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | + 0 << VE0_OFFSET_SHIFT); + OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | + VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | + VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT | + VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); /* u0, v0, w0 */ - OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | - (src_format << VE0_FORMAT_SHIFT) | - (4 << VE0_OFFSET_SHIFT)); /* offset vb in bytes */ - OUT_BATCH((GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | - (GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | - (w_component << VE1_VFCOMPONENT_2_SHIFT) | - (GEN5_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + DBG(("%s: id=%d, first channel %d floats, offset=4b\n", __FUNCTION__, + id, id & 3)); + dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; + switch (id & 3) { + default: + assert(0); + case 0: + format = GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; + break; + case 1: + format = GEN5_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; + dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; + break; + case 2: + format = GEN5_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; + break; + case 3: + format = GEN5_SURFACEFORMAT_R32G32B32_FLOAT << VE0_FORMAT_SHIFT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; + break; + } + OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | + format | 4 << VE0_OFFSET_SHIFT); + OUT_BATCH(dw); /* u1, v1, w1 */ if (has_mask) { - OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | - (src_format << VE0_FORMAT_SHIFT) | - (((1 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */ - OUT_BATCH((GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | - (GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | - (w_component << VE1_VFCOMPONENT_2_SHIFT) | - (GEN5_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); + DBG(("%s: id=%x, second channel %d floats, offset=%db\n", __FUNCTION__, + id, id >> 2, offset)); + dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; + switch (id >> 2) { + case 1: + format = GEN5_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; + dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; + break; + default: + assert(0); + case 2: + format = GEN5_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; + break; + case 3: + format = GEN5_SURFACEFORMAT_R32G32B32_FLOAT << VE0_FORMAT_SHIFT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; + break; + } + OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | + format | offset << VE0_OFFSET_SHIFT); + OUT_BATCH(dw); } } @@ -1380,23 +996,21 @@ gen5_emit_state(struct sna *sna, const struct sna_composite_op *op, uint16_t offset) { - bool flush; + if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { + DBG(("%s: flushing dirty (%d, %d)\n", __FUNCTION__, + kgem_bo_is_dirty(op->src.bo), + kgem_bo_is_dirty(op->mask.bo))); + OUT_BATCH(MI_FLUSH); + kgem_clear_dirty(&sna->kgem); + kgem_bo_mark_dirty(op->dst.bo); + } /* drawrect must be first for Ironlake BLT workaround */ gen5_emit_drawing_rectangle(sna, op); - - flush = gen5_emit_binding_table(sna, offset); - if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel)) { + gen5_emit_binding_table(sna, offset); + if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel)) gen5_emit_urb(sna); - flush = true; - } gen5_emit_vertex_elements(sna, op); - - if (flush || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { - OUT_BATCH(MI_FLUSH); - kgem_clear_dirty(&sna->kgem); - kgem_bo_mark_dirty(op->dst.bo); - } } static void gen5_bind_surfaces(struct sna *sna, @@ -1405,7 +1019,7 @@ static void gen5_bind_surfaces(struct sna *sna, uint32_t *binding_table; uint16_t offset; - gen5_get_batch(sna); + gen5_get_batch(sna, op); binding_table = gen5_composite_get_binding_table(sna, &offset); @@ -1419,7 +1033,8 @@ static void gen5_bind_surfaces(struct sna *sna, op->src.bo, op->src.width, op->src.height, op->src.card_format, false); - if (op->mask.bo) + if (op->mask.bo) { + assert(op->u.gen5.ve_id >> 2); binding_table[2] = gen5_bind_bo(sna, op->mask.bo, @@ -1427,6 +1042,7 @@ static void gen5_bind_surfaces(struct sna *sna, op->mask.height, op->mask.card_format, false); + } if (sna->kgem.surface == offset && *(uint64_t *)(sna->kgem.batch + sna->render_state.gen5.surface_table) == *(uint64_t*)binding_table && @@ -1478,9 +1094,9 @@ gen5_render_composite_box(struct sna *sna, } static void -gen5_render_composite_boxes(struct sna *sna, - const struct sna_composite_op *op, - const BoxRec *box, int nbox) +gen5_render_composite_boxes__blt(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) { DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n", __FUNCTION__, nbox, op->dst.x, op->dst.y, @@ -1514,6 +1130,62 @@ gen5_render_composite_boxes(struct sna *sna, } while (nbox); } +static void +gen5_render_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen5_get_rectangles(sna, op, nbox, + gen5_bind_surfaces); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + } while (nbox); +} + +static void +gen5_render_composite_boxes__thread(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen5_get_rectangles(sna, op, nbox, + gen5_bind_surfaces); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + #ifndef MAX #define MAX(a,b) ((a) > (b) ? (a) : (b)) #endif @@ -1559,9 +1231,8 @@ static void gen5_video_bind_surfaces(struct sna *sna, int src_height[6]; int src_pitch[6]; uint32_t *binding_table; - int n_src, n; uint16_t offset; - + int n_src, n; src_surf_base[0] = 0; src_surf_base[1] = 0; @@ -1594,9 +1265,9 @@ static void gen5_video_bind_surfaces(struct sna *sna, n_src = 1; } - gen5_get_batch(sna); - binding_table = gen5_composite_get_binding_table(sna, &offset); + gen5_get_batch(sna, op); + binding_table = gen5_composite_get_binding_table(sna, &offset); binding_table[0] = gen5_bind_bo(sna, op->dst.bo, op->dst.width, op->dst.height, @@ -1623,10 +1294,11 @@ gen5_render_video(struct sna *sna, RegionPtr dstRegion, short src_w, short src_h, short drw_w, short drw_h, + short dx, short dy, PixmapPtr pixmap) { struct sna_composite_op tmp; - int nbox, dxo, dyo, pix_xoff, pix_yoff; + int nbox, pix_xoff, pix_yoff; float src_scale_x, src_scale_y; struct sna_pixmap *priv; BoxPtr box; @@ -1646,13 +1318,16 @@ gen5_render_video(struct sna *sna, tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth); tmp.dst.bo = priv->gpu_bo; - tmp.src.filter = SAMPLER_FILTER_BILINEAR; + if (src_w == drw_w && src_h == drw_h) + tmp.src.filter = SAMPLER_FILTER_NEAREST; + else + tmp.src.filter = SAMPLER_FILTER_BILINEAR; tmp.src.repeat = SAMPLER_EXTEND_PAD; tmp.src.bo = frame->bo; tmp.mask.bo = NULL; tmp.u.gen5.wm_kernel = is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED; - tmp.u.gen5.ve_id = 1; + tmp.u.gen5.ve_id = 2; tmp.is_affine = true; tmp.floats_per_vertex = 3; tmp.floats_per_rect = 9; @@ -1677,9 +1352,6 @@ gen5_render_video(struct sna *sna, pix_yoff = 0; #endif - dxo = dstRegion->extents.x1; - dyo = dstRegion->extents.y1; - /* Use normalized texture coordinates */ src_scale_x = ((float)src_w / frame->width) / (float)drw_w; src_scale_y = ((float)src_h / frame->height) / (float)drw_h; @@ -1697,16 +1369,16 @@ gen5_render_video(struct sna *sna, gen5_get_rectangles(sna, &tmp, 1, gen5_video_bind_surfaces); OUT_VERTEX(r.x2, r.y2); - OUT_VERTEX_F((box->x2 - dxo) * src_scale_x); - OUT_VERTEX_F((box->y2 - dyo) * src_scale_y); + OUT_VERTEX_F((box->x2 - dx) * src_scale_x); + OUT_VERTEX_F((box->y2 - dy) * src_scale_y); OUT_VERTEX(r.x1, r.y2); - OUT_VERTEX_F((box->x1 - dxo) * src_scale_x); - OUT_VERTEX_F((box->y2 - dyo) * src_scale_y); + OUT_VERTEX_F((box->x1 - dx) * src_scale_x); + OUT_VERTEX_F((box->y2 - dy) * src_scale_y); OUT_VERTEX(r.x1, r.y1); - OUT_VERTEX_F((box->x1 - dxo) * src_scale_x); - OUT_VERTEX_F((box->y1 - dyo) * src_scale_y); + OUT_VERTEX_F((box->x1 - dx) * src_scale_x); + OUT_VERTEX_F((box->y1 - dy) * src_scale_y); if (!DAMAGE_IS_ALL(priv->gpu_damage)) { sna_damage_add_box(&priv->gpu_damage, &r); @@ -1716,146 +1388,11 @@ gen5_render_video(struct sna *sna, } priv->clear = false; - gen5_vertex_flush(sna); + gen4_vertex_flush(sna); return true; } static int -gen5_composite_solid_init(struct sna *sna, - struct sna_composite_channel *channel, - uint32_t color) -{ - channel->filter = PictFilterNearest; - channel->repeat = RepeatNormal; - channel->is_affine = true; - channel->is_solid = true; - channel->transform = NULL; - channel->width = 1; - channel->height = 1; - channel->card_format = GEN5_SURFACEFORMAT_B8G8R8A8_UNORM; - - channel->bo = sna_render_get_solid(sna, color); - - channel->scale[0] = channel->scale[1] = 1; - channel->offset[0] = channel->offset[1] = 0; - return channel->bo != NULL; -} - -static bool -gen5_composite_linear_init(struct sna *sna, - PicturePtr picture, - struct sna_composite_channel *channel, - int x, int y, - int w, int h, - int dst_x, int dst_y) -{ - PictLinearGradient *linear = - (PictLinearGradient *)picture->pSourcePict; - pixman_fixed_t tx, ty; - float x0, y0, sf; - float dx, dy; - - DBG(("%s: p1=(%f, %f), p2=(%f, %f), src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n", - __FUNCTION__, - pixman_fixed_to_double(linear->p1.x), pixman_fixed_to_double(linear->p1.y), - pixman_fixed_to_double(linear->p2.x), pixman_fixed_to_double(linear->p2.y), - x, y, dst_x, dst_y, w, h)); - - if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y) - return 0; - - if (!sna_transform_is_affine(picture->transform)) { - DBG(("%s: fallback due to projective transform\n", - __FUNCTION__)); - return sna_render_picture_fixup(sna, picture, channel, - x, y, w, h, dst_x, dst_y); - } - - channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear); - if (!channel->bo) - return 0; - - channel->filter = PictFilterNearest; - channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; - channel->width = channel->bo->pitch / 4; - channel->height = 1; - channel->pict_format = PICT_a8r8g8b8; - - channel->scale[0] = channel->scale[1] = 1; - channel->offset[0] = channel->offset[1] = 0; - - if (sna_transform_is_translation(picture->transform, &tx, &ty)) { - dx = pixman_fixed_to_double(linear->p2.x - linear->p1.x); - dy = pixman_fixed_to_double(linear->p2.y - linear->p1.y); - - x0 = pixman_fixed_to_double(linear->p1.x); - y0 = pixman_fixed_to_double(linear->p1.y); - - if (tx | ty) { - x0 -= pixman_fixed_to_double(tx); - y0 -= pixman_fixed_to_double(ty); - } - } else { - struct pixman_f_vector p1, p2; - struct pixman_f_transform m, inv; - - pixman_f_transform_from_pixman_transform(&m, picture->transform); - DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n", - __FUNCTION__, - m.m[0][0], m.m[0][1], m.m[0][2], - m.m[1][0], m.m[1][1], m.m[1][2], - m.m[2][0], m.m[2][1], m.m[2][2])); - if (!pixman_f_transform_invert(&inv, &m)) - return 0; - - p1.v[0] = pixman_fixed_to_double(linear->p1.x); - p1.v[1] = pixman_fixed_to_double(linear->p1.y); - p1.v[2] = 1.; - pixman_f_transform_point(&inv, &p1); - - p2.v[0] = pixman_fixed_to_double(linear->p2.x); - p2.v[1] = pixman_fixed_to_double(linear->p2.y); - p2.v[2] = 1.; - pixman_f_transform_point(&inv, &p2); - - DBG(("%s: untransformed: p1=(%f, %f, %f), p2=(%f, %f, %f)\n", - __FUNCTION__, - p1.v[0], p1.v[1], p1.v[2], - p2.v[0], p2.v[1], p2.v[2])); - - dx = p2.v[0] - p1.v[0]; - dy = p2.v[1] - p1.v[1]; - - x0 = p1.v[0]; - y0 = p1.v[1]; - } - - sf = dx*dx + dy*dy; - dx /= sf; - dy /= sf; - - channel->embedded_transform.matrix[0][0] = pixman_double_to_fixed(dx); - channel->embedded_transform.matrix[0][1] = pixman_double_to_fixed(dy); - channel->embedded_transform.matrix[0][2] = -pixman_double_to_fixed(dx*(x0+dst_x-x) + dy*(y0+dst_y-y)); - - channel->embedded_transform.matrix[1][0] = 0; - channel->embedded_transform.matrix[1][1] = 0; - channel->embedded_transform.matrix[1][2] = pixman_double_to_fixed(.5); - - channel->embedded_transform.matrix[2][0] = 0; - channel->embedded_transform.matrix[2][1] = 0; - channel->embedded_transform.matrix[2][2] = pixman_fixed_1; - - channel->transform = &channel->embedded_transform; - channel->is_affine = 1; - - DBG(("%s: dx=%f, dy=%f, offset=%f\n", - __FUNCTION__, dx, dy, -dx*(x0-x+dst_x) + -dy*(y0-y+dst_y))); - - return channel->bo != NULL; -} - -static int gen5_composite_picture(struct sna *sna, PicturePtr picture, struct sna_composite_channel *channel, @@ -1875,16 +1412,16 @@ gen5_composite_picture(struct sna *sna, channel->card_format = -1; if (sna_picture_is_solid(picture, &color)) - return gen5_composite_solid_init(sna, channel, color); + return gen4_channel_init_solid(sna, channel, color); if (picture->pDrawable == NULL) { int ret; if (picture->pSourcePict->type == SourcePictTypeLinear) - return gen5_composite_linear_init(sna, picture, channel, - x, y, - w, h, - dst_x, dst_y); + return gen4_channel_init_linear(sna, picture, channel, + x, y, + w, h, + dst_x, dst_y); DBG(("%s -- fixup, gradient\n", __FUNCTION__)); ret = -1; @@ -1935,7 +1472,8 @@ gen5_composite_picture(struct sna *sna, channel->card_format = gen5_get_card_format(picture->format); if (channel->card_format == -1) return sna_render_picture_convert(sna, picture, channel, pixmap, - x, y, w, h, dst_x, dst_y); + x, y, w, h, dst_x, dst_y, + false); if (too_large(pixmap->drawable.width, pixmap->drawable.height)) return sna_render_picture_extract(sna, picture, channel, @@ -1957,8 +1495,8 @@ static void gen5_render_composite_done(struct sna *sna, const struct sna_composite_op *op) { - if (sna->render_state.gen5.vertex_offset) { - gen5_vertex_flush(sna); + if (sna->render.vertex_offset) { + gen4_vertex_flush(sna); gen5_magic_ca_pass(sna,op); } @@ -1981,10 +1519,9 @@ gen5_composite_set_target(struct sna *sna, BoxRec box; op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); - op->dst.format = dst->format; - op->dst.width = op->dst.pixmap->drawable.width; + op->dst.width = op->dst.pixmap->drawable.width; op->dst.height = op->dst.pixmap->drawable.height; - + op->dst.format = dst->format; if (w && h) { box.x1 = x; box.y1 = y; @@ -2019,15 +1556,6 @@ gen5_composite_set_target(struct sna *sna, return true; } -static inline bool -picture_is_cpu(PicturePtr picture) -{ - if (!picture->pDrawable) - return false; - - return !is_gpu(picture->pDrawable); -} - static bool try_blt(struct sna *sna, PicturePtr dst, PicturePtr src, @@ -2052,7 +1580,7 @@ try_blt(struct sna *sna, return true; /* is the source picture only in cpu memory e.g. a shm pixmap? */ - return picture_is_cpu(src); + return picture_is_cpu(sna, src); } static bool @@ -2077,15 +1605,10 @@ has_alphamap(PicturePtr p) } static bool -untransformed(PicturePtr p) +need_upload(struct sna *sna, PicturePtr p) { - return !p->transform || pixman_transform_is_int_translate(p->transform); -} - -static bool -need_upload(PicturePtr p) -{ - return p->pDrawable && untransformed(p) && !is_gpu(p->pDrawable); + return p->pDrawable && untransformed(p) && + !is_gpu(sna, p->pDrawable, PREFER_GPU_RENDER); } static bool @@ -2108,7 +1631,7 @@ source_is_busy(PixmapPtr pixmap) } static bool -source_fallback(PicturePtr p, PixmapPtr pixmap) +source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap) { if (sna_picture_is_solid(p, NULL)) return false; @@ -2121,7 +1644,7 @@ source_fallback(PicturePtr p, PixmapPtr pixmap) if (pixmap && source_is_busy(pixmap)) return false; - return has_alphamap(p) || !gen5_check_filter(p) || need_upload(p); + return has_alphamap(p) || !gen5_check_filter(p) || need_upload(sna, p); } static bool @@ -2130,7 +1653,6 @@ gen5_composite_fallback(struct sna *sna, PicturePtr mask, PicturePtr dst) { - struct sna_pixmap *priv; PixmapPtr src_pixmap; PixmapPtr mask_pixmap; PixmapPtr dst_pixmap; @@ -2145,11 +1667,11 @@ gen5_composite_fallback(struct sna *sna, dst_pixmap = get_drawable_pixmap(dst->pDrawable); src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; - src_fallback = source_fallback(src, src_pixmap); + src_fallback = source_fallback(sna, src, src_pixmap); if (mask) { mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; - mask_fallback = source_fallback(mask, mask_pixmap); + mask_fallback = source_fallback(sna, mask, mask_pixmap); } else { mask_pixmap = NULL; mask_fallback = false; @@ -2169,8 +1691,7 @@ gen5_composite_fallback(struct sna *sna, } /* If anything is on the GPU, push everything out to the GPU */ - priv = sna_pixmap(dst_pixmap); - if (priv && priv->gpu_damage && !priv->clear) { + if (dst_use_gpu(dst_pixmap)) { DBG(("%s: dst is already on the GPU, try to use GPU\n", __FUNCTION__)); return false; @@ -2205,14 +1726,14 @@ gen5_composite_fallback(struct sna *sna, if (too_large(dst_pixmap->drawable.width, dst_pixmap->drawable.height) && - (priv == NULL || DAMAGE_IS_ALL(priv->cpu_damage))) { + dst_is_cpu(dst_pixmap)) { DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); return true; } DBG(("%s: dst is not on the GPU and the operation should not fallback\n", __FUNCTION__)); - return false; + return dst_use_cpu(dst_pixmap); } static int @@ -2233,7 +1754,7 @@ reuse_source(struct sna *sna, } if (sna_picture_is_solid(mask, &color)) - return gen5_composite_solid_init(sna, mc, color); + return gen4_channel_init_solid(sna, mc, color); if (sc->is_solid) return false; @@ -2318,6 +1839,7 @@ gen5_render_composite(struct sna *sna, } DBG(("%s: preparing source\n", __FUNCTION__)); + tmp->op = op; switch (gen5_composite_picture(sna, src, &tmp->src, src_x, src_y, width, height, @@ -2327,7 +1849,7 @@ gen5_render_composite(struct sna *sna, DBG(("%s: failed to prepare source picture\n", __FUNCTION__)); goto cleanup_dst; case 0: - if (!gen5_composite_solid_init(sna, &tmp->src, 0)) + if (!gen4_channel_init_solid(sna, &tmp->src, 0)) goto cleanup_dst; /* fall through to fixup */ case 1: @@ -2341,12 +1863,10 @@ gen5_render_composite(struct sna *sna, break; } - tmp->op = op; tmp->is_affine = tmp->src.is_affine; tmp->has_component_alpha = false; tmp->need_magic_ca_pass = false; - tmp->prim_emit = gen5_emit_composite_primitive; if (mask) { if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { tmp->has_component_alpha = true; @@ -2380,7 +1900,7 @@ gen5_render_composite(struct sna *sna, DBG(("%s: failed to prepare mask picture\n", __FUNCTION__)); goto cleanup_src; case 0: - if (!gen5_composite_solid_init(sna, &tmp->mask, 0)) + if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) goto cleanup_src; /* fall through to fixup */ case 1: @@ -2390,33 +1910,22 @@ gen5_render_composite(struct sna *sna, } tmp->is_affine &= tmp->mask.is_affine; - - if (tmp->src.transform == NULL && tmp->mask.transform == NULL) - tmp->prim_emit = gen5_emit_composite_primitive_identity_source_mask; - - tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine; - } else { - if (tmp->src.is_solid) - tmp->prim_emit = gen5_emit_composite_primitive_solid; - else if (tmp->src.transform == NULL) - tmp->prim_emit = gen5_emit_composite_primitive_identity_source; - else if (tmp->src.is_affine) - tmp->prim_emit = gen5_emit_composite_primitive_affine_source; - - tmp->floats_per_vertex = 3 + !tmp->is_affine; } - tmp->floats_per_rect = 3*tmp->floats_per_vertex; tmp->u.gen5.wm_kernel = gen5_choose_composite_kernel(tmp->op, tmp->mask.bo != NULL, tmp->has_component_alpha, tmp->is_affine); - tmp->u.gen5.ve_id = (tmp->mask.bo != NULL) << 1 | tmp->is_affine; + tmp->u.gen5.ve_id = gen4_choose_composite_emitter(tmp); tmp->blt = gen5_render_composite_blt; tmp->box = gen5_render_composite_box; - tmp->boxes = gen5_render_composite_boxes; + tmp->boxes = gen5_render_composite_boxes__blt; + if (tmp->emit_boxes) { + tmp->boxes = gen5_render_composite_boxes; + tmp->thread_boxes = gen5_render_composite_boxes__thread; + } tmp->done = gen5_render_composite_done; if (!kgem_check_bo(&sna->kgem, @@ -2444,125 +1953,6 @@ cleanup_dst: } #if !NO_COMPOSITE_SPANS -inline static void -gen5_emit_composite_texcoord(struct sna *sna, - const struct sna_composite_channel *channel, - int16_t x, int16_t y) -{ - float t[3]; - - if (channel->is_affine) { - sna_get_transformed_coordinates(x + channel->offset[0], - y + channel->offset[1], - channel->transform, - &t[0], &t[1]); - OUT_VERTEX_F(t[0] * channel->scale[0]); - OUT_VERTEX_F(t[1] * channel->scale[1]); - } else { - t[0] = t[1] = 0; t[2] = 1; - sna_get_transformed_coordinates_3d(x + channel->offset[0], - y + channel->offset[1], - channel->transform, - &t[0], &t[1], &t[2]); - OUT_VERTEX_F(t[0] * channel->scale[0]); - OUT_VERTEX_F(t[1] * channel->scale[1]); - OUT_VERTEX_F(t[2]); - } -} - -inline static void -gen5_emit_composite_texcoord_affine(struct sna *sna, - const struct sna_composite_channel *channel, - int16_t x, int16_t y) -{ - float t[2]; - - sna_get_transformed_coordinates(x + channel->offset[0], - y + channel->offset[1], - channel->transform, - &t[0], &t[1]); - OUT_VERTEX_F(t[0] * channel->scale[0]); - OUT_VERTEX_F(t[1] * channel->scale[1]); -} - -inline static void -gen5_emit_composite_spans_vertex(struct sna *sna, - const struct sna_composite_spans_op *op, - int16_t x, int16_t y) -{ - OUT_VERTEX(x, y); - gen5_emit_composite_texcoord(sna, &op->base.src, x, y); -} - -fastcall static void -gen5_emit_composite_spans_primitive(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - gen5_emit_composite_spans_vertex(sna, op, box->x2, box->y2); - OUT_VERTEX_F(opacity); - OUT_VERTEX_F(1); - if (!op->base.is_affine) - OUT_VERTEX_F(1); - - gen5_emit_composite_spans_vertex(sna, op, box->x1, box->y2); - OUT_VERTEX_F(opacity); - OUT_VERTEX_F(1); - if (!op->base.is_affine) - OUT_VERTEX_F(1); - - gen5_emit_composite_spans_vertex(sna, op, box->x1, box->y1); - OUT_VERTEX_F(opacity); - OUT_VERTEX_F(0); - if (!op->base.is_affine) - OUT_VERTEX_F(1); -} - -fastcall static void -gen5_emit_composite_spans_solid(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - OUT_VERTEX(box->x2, box->y2); - OUT_VERTEX_F(1); OUT_VERTEX_F(1); - OUT_VERTEX_F(opacity); OUT_VERTEX_F(1); - - OUT_VERTEX(box->x1, box->y2); - OUT_VERTEX_F(0); OUT_VERTEX_F(1); - OUT_VERTEX_F(opacity); OUT_VERTEX_F(1); - - OUT_VERTEX(box->x1, box->y1); - OUT_VERTEX_F(0); OUT_VERTEX_F(0); - OUT_VERTEX_F(opacity); OUT_VERTEX_F(0); -} - -fastcall static void -gen5_emit_composite_spans_affine(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - OUT_VERTEX(box->x2, box->y2); - gen5_emit_composite_texcoord_affine(sna, &op->base.src, - box->x2, box->y2); - OUT_VERTEX_F(opacity); - OUT_VERTEX_F(1); - - OUT_VERTEX(box->x1, box->y2); - gen5_emit_composite_texcoord_affine(sna, &op->base.src, - box->x1, box->y2); - OUT_VERTEX_F(opacity); - OUT_VERTEX_F(1); - - OUT_VERTEX(box->x1, box->y1); - gen5_emit_composite_texcoord_affine(sna, &op->base.src, - box->x1, box->y1); - OUT_VERTEX_F(opacity); - OUT_VERTEX_F(0); -} - fastcall static void gen5_render_composite_spans_box(struct sna *sna, const struct sna_composite_spans_op *op, @@ -2612,18 +2002,51 @@ gen5_render_composite_spans_boxes(struct sna *sna, } fastcall static void +gen5_render_composite_spans_boxes__thread(struct sna *sna, + const struct sna_composite_spans_op *op, + const struct sna_opacity_box *box, + int nbox) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + op->base.dst.x, op->base.dst.y)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox, + gen5_bind_surfaces); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + +fastcall static void gen5_render_composite_spans_done(struct sna *sna, const struct sna_composite_spans_op *op) { - if (sna->render_state.gen5.vertex_offset) - gen5_vertex_flush(sna); + if (sna->render.vertex_offset) + gen4_vertex_flush(sna); DBG(("%s()\n", __FUNCTION__)); - kgem_bo_destroy(&sna->kgem, op->base.mask.bo); - if (op->base.src.bo) - kgem_bo_destroy(&sna->kgem, op->base.src.bo); - + kgem_bo_destroy(&sna->kgem, op->base.src.bo); sna_render_composite_redirect_done(sna, &op->base); } @@ -2633,21 +2056,39 @@ gen5_check_composite_spans(struct sna *sna, int16_t width, int16_t height, unsigned flags) { - if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) - return false; + DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n", + __FUNCTION__, op, width, height, flags)); if (op >= ARRAY_SIZE(gen5_blend_op)) return false; - if (gen5_composite_fallback(sna, src, NULL, dst)) + if (gen5_composite_fallback(sna, src, NULL, dst)) { + DBG(("%s: operation would fallback\n", __FUNCTION__)); return false; + } - if (need_tiling(sna, width, height)) { - if (!is_gpu(dst->pDrawable)) { - DBG(("%s: fallback, tiled operation not on GPU\n", - __FUNCTION__)); + if (need_tiling(sna, width, height) && + !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { + DBG(("%s: fallback, tiled operation not on GPU\n", + __FUNCTION__)); + return false; + } + + if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) { + struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable); + assert(priv); + + if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) + return true; + + if (flags & COMPOSITE_SPANS_INPLACE_HINT) return false; - } + + if ((sna->render.prefer_gpu & PREFER_GPU_SPANS) == 0 && + dst->format == PICT_a8) + return false; + + return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo); } return true; @@ -2690,7 +2131,7 @@ gen5_render_composite_spans(struct sna *sna, case -1: goto cleanup_dst; case 0: - if (!gen5_composite_solid_init(sna, &tmp->base.src, 0)) + if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) goto cleanup_dst; /* fall through to fixup */ case 1: @@ -2698,27 +2139,19 @@ gen5_render_composite_spans(struct sna *sna, break; } - tmp->base.mask.bo = sna_render_get_solid(sna, 0); - if (tmp->base.mask.bo == NULL) - goto cleanup_src; + tmp->base.mask.bo = NULL; tmp->base.is_affine = tmp->base.src.is_affine; tmp->base.has_component_alpha = false; tmp->base.need_magic_ca_pass = false; - tmp->prim_emit = gen5_emit_composite_spans_primitive; - if (tmp->base.src.is_solid) - tmp->prim_emit = gen5_emit_composite_spans_solid; - else if (tmp->base.is_affine) - tmp->prim_emit = gen5_emit_composite_spans_affine; - tmp->base.floats_per_vertex = 5 + 2*!tmp->base.is_affine; - tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex; - + tmp->base.u.gen5.ve_id = gen4_choose_spans_emitter(tmp); tmp->base.u.gen5.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine; - tmp->base.u.gen5.ve_id = 1 << 1 | tmp->base.is_affine; tmp->box = gen5_render_composite_spans_box; tmp->boxes = gen5_render_composite_spans_boxes; + if (tmp->emit_boxes) + tmp->thread_boxes = gen5_render_composite_spans_boxes__thread; tmp->done = gen5_render_composite_spans_done; if (!kgem_check_bo(&sna->kgem, @@ -2752,7 +2185,7 @@ gen5_copy_bind_surfaces(struct sna *sna, uint32_t *binding_table; uint16_t offset; - gen5_get_batch(sna); + gen5_get_batch(sna, op); binding_table = gen5_composite_get_binding_table(sna, &offset); @@ -2846,7 +2279,6 @@ fallback_blt: if (box[i].y2 > extents.y2) extents.y2 = box[i].y2; } - if (!sna_render_composite_redirect(sna, &tmp, extents.x1 + dst_dx, extents.y1 + dst_dy, @@ -2893,7 +2325,7 @@ fallback_blt: tmp.floats_per_vertex = 3; tmp.floats_per_rect = 9; tmp.u.gen5.wm_kernel = WM_KERNEL; - tmp.u.gen5.ve_id = 1; + tmp.u.gen5.ve_id = 2; if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { kgem_submit(&sna->kgem); @@ -2939,7 +2371,7 @@ fallback_blt: } while (--n_this_time); } while (n); - gen5_vertex_flush(sna); + gen4_vertex_flush(sna); sna_render_composite_redirect_done(sna, &tmp); kgem_bo_destroy(&sna->kgem, tmp.src.bo); return true; @@ -2950,6 +2382,14 @@ fallback_tiled_dst: if (tmp.redirect.real_bo) kgem_bo_destroy(&sna->kgem, tmp.dst.bo); fallback_tiled: + if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && + sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->drawable.bitsPerPixel, + box, n)) + return true; + return sna_tiling_copy_boxes(sna, alu, src, src_bo, src_dx, src_dy, dst, dst_bo, dst_dx, dst_dy, @@ -2985,8 +2425,8 @@ static void gen5_render_copy_done(struct sna *sna, const struct sna_copy_op *op) { - if (sna->render_state.gen5.vertex_offset) - gen5_vertex_flush(sna); + if (sna->render.vertex_offset) + gen4_vertex_flush(sna); DBG(("%s()\n", __FUNCTION__)); } @@ -3049,9 +2489,9 @@ fallback: op->base.floats_per_vertex = 3; op->base.floats_per_rect = 9; op->base.u.gen5.wm_kernel = WM_KERNEL; - op->base.u.gen5.ve_id = 1; + op->base.u.gen5.ve_id = 2; - if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { + if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { kgem_submit(&sna->kgem); if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) goto fallback; @@ -3081,7 +2521,7 @@ gen5_fill_bind_surfaces(struct sna *sna, uint32_t *binding_table; uint16_t offset; - gen5_get_batch(sna); + gen5_get_batch(sna, op); binding_table = gen5_composite_get_binding_table(sna, &offset); @@ -3168,16 +2608,19 @@ gen5_render_fill_boxes(struct sna *sna, dst, dst_bo, box, n); } - if (op == PictOpClear) + if (op == PictOpClear) { pixel = 0; - else if (!sna_get_pixel_from_rgba(&pixel, - color->red, - color->green, - color->blue, - color->alpha, - PICT_a8r8g8b8)) + op = PictOpSrc; + } else if (!sna_get_pixel_from_rgba(&pixel, + color->red, + color->green, + color->blue, + color->alpha, + PICT_a8r8g8b8)) return false; + DBG(("%s(%08x x %d)\n", __FUNCTION__, pixel, n)); + memset(&tmp, 0, sizeof(tmp)); tmp.op = op; @@ -3193,8 +2636,8 @@ gen5_render_fill_boxes(struct sna *sna, tmp.src.repeat = SAMPLER_EXTEND_REPEAT; tmp.is_affine = true; - tmp.floats_per_vertex = 3; - tmp.floats_per_rect = 9; + tmp.floats_per_vertex = 2; + tmp.floats_per_rect = 6; tmp.u.gen5.wm_kernel = WM_KERNEL; tmp.u.gen5.ve_id = 1; @@ -3217,22 +2660,19 @@ gen5_render_fill_boxes(struct sna *sna, DBG((" (%d, %d), (%d, %d)\n", box->x1, box->y1, box->x2, box->y2)); OUT_VERTEX(box->x2, box->y2); - OUT_VERTEX_F(1); - OUT_VERTEX_F(1); + OUT_VERTEX_F(.5); OUT_VERTEX(box->x1, box->y2); - OUT_VERTEX_F(0); - OUT_VERTEX_F(1); + OUT_VERTEX_F(.5); OUT_VERTEX(box->x1, box->y1); - OUT_VERTEX_F(0); - OUT_VERTEX_F(0); + OUT_VERTEX_F(.5); box++; } while (--n_this_time); } while (n); - gen5_vertex_flush(sna); + gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, tmp.src.bo); return true; } @@ -3247,16 +2687,13 @@ gen5_render_fill_op_blt(struct sna *sna, gen5_get_rectangles(sna, &op->base, 1, gen5_fill_bind_surfaces); OUT_VERTEX(x+w, y+h); - OUT_VERTEX_F(1); - OUT_VERTEX_F(1); + OUT_VERTEX_F(.5); OUT_VERTEX(x, y+h); - OUT_VERTEX_F(0); - OUT_VERTEX_F(1); + OUT_VERTEX_F(.5); OUT_VERTEX(x, y); - OUT_VERTEX_F(0); - OUT_VERTEX_F(0); + OUT_VERTEX_F(.5); } fastcall static void @@ -3270,16 +2707,13 @@ gen5_render_fill_op_box(struct sna *sna, gen5_get_rectangles(sna, &op->base, 1, gen5_fill_bind_surfaces); OUT_VERTEX(box->x2, box->y2); - OUT_VERTEX_F(1); - OUT_VERTEX_F(1); + OUT_VERTEX_F(.5); OUT_VERTEX(box->x1, box->y2); - OUT_VERTEX_F(0); - OUT_VERTEX_F(1); + OUT_VERTEX_F(.5); OUT_VERTEX(box->x1, box->y1); - OUT_VERTEX_F(0); - OUT_VERTEX_F(0); + OUT_VERTEX_F(.5); } fastcall static void @@ -3300,16 +2734,13 @@ gen5_render_fill_op_boxes(struct sna *sna, do { OUT_VERTEX(box->x2, box->y2); - OUT_VERTEX_F(1); - OUT_VERTEX_F(1); + OUT_VERTEX_F(.5); OUT_VERTEX(box->x1, box->y2); - OUT_VERTEX_F(0); - OUT_VERTEX_F(1); + OUT_VERTEX_F(.5); OUT_VERTEX(box->x1, box->y1); - OUT_VERTEX_F(0); - OUT_VERTEX_F(0); + OUT_VERTEX_F(.5); box++; } while (--nbox_this_time); } while (nbox); @@ -3319,8 +2750,8 @@ static void gen5_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) { - if (sna->render_state.gen5.vertex_offset) - gen5_vertex_flush(sna); + if (sna->render.vertex_offset) + gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, op->base.src.bo); DBG(("%s()\n", __FUNCTION__)); @@ -3375,8 +2806,8 @@ gen5_render_fill(struct sna *sna, uint8_t alu, op->base.mask.repeat = SAMPLER_EXTEND_NONE; op->base.is_affine = true; - op->base.floats_per_vertex = 3; - op->base.floats_per_rect = 9; + op->base.floats_per_vertex = 2; + op->base.floats_per_rect = 6; op->base.u.gen5.wm_kernel = WM_KERNEL; op->base.u.gen5.ve_id = 1; @@ -3463,8 +2894,8 @@ gen5_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, tmp.mask.repeat = SAMPLER_EXTEND_NONE; tmp.is_affine = true; - tmp.floats_per_vertex = 3; - tmp.floats_per_rect = 9; + tmp.floats_per_vertex = 2; + tmp.floats_per_rect = 6; tmp.has_component_alpha = 0; tmp.need_magic_ca_pass = false; @@ -3472,7 +2903,11 @@ gen5_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, tmp.u.gen5.ve_id = 1; if (!kgem_check_bo(&sna->kgem, bo, NULL)) { - _kgem_submit(&sna->kgem); + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, bo, NULL)) { + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + return false; + } assert(kgem_check_bo(&sna->kgem, bo, NULL)); } @@ -3483,18 +2918,15 @@ gen5_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); OUT_VERTEX(x2, y2); - OUT_VERTEX_F(1); - OUT_VERTEX_F(1); + OUT_VERTEX_F(.5); OUT_VERTEX(x1, y2); - OUT_VERTEX_F(0); - OUT_VERTEX_F(1); + OUT_VERTEX_F(.5); OUT_VERTEX(x1, y1); - OUT_VERTEX_F(0); - OUT_VERTEX_F(0); + OUT_VERTEX_F(.5); - gen5_vertex_flush(sna); + gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, tmp.src.bo); return true; @@ -3503,14 +2935,17 @@ gen5_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, static void gen5_render_flush(struct sna *sna) { - gen5_vertex_close(sna); + gen4_vertex_close(sna); + + assert(sna->render.vb_id == 0); + assert(sna->render.vertex_offset == 0); } static void gen5_render_context_switch(struct kgem *kgem, int new_mode) { - if (!kgem->mode) + if (!kgem->nbatch) return; /* WaNonPipelinedStateCommandFlush @@ -3529,7 +2964,7 @@ gen5_render_context_switch(struct kgem *kgem, sna->render_state.gen5.drawrect_limit = -1; } - if (kgem_is_idle(kgem)) { + if (kgem_ring_is_idle(kgem, kgem->ring)) { DBG(("%s: GPU idle, flushing\n", __FUNCTION__)); _kgem_submit(kgem); } @@ -3574,7 +3009,6 @@ gen5_render_expire(struct kgem *kgem) static void gen5_render_reset(struct sna *sna) { sna->render_state.gen5.needs_invariant = true; - sna->render_state.gen5.vb_id = 0; sna->render_state.gen5.ve_id = -1; sna->render_state.gen5.last_primitive = -1; sna->render_state.gen5.last_pipelined_pointers = 0; @@ -3588,6 +3022,10 @@ static void gen5_render_reset(struct sna *sna) DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); discard_vbo(sna); } + + sna->render.vertex_offset = 0; + sna->render.nvertex_reloc = 0; + sna->render.vb_id = 0; } static void gen5_render_fini(struct sna *sna) @@ -3703,23 +3141,11 @@ static void gen5_init_wm_state(struct gen5_wm_unit_state *state, state->thread1.binding_table_entry_count = 0; } -static uint32_t gen5_create_cc_viewport(struct sna_static_stream *stream) -{ - struct gen5_cc_viewport vp; - - vp.min_depth = -1.e35; - vp.max_depth = 1.e35; - - return sna_static_stream_add(stream, &vp, sizeof(vp), 32); -} - static uint32_t gen5_create_cc_unit_state(struct sna_static_stream *stream) { uint8_t *ptr, *base; - uint32_t vp; int i, j; - vp = gen5_create_cc_viewport(stream); base = ptr = sna_static_stream_map(stream, GEN5_BLENDFACTOR_COUNT*GEN5_BLENDFACTOR_COUNT*64, @@ -3732,7 +3158,6 @@ static uint32_t gen5_create_cc_unit_state(struct sna_static_stream *stream) state->cc3.blend_enable = !(j == GEN5_BLENDFACTOR_ZERO && i == GEN5_BLENDFACTOR_ONE); - state->cc4.cc_viewport_state_offset = vp >> 5; state->cc5.logicop_func = 0xc; /* COPY */ state->cc5.ia_blend_function = GEN5_BLENDFUNCTION_ADD; @@ -3816,8 +3241,7 @@ static bool gen5_render_setup(struct sna *sna) for (m = 0; m < KERNEL_COUNT; m++) { gen5_init_wm_state(&wm_state->state, wm_kernels[m].has_mask, - wm[m], - sampler_state); + wm[m], sampler_state); wm_state++; } } @@ -3840,10 +3264,15 @@ bool gen5_render_init(struct sna *sna) sna->kgem.retire = gen5_render_retire; sna->kgem.expire = gen5_render_expire; +#if !NO_COMPOSITE sna->render.composite = gen5_render_composite; + sna->render.prefer_gpu |= PREFER_GPU_RENDER; +#endif #if !NO_COMPOSITE_SPANS sna->render.check_composite_spans = gen5_check_composite_spans; sna->render.composite_spans = gen5_render_composite_spans; + if (DEVICE_ID(sna->PciInfo) == 0x0044) + sna->render.prefer_gpu |= PREFER_GPU_SPANS; #endif sna->render.video = gen5_render_video; diff --git a/src/sna/gen5_render.h b/src/sna/gen5_render.h index b6e5b0c2e..0f6bae6b6 100644 --- a/src/sna/gen5_render.h +++ b/src/sna/gen5_render.h @@ -749,15 +749,14 @@ #define GEN5_VERTEXBUFFER_ACCESS_VERTEXDATA 0 #define GEN5_VERTEXBUFFER_ACCESS_INSTANCEDATA 1 -#define GEN5_VFCOMPONENT_NOSTORE 0 -#define GEN5_VFCOMPONENT_STORE_SRC 1 -#define GEN5_VFCOMPONENT_STORE_0 2 -#define GEN5_VFCOMPONENT_STORE_1_FLT 3 -#define GEN5_VFCOMPONENT_STORE_1_INT 4 -#define GEN5_VFCOMPONENT_STORE_VID 5 -#define GEN5_VFCOMPONENT_STORE_IID 6 -#define GEN5_VFCOMPONENT_STORE_PID 7 - +#define VFCOMPONENT_NOSTORE 0 +#define VFCOMPONENT_STORE_SRC 1 +#define VFCOMPONENT_STORE_0 2 +#define VFCOMPONENT_STORE_1_FLT 3 +#define VFCOMPONENT_STORE_1_INT 4 +#define VFCOMPONENT_STORE_VID 5 +#define VFCOMPONENT_STORE_IID 6 +#define VFCOMPONENT_STORE_PID 7 /* Execution Unit (EU) defines @@ -1990,50 +1989,43 @@ struct gen5_sampler_legacy_border_color { uint8_t color[4]; }; -struct gen5_sampler_state -{ - - struct - { - unsigned int shadow_function:3; - unsigned int lod_bias:11; - unsigned int min_filter:3; - unsigned int mag_filter:3; - unsigned int mip_filter:2; - unsigned int base_level:5; +struct gen5_sampler_state { + struct { + unsigned int shadow_function:3; + unsigned int lod_bias:11; + unsigned int min_filter:3; + unsigned int mag_filter:3; + unsigned int mip_filter:2; + unsigned int base_level:5; unsigned int pad:1; - unsigned int lod_preclamp:1; - unsigned int border_color_mode:1; + unsigned int lod_preclamp:1; + unsigned int border_color_mode:1; unsigned int pad0:1; - unsigned int disable:1; + unsigned int disable:1; } ss0; - struct - { - unsigned int r_wrap_mode:3; - unsigned int t_wrap_mode:3; - unsigned int s_wrap_mode:3; + struct { + unsigned int r_wrap_mode:3; + unsigned int t_wrap_mode:3; + unsigned int s_wrap_mode:3; unsigned int pad:3; - unsigned int max_lod:10; - unsigned int min_lod:10; + unsigned int max_lod:10; + unsigned int min_lod:10; } ss1; - - struct - { + struct { unsigned int pad:5; - unsigned int border_color_pointer:27; + unsigned int border_color_pointer:27; } ss2; - - struct - { - unsigned int pad:19; - unsigned int max_aniso:3; - unsigned int chroma_key_mode:1; - unsigned int chroma_key_index:2; - unsigned int chroma_key_enable:1; - unsigned int monochrome_filter_width:3; - unsigned int monochrome_filter_height:3; + + struct { + uint32_t pad:13; + uint32_t address_round:6; + uint32_t max_aniso:3; + uint32_t chroma_key_mode:1; + uint32_t chroma_key_index:2; + uint32_t chroma_key_enable:1; + uint32_t mbz:6; } ss3; }; diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c index fd7f2958b..3855f0449 100644 --- a/src/sna/gen6_render.c +++ b/src/sna/gen6_render.c @@ -42,6 +42,8 @@ #include "brw/brw.h" #include "gen6_render.h" +#include "gen4_source.h" +#include "gen4_vertex.h" #define NO_COMPOSITE 0 #define NO_COMPOSITE_SPANS 0 @@ -186,10 +188,6 @@ static const struct blendinfo { #define FILL_FLAGS(op, format) GEN6_SET_FLAGS(FILL_SAMPLER, gen6_get_blend((op), false, (format)), GEN6_WM_KERNEL_NOMASK, FILL_VERTEX) #define FILL_FLAGS_NOBLEND GEN6_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN6_WM_KERNEL_NOMASK, FILL_VERTEX) -#define VIDEO_SAMPLER \ - SAMPLER_OFFSET(SAMPLER_FILTER_BILINEAR, SAMPLER_EXTEND_PAD, \ - SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE) - #define GEN6_SAMPLER(f) (((f) >> 16) & 0xfff0) #define GEN6_BLEND(f) (((f) >> 0) & 0xfff0) #define GEN6_KERNEL(f) (((f) >> 16) & 0xf) @@ -437,7 +435,7 @@ gen6_emit_viewports(struct sna *sna) (4 - 2)); OUT_BATCH(0); OUT_BATCH(0); - OUT_BATCH(sna->render_state.gen6.cc_vp); + OUT_BATCH(0); } static void @@ -734,7 +732,7 @@ gen6_emit_vertex_elements(struct sna *sna, * texture coordinate 1 if (has_mask is true): same as above */ struct gen6_render_state *render = &sna->render_state.gen6; - uint32_t src_format, dw, offset; + uint32_t src_format, dw; int id = GEN6_VERTEX(op->u.gen6.flags); bool has_mask; @@ -744,40 +742,6 @@ gen6_emit_vertex_elements(struct sna *sna, return; render->ve_id = id; - if (id == VERTEX_2s2s) { - DBG(("%s: setup COPY\n", __FUNCTION__)); - - OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS | - ((2 * (1 + 2)) + 1 - 2)); - - OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | - GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT | - 0 << VE0_OFFSET_SHIFT); - OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT | - GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT | - GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | - GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT); - - /* x,y */ - OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | - GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | - 0 << VE0_OFFSET_SHIFT); - OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | - GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | - GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | - GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); - - /* u0, v0, w0 */ - OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | - GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | - 4 << VE0_OFFSET_SHIFT); - OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | - GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | - GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | - GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); - return; - } - /* The VUE layout * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) * dword 4-7: position (x, y, 1.0, 1.0), @@ -806,20 +770,25 @@ gen6_emit_vertex_elements(struct sna *sna, GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); - offset = 4; /* u0, v0, w0 */ - DBG(("%s: first channel %d floats, offset=%d\n", __FUNCTION__, id & 3, offset)); + DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3)); dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; switch (id & 3) { + default: + assert(0); + case 0: + src_format = GEN6_SURFACEFORMAT_R16G16_SSCALED; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; + break; case 1: src_format = GEN6_SURFACEFORMAT_R32_FLOAT; dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; break; - default: - assert(0); case 2: src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; @@ -835,15 +804,15 @@ gen6_emit_vertex_elements(struct sna *sna, } OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | src_format << VE0_FORMAT_SHIFT | - offset << VE0_OFFSET_SHIFT); + 4 << VE0_OFFSET_SHIFT); OUT_BATCH(dw); - offset += (id & 3) * sizeof(float); /* u1, v1, w1 */ if (has_mask) { - DBG(("%s: second channel %d floats, offset=%d\n", __FUNCTION__, (id >> 2) & 3, offset)); + unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); + DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset)); dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; - switch ((id >> 2) & 3) { + switch (id >> 2) { case 1: src_format = GEN6_SURFACEFORMAT_R32_FLOAT; dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; @@ -917,13 +886,13 @@ gen6_emit_state(struct sna *sna, sna->render_state.gen6.first_state_packet = false; } -static void gen6_magic_ca_pass(struct sna *sna, +static bool gen6_magic_ca_pass(struct sna *sna, const struct sna_composite_op *op) { struct gen6_render_state *state = &sna->render_state.gen6; if (!op->need_magic_ca_pass) - return; + return false; DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, sna->render.vertex_start, sna->render.vertex_index)); @@ -949,163 +918,7 @@ static void gen6_magic_ca_pass(struct sna *sna, OUT_BATCH(0); /* index buffer offset, ignored */ state->last_primitive = sna->kgem.nbatch; -} - -static void gen6_vertex_flush(struct sna *sna) -{ - assert(sna->render_state.gen6.vertex_offset); - - DBG(("%s[%x] = %d\n", __FUNCTION__, - 4*sna->render_state.gen6.vertex_offset, - sna->render.vertex_index - sna->render.vertex_start)); - sna->kgem.batch[sna->render_state.gen6.vertex_offset] = - sna->render.vertex_index - sna->render.vertex_start; - sna->render_state.gen6.vertex_offset = 0; -} - -static int gen6_vertex_finish(struct sna *sna) -{ - struct kgem_bo *bo; - unsigned int i; - - DBG(("%s: used=%d / %d\n", __FUNCTION__, - sna->render.vertex_used, sna->render.vertex_size)); - assert(sna->render.vertex_used); - assert(sna->render.nvertex_reloc); - - /* Note: we only need dword alignment (currently) */ - - bo = sna->render.vbo; - if (bo) { - if (sna->render_state.gen6.vertex_offset) - gen6_vertex_flush(sna); - - for (i = 0; i < sna->render.nvertex_reloc; i++) { - DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, - i, sna->render.vertex_reloc[i])); - - sna->kgem.batch[sna->render.vertex_reloc[i]] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i], bo, - I915_GEM_DOMAIN_VERTEX << 16, - 0); - sna->kgem.batch[sna->render.vertex_reloc[i]+1] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i]+1, bo, - I915_GEM_DOMAIN_VERTEX << 16, - sna->render.vertex_used * 4 - 1); - } - - sna->render.nvertex_reloc = 0; - sna->render.vertex_used = 0; - sna->render.vertex_index = 0; - sna->render_state.gen6.vb_id = 0; - - kgem_bo_destroy(&sna->kgem, bo); - } - - sna->render.vertices = NULL; - sna->render.vbo = kgem_create_linear(&sna->kgem, - 256*1024, CREATE_GTT_MAP); - if (sna->render.vbo) - sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo); - if (sna->render.vertices == NULL) { - if (sna->render.vbo) - kgem_bo_destroy(&sna->kgem, sna->render.vbo); - sna->render.vbo = NULL; - return 0; - } - - DBG(("%s: create vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); - - kgem_bo_sync__cpu(&sna->kgem, sna->render.vbo); - if (sna->render.vertex_used) { - DBG(("%s: copying initial buffer x %d to handle=%d\n", - __FUNCTION__, - sna->render.vertex_used, - sna->render.vbo->handle)); - memcpy(sna->render.vertices, - sna->render.vertex_data, - sizeof(float)*sna->render.vertex_used); - } - sna->render.vertex_size = 64 * 1024 - 1; - return sna->render.vertex_size - sna->render.vertex_used; -} - -static void gen6_vertex_close(struct sna *sna) -{ - struct kgem_bo *bo, *free_bo = NULL; - unsigned int i, delta = 0; - - assert(sna->render_state.gen6.vertex_offset == 0); - - if (!sna->render_state.gen6.vb_id) - return; - - DBG(("%s: used=%d, vbo active? %d\n", - __FUNCTION__, sna->render.vertex_used, sna->render.vbo ? sna->render.vbo->handle : 0)); - - bo = sna->render.vbo; - if (bo) { - if (sna->render.vertex_size - sna->render.vertex_used < 64) { - DBG(("%s: discarding vbo (full), handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); - sna->render.vbo = NULL; - sna->render.vertices = sna->render.vertex_data; - sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); - free_bo = bo; - } - } else { - if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) { - DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__, - sna->render.vertex_used, sna->kgem.nbatch)); - memcpy(sna->kgem.batch + sna->kgem.nbatch, - sna->render.vertex_data, - sna->render.vertex_used * 4); - delta = sna->kgem.nbatch * 4; - bo = NULL; - sna->kgem.nbatch += sna->render.vertex_used; - } else { - bo = kgem_create_linear(&sna->kgem, - 4*sna->render.vertex_used, 0); - if (bo && !kgem_bo_write(&sna->kgem, bo, - sna->render.vertex_data, - 4*sna->render.vertex_used)) { - kgem_bo_destroy(&sna->kgem, bo); - bo = NULL; - } - DBG(("%s: new vbo: %d\n", __FUNCTION__, - sna->render.vertex_used)); - free_bo = bo; - } - } - - assert(sna->render.nvertex_reloc); - for (i = 0; i < sna->render.nvertex_reloc; i++) { - DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, - i, sna->render.vertex_reloc[i])); - - sna->kgem.batch[sna->render.vertex_reloc[i]] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i], bo, - I915_GEM_DOMAIN_VERTEX << 16, - delta); - sna->kgem.batch[sna->render.vertex_reloc[i]+1] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i]+1, bo, - I915_GEM_DOMAIN_VERTEX << 16, - delta + sna->render.vertex_used * 4 - 1); - } - sna->render.nvertex_reloc = 0; - - if (sna->render.vbo == NULL) { - sna->render.vertex_used = 0; - sna->render.vertex_index = 0; - assert(sna->render.vertices == sna->render.vertex_data); - assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data)); - } - - if (free_bo) - kgem_bo_destroy(&sna->kgem, free_bo); + return true; } typedef struct gen6_surface_state_padded { @@ -1193,16 +1006,6 @@ sampler_fill_init(struct gen6_sampler_state *ss) sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); } -static uint32_t gen6_create_cc_viewport(struct sna_static_stream *stream) -{ - struct gen6_cc_viewport vp; - - vp.min_depth = -1.e35; - vp.max_depth = 1.e35; - - return sna_static_stream_add(stream, &vp, sizeof(vp), 32); -} - static uint32_t gen6_tiling_bits(uint32_t tiling) { @@ -1229,9 +1032,10 @@ gen6_bind_bo(struct sna *sna, uint32_t *ss; uint32_t domains; uint16_t offset; + uint32_t is_scanout = is_dst && bo->scanout; /* After the first bind, we manage the cache domains within the batch */ - offset = kgem_bo_get_binding(bo, format); + offset = kgem_bo_get_binding(bo, format | is_scanout << 31); if (offset) { DBG(("[%x] bo(handle=%d), format=%d, reuse %s binding\n", offset, bo->handle, format, @@ -1258,9 +1062,9 @@ gen6_bind_bo(struct sna *sna, ss[3] = (gen6_tiling_bits(bo->tiling) | (bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT); ss[4] = 0; - ss[5] = 0; + ss[5] = is_scanout ? 0 : 3 << 16; - kgem_bo_set_binding(bo, format, offset); + kgem_bo_set_binding(bo, format | is_scanout << 31, offset); DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", offset, bo->handle, ss[1], @@ -1270,254 +1074,6 @@ gen6_bind_bo(struct sna *sna, return offset * sizeof(uint32_t); } -fastcall static void -gen6_emit_composite_primitive_solid(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - float *v; - union { - struct sna_coordinate p; - float f; - } dst; - - DBG(("%s: [%d+9] = (%d, %d)x(%d, %d)\n", __FUNCTION__, - sna->render.vertex_used, r->dst.x, r->dst.y, r->width, r->height)); - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 9; - assert(sna->render.vertex_used <= sna->render.vertex_size); - assert(!too_large(op->dst.x + r->dst.x + r->width, - op->dst.y + r->dst.y + r->height)); - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - dst.p.x = r->dst.x; - v[3] = dst.f; - dst.p.y = r->dst.y; - v[6] = dst.f; - - v[5] = v[2] = v[1] = 1.; - v[8] = v[7] = v[4] = 0.; -} - -fastcall static void -gen6_emit_composite_primitive_identity_source(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - union { - struct sna_coordinate p; - float f; - } dst; - float *v; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 9; - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - dst.p.x = r->dst.x; - v[3] = dst.f; - dst.p.y = r->dst.y; - v[6] = dst.f; - - v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; - v[1] = v[4] + r->width * op->src.scale[0]; - - v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; - v[5] = v[2] = v[8] + r->height * op->src.scale[1]; -} - -fastcall static void -gen6_emit_composite_primitive_simple_source(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - float *v; - union { - struct sna_coordinate p; - float f; - } dst; - - float xx = op->src.transform->matrix[0][0]; - float x0 = op->src.transform->matrix[0][2]; - float yy = op->src.transform->matrix[1][1]; - float y0 = op->src.transform->matrix[1][2]; - float sx = op->src.scale[0]; - float sy = op->src.scale[1]; - int16_t tx = op->src.offset[0]; - int16_t ty = op->src.offset[1]; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 3*3; - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; - v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; - - dst.p.x = r->dst.x; - v[3] = dst.f; - v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx; - - dst.p.y = r->dst.y; - v[6] = dst.f; - v[8] = ((r->src.y + ty) * yy + y0) * sy; -} - -fastcall static void -gen6_emit_composite_primitive_affine_source(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - union { - struct sna_coordinate p; - float f; - } dst; - float *v; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 9; - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x + r->width, - op->src.offset[1] + r->src.y + r->height, - op->src.transform, - &v[1], &v[2]); - v[1] *= op->src.scale[0]; - v[2] *= op->src.scale[1]; - - dst.p.x = r->dst.x; - v[3] = dst.f; - _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x, - op->src.offset[1] + r->src.y + r->height, - op->src.transform, - &v[4], &v[5]); - v[4] *= op->src.scale[0]; - v[5] *= op->src.scale[1]; - - dst.p.y = r->dst.y; - v[6] = dst.f; - _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x, - op->src.offset[1] + r->src.y, - op->src.transform, - &v[7], &v[8]); - v[7] *= op->src.scale[0]; - v[8] *= op->src.scale[1]; -} - -fastcall static void -gen6_emit_composite_primitive_identity_source_mask(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - union { - struct sna_coordinate p; - float f; - } dst; - float src_x, src_y; - float msk_x, msk_y; - float w, h; - float *v; - - src_x = r->src.x + op->src.offset[0]; - src_y = r->src.y + op->src.offset[1]; - msk_x = r->mask.x + op->mask.offset[0]; - msk_y = r->mask.y + op->mask.offset[1]; - w = r->width; - h = r->height; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 15; - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - v[1] = (src_x + w) * op->src.scale[0]; - v[2] = (src_y + h) * op->src.scale[1]; - v[3] = (msk_x + w) * op->mask.scale[0]; - v[4] = (msk_y + h) * op->mask.scale[1]; - - dst.p.x = r->dst.x; - v[5] = dst.f; - v[6] = src_x * op->src.scale[0]; - v[7] = v[2]; - v[8] = msk_x * op->mask.scale[0]; - v[9] = v[4]; - - dst.p.y = r->dst.y; - v[10] = dst.f; - v[11] = v[6]; - v[12] = src_y * op->src.scale[1]; - v[13] = v[8]; - v[14] = msk_y * op->mask.scale[1]; -} - -inline static void -gen6_emit_composite_texcoord(struct sna *sna, - const struct sna_composite_channel *channel, - int16_t x, int16_t y) -{ - x += channel->offset[0]; - y += channel->offset[1]; - - if (channel->is_affine) { - float s, t; - - sna_get_transformed_coordinates(x, y, - channel->transform, - &s, &t); - OUT_VERTEX_F(s * channel->scale[0]); - OUT_VERTEX_F(t * channel->scale[1]); - } else { - float s, t, w; - - sna_get_transformed_coordinates_3d(x, y, - channel->transform, - &s, &t, &w); - OUT_VERTEX_F(s * channel->scale[0]); - OUT_VERTEX_F(t * channel->scale[1]); - OUT_VERTEX_F(w); - } -} - -static void -gen6_emit_composite_vertex(struct sna *sna, - const struct sna_composite_op *op, - int16_t srcX, int16_t srcY, - int16_t mskX, int16_t mskY, - int16_t dstX, int16_t dstY) -{ - OUT_VERTEX(dstX, dstY); - gen6_emit_composite_texcoord(sna, &op->src, srcX, srcY); - gen6_emit_composite_texcoord(sna, &op->mask, mskX, mskY); -} - -fastcall static void -gen6_emit_composite_primitive(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - gen6_emit_composite_vertex(sna, op, - r->src.x + r->width, r->src.y + r->height, - r->mask.x + r->width, r->mask.y + r->height, - r->dst.x + r->width, r->dst.y + r->height); - gen6_emit_composite_vertex(sna, op, - r->src.x, r->src.y + r->height, - r->mask.x, r->mask.y + r->height, - r->dst.x, r->dst.y + r->height); - gen6_emit_composite_vertex(sna, op, - r->src.x, r->src.y, - r->mask.x, r->mask.y, - r->dst.x, r->dst.y); -} - static void gen6_emit_vertex_buffer(struct sna *sna, const struct sna_composite_op *op) { @@ -1528,10 +1084,10 @@ static void gen6_emit_vertex_buffer(struct sna *sna, 4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT); sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; OUT_BATCH(0); - OUT_BATCH(0); + OUT_BATCH(~0); /* max address: disabled */ OUT_BATCH(0); - sna->render_state.gen6.vb_id |= 1 << id; + sna->render.vb_id |= 1 << id; } static void gen6_emit_primitive(struct sna *sna) @@ -1541,7 +1097,7 @@ static void gen6_emit_primitive(struct sna *sna) __FUNCTION__, sna->render.vertex_start, sna->render.vertex_index)); - sna->render_state.gen6.vertex_offset = sna->kgem.nbatch - 5; + sna->render.vertex_offset = sna->kgem.nbatch - 5; return; } @@ -1550,7 +1106,7 @@ static void gen6_emit_primitive(struct sna *sna) _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | 0 << 9 | 4); - sna->render_state.gen6.vertex_offset = sna->kgem.nbatch; + sna->render.vertex_offset = sna->kgem.nbatch; OUT_BATCH(0); /* vertex count, to be filled in later */ OUT_BATCH(sna->render.vertex_index); OUT_BATCH(1); /* single instance */ @@ -1569,13 +1125,16 @@ static bool gen6_rectangle_begin(struct sna *sna, int id = 1 << GEN6_VERTEX(op->u.gen6.flags); int ndwords; + if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) + return true; + ndwords = op->need_magic_ca_pass ? 60 : 6; - if ((sna->render_state.gen6.vb_id & id) == 0) + if ((sna->render.vb_id & id) == 0) ndwords += 5; if (!kgem_check_batch(&sna->kgem, ndwords)) return false; - if ((sna->render_state.gen6.vb_id & id) == 0) + if ((sna->render.vb_id & id) == 0) gen6_emit_vertex_buffer(sna, op); gen6_emit_primitive(sna); @@ -1585,17 +1144,30 @@ static bool gen6_rectangle_begin(struct sna *sna, static int gen6_get_rectangles__flush(struct sna *sna, const struct sna_composite_op *op) { + /* Preventing discarding new vbo after lock contention */ + if (sna_vertex_wait__locked(&sna->render)) { + int rem = vertex_space(sna); + if (rem > op->floats_per_rect) + return rem; + } + if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5)) return 0; - if (!kgem_check_exec(&sna->kgem, 1)) - return 0; - if (!kgem_check_reloc(&sna->kgem, 2)) + if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) return 0; - if (op->need_magic_ca_pass && sna->render.vbo) - return 0; + if (sna->render.vertex_offset) { + gen4_vertex_flush(sna); + if (gen6_magic_ca_pass(sna, op)) { + gen6_emit_flush(sna); + gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)); + gen6_emit_wm(sna, + GEN6_KERNEL(op->u.gen6.flags), + GEN6_VERTEX(op->u.gen6.flags) >> 2); + } + } - return gen6_vertex_finish(sna); + return gen4_vertex_finish(sna); } inline static int gen6_get_rectangles(struct sna *sna, @@ -1607,7 +1179,7 @@ inline static int gen6_get_rectangles(struct sna *sna, start: rem = vertex_space(sna); - if (rem < op->floats_per_rect) { + if (unlikely(rem < op->floats_per_rect)) { DBG(("flushing vbo for %s: %d < %d\n", __FUNCTION__, rem, op->floats_per_rect)); rem = gen6_get_rectangles__flush(sna, op); @@ -1615,7 +1187,7 @@ start: goto flush; } - if (unlikely(sna->render_state.gen6.vertex_offset == 0 && + if (unlikely(sna->render.vertex_offset == 0 && !gen6_rectangle_begin(sna, op))) goto flush; @@ -1627,10 +1199,11 @@ start: return want; flush: - if (sna->render_state.gen6.vertex_offset) { - gen6_vertex_flush(sna); + if (sna->render.vertex_offset) { + gen4_vertex_flush(sna); gen6_magic_ca_pass(sna, op); } + sna_vertex_wait__locked(&sna->render); _kgem_submit(&sna->kgem); emit_state(sna, op); goto start; @@ -1653,20 +1226,10 @@ inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna, return table; } -static uint32_t -gen6_choose_composite_vertex_buffer(const struct sna_composite_op *op) -{ - int id = 2 + !op->is_affine; - if (op->mask.bo) - id |= id << 2; - assert(id > 0 && id < 16); - return id; -} - -static void -gen6_get_batch(struct sna *sna) +static bool +gen6_get_batch(struct sna *sna, const struct sna_composite_op *op) { - kgem_set_mode(&sna->kgem, KGEM_RENDER); + kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) { DBG(("%s: flushing batch: %d < %d+%d\n", @@ -1678,6 +1241,8 @@ gen6_get_batch(struct sna *sna) if (sna->render_state.gen6.needs_invariant) gen6_emit_invariant(sna); + + return kgem_bo_is_dirty(op->dst.bo); } static void gen6_emit_composite_state(struct sna *sna, @@ -1687,8 +1252,7 @@ static void gen6_emit_composite_state(struct sna *sna, uint16_t offset; bool dirty; - gen6_get_batch(sna); - dirty = kgem_bo_is_dirty(op->dst.bo); + dirty = gen6_get_batch(sna, op); binding_table = gen6_composite_get_binding_table(sna, &offset); @@ -1726,11 +1290,10 @@ static void gen6_emit_composite_state(struct sna *sna, static void gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op) { - assert (sna->render_state.gen6.vertex_offset == 0); + assert (sna->render.vertex_offset == 0); if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) { if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect) - /* XXX propagate failure */ - gen6_vertex_finish(sna); + gen4_vertex_finish(sna); DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n", sna->render_state.gen6.floats_per_vertex, @@ -1741,6 +1304,7 @@ gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op) sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex; sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex; } + assert((sna->render.vertex_used % op->floats_per_vertex) == 0); } fastcall static void @@ -1775,9 +1339,9 @@ gen6_render_composite_box(struct sna *sna, } static void -gen6_render_composite_boxes(struct sna *sna, - const struct sna_composite_op *op, - const BoxRec *box, int nbox) +gen6_render_composite_boxes__blt(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) { DBG(("composite_boxes(%d)\n", nbox)); @@ -1807,6 +1371,62 @@ gen6_render_composite_boxes(struct sna *sna, } while (nbox); } +static void +gen6_render_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen6_get_rectangles(sna, op, nbox, + gen6_emit_composite_state); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + } while (nbox); +} + +static void +gen6_render_composite_boxes__thread(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen6_get_rectangles(sna, op, nbox, + gen6_emit_composite_state); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + #ifndef MAX #define MAX(a,b) ((a) > (b) ? (a) : (b)) #endif @@ -1887,8 +1507,7 @@ static void gen6_emit_video_state(struct sna *sna, bool dirty; int n_src, n; - gen6_get_batch(sna); - dirty = kgem_bo_is_dirty(op->dst.bo); + dirty = gen6_get_batch(sna, op); src_surf_base[0] = 0; src_surf_base[1] = 0; @@ -1949,12 +1568,14 @@ gen6_render_video(struct sna *sna, RegionPtr dstRegion, short src_w, short src_h, short drw_w, short drw_h, + short dx, short dy, PixmapPtr pixmap) { struct sna_composite_op tmp; - int nbox, dxo, dyo, pix_xoff, pix_yoff; + int nbox, pix_xoff, pix_yoff; float src_scale_x, src_scale_y; struct sna_pixmap *priv; + unsigned filter; BoxPtr box; DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", @@ -1983,15 +1604,22 @@ gen6_render_video(struct sna *sna, tmp.floats_per_vertex = 3; tmp.floats_per_rect = 9; + if (src_w == drw_w && src_h == drw_h) + filter = SAMPLER_FILTER_NEAREST; + else + filter = SAMPLER_FILTER_BILINEAR; + tmp.u.gen6.flags = - GEN6_SET_FLAGS(VIDEO_SAMPLER, NO_BLEND, + GEN6_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, + SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), + NO_BLEND, is_planar_fourcc(frame->id) ? GEN6_WM_KERNEL_VIDEO_PLANAR : GEN6_WM_KERNEL_VIDEO_PACKED, 2); tmp.priv = frame; - kgem_set_mode(&sna->kgem, KGEM_RENDER); + kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { kgem_submit(&sna->kgem); assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)); @@ -2012,9 +1640,6 @@ gen6_render_video(struct sna *sna, pix_yoff = 0; #endif - dxo = dstRegion->extents.x1; - dyo = dstRegion->extents.y1; - /* Use normalized texture coordinates */ src_scale_x = ((float)src_w / frame->width) / (float)drw_w; src_scale_y = ((float)src_h / frame->height) / (float)drw_h; @@ -2032,16 +1657,16 @@ gen6_render_video(struct sna *sna, gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state); OUT_VERTEX(r.x2, r.y2); - OUT_VERTEX_F((box->x2 - dxo) * src_scale_x); - OUT_VERTEX_F((box->y2 - dyo) * src_scale_y); + OUT_VERTEX_F((box->x2 - dx) * src_scale_x); + OUT_VERTEX_F((box->y2 - dy) * src_scale_y); OUT_VERTEX(r.x1, r.y2); - OUT_VERTEX_F((box->x1 - dxo) * src_scale_x); - OUT_VERTEX_F((box->y2 - dyo) * src_scale_y); + OUT_VERTEX_F((box->x1 - dx) * src_scale_x); + OUT_VERTEX_F((box->y2 - dy) * src_scale_y); OUT_VERTEX(r.x1, r.y1); - OUT_VERTEX_F((box->x1 - dxo) * src_scale_x); - OUT_VERTEX_F((box->y1 - dyo) * src_scale_y); + OUT_VERTEX_F((box->x1 - dx) * src_scale_x); + OUT_VERTEX_F((box->y1 - dy) * src_scale_y); if (!DAMAGE_IS_ALL(priv->gpu_damage)) { sna_damage_add_box(&priv->gpu_damage, &r); @@ -2051,148 +1676,10 @@ gen6_render_video(struct sna *sna, } priv->clear = false; - gen6_vertex_flush(sna); + gen4_vertex_flush(sna); return true; } -static bool -gen6_composite_solid_init(struct sna *sna, - struct sna_composite_channel *channel, - uint32_t color) -{ - DBG(("%s: color=%x\n", __FUNCTION__, color)); - - channel->filter = PictFilterNearest; - channel->repeat = RepeatNormal; - channel->is_affine = true; - channel->is_solid = true; - channel->is_opaque = (color >> 24) == 0xff; - channel->transform = NULL; - channel->width = 1; - channel->height = 1; - channel->card_format = GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; - - channel->bo = sna_render_get_solid(sna, color); - - channel->scale[0] = channel->scale[1] = 1; - channel->offset[0] = channel->offset[1] = 0; - return channel->bo != NULL; -} - -static bool -gen6_composite_linear_init(struct sna *sna, - PicturePtr picture, - struct sna_composite_channel *channel, - int x, int y, - int w, int h, - int dst_x, int dst_y) -{ - PictLinearGradient *linear = - (PictLinearGradient *)picture->pSourcePict; - pixman_fixed_t tx, ty; - float x0, y0, sf; - float dx, dy; - - DBG(("%s: p1=(%f, %f), p2=(%f, %f), src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n", - __FUNCTION__, - pixman_fixed_to_double(linear->p1.x), pixman_fixed_to_double(linear->p1.y), - pixman_fixed_to_double(linear->p2.x), pixman_fixed_to_double(linear->p2.y), - x, y, dst_x, dst_y, w, h)); - - if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y) - return 0; - - if (!sna_transform_is_affine(picture->transform)) { - DBG(("%s: fallback due to projective transform\n", - __FUNCTION__)); - return sna_render_picture_fixup(sna, picture, channel, - x, y, w, h, dst_x, dst_y); - } - - channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear); - if (!channel->bo) - return 0; - - channel->filter = PictFilterNearest; - channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; - channel->width = channel->bo->pitch / 4; - channel->height = 1; - channel->pict_format = PICT_a8r8g8b8; - - channel->scale[0] = channel->scale[1] = 1; - channel->offset[0] = channel->offset[1] = 0; - - if (sna_transform_is_translation(picture->transform, &tx, &ty)) { - dx = pixman_fixed_to_double(linear->p2.x - linear->p1.x); - dy = pixman_fixed_to_double(linear->p2.y - linear->p1.y); - - x0 = pixman_fixed_to_double(linear->p1.x); - y0 = pixman_fixed_to_double(linear->p1.y); - - if (tx | ty) { - x0 -= pixman_fixed_to_double(tx); - y0 -= pixman_fixed_to_double(ty); - } - } else { - struct pixman_f_vector p1, p2; - struct pixman_f_transform m, inv; - - pixman_f_transform_from_pixman_transform(&m, picture->transform); - DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n", - __FUNCTION__, - m.m[0][0], m.m[0][1], m.m[0][2], - m.m[1][0], m.m[1][1], m.m[1][2], - m.m[2][0], m.m[2][1], m.m[2][2])); - if (!pixman_f_transform_invert(&inv, &m)) - return 0; - - p1.v[0] = pixman_fixed_to_double(linear->p1.x); - p1.v[1] = pixman_fixed_to_double(linear->p1.y); - p1.v[2] = 1.; - pixman_f_transform_point(&inv, &p1); - - p2.v[0] = pixman_fixed_to_double(linear->p2.x); - p2.v[1] = pixman_fixed_to_double(linear->p2.y); - p2.v[2] = 1.; - pixman_f_transform_point(&inv, &p2); - - DBG(("%s: untransformed: p1=(%f, %f, %f), p2=(%f, %f, %f)\n", - __FUNCTION__, - p1.v[0], p1.v[1], p1.v[2], - p2.v[0], p2.v[1], p2.v[2])); - - dx = p2.v[0] - p1.v[0]; - dy = p2.v[1] - p1.v[1]; - - x0 = p1.v[0]; - y0 = p1.v[1]; - } - - sf = dx*dx + dy*dy; - dx /= sf; - dy /= sf; - - channel->embedded_transform.matrix[0][0] = pixman_double_to_fixed(dx); - channel->embedded_transform.matrix[0][1] = pixman_double_to_fixed(dy); - channel->embedded_transform.matrix[0][2] = -pixman_double_to_fixed(dx*(x0+dst_x-x) + dy*(y0+dst_y-y)); - - channel->embedded_transform.matrix[1][0] = 0; - channel->embedded_transform.matrix[1][1] = 0; - channel->embedded_transform.matrix[1][2] = pixman_double_to_fixed(.5); - - channel->embedded_transform.matrix[2][0] = 0; - channel->embedded_transform.matrix[2][1] = 0; - channel->embedded_transform.matrix[2][2] = pixman_fixed_1; - - channel->transform = &channel->embedded_transform; - channel->is_affine = 1; - - DBG(("%s: dx=%f, dy=%f, offset=%f\n", - __FUNCTION__, dx, dy, -dx*(x0-x+dst_x) + -dy*(y0-y+dst_y))); - - return channel->bo != NULL; -} - static int gen6_composite_picture(struct sna *sna, PicturePtr picture, @@ -2213,16 +1700,16 @@ gen6_composite_picture(struct sna *sna, channel->card_format = -1; if (sna_picture_is_solid(picture, &color)) - return gen6_composite_solid_init(sna, channel, color); + return gen4_channel_init_solid(sna, channel, color); if (picture->pDrawable == NULL) { int ret; if (picture->pSourcePict->type == SourcePictTypeLinear) - return gen6_composite_linear_init(sna, picture, channel, - x, y, - w, h, - dst_x, dst_y); + return gen4_channel_init_linear(sna, picture, channel, + x, y, + w, h, + dst_x, dst_y); DBG(("%s -- fixup, gradient\n", __FUNCTION__)); ret = -1; @@ -2273,7 +1760,8 @@ gen6_composite_picture(struct sna *sna, channel->card_format = gen6_get_card_format(picture->format); if (channel->card_format == (unsigned)-1) return sna_render_picture_convert(sna, picture, channel, pixmap, - x, y, w, h, dst_x, dst_y); + x, y, w, h, dst_x, dst_y, + false); if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, @@ -2300,8 +1788,9 @@ static void gen6_render_composite_done(struct sna *sna, { DBG(("%s\n", __FUNCTION__)); - if (sna->render_state.gen6.vertex_offset) { - gen6_vertex_flush(sna); + assert(!sna->render.active); + if (sna->render.vertex_offset) { + gen4_vertex_flush(sna); gen6_magic_ca_pass(sna, op); } @@ -2360,17 +1849,11 @@ gen6_composite_set_target(struct sna *sna, return true; } -static bool prefer_blt_ring(struct sna *sna) -{ - if (PREFER_RENDER) - return PREFER_RENDER < 0; - - return sna->kgem.ring != KGEM_RENDER; -} - -static bool can_switch_to_blt(struct sna *sna) +inline static bool can_switch_to_blt(struct sna *sna, + struct kgem_bo *bo, + unsigned flags) { - if (sna->kgem.ring == KGEM_BLT) + if (sna->kgem.ring != KGEM_RENDER) return true; if (NO_RING_SWITCH) @@ -2379,7 +1862,13 @@ static bool can_switch_to_blt(struct sna *sna) if (!sna->kgem.has_semaphores) return false; - return sna->kgem.mode == KGEM_NONE || kgem_is_idle(&sna->kgem); + if (flags & COPY_LAST) + return true; + + if (bo && RQ_IS_BLT(bo->rq)) + return true; + + return kgem_ring_is_idle(&sna->kgem, KGEM_BLT); } static inline bool untiled_tlb_miss(struct kgem_bo *bo) @@ -2387,9 +1876,19 @@ static inline bool untiled_tlb_miss(struct kgem_bo *bo) return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096; } -static bool prefer_blt_bo(struct sna *sna, struct kgem_bo *bo) +static int prefer_blt_bo(struct sna *sna, struct kgem_bo *bo) { - return untiled_tlb_miss(bo) && bo->pitch < MAXSHORT; + if (bo->rq) + return RQ_IS_BLT(bo->rq) ? 1 : -1; + + return bo->tiling == I915_TILING_NONE || bo->scanout; +} + +inline static bool prefer_blt_ring(struct sna *sna, + struct kgem_bo *bo, + unsigned flags) +{ + return can_switch_to_blt(sna, bo, flags); } static bool @@ -2397,7 +1896,7 @@ try_blt(struct sna *sna, PicturePtr dst, PicturePtr src, int width, int height) { - if (prefer_blt_ring(sna)) { + if (sna->kgem.ring == KGEM_BLT) { DBG(("%s: already performing BLT\n", __FUNCTION__)); return true; } @@ -2408,7 +1907,7 @@ try_blt(struct sna *sna, return true; } - if (can_switch_to_blt(sna) && sna_picture_is_solid(src, NULL)) + if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, NULL, 0)) return true; return false; @@ -2436,12 +1935,6 @@ has_alphamap(PicturePtr p) } static bool -untransformed(PicturePtr p) -{ - return !p->transform || pixman_transform_is_int_translate(p->transform); -} - -static bool need_upload(PicturePtr p) { return p->pDrawable && unattached(p->pDrawable) && untransformed(p); @@ -2487,7 +1980,6 @@ gen6_composite_fallback(struct sna *sna, PicturePtr mask, PicturePtr dst) { - struct sna_pixmap *priv; PixmapPtr src_pixmap; PixmapPtr mask_pixmap; PixmapPtr dst_pixmap; @@ -2526,10 +2018,7 @@ gen6_composite_fallback(struct sna *sna, } /* If anything is on the GPU, push everything out to the GPU */ - priv = sna_pixmap(dst_pixmap); - if (priv && - ((priv->gpu_damage && !priv->clear) || - (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)))) { + if (dst_use_gpu(dst_pixmap)) { DBG(("%s: dst is already on the GPU, try to use GPU\n", __FUNCTION__)); return false; @@ -2564,14 +2053,14 @@ gen6_composite_fallback(struct sna *sna, if (too_large(dst_pixmap->drawable.width, dst_pixmap->drawable.height) && - (priv == NULL || DAMAGE_IS_ALL(priv->cpu_damage))) { + dst_is_cpu(dst_pixmap)) { DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); return true; } DBG(("%s: dst is not on the GPU and the operation should not fallback\n", __FUNCTION__)); - return false; + return dst_use_cpu(dst_pixmap); } static int @@ -2592,7 +2081,7 @@ reuse_source(struct sna *sna, } if (sna_picture_is_solid(mask, &color)) - return gen6_composite_solid_init(sna, mc, color); + return gen4_channel_init_solid(sna, mc, color); if (sc->is_solid) return false; @@ -2635,11 +2124,14 @@ prefer_blt_composite(struct sna *sna, struct sna_composite_op *tmp) if (sna->kgem.ring == KGEM_BLT) return true; - if (!prefer_blt_ring(sna)) + if (untiled_tlb_miss(tmp->dst.bo) || + untiled_tlb_miss(tmp->src.bo)) + return true; + + if (!prefer_blt_ring(sna, tmp->dst.bo, 0)) return false; - return (prefer_blt_bo(sna, tmp->dst.bo) || - prefer_blt_bo(sna, tmp->src.bo)); + return (prefer_blt_bo(sna, tmp->dst.bo) | prefer_blt_bo(sna, tmp->src.bo)) > 0; } static bool @@ -2696,7 +2188,7 @@ gen6_render_composite(struct sna *sna, case -1: goto cleanup_dst; case 0: - if (!gen6_composite_solid_init(sna, &tmp->src, 0)) + if (!gen4_channel_init_solid(sna, &tmp->src, 0)) goto cleanup_dst; /* fall through to fixup */ case 1: @@ -2720,7 +2212,6 @@ gen6_render_composite(struct sna *sna, tmp->mask.filter = SAMPLER_FILTER_NEAREST; tmp->mask.repeat = SAMPLER_EXTEND_NONE; - tmp->prim_emit = gen6_emit_composite_primitive; if (mask) { if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { tmp->has_component_alpha = true; @@ -2750,7 +2241,7 @@ gen6_render_composite(struct sna *sna, case -1: goto cleanup_src; case 0: - if (!gen6_composite_solid_init(sna, &tmp->mask, 0)) + if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) goto cleanup_src; /* fall through to fixup */ case 1: @@ -2760,40 +2251,7 @@ gen6_render_composite(struct sna *sna, } tmp->is_affine &= tmp->mask.is_affine; - - if (tmp->src.transform == NULL && tmp->mask.transform == NULL) - tmp->prim_emit = gen6_emit_composite_primitive_identity_source_mask; - - tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine; - } else { - if (tmp->src.is_solid) { - DBG(("%s: choosing gen6_emit_composite_primitive_solid\n", - __FUNCTION__)); - tmp->prim_emit = gen6_emit_composite_primitive_solid; - if (tmp->src.is_opaque && op == PictOpOver) - tmp->op = PictOpSrc; - } else if (tmp->src.transform == NULL) { - DBG(("%s: choosing gen6_emit_composite_primitive_identity_source\n", - __FUNCTION__)); - tmp->prim_emit = gen6_emit_composite_primitive_identity_source; - } else if (tmp->src.is_affine) { - if (tmp->src.transform->matrix[0][1] == 0 && - tmp->src.transform->matrix[1][0] == 0) { - tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; - tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; - DBG(("%s: choosing gen6_emit_composite_primitive_simple_source\n", - __FUNCTION__)); - tmp->prim_emit = gen6_emit_composite_primitive_simple_source; - } else { - DBG(("%s: choosing gen6_emit_composite_primitive_affine_source\n", - __FUNCTION__)); - tmp->prim_emit = gen6_emit_composite_primitive_affine_source; - } - } - - tmp->floats_per_vertex = 3 + !tmp->is_affine; } - tmp->floats_per_rect = 3 * tmp->floats_per_vertex; tmp->u.gen6.flags = GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, @@ -2807,14 +2265,18 @@ gen6_render_composite(struct sna *sna, tmp->mask.bo != NULL, tmp->has_component_alpha, tmp->is_affine), - gen6_choose_composite_vertex_buffer(tmp)); + gen4_choose_composite_emitter(tmp)); tmp->blt = gen6_render_composite_blt; tmp->box = gen6_render_composite_box; - tmp->boxes = gen6_render_composite_boxes; + tmp->boxes = gen6_render_composite_boxes__blt; + if (tmp->emit_boxes) { + tmp->boxes = gen6_render_composite_boxes; + tmp->thread_boxes = gen6_render_composite_boxes__thread; + } tmp->done = gen6_render_composite_done; - kgem_set_mode(&sna->kgem, KGEM_RENDER); + kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); if (!kgem_check_bo(&sna->kgem, tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) { @@ -2843,167 +2305,6 @@ cleanup_dst: } #if !NO_COMPOSITE_SPANS -inline static void -gen6_emit_composite_texcoord_affine(struct sna *sna, - const struct sna_composite_channel *channel, - int16_t x, int16_t y) -{ - float t[2]; - - sna_get_transformed_coordinates(x + channel->offset[0], - y + channel->offset[1], - channel->transform, - &t[0], &t[1]); - OUT_VERTEX_F(t[0] * channel->scale[0]); - OUT_VERTEX_F(t[1] * channel->scale[1]); -} - -inline static void -gen6_emit_composite_spans_vertex(struct sna *sna, - const struct sna_composite_spans_op *op, - int16_t x, int16_t y) -{ - OUT_VERTEX(x, y); - gen6_emit_composite_texcoord(sna, &op->base.src, x, y); -} - -fastcall static void -gen6_emit_composite_spans_primitive(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - gen6_emit_composite_spans_vertex(sna, op, box->x2, box->y2); - OUT_VERTEX_F(opacity); - - gen6_emit_composite_spans_vertex(sna, op, box->x1, box->y2); - OUT_VERTEX_F(opacity); - - gen6_emit_composite_spans_vertex(sna, op, box->x1, box->y1); - OUT_VERTEX_F(opacity); -} - -fastcall static void -gen6_emit_composite_spans_solid(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - OUT_VERTEX(box->x2, box->y2); - OUT_VERTEX_F(1); OUT_VERTEX_F(1); - OUT_VERTEX_F(opacity); - - OUT_VERTEX(box->x1, box->y2); - OUT_VERTEX_F(0); OUT_VERTEX_F(1); - OUT_VERTEX_F(opacity); - - OUT_VERTEX(box->x1, box->y1); - OUT_VERTEX_F(0); OUT_VERTEX_F(0); - OUT_VERTEX_F(opacity); -} - -fastcall static void -gen6_emit_composite_spans_identity(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - float *v; - union { - struct sna_coordinate p; - float f; - } dst; - - float sx = op->base.src.scale[0]; - float sy = op->base.src.scale[1]; - int16_t tx = op->base.src.offset[0]; - int16_t ty = op->base.src.offset[1]; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 3*4; - assert(sna->render.vertex_used <= sna->render.vertex_size); - - dst.p.x = box->x2; - dst.p.y = box->y2; - v[0] = dst.f; - v[1] = (box->x2 + tx) * sx; - v[6] = v[2] = (box->y2 + ty) * sy; - - dst.p.x = box->x1; - v[4] = dst.f; - v[9] = v[5] = (box->x1 + tx) * sx; - - dst.p.y = box->y1; - v[8] = dst.f; - v[10] = (box->y1 + ty) * sy; - - v[11] = v[7] = v[3] = opacity; -} - -fastcall static void -gen6_emit_composite_spans_simple(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - float *v; - union { - struct sna_coordinate p; - float f; - } dst; - - float xx = op->base.src.transform->matrix[0][0]; - float x0 = op->base.src.transform->matrix[0][2]; - float yy = op->base.src.transform->matrix[1][1]; - float y0 = op->base.src.transform->matrix[1][2]; - float sx = op->base.src.scale[0]; - float sy = op->base.src.scale[1]; - int16_t tx = op->base.src.offset[0]; - int16_t ty = op->base.src.offset[1]; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 3*4; - assert(sna->render.vertex_used <= sna->render.vertex_size); - - dst.p.x = box->x2; - dst.p.y = box->y2; - v[0] = dst.f; - v[1] = ((box->x2 + tx) * xx + x0) * sx; - v[6] = v[2] = ((box->y2 + ty) * yy + y0) * sy; - - dst.p.x = box->x1; - v[4] = dst.f; - v[9] = v[5] = ((box->x1 + tx) * xx + x0) * sx; - - dst.p.y = box->y1; - v[8] = dst.f; - v[10] = ((box->y1 + ty) * yy + y0) * sy; - - v[11] = v[7] = v[3] = opacity; -} - -fastcall static void -gen6_emit_composite_spans_affine(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - OUT_VERTEX(box->x2, box->y2); - gen6_emit_composite_texcoord_affine(sna, &op->base.src, - box->x2, box->y2); - OUT_VERTEX_F(opacity); - - OUT_VERTEX(box->x1, box->y2); - gen6_emit_composite_texcoord_affine(sna, &op->base.src, - box->x1, box->y2); - OUT_VERTEX_F(opacity); - - OUT_VERTEX(box->x1, box->y1); - gen6_emit_composite_texcoord_affine(sna, &op->base.src, - box->x1, box->y1); - OUT_VERTEX_F(opacity); -} - fastcall static void gen6_render_composite_spans_box(struct sna *sna, const struct sna_composite_spans_op *op, @@ -3053,13 +2354,50 @@ gen6_render_composite_spans_boxes(struct sna *sna, } fastcall static void +gen6_render_composite_spans_boxes__thread(struct sna *sna, + const struct sna_composite_spans_op *op, + const struct sna_opacity_box *box, + int nbox) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + op->base.dst.x, op->base.dst.y)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, + gen6_emit_composite_state); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + +fastcall static void gen6_render_composite_spans_done(struct sna *sna, const struct sna_composite_spans_op *op) { DBG(("%s()\n", __FUNCTION__)); + assert(!sna->render.active); - if (sna->render_state.gen6.vertex_offset) - gen6_vertex_flush(sna); + if (sna->render.vertex_offset) + gen4_vertex_flush(sna); if (op->base.src.bo) kgem_bo_destroy(&sna->kgem, op->base.src.bo); @@ -3070,23 +2408,38 @@ gen6_render_composite_spans_done(struct sna *sna, static bool gen6_check_composite_spans(struct sna *sna, uint8_t op, PicturePtr src, PicturePtr dst, - int16_t width, int16_t height, unsigned flags) + int16_t width, int16_t height, + unsigned flags) { - if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) - return false; + DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n", + __FUNCTION__, op, width, height, flags)); if (op >= ARRAY_SIZE(gen6_blend_op)) return false; - if (gen6_composite_fallback(sna, src, NULL, dst)) + if (gen6_composite_fallback(sna, src, NULL, dst)) { + DBG(("%s: operation would fallback\n", __FUNCTION__)); return false; + } - if (need_tiling(sna, width, height)) { - if (!is_gpu(dst->pDrawable)) { - DBG(("%s: fallback, tiled operation not on GPU\n", - __FUNCTION__)); + if (need_tiling(sna, width, height) && + !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { + DBG(("%s: fallback, tiled operation not on GPU\n", + __FUNCTION__)); + return false; + } + + if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) { + struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable); + assert(priv); + + if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) + return true; + + if (flags & COMPOSITE_SPANS_INPLACE_HINT) return false; - } + + return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo); } return true; @@ -3129,7 +2482,7 @@ gen6_render_composite_spans(struct sna *sna, case -1: goto cleanup_dst; case 0: - if (!gen6_composite_solid_init(sna, &tmp->base.src, 0)) + if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) goto cleanup_dst; /* fall through to fixup */ case 1: @@ -3141,23 +2494,6 @@ gen6_render_composite_spans(struct sna *sna, tmp->base.is_affine = tmp->base.src.is_affine; tmp->base.need_magic_ca_pass = false; - tmp->prim_emit = gen6_emit_composite_spans_primitive; - if (tmp->base.src.is_solid) { - tmp->prim_emit = gen6_emit_composite_spans_solid; - } else if (tmp->base.src.transform == NULL) { - tmp->prim_emit = gen6_emit_composite_spans_identity; - } else if (tmp->base.is_affine) { - if (tmp->base.src.transform->matrix[0][1] == 0 && - tmp->base.src.transform->matrix[1][0] == 0) { - tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2]; - tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2]; - tmp->prim_emit = gen6_emit_composite_spans_simple; - } else - tmp->prim_emit = gen6_emit_composite_spans_affine; - } - tmp->base.floats_per_vertex = 4 + !tmp->base.is_affine; - tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex; - tmp->base.u.gen6.flags = GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, tmp->base.src.repeat, @@ -3165,13 +2501,15 @@ gen6_render_composite_spans(struct sna *sna, SAMPLER_EXTEND_PAD), gen6_get_blend(tmp->base.op, false, tmp->base.dst.format), GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine, - 1 << 2 | (2+!tmp->base.is_affine)); + gen4_choose_spans_emitter(tmp)); tmp->box = gen6_render_composite_spans_box; tmp->boxes = gen6_render_composite_spans_boxes; + if (tmp->emit_boxes) + tmp->thread_boxes = gen6_render_composite_spans_boxes__thread; tmp->done = gen6_render_composite_spans_done; - kgem_set_mode(&sna->kgem, KGEM_RENDER); + kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); if (!kgem_check_bo(&sna->kgem, tmp->base.dst.bo, tmp->base.src.bo, NULL)) { @@ -3205,8 +2543,7 @@ gen6_emit_copy_state(struct sna *sna, uint16_t offset; bool dirty; - gen6_get_batch(sna); - dirty = kgem_bo_is_dirty(op->dst.bo); + dirty = gen6_get_batch(sna, op); binding_table = gen6_composite_get_binding_table(sna, &offset); @@ -3235,13 +2572,27 @@ static inline bool prefer_blt_copy(struct sna *sna, struct kgem_bo *dst_bo, unsigned flags) { + if (flags & COPY_SYNC) + return false; + if (PREFER_RENDER) return PREFER_RENDER > 0; - return (sna->kgem.ring == KGEM_BLT || - (flags & COPY_LAST && sna->kgem.mode == KGEM_NONE) || - prefer_blt_bo(sna, src_bo) || - prefer_blt_bo(sna, dst_bo)); + if (sna->kgem.ring == KGEM_BLT) + return true; + + if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) + return true; + + if (untiled_tlb_miss(src_bo) || + untiled_tlb_miss(dst_bo)) + return true; + + if (!prefer_blt_ring(sna, dst_bo, flags)) + return false; + + return (prefer_blt_bo(sna, src_bo) >= 0 && + prefer_blt_bo(sna, dst_bo) > 0); } inline static void boxes_extents(const BoxRec *box, int n, BoxRec *extents) @@ -3322,7 +2673,7 @@ fallback_blt: if (too_large(extents.x2-extents.x1, extents.y2-extents.y1)) goto fallback_blt; - if ((flags & COPY_LAST || can_switch_to_blt(sna)) && + if (can_switch_to_blt(sna, dst_bo, flags) && sna_blt_compare_depth(&src->drawable, &dst->drawable) && sna_blt_copy_boxes(sna, alu, src_bo, src_dx, src_dy, @@ -3429,7 +2780,7 @@ fallback_blt: assert(GEN6_SAMPLER(tmp.u.gen6.flags) == COPY_SAMPLER); assert(GEN6_VERTEX(tmp.u.gen6.flags) == COPY_VERTEX); - kgem_set_mode(&sna->kgem, KGEM_RENDER); + kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { kgem_submit(&sna->kgem); if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { @@ -3472,7 +2823,7 @@ fallback_blt: } while (--n_this_time); } while (n); - gen6_vertex_flush(sna); + gen4_vertex_flush(sna); sna_render_composite_redirect_done(sna, &tmp); if (tmp.src.bo != src_bo) kgem_bo_destroy(&sna->kgem, tmp.src.bo); @@ -3485,6 +2836,14 @@ fallback_tiled_dst: if (tmp.redirect.real_bo) kgem_bo_destroy(&sna->kgem, tmp.dst.bo); fallback_tiled: + if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && + sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->drawable.bitsPerPixel, + box, n)) + return true; + return sna_tiling_copy_boxes(sna, alu, src, src_bo, src_dx, src_dy, dst, dst_bo, dst_dx, dst_dy, @@ -3519,8 +2878,9 @@ gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op) { DBG(("%s()\n", __FUNCTION__)); - if (sna->render_state.gen6.vertex_offset) - gen6_vertex_flush(sna); + assert(!sna->render.active); + if (sna->render.vertex_offset) + gen4_vertex_flush(sna); } static bool @@ -3585,7 +2945,7 @@ fallback: assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER); assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX); - kgem_set_mode(&sna->kgem, KGEM_RENDER); + kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { kgem_submit(&sna->kgem); if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) @@ -3608,8 +2968,7 @@ gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) uint16_t offset; bool dirty; - gen6_get_batch(sna); - dirty = kgem_bo_is_dirty(op->dst.bo); + dirty = gen6_get_batch(sna, op); binding_table = gen6_composite_get_binding_table(sna, &offset); @@ -3640,9 +2999,10 @@ static inline bool prefer_blt_fill(struct sna *sna, if (PREFER_RENDER) return PREFER_RENDER < 0; - return (can_switch_to_blt(sna) || - prefer_blt_ring(sna) || - untiled_tlb_miss(bo)); + if (untiled_tlb_miss(bo)) + return true; + + return prefer_blt_ring(sna, bo, 0) || prefer_blt_bo(sna, bo) >= 0; } static bool @@ -3773,7 +3133,7 @@ gen6_render_fill_boxes(struct sna *sna, } while (--n_this_time); } while (n); - gen6_vertex_flush(sna); + gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, tmp.src.bo); sna_render_composite_redirect_done(sna, &tmp); return true; @@ -3866,8 +3226,9 @@ gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op) { DBG(("%s()\n", __FUNCTION__)); - if (sna->render_state.gen6.vertex_offset) - gen6_vertex_flush(sna); + assert(!sna->render.active); + if (sna->render.vertex_offset) + gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, op->base.src.bo); } @@ -3999,8 +3360,11 @@ gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); if (!kgem_check_bo(&sna->kgem, bo, NULL)) { - _kgem_submit(&sna->kgem); - assert(kgem_check_bo(&sna->kgem, bo, NULL)); + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, bo, NULL)) { + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + return false; + } } gen6_emit_fill_state(sna, &tmp); @@ -4021,7 +3385,7 @@ gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, v[7] = v[2] = v[3] = 1; v[6] = v[10] = v[11] = 0; - gen6_vertex_flush(sna); + gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, tmp.src.bo); return true; @@ -4082,8 +3446,11 @@ gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); if (!kgem_check_bo(&sna->kgem, bo, NULL)) { - _kgem_submit(&sna->kgem); - assert(kgem_check_bo(&sna->kgem, bo, NULL)); + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, bo, NULL)) { + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + return false; + } } gen6_emit_fill_state(sna, &tmp); @@ -4103,7 +3470,7 @@ gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) v[7] = v[2] = v[3] = 1; v[6] = v[10] = v[11] = 0; - gen6_vertex_flush(sna); + gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, tmp.src.bo); return true; @@ -4111,20 +3478,20 @@ gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) static void gen6_render_flush(struct sna *sna) { - gen6_vertex_close(sna); + gen4_vertex_close(sna); + + assert(sna->render.vb_id == 0); + assert(sna->render.vertex_offset == 0); } static void gen6_render_context_switch(struct kgem *kgem, int new_mode) { - if (!new_mode) - return; - - DBG(("%s: from %d to %d\n", __FUNCTION__, kgem->mode, new_mode)); - - if (kgem->mode) - kgem_submit(kgem); + if (kgem->nbatch) { + DBG(("%s: from %d to %d\n", __FUNCTION__, kgem->mode, new_mode)); + _kgem_submit(kgem); + } kgem->ring = new_mode; } @@ -4154,6 +3521,7 @@ gen6_render_expire(struct kgem *kgem) if (sna->render.vbo && !sna->render.vertex_used) { DBG(("%s: discarding vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); kgem_bo_destroy(kgem, sna->render.vbo); + assert(!sna->render.active); sna->render.vbo = NULL; sna->render.vertices = sna->render.vertex_data; sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); @@ -4166,7 +3534,6 @@ static void gen6_render_reset(struct sna *sna) { sna->render_state.gen6.needs_invariant = true; sna->render_state.gen6.first_state_packet = true; - sna->render_state.gen6.vb_id = 0; sna->render_state.gen6.ve_id = 3 << 2; sna->render_state.gen6.last_primitive = -1; @@ -4177,6 +3544,10 @@ static void gen6_render_reset(struct sna *sna) sna->render_state.gen6.drawrect_offset = -1; sna->render_state.gen6.drawrect_limit = -1; sna->render_state.gen6.surface_table = -1; + + sna->render.vertex_offset = 0; + sna->render.nvertex_reloc = 0; + sna->render.vb_id = 0; } static void gen6_render_fini(struct sna *sna) @@ -4184,6 +3555,16 @@ static void gen6_render_fini(struct sna *sna) kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo); } +static bool is_gt2(struct sna *sna) +{ + return DEVICE_ID(sna->PciInfo) & 0x30; +} + +static bool is_mobile(struct sna *sna) +{ + return (DEVICE_ID(sna->PciInfo) & 0xf) == 0x6; +} + static bool gen6_render_setup(struct sna *sna) { struct gen6_render_state *state = &sna->render_state.gen6; @@ -4192,7 +3573,7 @@ static bool gen6_render_setup(struct sna *sna) int i, j, k, l, m; state->info = >1_info; - if (DEVICE_ID(sna->PciInfo) & 0x20) + if (is_gt2(sna)) state->info = >2_info; /* XXX requires GT_MODE WiZ disabled */ sna_static_stream_init(&general); @@ -4256,7 +3637,6 @@ static bool gen6_render_setup(struct sna *sna) } } - state->cc_vp = gen6_create_cc_viewport(&general); state->cc_blend = gen6_composite_create_blend_state(&general); state->general_bo = sna_static_stream_fini(sna, &general); @@ -4274,10 +3654,14 @@ bool gen6_render_init(struct sna *sna) #if !NO_COMPOSITE sna->render.composite = gen6_render_composite; + sna->render.prefer_gpu |= PREFER_GPU_RENDER; + #endif #if !NO_COMPOSITE_SPANS sna->render.check_composite_spans = gen6_check_composite_spans; sna->render.composite_spans = gen6_render_composite_spans; + if (is_mobile(sna)) + sna->render.prefer_gpu |= PREFER_GPU_SPANS; #endif sna->render.video = gen6_render_video; diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c index 4d94c803c..f05d6f926 100644 --- a/src/sna/gen7_render.c +++ b/src/sna/gen7_render.c @@ -42,6 +42,8 @@ #include "brw/brw.h" #include "gen7_render.h" +#include "gen4_source.h" +#include "gen4_vertex.h" #define NO_COMPOSITE 0 #define NO_COMPOSITE_SPANS 0 @@ -113,6 +115,24 @@ static const struct gt_info hsw_gt_info = { .urb = { 128, 64, 64 }, }; +static const struct gt_info hsw_gt1_info = { + .max_vs_threads = 70, + .max_gs_threads = 70, + .max_wm_threads = + (102 - 1) << HSW_PS_MAX_THREADS_SHIFT | + 1 << HSW_PS_SAMPLE_MASK_SHIFT, + .urb = { 128, 640, 256 }, +}; + +static const struct gt_info hsw_gt2_info = { + .max_vs_threads = 280, + .max_gs_threads = 280, + .max_wm_threads = + (204 - 1) << HSW_PS_MAX_THREADS_SHIFT | + 1 << HSW_PS_SAMPLE_MASK_SHIFT, + .urb = { 256, 1664, 640 }, +}; + static const uint32_t ps_kernel_packed[][4] = { #include "exa_wm_src_affine.g7b" #include "exa_wm_src_sample_argb.g7b" @@ -209,10 +229,6 @@ static const struct blendinfo { #define FILL_FLAGS(op, format) GEN7_SET_FLAGS(FILL_SAMPLER, gen7_get_blend((op), false, (format)), GEN7_WM_KERNEL_NOMASK, FILL_VERTEX) #define FILL_FLAGS_NOBLEND GEN7_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN7_WM_KERNEL_NOMASK, FILL_VERTEX) -#define VIDEO_SAMPLER \ - SAMPLER_OFFSET(SAMPLER_FILTER_BILINEAR, SAMPLER_EXTEND_PAD, \ - SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE) - #define GEN7_SAMPLER(f) (((f) >> 16) & 0xfff0) #define GEN7_BLEND(f) (((f) >> 0) & 0x7ff0) #define GEN7_READS_DST(f) (((f) >> 15) & 1) @@ -627,9 +643,9 @@ gen7_emit_cc_invariant(struct sna *sna) OUT_BATCH(0); #endif - assert(is_aligned(sna->render_state.gen7.cc_vp, 32)); + /* XXX clear to be safe */ OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2)); - OUT_BATCH(sna->render_state.gen7.cc_vp); + OUT_BATCH(0); } static void @@ -865,7 +881,7 @@ gen7_emit_vertex_elements(struct sna *sna, * texture coordinate 1 if (has_mask is true): same as above */ struct gen7_render_state *render = &sna->render_state.gen7; - uint32_t src_format, dw, offset; + uint32_t src_format, dw; int id = GEN7_VERTEX(op->u.gen7.flags); bool has_mask; @@ -875,39 +891,6 @@ gen7_emit_vertex_elements(struct sna *sna, return; render->ve_id = id; - if (id == VERTEX_2s2s) { - DBG(("%s: setup COPY\n", __FUNCTION__)); - - OUT_BATCH(GEN7_3DSTATE_VERTEX_ELEMENTS | - ((2 * (1 + 2)) + 1 - 2)); - - OUT_BATCH(VERTEX_2s2s << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | - GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT << GEN7_VE0_FORMAT_SHIFT | - 0 << GEN7_VE0_OFFSET_SHIFT); - OUT_BATCH(GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_0_SHIFT | - GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT | - GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT | - GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_3_SHIFT); - - /* x,y */ - OUT_BATCH(VERTEX_2s2s << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | - GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT | - 0 << GEN7_VE0_OFFSET_SHIFT); /* offsets vb in bytes */ - OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT | - GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT | - GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT | - GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT); - - OUT_BATCH(VERTEX_2s2s << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | - GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT | - 4 << GEN7_VE0_OFFSET_SHIFT); /* offset vb in bytes */ - OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT | - GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT | - GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT | - GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT); - return; - } - /* The VUE layout * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) * dword 4-7: position (x, y, 1.0, 1.0), @@ -936,20 +919,25 @@ gen7_emit_vertex_elements(struct sna *sna, GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT | GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT | GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT); - offset = 4; /* u0, v0, w0 */ - DBG(("%s: first channel %d floats, offset=%d\n", __FUNCTION__, id & 3, offset)); + DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3)); dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT; switch (id & 3) { + default: + assert(0); + case 0: + src_format = GEN7_SURFACEFORMAT_R16G16_SSCALED; + dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; + break; case 1: src_format = GEN7_SURFACEFORMAT_R32_FLOAT; dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT; dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; break; - default: - assert(0); case 2: src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT; dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; @@ -965,15 +953,15 @@ gen7_emit_vertex_elements(struct sna *sna, } OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | src_format << GEN7_VE0_FORMAT_SHIFT | - offset << GEN7_VE0_OFFSET_SHIFT); + 4 << GEN7_VE0_OFFSET_SHIFT); OUT_BATCH(dw); - offset += (id & 3) * sizeof(float); /* u1, v1, w1 */ if (has_mask) { - DBG(("%s: second channel %d floats, offset=%d\n", __FUNCTION__, (id >> 2) & 3, offset)); + unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); + DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset)); dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT; - switch ((id >> 2) & 3) { + switch (id >> 2) { case 1: src_format = GEN7_SURFACEFORMAT_R32_FLOAT; dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; @@ -1064,20 +1052,22 @@ gen7_emit_state(struct sna *sna, sna->render_state.gen7.emit_flush = GEN7_READS_DST(op->u.gen7.flags); } -static void gen7_magic_ca_pass(struct sna *sna, +static bool gen7_magic_ca_pass(struct sna *sna, const struct sna_composite_op *op) { struct gen7_render_state *state = &sna->render_state.gen7; if (!op->need_magic_ca_pass) - return; + return false; DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, sna->render.vertex_start, sna->render.vertex_index)); gen7_emit_pipe_invalidate(sna); - gen7_emit_cc(sna, gen7_get_blend(PictOpAdd, true, op->dst.format)); + gen7_emit_cc(sna, + GEN7_BLEND(gen7_get_blend(PictOpAdd, true, + op->dst.format))); gen7_emit_wm(sna, gen7_choose_composite_kernel(PictOpAdd, true, true, @@ -1092,155 +1082,7 @@ static void gen7_magic_ca_pass(struct sna *sna, OUT_BATCH(0); /* index buffer offset, ignored */ state->last_primitive = sna->kgem.nbatch; -} - -static void gen7_vertex_flush(struct sna *sna) -{ - assert(sna->render_state.gen7.vertex_offset); - - DBG(("%s[%x] = %d\n", __FUNCTION__, - 4*sna->render_state.gen7.vertex_offset, - sna->render.vertex_index - sna->render.vertex_start)); - sna->kgem.batch[sna->render_state.gen7.vertex_offset] = - sna->render.vertex_index - sna->render.vertex_start; - sna->render_state.gen7.vertex_offset = 0; -} - -static int gen7_vertex_finish(struct sna *sna) -{ - struct kgem_bo *bo; - unsigned int i; - - assert(sna->render.vertex_used); - assert(sna->render.nvertex_reloc); - - /* Note: we only need dword alignment (currently) */ - - bo = sna->render.vbo; - if (bo) { - if (sna->render_state.gen7.vertex_offset) - gen7_vertex_flush(sna); - - for (i = 0; i < sna->render.nvertex_reloc; i++) { - DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, - i, sna->render.vertex_reloc[i])); - - sna->kgem.batch[sna->render.vertex_reloc[i]] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i], bo, - I915_GEM_DOMAIN_VERTEX << 16, - 0); - sna->kgem.batch[sna->render.vertex_reloc[i]+1] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i]+1, bo, - I915_GEM_DOMAIN_VERTEX << 16, - sna->render.vertex_used * 4 - 1); - } - - sna->render.nvertex_reloc = 0; - sna->render.vertex_used = 0; - sna->render.vertex_index = 0; - sna->render_state.gen7.vb_id = 0; - - kgem_bo_destroy(&sna->kgem, bo); - } - - sna->render.vertices = NULL; - sna->render.vbo = kgem_create_linear(&sna->kgem, - 256*1024, CREATE_GTT_MAP); - if (sna->render.vbo) - sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo); - if (sna->render.vertices == NULL) { - if (sna->render.vbo) - kgem_bo_destroy(&sna->kgem, sna->render.vbo); - sna->render.vbo = NULL; - return 0; - } - - kgem_bo_sync__cpu(&sna->kgem, sna->render.vbo); - if (sna->render.vertex_used) { - memcpy(sna->render.vertices, - sna->render.vertex_data, - sizeof(float)*sna->render.vertex_used); - } - sna->render.vertex_size = 64 * 1024 - 1; - return sna->render.vertex_size - sna->render.vertex_used; -} - -static void gen7_vertex_close(struct sna *sna) -{ - struct kgem_bo *bo, *free_bo = NULL; - unsigned int i, delta = 0; - - assert(sna->render_state.gen7.vertex_offset == 0); - - if (!sna->render_state.gen7.vb_id) - return; - - DBG(("%s: used=%d, vbo active? %d\n", - __FUNCTION__, sna->render.vertex_used, sna->render.vbo ? sna->render.vbo->handle : 0)); - - bo = sna->render.vbo; - if (bo) { - if (sna->render.vertex_size - sna->render.vertex_used < 64) { - DBG(("%s: discarding vbo (full), handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); - sna->render.vbo = NULL; - sna->render.vertices = sna->render.vertex_data; - sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); - free_bo = bo; - } - } else { - if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) { - DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__, - sna->render.vertex_used, sna->kgem.nbatch)); - memcpy(sna->kgem.batch + sna->kgem.nbatch, - sna->render.vertex_data, - sna->render.vertex_used * 4); - delta = sna->kgem.nbatch * 4; - bo = NULL; - sna->kgem.nbatch += sna->render.vertex_used; - } else { - bo = kgem_create_linear(&sna->kgem, - 4*sna->render.vertex_used, 0); - if (bo && !kgem_bo_write(&sna->kgem, bo, - sna->render.vertex_data, - 4*sna->render.vertex_used)) { - kgem_bo_destroy(&sna->kgem, bo); - bo = NULL; - } - DBG(("%s: new vbo: %d\n", __FUNCTION__, - sna->render.vertex_used)); - free_bo = bo; - } - } - - assert(sna->render.nvertex_reloc); - for (i = 0; i < sna->render.nvertex_reloc; i++) { - DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, - i, sna->render.vertex_reloc[i])); - - sna->kgem.batch[sna->render.vertex_reloc[i]] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i], bo, - I915_GEM_DOMAIN_VERTEX << 16, - delta); - sna->kgem.batch[sna->render.vertex_reloc[i]+1] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i]+1, bo, - I915_GEM_DOMAIN_VERTEX << 16, - delta + sna->render.vertex_used * 4 - 1); - } - sna->render.nvertex_reloc = 0; - - if (sna->render.vbo == NULL) { - sna->render.vertex_used = 0; - sna->render.vertex_index = 0; - assert(sna->render.vertices == sna->render.vertex_data); - assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data)); - } - - if (free_bo) - kgem_bo_destroy(&sna->kgem, free_bo); + return true; } static void null_create(struct sna_static_stream *stream) @@ -1315,16 +1157,6 @@ sampler_fill_init(struct gen7_sampler_state *ss) sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); } -static uint32_t gen7_create_cc_viewport(struct sna_static_stream *stream) -{ - struct gen7_cc_viewport vp; - - vp.min_depth = -1.e35; - vp.max_depth = 1.e35; - - return sna_static_stream_add(stream, &vp, sizeof(vp), 32); -} - static uint32_t gen7_tiling_bits(uint32_t tiling) { @@ -1351,11 +1183,12 @@ gen7_bind_bo(struct sna *sna, uint32_t *ss; uint32_t domains; int offset; + uint32_t is_scanout = is_dst && bo->scanout; COMPILE_TIME_ASSERT(sizeof(struct gen7_surface_state) == 32); /* After the first bind, we manage the cache domains within the batch */ - offset = kgem_bo_get_binding(bo, format); + offset = kgem_bo_get_binding(bo, format | is_scanout << 31); if (offset) { if (is_dst) kgem_bo_mark_dirty(bo); @@ -1377,13 +1210,13 @@ gen7_bind_bo(struct sna *sna, (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT); ss[3] = (bo->pitch - 1) << GEN7_SURFACE_PITCH_SHIFT; ss[4] = 0; - ss[5] = 0; + ss[5] = is_scanout ? 0 : 3 << 16; ss[6] = 0; ss[7] = 0; - if (sna->kgem.gen == 75) + if (sna->kgem.gen == 075) ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); - kgem_bo_set_binding(bo, format, offset); + kgem_bo_set_binding(bo, format | is_scanout << 31, offset); DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", offset, bo->handle, ss[1], @@ -1393,251 +1226,6 @@ gen7_bind_bo(struct sna *sna, return offset * sizeof(uint32_t); } -fastcall static void -gen7_emit_composite_primitive_solid(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - float *v; - union { - struct sna_coordinate p; - float f; - } dst; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 9; - assert(sna->render.vertex_used <= sna->render.vertex_size); - assert(!too_large(op->dst.x + r->dst.x + r->width, - op->dst.y + r->dst.y + r->height)); - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - dst.p.x = r->dst.x; - v[3] = dst.f; - dst.p.y = r->dst.y; - v[6] = dst.f; - - v[5] = v[2] = v[1] = 1.; - v[8] = v[7] = v[4] = 0.; -} - -fastcall static void -gen7_emit_composite_primitive_identity_source(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - union { - struct sna_coordinate p; - float f; - } dst; - float *v; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 9; - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - dst.p.x = r->dst.x; - v[3] = dst.f; - dst.p.y = r->dst.y; - v[6] = dst.f; - - v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; - v[1] = v[4] + r->width * op->src.scale[0]; - - v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; - v[5] = v[2] = v[8] + r->height * op->src.scale[1]; -} - -fastcall static void -gen7_emit_composite_primitive_simple_source(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - float *v; - union { - struct sna_coordinate p; - float f; - } dst; - - float xx = op->src.transform->matrix[0][0]; - float x0 = op->src.transform->matrix[0][2]; - float yy = op->src.transform->matrix[1][1]; - float y0 = op->src.transform->matrix[1][2]; - float sx = op->src.scale[0]; - float sy = op->src.scale[1]; - int16_t tx = op->src.offset[0]; - int16_t ty = op->src.offset[1]; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 3*3; - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; - v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; - - dst.p.x = r->dst.x; - v[3] = dst.f; - v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx; - - dst.p.y = r->dst.y; - v[6] = dst.f; - v[8] = ((r->src.y + ty) * yy + y0) * sy; -} - -fastcall static void -gen7_emit_composite_primitive_affine_source(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - union { - struct sna_coordinate p; - float f; - } dst; - float *v; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 9; - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x + r->width, - op->src.offset[1] + r->src.y + r->height, - op->src.transform, - &v[1], &v[2]); - v[1] *= op->src.scale[0]; - v[2] *= op->src.scale[1]; - - dst.p.x = r->dst.x; - v[3] = dst.f; - _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x, - op->src.offset[1] + r->src.y + r->height, - op->src.transform, - &v[4], &v[5]); - v[4] *= op->src.scale[0]; - v[5] *= op->src.scale[1]; - - dst.p.y = r->dst.y; - v[6] = dst.f; - _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x, - op->src.offset[1] + r->src.y, - op->src.transform, - &v[7], &v[8]); - v[7] *= op->src.scale[0]; - v[8] *= op->src.scale[1]; -} - -fastcall static void -gen7_emit_composite_primitive_identity_source_mask(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - union { - struct sna_coordinate p; - float f; - } dst; - float src_x, src_y; - float msk_x, msk_y; - float w, h; - float *v; - - src_x = r->src.x + op->src.offset[0]; - src_y = r->src.y + op->src.offset[1]; - msk_x = r->mask.x + op->mask.offset[0]; - msk_y = r->mask.y + op->mask.offset[1]; - w = r->width; - h = r->height; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 15; - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - v[1] = (src_x + w) * op->src.scale[0]; - v[2] = (src_y + h) * op->src.scale[1]; - v[3] = (msk_x + w) * op->mask.scale[0]; - v[4] = (msk_y + h) * op->mask.scale[1]; - - dst.p.x = r->dst.x; - v[5] = dst.f; - v[6] = src_x * op->src.scale[0]; - v[7] = v[2]; - v[8] = msk_x * op->mask.scale[0]; - v[9] = v[4]; - - dst.p.y = r->dst.y; - v[10] = dst.f; - v[11] = v[6]; - v[12] = src_y * op->src.scale[1]; - v[13] = v[8]; - v[14] = msk_y * op->mask.scale[1]; -} - -inline static void -gen7_emit_composite_texcoord(struct sna *sna, - const struct sna_composite_channel *channel, - int16_t x, int16_t y) -{ - x += channel->offset[0]; - y += channel->offset[1]; - - if (channel->is_affine) { - float s, t; - - sna_get_transformed_coordinates(x, y, - channel->transform, - &s, &t); - OUT_VERTEX_F(s * channel->scale[0]); - OUT_VERTEX_F(t * channel->scale[1]); - } else { - float s, t, w; - - sna_get_transformed_coordinates_3d(x, y, - channel->transform, - &s, &t, &w); - OUT_VERTEX_F(s * channel->scale[0]); - OUT_VERTEX_F(t * channel->scale[1]); - OUT_VERTEX_F(w); - } -} - -static void -gen7_emit_composite_vertex(struct sna *sna, - const struct sna_composite_op *op, - int16_t srcX, int16_t srcY, - int16_t mskX, int16_t mskY, - int16_t dstX, int16_t dstY) -{ - OUT_VERTEX(dstX, dstY); - gen7_emit_composite_texcoord(sna, &op->src, srcX, srcY); - gen7_emit_composite_texcoord(sna, &op->mask, mskX, mskY); -} - -fastcall static void -gen7_emit_composite_primitive(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - gen7_emit_composite_vertex(sna, op, - r->src.x + r->width, r->src.y + r->height, - r->mask.x + r->width, r->mask.y + r->height, - r->dst.x + r->width, r->dst.y + r->height); - gen7_emit_composite_vertex(sna, op, - r->src.x, r->src.y + r->height, - r->mask.x, r->mask.y + r->height, - r->dst.x, r->dst.y + r->height); - gen7_emit_composite_vertex(sna, op, - r->src.x, r->src.y, - r->mask.x, r->mask.y, - r->dst.x, r->dst.y); -} - static void gen7_emit_vertex_buffer(struct sna *sna, const struct sna_composite_op *op) { @@ -1650,22 +1238,22 @@ static void gen7_emit_vertex_buffer(struct sna *sna, 4*op->floats_per_vertex << GEN7_VB0_BUFFER_PITCH_SHIFT); sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; OUT_BATCH(0); - OUT_BATCH(0); + OUT_BATCH(~0); /* max address: disabled */ OUT_BATCH(0); - sna->render_state.gen7.vb_id |= 1 << id; + sna->render.vb_id |= 1 << id; } static void gen7_emit_primitive(struct sna *sna) { if (sna->kgem.nbatch == sna->render_state.gen7.last_primitive) { - sna->render_state.gen7.vertex_offset = sna->kgem.nbatch - 5; + sna->render.vertex_offset = sna->kgem.nbatch - 5; return; } OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2)); OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST); - sna->render_state.gen7.vertex_offset = sna->kgem.nbatch; + sna->render.vertex_offset = sna->kgem.nbatch; OUT_BATCH(0); /* vertex count, to be filled in later */ OUT_BATCH(sna->render.vertex_index); OUT_BATCH(1); /* single instance */ @@ -1682,13 +1270,16 @@ static bool gen7_rectangle_begin(struct sna *sna, int id = 1 << GEN7_VERTEX(op->u.gen7.flags); int ndwords; + if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) + return true; + ndwords = op->need_magic_ca_pass ? 60 : 6; - if ((sna->render_state.gen7.vb_id & id) == 0) + if ((sna->render.vb_id & id) == 0) ndwords += 5; if (!kgem_check_batch(&sna->kgem, ndwords)) return false; - if ((sna->render_state.gen7.vb_id & id) == 0) + if ((sna->render.vb_id & id) == 0) gen7_emit_vertex_buffer(sna, op); gen7_emit_primitive(sna); @@ -1698,17 +1289,28 @@ static bool gen7_rectangle_begin(struct sna *sna, static int gen7_get_rectangles__flush(struct sna *sna, const struct sna_composite_op *op) { + /* Preventing discarding new vbo after lock contention */ + if (sna_vertex_wait__locked(&sna->render)) { + int rem = vertex_space(sna); + if (rem > op->floats_per_rect) + return rem; + } + if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6)) return 0; - if (!kgem_check_exec(&sna->kgem, 1)) - return 0; - if (!kgem_check_reloc(&sna->kgem, 2)) + if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) return 0; - if (op->need_magic_ca_pass && sna->render.vbo) - return 0; + if (sna->render.vertex_offset) { + gen4_vertex_flush(sna); + if (gen7_magic_ca_pass(sna, op)) { + gen7_emit_pipe_invalidate(sna); + gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags)); + gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags)); + } + } - return gen7_vertex_finish(sna); + return gen4_vertex_finish(sna); } inline static int gen7_get_rectangles(struct sna *sna, @@ -1720,7 +1322,7 @@ inline static int gen7_get_rectangles(struct sna *sna, start: rem = vertex_space(sna); - if (rem < op->floats_per_rect) { + if (unlikely(rem < op->floats_per_rect)) { DBG(("flushing vbo for %s: %d < %d\n", __FUNCTION__, rem, op->floats_per_rect)); rem = gen7_get_rectangles__flush(sna, op); @@ -1728,7 +1330,7 @@ start: goto flush; } - if (unlikely(sna->render_state.gen7.vertex_offset == 0 && + if (unlikely(sna->render.vertex_offset == 0 && !gen7_rectangle_begin(sna, op))) goto flush; @@ -1740,10 +1342,11 @@ start: return want; flush: - if (sna->render_state.gen7.vertex_offset) { - gen7_vertex_flush(sna); + if (sna->render.vertex_offset) { + gen4_vertex_flush(sna); gen7_magic_ca_pass(sna, op); } + sna_vertex_wait__locked(&sna->render); _kgem_submit(&sna->kgem); emit_state(sna, op); goto start; @@ -1766,20 +1369,10 @@ inline static uint32_t *gen7_composite_get_binding_table(struct sna *sna, return table; } -static uint32_t -gen7_choose_composite_vertex_buffer(const struct sna_composite_op *op) -{ - int id = 2 + !op->is_affine; - if (op->mask.bo) - id |= id << 2; - assert(id > 0 && id < 16); - return id; -} - static void -gen7_get_batch(struct sna *sna) +gen7_get_batch(struct sna *sna, const struct sna_composite_op *op) { - kgem_set_mode(&sna->kgem, KGEM_RENDER); + kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) { DBG(("%s: flushing batch: %d < %d+%d\n", @@ -1802,7 +1395,7 @@ static void gen7_emit_composite_state(struct sna *sna, uint32_t *binding_table; uint16_t offset; - gen7_get_batch(sna); + gen7_get_batch(sna, op); binding_table = gen7_composite_get_binding_table(sna, &offset); @@ -1842,7 +1435,7 @@ gen7_align_vertex(struct sna *sna, const struct sna_composite_op *op) { if (op->floats_per_vertex != sna->render_state.gen7.floats_per_vertex) { if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect) - gen7_vertex_finish(sna); + gen4_vertex_finish(sna); DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n", sna->render_state.gen7.floats_per_vertex, @@ -1887,9 +1480,9 @@ gen7_render_composite_box(struct sna *sna, } static void -gen7_render_composite_boxes(struct sna *sna, - const struct sna_composite_op *op, - const BoxRec *box, int nbox) +gen7_render_composite_boxes__blt(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) { DBG(("composite_boxes(%d)\n", nbox)); @@ -1919,6 +1512,62 @@ gen7_render_composite_boxes(struct sna *sna, } while (nbox); } +static void +gen7_render_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen7_get_rectangles(sna, op, nbox, + gen7_emit_composite_state); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + } while (nbox); +} + +static void +gen7_render_composite_boxes__thread(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen7_get_rectangles(sna, op, nbox, + gen7_emit_composite_state); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + #ifndef MAX #define MAX(a,b) ((a) > (b) ? (a) : (b)) #endif @@ -1998,7 +1647,7 @@ static void gen7_emit_video_state(struct sna *sna, uint16_t offset; int n_src, n; - gen7_get_batch(sna); + gen7_get_batch(sna, op); src_surf_base[0] = 0; src_surf_base[1] = 0; @@ -2059,12 +1708,14 @@ gen7_render_video(struct sna *sna, RegionPtr dstRegion, short src_w, short src_h, short drw_w, short drw_h, + short dx, short dy, PixmapPtr pixmap) { struct sna_composite_op tmp; - int nbox, dxo, dyo, pix_xoff, pix_yoff; + int nbox, pix_xoff, pix_yoff; float src_scale_x, src_scale_y; struct sna_pixmap *priv; + unsigned filter; BoxPtr box; DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", @@ -2093,15 +1744,22 @@ gen7_render_video(struct sna *sna, tmp.floats_per_vertex = 3; tmp.floats_per_rect = 9; + if (src_w == drw_w && src_h == drw_h) + filter = SAMPLER_FILTER_NEAREST; + else + filter = SAMPLER_FILTER_BILINEAR; + tmp.u.gen7.flags = - GEN7_SET_FLAGS(VIDEO_SAMPLER, NO_BLEND, + GEN7_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, + SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), + NO_BLEND, is_planar_fourcc(frame->id) ? GEN7_WM_KERNEL_VIDEO_PLANAR : GEN7_WM_KERNEL_VIDEO_PACKED, 2); tmp.priv = frame; - kgem_set_mode(&sna->kgem, KGEM_RENDER); + kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { kgem_submit(&sna->kgem); assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)); @@ -2122,9 +1780,6 @@ gen7_render_video(struct sna *sna, pix_yoff = 0; #endif - dxo = dstRegion->extents.x1; - dyo = dstRegion->extents.y1; - /* Use normalized texture coordinates */ src_scale_x = ((float)src_w / frame->width) / (float)drw_w; src_scale_y = ((float)src_h / frame->height) / (float)drw_h; @@ -2142,16 +1797,16 @@ gen7_render_video(struct sna *sna, gen7_get_rectangles(sna, &tmp, 1, gen7_emit_video_state); OUT_VERTEX(r.x2, r.y2); - OUT_VERTEX_F((box->x2 - dxo) * src_scale_x); - OUT_VERTEX_F((box->y2 - dyo) * src_scale_y); + OUT_VERTEX_F((box->x2 - dx) * src_scale_x); + OUT_VERTEX_F((box->y2 - dy) * src_scale_y); OUT_VERTEX(r.x1, r.y2); - OUT_VERTEX_F((box->x1 - dxo) * src_scale_x); - OUT_VERTEX_F((box->y2 - dyo) * src_scale_y); + OUT_VERTEX_F((box->x1 - dx) * src_scale_x); + OUT_VERTEX_F((box->y2 - dy) * src_scale_y); OUT_VERTEX(r.x1, r.y1); - OUT_VERTEX_F((box->x1 - dxo) * src_scale_x); - OUT_VERTEX_F((box->y1 - dyo) * src_scale_y); + OUT_VERTEX_F((box->x1 - dx) * src_scale_x); + OUT_VERTEX_F((box->y1 - dy) * src_scale_y); if (!DAMAGE_IS_ALL(priv->gpu_damage)) { sna_damage_add_box(&priv->gpu_damage, &r); @@ -2161,148 +1816,10 @@ gen7_render_video(struct sna *sna, } priv->clear = false; - gen7_vertex_flush(sna); + gen4_vertex_flush(sna); return true; } -static bool -gen7_composite_solid_init(struct sna *sna, - struct sna_composite_channel *channel, - uint32_t color) -{ - DBG(("%s: color=%x\n", __FUNCTION__, color)); - - channel->filter = PictFilterNearest; - channel->repeat = RepeatNormal; - channel->is_affine = true; - channel->is_solid = true; - channel->is_opaque = (color >> 24) == 0xff; - channel->transform = NULL; - channel->width = 1; - channel->height = 1; - channel->card_format = GEN7_SURFACEFORMAT_B8G8R8A8_UNORM; - - channel->bo = sna_render_get_solid(sna, color); - - channel->scale[0] = channel->scale[1] = 1; - channel->offset[0] = channel->offset[1] = 0; - return channel->bo != NULL; -} - -static bool -gen7_composite_linear_init(struct sna *sna, - PicturePtr picture, - struct sna_composite_channel *channel, - int x, int y, - int w, int h, - int dst_x, int dst_y) -{ - PictLinearGradient *linear = - (PictLinearGradient *)picture->pSourcePict; - pixman_fixed_t tx, ty; - float x0, y0, sf; - float dx, dy; - - DBG(("%s: p1=(%f, %f), p2=(%f, %f), src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n", - __FUNCTION__, - pixman_fixed_to_double(linear->p1.x), pixman_fixed_to_double(linear->p1.y), - pixman_fixed_to_double(linear->p2.x), pixman_fixed_to_double(linear->p2.y), - x, y, dst_x, dst_y, w, h)); - - if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y) - return 0; - - if (!sna_transform_is_affine(picture->transform)) { - DBG(("%s: fallback due to projective transform\n", - __FUNCTION__)); - return sna_render_picture_fixup(sna, picture, channel, - x, y, w, h, dst_x, dst_y); - } - - channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear); - if (!channel->bo) - return 0; - - channel->filter = PictFilterNearest; - channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; - channel->width = channel->bo->pitch / 4; - channel->height = 1; - channel->pict_format = PICT_a8r8g8b8; - - channel->scale[0] = channel->scale[1] = 1; - channel->offset[0] = channel->offset[1] = 0; - - if (sna_transform_is_translation(picture->transform, &tx, &ty)) { - dx = pixman_fixed_to_double(linear->p2.x - linear->p1.x); - dy = pixman_fixed_to_double(linear->p2.y - linear->p1.y); - - x0 = pixman_fixed_to_double(linear->p1.x); - y0 = pixman_fixed_to_double(linear->p1.y); - - if (tx | ty) { - x0 -= pixman_fixed_to_double(tx); - y0 -= pixman_fixed_to_double(ty); - } - } else { - struct pixman_f_vector p1, p2; - struct pixman_f_transform m, inv; - - pixman_f_transform_from_pixman_transform(&m, picture->transform); - DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n", - __FUNCTION__, - m.m[0][0], m.m[0][1], m.m[0][2], - m.m[1][0], m.m[1][1], m.m[1][2], - m.m[2][0], m.m[2][1], m.m[2][2])); - if (!pixman_f_transform_invert(&inv, &m)) - return 0; - - p1.v[0] = pixman_fixed_to_double(linear->p1.x); - p1.v[1] = pixman_fixed_to_double(linear->p1.y); - p1.v[2] = 1.; - pixman_f_transform_point(&inv, &p1); - - p2.v[0] = pixman_fixed_to_double(linear->p2.x); - p2.v[1] = pixman_fixed_to_double(linear->p2.y); - p2.v[2] = 1.; - pixman_f_transform_point(&inv, &p2); - - DBG(("%s: untransformed: p1=(%f, %f, %f), p2=(%f, %f, %f)\n", - __FUNCTION__, - p1.v[0], p1.v[1], p1.v[2], - p2.v[0], p2.v[1], p2.v[2])); - - dx = p2.v[0] - p1.v[0]; - dy = p2.v[1] - p1.v[1]; - - x0 = p1.v[0]; - y0 = p1.v[1]; - } - - sf = dx*dx + dy*dy; - dx /= sf; - dy /= sf; - - channel->embedded_transform.matrix[0][0] = pixman_double_to_fixed(dx); - channel->embedded_transform.matrix[0][1] = pixman_double_to_fixed(dy); - channel->embedded_transform.matrix[0][2] = -pixman_double_to_fixed(dx*(x0+dst_x-x) + dy*(y0+dst_y-y)); - - channel->embedded_transform.matrix[1][0] = 0; - channel->embedded_transform.matrix[1][1] = 0; - channel->embedded_transform.matrix[1][2] = pixman_double_to_fixed(.5); - - channel->embedded_transform.matrix[2][0] = 0; - channel->embedded_transform.matrix[2][1] = 0; - channel->embedded_transform.matrix[2][2] = pixman_fixed_1; - - channel->transform = &channel->embedded_transform; - channel->is_affine = 1; - - DBG(("%s: dx=%f, dy=%f, offset=%f\n", - __FUNCTION__, dx, dy, -dx*(x0-x+dst_x) + -dy*(y0-y+dst_y))); - - return channel->bo != NULL; -} - static int gen7_composite_picture(struct sna *sna, PicturePtr picture, @@ -2323,16 +1840,16 @@ gen7_composite_picture(struct sna *sna, channel->card_format = -1; if (sna_picture_is_solid(picture, &color)) - return gen7_composite_solid_init(sna, channel, color); + return gen4_channel_init_solid(sna, channel, color); if (picture->pDrawable == NULL) { int ret; if (picture->pSourcePict->type == SourcePictTypeLinear) - return gen7_composite_linear_init(sna, picture, channel, - x, y, - w, h, - dst_x, dst_y); + return gen4_channel_init_linear(sna, picture, channel, + x, y, + w, h, + dst_x, dst_y); DBG(("%s -- fixup, gradient\n", __FUNCTION__)); ret = -1; @@ -2383,7 +1900,8 @@ gen7_composite_picture(struct sna *sna, channel->card_format = gen7_get_card_format(picture->format); if (channel->card_format == (unsigned)-1) return sna_render_picture_convert(sna, picture, channel, pixmap, - x, y, w, h, dst_x, dst_y); + x, y, w, h, dst_x, dst_y, + false); if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, @@ -2408,8 +1926,8 @@ static void gen7_composite_channel_convert(struct sna_composite_channel *channel static void gen7_render_composite_done(struct sna *sna, const struct sna_composite_op *op) { - if (sna->render_state.gen7.vertex_offset) { - gen7_vertex_flush(sna); + if (sna->render.vertex_offset) { + gen4_vertex_flush(sna); gen7_magic_ca_pass(sna, op); } @@ -2469,9 +1987,11 @@ gen7_composite_set_target(struct sna *sna, return true; } -inline static bool can_switch_to_blt(struct sna *sna) +inline static bool can_switch_to_blt(struct sna *sna, + struct kgem_bo *bo, + unsigned flags) { - if (sna->kgem.ring == KGEM_BLT) + if (sna->kgem.ring != KGEM_RENDER) return true; if (NO_RING_SWITCH) @@ -2480,7 +2000,13 @@ inline static bool can_switch_to_blt(struct sna *sna) if (!sna->kgem.has_semaphores) return false; - return sna->kgem.mode == KGEM_NONE || kgem_is_idle(&sna->kgem); + if (flags & COPY_LAST) + return true; + + if (bo && RQ_IS_BLT(bo->rq)) + return true; + + return kgem_ring_is_idle(&sna->kgem, KGEM_BLT); } static inline bool untiled_tlb_miss(struct kgem_bo *bo) @@ -2488,14 +2014,19 @@ static inline bool untiled_tlb_miss(struct kgem_bo *bo) return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096; } -static bool prefer_blt_bo(struct sna *sna, struct kgem_bo *bo) +static int prefer_blt_bo(struct sna *sna, struct kgem_bo *bo) { - return untiled_tlb_miss(bo) && bo->pitch < MAXSHORT; + if (bo->rq) + return RQ_IS_BLT(bo->rq) ? 1 : -1; + + return bo->tiling == I915_TILING_NONE || bo->scanout; } -inline static bool prefer_blt_ring(struct sna *sna) +inline static bool prefer_blt_ring(struct sna *sna, + struct kgem_bo *bo, + unsigned flags) { - return sna->kgem.ring != KGEM_RENDER || can_switch_to_blt(sna); + return can_switch_to_blt(sna, bo, flags); } static bool @@ -2514,17 +2045,8 @@ try_blt(struct sna *sna, return true; } - if (can_switch_to_blt(sna)) { - if (sna_picture_is_solid(src, NULL)) - return true; - - if (dst->pDrawable == src->pDrawable) - return true; - - if (src->pDrawable && - get_drawable_pixmap(dst->pDrawable) == get_drawable_pixmap(src->pDrawable)) - return true; - } + if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, NULL, 0)) + return true; return false; } @@ -2551,12 +2073,6 @@ has_alphamap(PicturePtr p) } static bool -untransformed(PicturePtr p) -{ - return !p->transform || pixman_transform_is_int_translate(p->transform); -} - -static bool need_upload(PicturePtr p) { return p->pDrawable && unattached(p->pDrawable) && untransformed(p); @@ -2602,7 +2118,6 @@ gen7_composite_fallback(struct sna *sna, PicturePtr mask, PicturePtr dst) { - struct sna_pixmap *priv; PixmapPtr src_pixmap; PixmapPtr mask_pixmap; PixmapPtr dst_pixmap; @@ -2641,10 +2156,7 @@ gen7_composite_fallback(struct sna *sna, } /* If anything is on the GPU, push everything out to the GPU */ - priv = sna_pixmap(dst_pixmap); - if (priv && - ((priv->gpu_damage && !priv->clear) || - (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)))) { + if (dst_use_gpu(dst_pixmap)) { DBG(("%s: dst is already on the GPU, try to use GPU\n", __FUNCTION__)); return false; @@ -2679,14 +2191,14 @@ gen7_composite_fallback(struct sna *sna, if (too_large(dst_pixmap->drawable.width, dst_pixmap->drawable.height) && - (priv == NULL || DAMAGE_IS_ALL(priv->cpu_damage))) { + dst_is_cpu(dst_pixmap)) { DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); return true; } DBG(("%s: dst is not on the GPU and the operation should not fallback\n", __FUNCTION__)); - return false; + return dst_use_cpu(dst_pixmap); } static int @@ -2707,7 +2219,7 @@ reuse_source(struct sna *sna, } if (sna_picture_is_solid(mask, &color)) - return gen7_composite_solid_init(sna, mc, color); + return gen4_channel_init_solid(sna, mc, color); if (sc->is_solid) return false; @@ -2750,11 +2262,14 @@ prefer_blt_composite(struct sna *sna, struct sna_composite_op *tmp) if (sna->kgem.ring == KGEM_BLT) return true; - if (!prefer_blt_ring(sna)) + if (untiled_tlb_miss(tmp->dst.bo) || + untiled_tlb_miss(tmp->src.bo)) + return true; + + if (!prefer_blt_ring(sna, tmp->dst.bo, 0)) return false; - return (prefer_blt_bo(sna, tmp->dst.bo) || - prefer_blt_bo(sna, tmp->src.bo)); + return (prefer_blt_bo(sna, tmp->dst.bo) | prefer_blt_bo(sna, tmp->src.bo)) > 0; } static bool @@ -2811,7 +2326,7 @@ gen7_render_composite(struct sna *sna, case -1: goto cleanup_dst; case 0: - if (!gen7_composite_solid_init(sna, &tmp->src, 0)) + if (!gen4_channel_init_solid(sna, &tmp->src, 0)) goto cleanup_dst; /* fall through to fixup */ case 1: @@ -2835,7 +2350,6 @@ gen7_render_composite(struct sna *sna, tmp->mask.filter = SAMPLER_FILTER_NEAREST; tmp->mask.repeat = SAMPLER_EXTEND_NONE; - tmp->prim_emit = gen7_emit_composite_primitive; if (mask) { if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { tmp->has_component_alpha = true; @@ -2865,7 +2379,7 @@ gen7_render_composite(struct sna *sna, case -1: goto cleanup_src; case 0: - if (!gen7_composite_solid_init(sna, &tmp->mask, 0)) + if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) goto cleanup_src; /* fall through to fixup */ case 1: @@ -2875,31 +2389,7 @@ gen7_render_composite(struct sna *sna, } tmp->is_affine &= tmp->mask.is_affine; - - if (tmp->src.transform == NULL && tmp->mask.transform == NULL) - tmp->prim_emit = gen7_emit_composite_primitive_identity_source_mask; - - tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine; - } else { - if (tmp->src.is_solid) { - tmp->prim_emit = gen7_emit_composite_primitive_solid; - if (tmp->src.is_opaque && op == PictOpOver) - tmp->op = PictOpSrc; - } else if (tmp->src.transform == NULL) - tmp->prim_emit = gen7_emit_composite_primitive_identity_source; - else if (tmp->src.is_affine) { - if (tmp->src.transform->matrix[0][1] == 0 && - tmp->src.transform->matrix[1][0] == 0) { - tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; - tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; - tmp->prim_emit = gen7_emit_composite_primitive_simple_source; - } else - tmp->prim_emit = gen7_emit_composite_primitive_affine_source; - } - - tmp->floats_per_vertex = 3 + !tmp->is_affine; } - tmp->floats_per_rect = 3 * tmp->floats_per_vertex; tmp->u.gen7.flags = GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, @@ -2913,14 +2403,18 @@ gen7_render_composite(struct sna *sna, tmp->mask.bo != NULL, tmp->has_component_alpha, tmp->is_affine), - gen7_choose_composite_vertex_buffer(tmp)); + gen4_choose_composite_emitter(tmp)); tmp->blt = gen7_render_composite_blt; tmp->box = gen7_render_composite_box; - tmp->boxes = gen7_render_composite_boxes; + tmp->boxes = gen7_render_composite_boxes__blt; + if (tmp->emit_boxes){ + tmp->boxes = gen7_render_composite_boxes; + tmp->thread_boxes = gen7_render_composite_boxes__thread; + } tmp->done = gen7_render_composite_done; - kgem_set_mode(&sna->kgem, KGEM_RENDER); + kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); if (!kgem_check_bo(&sna->kgem, tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) { @@ -2949,167 +2443,6 @@ cleanup_dst: } #if !NO_COMPOSITE_SPANS -inline static void -gen7_emit_composite_texcoord_affine(struct sna *sna, - const struct sna_composite_channel *channel, - int16_t x, int16_t y) -{ - float t[2]; - - sna_get_transformed_coordinates(x + channel->offset[0], - y + channel->offset[1], - channel->transform, - &t[0], &t[1]); - OUT_VERTEX_F(t[0] * channel->scale[0]); - OUT_VERTEX_F(t[1] * channel->scale[1]); -} - -inline static void -gen7_emit_composite_spans_vertex(struct sna *sna, - const struct sna_composite_spans_op *op, - int16_t x, int16_t y) -{ - OUT_VERTEX(x, y); - gen7_emit_composite_texcoord(sna, &op->base.src, x, y); -} - -fastcall static void -gen7_emit_composite_spans_primitive(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - gen7_emit_composite_spans_vertex(sna, op, box->x2, box->y2); - OUT_VERTEX_F(opacity); - - gen7_emit_composite_spans_vertex(sna, op, box->x1, box->y2); - OUT_VERTEX_F(opacity); - - gen7_emit_composite_spans_vertex(sna, op, box->x1, box->y1); - OUT_VERTEX_F(opacity); -} - -fastcall static void -gen7_emit_composite_spans_solid(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - OUT_VERTEX(box->x2, box->y2); - OUT_VERTEX_F(1); OUT_VERTEX_F(1); - OUT_VERTEX_F(opacity); - - OUT_VERTEX(box->x1, box->y2); - OUT_VERTEX_F(0); OUT_VERTEX_F(1); - OUT_VERTEX_F(opacity); - - OUT_VERTEX(box->x1, box->y1); - OUT_VERTEX_F(0); OUT_VERTEX_F(0); - OUT_VERTEX_F(opacity); -} - -fastcall static void -gen7_emit_composite_spans_identity(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - float *v; - union { - struct sna_coordinate p; - float f; - } dst; - - float sx = op->base.src.scale[0]; - float sy = op->base.src.scale[1]; - int16_t tx = op->base.src.offset[0]; - int16_t ty = op->base.src.offset[1]; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 3*4; - assert(sna->render.vertex_used <= sna->render.vertex_size); - - dst.p.x = box->x2; - dst.p.y = box->y2; - v[0] = dst.f; - v[1] = (box->x2 + tx) * sx; - v[6] = v[2] = (box->y2 + ty) * sy; - - dst.p.x = box->x1; - v[4] = dst.f; - v[9] = v[5] = (box->x1 + tx) * sx; - - dst.p.y = box->y1; - v[8] = dst.f; - v[10] = (box->y1 + ty) * sy; - - v[11] = v[7] = v[3] = opacity; -} - -fastcall static void -gen7_emit_composite_spans_simple(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - float *v; - union { - struct sna_coordinate p; - float f; - } dst; - - float xx = op->base.src.transform->matrix[0][0]; - float x0 = op->base.src.transform->matrix[0][2]; - float yy = op->base.src.transform->matrix[1][1]; - float y0 = op->base.src.transform->matrix[1][2]; - float sx = op->base.src.scale[0]; - float sy = op->base.src.scale[1]; - int16_t tx = op->base.src.offset[0]; - int16_t ty = op->base.src.offset[1]; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 3*4; - assert(sna->render.vertex_used <= sna->render.vertex_size); - - dst.p.x = box->x2; - dst.p.y = box->y2; - v[0] = dst.f; - v[1] = ((box->x2 + tx) * xx + x0) * sx; - v[6] = v[2] = ((box->y2 + ty) * yy + y0) * sy; - - dst.p.x = box->x1; - v[4] = dst.f; - v[9] = v[5] = ((box->x1 + tx) * xx + x0) * sx; - - dst.p.y = box->y1; - v[8] = dst.f; - v[10] = ((box->y1 + ty) * yy + y0) * sy; - - v[11] = v[7] = v[3] = opacity; -} - -fastcall static void -gen7_emit_composite_spans_affine(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - OUT_VERTEX(box->x2, box->y2); - gen7_emit_composite_texcoord_affine(sna, &op->base.src, - box->x2, box->y2); - OUT_VERTEX_F(opacity); - - OUT_VERTEX(box->x1, box->y2); - gen7_emit_composite_texcoord_affine(sna, &op->base.src, - box->x1, box->y2); - OUT_VERTEX_F(opacity); - - OUT_VERTEX(box->x1, box->y1); - gen7_emit_composite_texcoord_affine(sna, &op->base.src, - box->x1, box->y1); - OUT_VERTEX_F(opacity); -} - fastcall static void gen7_render_composite_spans_box(struct sna *sna, const struct sna_composite_spans_op *op, @@ -3159,11 +2492,47 @@ gen7_render_composite_spans_boxes(struct sna *sna, } fastcall static void +gen7_render_composite_spans_boxes__thread(struct sna *sna, + const struct sna_composite_spans_op *op, + const struct sna_opacity_box *box, + int nbox) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + op->base.dst.x, op->base.dst.y)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox, + gen7_emit_composite_state); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + +fastcall static void gen7_render_composite_spans_done(struct sna *sna, const struct sna_composite_spans_op *op) { - if (sna->render_state.gen7.vertex_offset) - gen7_vertex_flush(sna); + if (sna->render.vertex_offset) + gen4_vertex_flush(sna); DBG(("%s()\n", __FUNCTION__)); @@ -3184,12 +2553,11 @@ gen7_check_composite_spans(struct sna *sna, if (gen7_composite_fallback(sna, src, NULL, dst)) return false; - if (need_tiling(sna, width, height)) { - if (!is_gpu(dst->pDrawable)) { - DBG(("%s: fallback, tiled operation not on GPU\n", - __FUNCTION__)); - return false; - } + if (need_tiling(sna, width, height) && + !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { + DBG(("%s: fallback, tiled operation not on GPU\n", + __FUNCTION__)); + return false; } return true; @@ -3232,7 +2600,7 @@ gen7_render_composite_spans(struct sna *sna, case -1: goto cleanup_dst; case 0: - if (!gen7_composite_solid_init(sna, &tmp->base.src, 0)) + if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) goto cleanup_dst; /* fall through to fixup */ case 1: @@ -3244,23 +2612,6 @@ gen7_render_composite_spans(struct sna *sna, tmp->base.is_affine = tmp->base.src.is_affine; tmp->base.need_magic_ca_pass = false; - tmp->prim_emit = gen7_emit_composite_spans_primitive; - if (tmp->base.src.is_solid) { - tmp->prim_emit = gen7_emit_composite_spans_solid; - } else if (tmp->base.src.transform == NULL) { - tmp->prim_emit = gen7_emit_composite_spans_identity; - } else if (tmp->base.is_affine) { - if (tmp->base.src.transform->matrix[0][1] == 0 && - tmp->base.src.transform->matrix[1][0] == 0) { - tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2]; - tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2]; - tmp->prim_emit = gen7_emit_composite_spans_simple; - } else - tmp->prim_emit = gen7_emit_composite_spans_affine; - } - tmp->base.floats_per_vertex = 4 + !tmp->base.is_affine; - tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex; - tmp->base.u.gen7.flags = GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, tmp->base.src.repeat, @@ -3268,13 +2619,15 @@ gen7_render_composite_spans(struct sna *sna, SAMPLER_EXTEND_PAD), gen7_get_blend(tmp->base.op, false, tmp->base.dst.format), GEN7_WM_KERNEL_OPACITY | !tmp->base.is_affine, - 1 << 2 | (2+!tmp->base.is_affine)); + gen4_choose_spans_emitter(tmp)); tmp->box = gen7_render_composite_spans_box; tmp->boxes = gen7_render_composite_spans_boxes; + if (tmp->emit_boxes) + tmp->thread_boxes = gen7_render_composite_spans_boxes__thread; tmp->done = gen7_render_composite_spans_done; - kgem_set_mode(&sna->kgem, KGEM_RENDER); + kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); if (!kgem_check_bo(&sna->kgem, tmp->base.dst.bo, tmp->base.src.bo, NULL)) { @@ -3307,7 +2660,7 @@ gen7_emit_copy_state(struct sna *sna, uint32_t *binding_table; uint16_t offset; - gen7_get_batch(sna); + gen7_get_batch(sna, op); binding_table = gen7_composite_get_binding_table(sna, &offset); @@ -3337,10 +2690,23 @@ static inline bool prefer_blt_copy(struct sna *sna, struct kgem_bo *dst_bo, unsigned flags) { - return (sna->kgem.ring == KGEM_BLT || - (flags & COPY_LAST && sna->kgem.mode == KGEM_NONE) || - prefer_blt_bo(sna, src_bo) || - prefer_blt_bo(sna, dst_bo)); + if (sna->kgem.ring == KGEM_BLT) + return true; + + assert((flags & COPY_SYNC) == 0); + + if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) + return true; + + if (untiled_tlb_miss(src_bo) || + untiled_tlb_miss(dst_bo)) + return true; + + if (!prefer_blt_ring(sna, dst_bo, flags)) + return false; + + return (prefer_blt_bo(sna, src_bo) >= 0 && + prefer_blt_bo(sna, dst_bo) > 0); } inline static void boxes_extents(const BoxRec *box, int n, BoxRec *extents) @@ -3386,8 +2752,8 @@ gen7_render_copy_boxes(struct sna *sna, uint8_t alu, struct sna_composite_op tmp; BoxRec extents; - DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d, overlaps? %d\n", - __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, + DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, flags=%x, self-copy=%d, overlaps? %d\n", + __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, flags, src_bo == dst_bo, overlaps(sna, src_bo, src_dx, src_dy, @@ -3421,7 +2787,7 @@ fallback_blt: if (too_large(extents.x2-extents.x1, extents.y2-extents.y1)) goto fallback_blt; - if ((flags & COPY_LAST || can_switch_to_blt(sna)) && + if (can_switch_to_blt(sna, dst_bo, flags) && sna_blt_compare_depth(&src->drawable, &dst->drawable) && sna_blt_copy_boxes(sna, alu, src_bo, src_dx, src_dy, @@ -3523,7 +2889,7 @@ fallback_blt: tmp.u.gen7.flags = COPY_FLAGS(alu); - kgem_set_mode(&sna->kgem, KGEM_RENDER); + kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { kgem_submit(&sna->kgem); if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) @@ -3563,7 +2929,7 @@ fallback_blt: } while (--n_this_time); } while (n); - gen7_vertex_flush(sna); + gen4_vertex_flush(sna); sna_render_composite_redirect_done(sna, &tmp); if (tmp.src.bo != src_bo) kgem_bo_destroy(&sna->kgem, tmp.src.bo); @@ -3576,6 +2942,14 @@ fallback_tiled_dst: if (tmp.redirect.real_bo) kgem_bo_destroy(&sna->kgem, tmp.dst.bo); fallback_tiled: + if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && + sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->drawable.bitsPerPixel, + box, n)) + return true; + return sna_tiling_copy_boxes(sna, alu, src, src_bo, src_dx, src_dy, dst, dst_bo, dst_dx, dst_dy, @@ -3608,8 +2982,8 @@ gen7_render_copy_blt(struct sna *sna, static void gen7_render_copy_done(struct sna *sna, const struct sna_copy_op *op) { - if (sna->render_state.gen7.vertex_offset) - gen7_vertex_flush(sna); + if (sna->render.vertex_offset) + gen4_vertex_flush(sna); } static bool @@ -3671,7 +3045,7 @@ fallback: op->base.u.gen7.flags = COPY_FLAGS(alu); - kgem_set_mode(&sna->kgem, KGEM_RENDER); + kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { kgem_submit(&sna->kgem); if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) @@ -3699,7 +3073,7 @@ gen7_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) * specific kernel. */ - gen7_get_batch(sna); + gen7_get_batch(sna, op); binding_table = gen7_composite_get_binding_table(sna, &offset); @@ -3727,7 +3101,10 @@ gen7_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) static inline bool prefer_blt_fill(struct sna *sna, struct kgem_bo *bo) { - return prefer_blt_ring(sna) || untiled_tlb_miss(bo); + if (untiled_tlb_miss(bo)) + return true; + + return prefer_blt_ring(sna, bo, 0) || prefer_blt_bo(sna, bo) >= 0; } static bool @@ -3822,6 +3199,7 @@ gen7_render_fill_boxes(struct sna *sna, tmp.u.gen7.flags = FILL_FLAGS(op, format); + kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { kgem_submit(&sna->kgem); assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); @@ -3855,7 +3233,7 @@ gen7_render_fill_boxes(struct sna *sna, } while (--n_this_time); } while (n); - gen7_vertex_flush(sna); + gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, tmp.src.bo); sna_render_composite_redirect_done(sna, &tmp); return true; @@ -3946,8 +3324,8 @@ gen7_render_fill_op_boxes(struct sna *sna, static void gen7_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) { - if (sna->render_state.gen7.vertex_offset) - gen7_vertex_flush(sna); + if (sna->render.vertex_offset) + gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, op->base.src.bo); } @@ -3995,6 +3373,7 @@ gen7_render_fill(struct sna *sna, uint8_t alu, op->base.u.gen7.flags = FILL_FLAGS_NOBLEND; + kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { kgem_submit(&sna->kgem); assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); @@ -4072,9 +3451,13 @@ gen7_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, tmp.u.gen7.flags = FILL_FLAGS_NOBLEND; + kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); if (!kgem_check_bo(&sna->kgem, bo, NULL)) { - _kgem_submit(&sna->kgem); - assert(kgem_check_bo(&sna->kgem, bo, NULL)); + kgem_submit(&sna->kgem); + if (kgem_check_bo(&sna->kgem, bo, NULL)) { + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + return false; + } } gen7_emit_fill_state(sna, &tmp); @@ -4095,7 +3478,7 @@ gen7_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, v[7] = v[2] = v[3] = 1; v[6] = v[10] = v[11] = 0; - gen7_vertex_flush(sna); + gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, tmp.src.bo); return true; @@ -4152,9 +3535,13 @@ gen7_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) tmp.u.gen7.flags = FILL_FLAGS_NOBLEND; + kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); if (!kgem_check_bo(&sna->kgem, bo, NULL)) { - _kgem_submit(&sna->kgem); - assert(kgem_check_bo(&sna->kgem, bo, NULL)); + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, bo, NULL)) { + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + return false; + } } gen7_emit_fill_state(sna, &tmp); @@ -4174,7 +3561,7 @@ gen7_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) v[7] = v[2] = v[3] = 1; v[6] = v[10] = v[11] = 0; - gen7_vertex_flush(sna); + gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, tmp.src.bo); return true; @@ -4182,20 +3569,20 @@ gen7_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) static void gen7_render_flush(struct sna *sna) { - gen7_vertex_close(sna); + gen4_vertex_close(sna); + + assert(sna->render.vb_id == 0); + assert(sna->render.vertex_offset == 0); } static void gen7_render_context_switch(struct kgem *kgem, int new_mode) { - if (!new_mode) - return; - - if (kgem->mode) { + if (kgem->nbatch) { DBG(("%s: switch rings %d -> %d\n", __FUNCTION__, kgem->mode, new_mode)); - kgem_submit(kgem); + _kgem_submit(kgem); } kgem->ring = new_mode; @@ -4238,7 +3625,6 @@ static void gen7_render_reset(struct sna *sna) { sna->render_state.gen7.emit_flush = false; sna->render_state.gen7.needs_invariant = true; - sna->render_state.gen7.vb_id = 0; sna->render_state.gen7.ve_id = 3 << 2; sna->render_state.gen7.last_primitive = -1; @@ -4249,6 +3635,10 @@ static void gen7_render_reset(struct sna *sna) sna->render_state.gen7.drawrect_offset = -1; sna->render_state.gen7.drawrect_limit = -1; sna->render_state.gen7.surface_table = -1; + + sna->render.vertex_offset = 0; + sna->render.nvertex_reloc = 0; + sna->render.vb_id = 0; } static void gen7_render_fini(struct sna *sna) @@ -4256,6 +3646,16 @@ static void gen7_render_fini(struct sna *sna) kgem_bo_destroy(&sna->kgem, sna->render_state.gen7.general_bo); } +static bool is_gt2(struct sna *sna) +{ + return DEVICE_ID(sna->PciInfo) & 0x20; +} + +static bool is_mobile(struct sna *sna) +{ + return (DEVICE_ID(sna->PciInfo) & 0xf) == 0x6; +} + static bool gen7_render_setup(struct sna *sna) { struct gen7_render_state *state = &sna->render_state.gen7; @@ -4263,15 +3663,20 @@ static bool gen7_render_setup(struct sna *sna) struct gen7_sampler_state *ss; int i, j, k, l, m; - if (sna->kgem.gen == 70) { + if (sna->kgem.gen == 070) { state->info = &ivb_gt_info; if (DEVICE_ID(sna->PciInfo) & 0xf) { state->info = &ivb_gt1_info; - if (DEVICE_ID(sna->PciInfo) & 0x20) + if (is_gt2(sna)) state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */ } - } else if (sna->kgem.gen == 75) { + } else if (sna->kgem.gen == 075) { state->info = &hsw_gt_info; + if (DEVICE_ID(sna->PciInfo) & 0xf) { + state->info = &hsw_gt1_info; + if (is_gt2(sna)) + state->info = &hsw_gt2_info; + } } else return false; @@ -4331,7 +3736,6 @@ static bool gen7_render_setup(struct sna *sna) } } - state->cc_vp = gen7_create_cc_viewport(&general); state->cc_blend = gen7_composite_create_blend_state(&general); state->general_bo = sna_static_stream_fini(sna, &general); @@ -4349,10 +3753,13 @@ bool gen7_render_init(struct sna *sna) #if !NO_COMPOSITE sna->render.composite = gen7_render_composite; + sna->render.prefer_gpu |= PREFER_GPU_RENDER; #endif #if !NO_COMPOSITE_SPANS sna->render.check_composite_spans = gen7_check_composite_spans; sna->render.composite_spans = gen7_render_composite_spans; + if (is_mobile(sna)) + sna->render.prefer_gpu |= PREFER_GPU_SPANS; #endif sna->render.video = gen7_render_video; diff --git a/src/sna/kgem.c b/src/sna/kgem.c index 46c898f79..86a2dfcde 100644 --- a/src/sna/kgem.c +++ b/src/sna/kgem.c @@ -69,10 +69,26 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define DBG_NO_UPLOAD_ACTIVE 0 #define DBG_NO_MAP_UPLOAD 0 #define DBG_NO_RELAXED_FENCING 0 +#define DBG_NO_SECURE_BATCHES 0 +#define DBG_NO_PINNED_BATCHES 0 +#define DBG_NO_FAST_RELOC 0 +#define DBG_NO_HANDLE_LUT 0 #define DBG_DUMP 0 +#ifndef DEBUG_SYNC +#define DEBUG_SYNC 0 +#endif + #define SHOW_BATCH 0 +#if 0 +#define ASSERT_IDLE(kgem__, handle__) assert(!__kgem_busy(kgem__, handle__)) +#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) assert(!(expect__) || !__kgem_busy(kgem__, handle__)) +#else +#define ASSERT_IDLE(kgem__, handle__) +#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) +#endif + /* Worst case seems to be 965gm where we cannot write within a cacheline that * is being simultaneously being read by the GPU, or within the sampler * prefetch. In general, the chipsets seem to have a requirement that sampler @@ -93,7 +109,20 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 2) #define __MAP_TYPE(ptr) ((uintptr_t)(ptr) & 3) -#define LOCAL_I915_PARAM_HAS_SEMAPHORES 20 +#define MAKE_REQUEST(rq, ring) ((struct kgem_request *)((uintptr_t)(rq) | (ring))) + +#define LOCAL_I915_PARAM_HAS_BLT 11 +#define LOCAL_I915_PARAM_HAS_RELAXED_FENCING 12 +#define LOCAL_I915_PARAM_HAS_RELAXED_DELTA 15 +#define LOCAL_I915_PARAM_HAS_SEMAPHORES 20 +#define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23 +#define LOCAL_I915_PARAM_HAS_PINNED_BATCHES 24 +#define LOCAL_I915_PARAM_HAS_NO_RELOC 25 +#define LOCAL_I915_PARAM_HAS_HANDLE_LUT 26 + +#define LOCAL_I915_EXEC_IS_PINNED (1<<10) +#define LOCAL_I915_EXEC_NO_RELOC (1<<11) +#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12) #define LOCAL_I915_GEM_USERPTR 0x32 #define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr) @@ -101,7 +130,8 @@ struct local_i915_gem_userptr { uint64_t user_ptr; uint32_t user_size; uint32_t flags; -#define I915_USERPTR_READ_ONLY 0x1 +#define I915_USERPTR_READ_ONLY (1<<0) +#define I915_USERPTR_UNSYNCHRONIZED (1<<31) uint32_t handle; }; @@ -170,13 +200,13 @@ static void kgem_sna_flush(struct kgem *kgem) sna_render_flush_solid(sna); } -static int gem_set_tiling(int fd, uint32_t handle, int tiling, int stride) +static bool gem_set_tiling(int fd, uint32_t handle, int tiling, int stride) { struct drm_i915_gem_set_tiling set_tiling; int ret; if (DBG_NO_TILING) - return I915_TILING_NONE; + return false; VG_CLEAR(set_tiling); do { @@ -186,7 +216,7 @@ static int gem_set_tiling(int fd, uint32_t handle, int tiling, int stride) ret = ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); - return set_tiling.tiling_mode; + return ret == 0; } static bool gem_set_cacheing(int fd, uint32_t handle, int cacheing) @@ -206,14 +236,17 @@ static uint32_t gem_userptr(int fd, void *ptr, int size, int read_only) VG_CLEAR(arg); arg.user_ptr = (uintptr_t)ptr; arg.user_size = size; - arg.flags = 0; + arg.flags = I915_USERPTR_UNSYNCHRONIZED; if (read_only) arg.flags |= I915_USERPTR_READ_ONLY; if (drmIoctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) { - DBG(("%s: failed to map %p + %d bytes: %d\n", - __FUNCTION__, ptr, size, errno)); - return 0; + arg.flags &= ~I915_USERPTR_UNSYNCHRONIZED; + if (drmIoctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) { + DBG(("%s: failed to map %p + %d bytes: %d\n", + __FUNCTION__, ptr, size, errno)); + return 0; + } } return arg.handle; @@ -262,6 +295,11 @@ retry_gtt: if (kgem_expire_cache(kgem)) goto retry_gtt; + if (kgem->need_expire) { + kgem_cleanup_cache(kgem); + goto retry_gtt; + } + return NULL; } @@ -274,6 +312,11 @@ retry_mmap: if (__kgem_throttle_retire(kgem, 0)) goto retry_mmap; + if (kgem->need_expire) { + kgem_cleanup_cache(kgem); + goto retry_mmap; + } + ptr = NULL; } @@ -345,8 +388,7 @@ static int gem_read(int fd, uint32_t handle, const void *dst, return 0; } -static bool -kgem_busy(struct kgem *kgem, int handle) +bool __kgem_busy(struct kgem *kgem, int handle) { struct drm_i915_gem_busy busy; @@ -360,26 +402,23 @@ kgem_busy(struct kgem *kgem, int handle) return busy.busy; } -void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo) +static void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo) { - DBG(("%s: handle=%d, domain=%d\n", - __FUNCTION__, bo->handle, bo->domain)); - assert(bo->flush || !kgem_busy(kgem, bo->handle)); - - if (bo->rq) - kgem_retire(kgem); - - if (bo->exec == NULL) { - DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d\n", - __FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL)); - assert(list_is_empty(&bo->vma)); - bo->rq = NULL; - list_del(&bo->request); + DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d [busy?=%d]\n", + __FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL, + __kgem_busy(kgem, bo->handle))); + assert(bo->exec == NULL); + assert(list_is_empty(&bo->vma)); - bo->needs_flush = false; + if (bo->rq) { + if (!__kgem_busy(kgem, bo->handle)) { + __kgem_bo_clear_busy(bo); + kgem_retire(kgem); + } + } else { + assert(!bo->needs_flush); + ASSERT_IDLE(kgem, bo->handle); } - - bo->domain = DOMAIN_NONE; } bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo, @@ -387,15 +426,18 @@ bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo, { assert(bo->refcnt); assert(!bo->purged); - assert(bo->flush || !kgem_busy(kgem, bo->handle)); assert(bo->proxy == NULL); + ASSERT_IDLE(kgem, bo->handle); assert(length <= bytes(bo)); if (gem_write(kgem->fd, bo->handle, 0, length, data)) return false; DBG(("%s: flush=%d, domain=%d\n", __FUNCTION__, bo->flush, bo->domain)); - kgem_bo_retire(kgem, bo); + if (bo->exec == NULL) { + kgem_bo_retire(kgem, bo); + bo->domain = DOMAIN_NONE; + } return true; } @@ -490,10 +532,19 @@ static void gem_close(int fd, uint32_t handle) constant inline static unsigned long __fls(unsigned long word) { +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86__) || defined(__x86_64__)) asm("bsr %1,%0" : "=r" (word) : "rm" (word)); return word; +#else + unsigned int v = 0; + + while (word >>= 1) + v++; + + return v; +#endif } constant inline static int cache_bucket(int num_pages) @@ -509,6 +560,7 @@ static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo, bo->refcnt = 1; bo->handle = handle; + bo->target_handle = -1; num_pages(bo) = num_pages; bucket(bo) = cache_bucket(num_pages); bo->reusable = true; @@ -536,9 +588,7 @@ static struct kgem_bo *__kgem_bo_alloc(int handle, int num_pages) return __kgem_bo_init(bo, handle, num_pages); } -static struct kgem_request _kgem_static_request; - -static struct kgem_request *__kgem_request_alloc(void) +static struct kgem_request *__kgem_request_alloc(struct kgem *kgem) { struct kgem_request *rq; @@ -548,7 +598,7 @@ static struct kgem_request *__kgem_request_alloc(void) } else { rq = malloc(sizeof(*rq)); if (rq == NULL) - rq = &_kgem_static_request; + rq = &kgem->static_request; } list_init(&rq->buffers); @@ -567,11 +617,15 @@ static void __kgem_request_free(struct kgem_request *rq) static struct list *inactive(struct kgem *kgem, int num_pages) { + assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE); + assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS); return &kgem->inactive[cache_bucket(num_pages)]; } static struct list *active(struct kgem *kgem, int num_pages, int tiling) { + assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE); + assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS); return &kgem->active[cache_bucket(num_pages)][tiling]; } @@ -581,7 +635,7 @@ agp_aperture_size(struct pci_device *dev, unsigned gen) /* XXX assume that only future chipsets are unknown and follow * the post gen2 PCI layout. */ - return dev->regions[gen < 30 ? 0 : 2].size; + return dev->regions[gen < 030 ? 0 : 2].size; } static size_t @@ -636,6 +690,35 @@ static int gem_param(struct kgem *kgem, int name) return v; } +static bool test_has_execbuffer2(struct kgem *kgem) +{ + struct drm_i915_gem_execbuffer2 execbuf; + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffer_count = 1; + + return (drmIoctl(kgem->fd, + DRM_IOCTL_I915_GEM_EXECBUFFER2, + &execbuf) == -1 && + errno == EFAULT); +} + +static bool test_has_no_reloc(struct kgem *kgem) +{ + if (DBG_NO_FAST_RELOC) + return false; + + return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0; +} + +static bool test_has_handle_lut(struct kgem *kgem) +{ + if (DBG_NO_HANDLE_LUT) + return false; + + return gem_param(kgem, LOCAL_I915_PARAM_HAS_HANDLE_LUT) > 0; +} + static bool test_has_semaphores_enabled(struct kgem *kgem) { FILE *file; @@ -674,6 +757,9 @@ static bool is_hw_supported(struct kgem *kgem, if (DBG_NO_HW) return false; + if (!test_has_execbuffer2(kgem)) + return false; + if (kgem->gen == (unsigned)-1) /* unknown chipset, assume future gen */ return kgem->has_blt; @@ -682,12 +768,12 @@ static bool is_hw_supported(struct kgem *kgem, * hw acceleration. */ - if (kgem->gen == 60 && dev->revision < 8) { + if (kgem->gen == 060 && dev->revision < 8) { /* pre-production SNB with dysfunctional BLT */ return false; } - if (kgem->gen >= 60) /* Only if the kernel supports the BLT ring */ + if (kgem->gen >= 060) /* Only if the kernel supports the BLT ring */ return kgem->has_blt; return true; @@ -695,11 +781,11 @@ static bool is_hw_supported(struct kgem *kgem, static bool test_has_relaxed_fencing(struct kgem *kgem) { - if (kgem->gen < 40) { + if (kgem->gen < 040) { if (DBG_NO_RELAXED_FENCING) return false; - return gem_param(kgem, I915_PARAM_HAS_RELAXED_FENCING) > 0; + return gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_FENCING) > 0; } else return true; } @@ -716,7 +802,7 @@ static bool test_has_llc(struct kgem *kgem) #endif if (has_llc == -1) { DBG(("%s: no kernel/drm support for HAS_LLC, assuming support for LLC based on GPU generation\n", __FUNCTION__)); - has_llc = kgem->gen >= 60; + has_llc = kgem->gen >= 060; } return has_llc; @@ -731,7 +817,7 @@ static bool test_has_cacheing(struct kgem *kgem) return false; /* Incoherent blt and sampler hangs the GPU */ - if (kgem->gen == 40) + if (kgem->gen == 040) return false; handle = gem_create(kgem->fd, 1); @@ -753,7 +839,7 @@ static bool test_has_userptr(struct kgem *kgem) return false; /* Incoherent blt and sampler hangs the GPU */ - if (kgem->gen == 40) + if (kgem->gen == 040) return false; ptr = malloc(PAGE_SIZE); @@ -767,13 +853,101 @@ static bool test_has_userptr(struct kgem *kgem) #endif } +static bool test_has_secure_batches(struct kgem *kgem) +{ + if (DBG_NO_SECURE_BATCHES) + return false; + + return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0; +} + +static bool test_has_pinned_batches(struct kgem *kgem) +{ + if (DBG_NO_PINNED_BATCHES) + return false; + + return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0; +} + static int kgem_get_screen_index(struct kgem *kgem) { struct sna *sna = container_of(kgem, struct sna, kgem); return sna->scrn->scrnIndex; } -void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen) +static bool kgem_init_pinned_batches(struct kgem *kgem) +{ + int count[2] = { 16, 4 }; + int size[2] = { 1, 4 }; + int n, i; + + if (kgem->wedged) + return true; + + for (n = 0; n < ARRAY_SIZE(count); n++) { + for (i = 0; i < count[n]; i++) { + struct drm_i915_gem_pin pin; + struct kgem_bo *bo; + + VG_CLEAR(pin); + + pin.handle = gem_create(kgem->fd, size[n]); + if (pin.handle == 0) + goto err; + + DBG(("%s: new handle=%d, num_pages=%d\n", + __FUNCTION__, pin.handle, size[n])); + + bo = __kgem_bo_alloc(pin.handle, size[n]); + if (bo == NULL) { + gem_close(kgem->fd, pin.handle); + goto err; + } + + pin.alignment = 0; + if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_PIN, &pin)) { + gem_close(kgem->fd, pin.handle); + goto err; + } + bo->presumed_offset = pin.offset; + debug_alloc__bo(kgem, bo); + list_add(&bo->list, &kgem->pinned_batches[n]); + } + } + + return true; + +err: + for (n = 0; n < ARRAY_SIZE(kgem->pinned_batches); n++) { + while (!list_is_empty(&kgem->pinned_batches[n])) { + kgem_bo_destroy(kgem, + list_first_entry(&kgem->pinned_batches[n], + struct kgem_bo, list)); + } + } + + /* For simplicity populate the lists with a single unpinned bo */ + for (n = 0; n < ARRAY_SIZE(count); n++) { + struct kgem_bo *bo; + uint32_t handle; + + handle = gem_create(kgem->fd, size[n]); + if (handle == 0) + break; + + bo = __kgem_bo_alloc(handle, size[n]); + if (bo == NULL) { + gem_close(kgem->fd, handle); + break; + } + + debug_alloc__bo(kgem, bo); + list_add(&bo->list, &kgem->pinned_batches[n]); + } + return false; +} + +void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) { struct drm_i915_gem_get_aperture aperture; size_t totalram; @@ -787,12 +961,36 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen) kgem->fd = fd; kgem->gen = gen; - kgem->has_blt = gem_param(kgem, I915_PARAM_HAS_BLT) > 0; + list_init(&kgem->requests[0]); + list_init(&kgem->requests[1]); + list_init(&kgem->batch_buffers); + list_init(&kgem->active_buffers); + list_init(&kgem->flushing); + list_init(&kgem->large); + list_init(&kgem->large_inactive); + list_init(&kgem->snoop); + list_init(&kgem->scanout); + for (i = 0; i < ARRAY_SIZE(kgem->pinned_batches); i++) + list_init(&kgem->pinned_batches[i]); + for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) + list_init(&kgem->inactive[i]); + for (i = 0; i < ARRAY_SIZE(kgem->active); i++) { + for (j = 0; j < ARRAY_SIZE(kgem->active[i]); j++) + list_init(&kgem->active[i][j]); + } + for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) { + for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++) + list_init(&kgem->vma[i].inactive[j]); + } + kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE; + kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE; + + kgem->has_blt = gem_param(kgem, LOCAL_I915_PARAM_HAS_BLT) > 0; DBG(("%s: has BLT ring? %d\n", __FUNCTION__, kgem->has_blt)); kgem->has_relaxed_delta = - gem_param(kgem, I915_PARAM_HAS_RELAXED_DELTA) > 0; + gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_DELTA) > 0; DBG(("%s: has relaxed delta? %d\n", __FUNCTION__, kgem->has_relaxed_delta)); @@ -812,16 +1010,32 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen) DBG(("%s: has userptr? %d\n", __FUNCTION__, kgem->has_userptr)); + kgem->has_no_reloc = test_has_no_reloc(kgem); + DBG(("%s: has no-reloc? %d\n", __FUNCTION__, + kgem->has_no_reloc)); + + kgem->has_handle_lut = test_has_handle_lut(kgem); + DBG(("%s: has handle-lut? %d\n", __FUNCTION__, + kgem->has_handle_lut)); + kgem->has_semaphores = false; if (kgem->has_blt && test_has_semaphores_enabled(kgem)) kgem->has_semaphores = true; DBG(("%s: semaphores enabled? %d\n", __FUNCTION__, kgem->has_semaphores)); - kgem->can_blt_cpu = gen >= 30; + kgem->can_blt_cpu = gen >= 030; DBG(("%s: can blt to cpu? %d\n", __FUNCTION__, kgem->can_blt_cpu)); + kgem->has_secure_batches = test_has_secure_batches(kgem); + DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__, + kgem->has_secure_batches)); + + kgem->has_pinned_batches = test_has_pinned_batches(kgem); + DBG(("%s: can use pinned batchbuffers (to avoid CS w/a)? %d\n", __FUNCTION__, + kgem->has_pinned_batches)); + if (!is_hw_supported(kgem, dev)) { xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, "Detected unsupported/dysfunctional hardware, disabling acceleration.\n"); @@ -833,47 +1047,35 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen) } kgem->batch_size = ARRAY_SIZE(kgem->batch); - if (gen == 22) + if (gen == 020 && !kgem->has_pinned_batches) + /* Limited to what we can pin */ + kgem->batch_size = 4*1024; + if (gen == 022) /* 865g cannot handle a batch spanning multiple pages */ kgem->batch_size = PAGE_SIZE / sizeof(uint32_t); - if (gen >= 70 && gen < 80) + if ((gen >> 3) == 7) kgem->batch_size = 16*1024; if (!kgem->has_relaxed_delta && kgem->batch_size > 4*1024) kgem->batch_size = 4*1024; + if (!kgem_init_pinned_batches(kgem) && gen == 020) { + xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, + "Unable to reserve memory for GPU, disabling acceleration.\n"); + kgem->wedged = 1; + } + DBG(("%s: maximum batch size? %d\n", __FUNCTION__, kgem->batch_size)); kgem->min_alignment = 4; - if (gen < 40) + if (gen < 040) kgem->min_alignment = 64; kgem->half_cpu_cache_pages = cpu_cache_size() >> 13; DBG(("%s: half cpu cache %d pages\n", __FUNCTION__, kgem->half_cpu_cache_pages)); - list_init(&kgem->requests[0]); - list_init(&kgem->requests[1]); - list_init(&kgem->batch_buffers); - list_init(&kgem->active_buffers); - list_init(&kgem->flushing); - list_init(&kgem->large); - list_init(&kgem->large_inactive); - list_init(&kgem->snoop); - for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) - list_init(&kgem->inactive[i]); - for (i = 0; i < ARRAY_SIZE(kgem->active); i++) { - for (j = 0; j < ARRAY_SIZE(kgem->active[i]); j++) - list_init(&kgem->active[i][j]); - } - for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) { - for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++) - list_init(&kgem->vma[i].inactive[j]); - } - kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE; - kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE; - - kgem->next_request = __kgem_request_alloc(); + kgem->next_request = __kgem_request_alloc(kgem); DBG(("%s: cpu bo enabled %d: llc? %d, set-cache-level? %d, userptr? %d\n", __FUNCTION__, !DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_cacheing), @@ -885,10 +1087,15 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen) if (aperture.aper_size == 0) aperture.aper_size = 64*1024*1024; + DBG(("%s: aperture size %lld, available now %lld\n", + __FUNCTION__, + (long long)aperture.aper_size, + (long long)aperture.aper_available_size)); + kgem->aperture_total = aperture.aper_size; kgem->aperture_high = aperture.aper_size * 3/4; kgem->aperture_low = aperture.aper_size * 1/3; - if (gen < 33) { + if (gen < 033) { /* Severe alignment penalties */ kgem->aperture_high /= 2; kgem->aperture_low /= 2; @@ -907,21 +1114,15 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen) kgem->buffer_size = 64 * 1024; while (kgem->buffer_size < kgem->aperture_mappable >> 10) kgem->buffer_size *= 2; + if (kgem->buffer_size >> 12 > kgem->half_cpu_cache_pages) + kgem->buffer_size = kgem->half_cpu_cache_pages << 12; DBG(("%s: buffer size=%d [%d KiB]\n", __FUNCTION__, kgem->buffer_size, kgem->buffer_size / 1024)); - kgem->max_object_size = 2 * aperture.aper_size / 3; + kgem->max_object_size = 3 * (kgem->aperture_high >> 12) << 10; kgem->max_gpu_size = kgem->max_object_size; if (!kgem->has_llc) kgem->max_gpu_size = MAX_CACHE_SIZE; - if (gen < 40) { - /* If we have to use fences for blitting, we have to make - * sure we can fit them into the aperture. - */ - kgem->max_gpu_size = kgem->aperture_mappable / 2; - if (kgem->max_gpu_size > kgem->aperture_low) - kgem->max_gpu_size = kgem->aperture_low; - } totalram = total_ram_size(); if (totalram == 0) { @@ -935,12 +1136,9 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen) if (kgem->max_gpu_size > totalram / 4) kgem->max_gpu_size = totalram / 4; - half_gpu_max = kgem->max_gpu_size / 2; - if (kgem->gen >= 40) - kgem->max_cpu_size = half_gpu_max; - else - kgem->max_cpu_size = kgem->max_object_size; + kgem->max_cpu_size = kgem->max_object_size; + half_gpu_max = kgem->max_gpu_size / 2; kgem->max_copy_tile_size = (MAX_CACHE_SIZE + 1)/2; if (kgem->max_copy_tile_size > half_gpu_max) kgem->max_copy_tile_size = half_gpu_max; @@ -981,6 +1179,14 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen) if ((int)kgem->fence_max < 0) kgem->fence_max = 5; /* minimum safe value for all hw */ DBG(("%s: max fences=%d\n", __FUNCTION__, kgem->fence_max)); + + kgem->batch_flags_base = 0; + if (kgem->has_no_reloc) + kgem->batch_flags_base |= LOCAL_I915_EXEC_NO_RELOC; + if (kgem->has_handle_lut) + kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT; + if (kgem->has_pinned_batches) + kgem->batch_flags_base |= LOCAL_I915_EXEC_IS_PINNED; } /* XXX hopefully a good approximation */ @@ -1013,9 +1219,9 @@ static uint32_t kgem_untiled_pitch(struct kgem *kgem, void kgem_get_tile_size(struct kgem *kgem, int tiling, int *tile_width, int *tile_height, int *tile_size) { - if (kgem->gen <= 30) { + if (kgem->gen <= 030) { if (tiling) { - if (kgem->gen < 30) { + if (kgem->gen < 030) { *tile_width = 128; *tile_height = 16; *tile_size = 2048; @@ -1064,14 +1270,14 @@ static uint32_t kgem_surface_size(struct kgem *kgem, assert(width <= MAXSHORT); assert(height <= MAXSHORT); - if (kgem->gen <= 30) { + if (kgem->gen <= 030) { if (tiling) { - if (kgem->gen < 30) { + if (kgem->gen < 030) { tile_width = 128; - tile_height = 16; + tile_height = 32; } else { tile_width = 512; - tile_height = 8; + tile_height = 16; } } else { tile_width = 2 * bpp >> 3; @@ -1087,19 +1293,21 @@ static uint32_t kgem_surface_size(struct kgem *kgem, kgem_pitch_alignment(kgem, flags)); tile_height = 2; break; + + /* XXX align to an even tile row */ case I915_TILING_X: tile_width = 512; - tile_height = 8; + tile_height = 16; break; case I915_TILING_Y: tile_width = 128; - tile_height = 32; + tile_height = 64; break; } *pitch = ALIGN(width * bpp / 8, tile_width); height = ALIGN(height, tile_height); - if (kgem->gen >= 40) + if (kgem->gen >= 040) return PAGE_ALIGN(*pitch * height); /* If it is too wide for the blitter, don't even bother. */ @@ -1120,7 +1328,7 @@ static uint32_t kgem_surface_size(struct kgem *kgem, return PAGE_ALIGN(size); /* We need to allocate a pot fence region for a tiled buffer. */ - if (kgem->gen < 30) + if (kgem->gen < 030) tile_width = 512 * 1024; else tile_width = 1024 * 1024; @@ -1134,18 +1342,19 @@ static uint32_t kgem_aligned_height(struct kgem *kgem, { uint32_t tile_height; - if (kgem->gen <= 30) { - tile_height = tiling ? kgem->gen < 30 ? 16 : 8 : 1; + if (kgem->gen <= 030) { + tile_height = tiling ? kgem->gen < 030 ? 32 : 16 : 1; } else switch (tiling) { + /* XXX align to an even tile row */ default: case I915_TILING_NONE: - tile_height = 2; + tile_height = 1; break; case I915_TILING_X: - tile_height = 8; + tile_height = 16; break; case I915_TILING_Y: - tile_height = 32; + tile_height = 64; break; } @@ -1161,6 +1370,7 @@ kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo) __FUNCTION__, bo->handle, kgem->nexec)); assert(kgem->nexec < ARRAY_SIZE(kgem->exec)); + bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle; exec = memset(&kgem->exec[kgem->nexec++], 0, sizeof(*exec)); exec->handle = bo->handle; exec->offset = bo->presumed_offset; @@ -1170,10 +1380,10 @@ kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo) return exec; } -void _kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo) +static void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo) { bo->exec = kgem_add_handle(kgem, bo); - bo->rq = kgem->next_request; + bo->rq = MAKE_REQUEST(kgem->next_request, kgem->ring); list_move_tail(&bo->request, &kgem->next_request->buffers); @@ -1194,14 +1404,30 @@ static void kgem_fixup_self_relocs(struct kgem *kgem, struct kgem_bo *bo) { int n; - for (n = 0; n < kgem->nreloc; n++) { - if (kgem->reloc[n].target_handle == 0) { - kgem->reloc[n].target_handle = bo->handle; - kgem->reloc[n].presumed_offset = bo->presumed_offset; - kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = - kgem->reloc[n].delta + bo->presumed_offset; + if (kgem->nreloc__self == 0) + return; + + for (n = 0; n < kgem->nreloc__self; n++) { + int i = kgem->reloc__self[n]; + assert(kgem->reloc[i].target_handle == ~0U); + kgem->reloc[i].target_handle = bo->target_handle; + kgem->reloc[i].presumed_offset = bo->presumed_offset; + kgem->batch[kgem->reloc[i].offset/sizeof(kgem->batch[0])] = + kgem->reloc[i].delta + bo->presumed_offset; + } + + if (n == 256) { + for (n = kgem->reloc__self[255]; n < kgem->nreloc; n++) { + if (kgem->reloc[n].target_handle == ~0U) { + kgem->reloc[n].target_handle = bo->target_handle; + kgem->reloc[n].presumed_offset = bo->presumed_offset; + kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = + kgem->reloc[n].delta + bo->presumed_offset; + } } + } + } static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo) @@ -1284,11 +1510,12 @@ inline static void kgem_bo_move_to_inactive(struct kgem *kgem, assert(bo->rq == NULL); assert(bo->exec == NULL); assert(bo->domain != DOMAIN_GPU); - assert(!kgem_busy(kgem, bo->handle)); assert(!bo->proxy); assert(!bo->io); + assert(!bo->scanout); assert(!bo->needs_flush); assert(list_is_empty(&bo->vma)); + ASSERT_IDLE(kgem, bo->handle); kgem->need_expire = true; @@ -1302,7 +1529,7 @@ inline static void kgem_bo_move_to_inactive(struct kgem *kgem, if (bo->map) { int type = IS_CPU_MAP(bo->map); if (bucket(bo) >= NUM_CACHE_BUCKETS || - (!type && !kgem_bo_is_mappable(kgem, bo))) { + (!type && !__kgem_bo_is_mappable(kgem, bo))) { munmap(MAP(bo->map), bytes(bo)); bo->map = NULL; } @@ -1313,6 +1540,32 @@ inline static void kgem_bo_move_to_inactive(struct kgem *kgem, } } +static struct kgem_bo *kgem_bo_replace_io(struct kgem_bo *bo) +{ + struct kgem_bo *base; + + if (!bo->io) + return bo; + + assert(!bo->snoop); + base = malloc(sizeof(*base)); + if (base) { + DBG(("%s: transferring io handle=%d to bo\n", + __FUNCTION__, bo->handle)); + /* transfer the handle to a minimum bo */ + memcpy(base, bo, sizeof(*base)); + base->io = false; + list_init(&base->list); + list_replace(&bo->request, &base->request); + list_replace(&bo->vma, &base->vma); + free(bo); + bo = base; + } else + bo->reusable = false; + + return bo; +} + inline static void kgem_bo_remove_from_inactive(struct kgem *kgem, struct kgem_bo *bo) { @@ -1335,16 +1588,14 @@ inline static void kgem_bo_remove_from_active(struct kgem *kgem, list_del(&bo->list); assert(bo->rq != NULL); - if (bo->rq == &_kgem_static_request) + if (bo->rq == (void *)kgem) list_del(&bo->request); assert(list_is_empty(&bo->vma)); } static void kgem_bo_clear_scanout(struct kgem *kgem, struct kgem_bo *bo) { - if (!bo->scanout) - return; - + assert(bo->scanout); assert(bo->proxy == NULL); DBG(("%s: handle=%d, fb=%d (reusable=%d)\n", @@ -1356,7 +1607,6 @@ static void kgem_bo_clear_scanout(struct kgem *kgem, struct kgem_bo *bo) } bo->scanout = false; - bo->needs_flush = true; bo->flush = false; bo->reusable = true; @@ -1376,6 +1626,20 @@ static void _kgem_bo_delete_buffer(struct kgem *kgem, struct kgem_bo *bo) io->used = bo->delta; } +static void kgem_bo_move_to_scanout(struct kgem *kgem, struct kgem_bo *bo) +{ + assert(bo->refcnt == 0); + assert(bo->exec == NULL); + assert(bo->scanout); + assert(bo->delta); + assert(!bo->snoop); + assert(!bo->io); + + DBG(("%s: moving %d [fb %d] to scanout cachee\n", __FUNCTION__, + bo->handle, bo->delta)); + list_move(&bo->list, &kgem->scanout); +} + static void kgem_bo_move_to_snoop(struct kgem *kgem, struct kgem_bo *bo) { assert(bo->refcnt == 0); @@ -1416,6 +1680,7 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) list_for_each_entry(bo, &kgem->snoop, list) { assert(bo->refcnt == 0); assert(bo->snoop); + assert(!bo->scanout); assert(bo->proxy == NULL); assert(bo->tiling == I915_TILING_NONE); assert(bo->rq == NULL); @@ -1462,7 +1727,6 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) assert(bo->proxy == NULL); bo->binding.offset = 0; - kgem_bo_clear_scanout(kgem, bo); if (DBG_NO_CACHE) goto destroy; @@ -1471,39 +1735,22 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) DBG(("%s: handle=%d is snooped\n", __FUNCTION__, bo->handle)); assert(!bo->flush); assert(list_is_empty(&bo->list)); + if (bo->exec == NULL && bo->rq && !__kgem_busy(kgem, bo->handle)) + __kgem_bo_clear_busy(bo); if (bo->rq == NULL) { - if (bo->needs_flush && kgem_busy(kgem, bo->handle)) { - DBG(("%s: handle=%d is snooped, tracking until free\n", - __FUNCTION__, bo->handle)); - list_add(&bo->request, &kgem->flushing); - bo->rq = &_kgem_static_request; - } - } - if (bo->rq == NULL) + assert(!bo->needs_flush); kgem_bo_move_to_snoop(kgem, bo); + } return; } - if (bo->io) { - struct kgem_bo *base; - - assert(!bo->snoop); - base = malloc(sizeof(*base)); - if (base) { - DBG(("%s: transferring io handle=%d to bo\n", - __FUNCTION__, bo->handle)); - /* transfer the handle to a minimum bo */ - memcpy(base, bo, sizeof(*base)); - base->io = false; - list_init(&base->list); - list_replace(&bo->request, &base->request); - list_replace(&bo->vma, &base->vma); - free(bo); - bo = base; - } else - bo->reusable = false; + if (bo->scanout) { + kgem_bo_move_to_scanout(kgem, bo); + return; } + if (bo->io) + bo = kgem_bo_replace_io(bo); if (!bo->reusable) { DBG(("%s: handle=%d, not reusable\n", __FUNCTION__, bo->handle)); @@ -1519,6 +1766,20 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) assert(bo->io == false); assert(bo->scanout == false); + if (bo->exec && kgem->nexec == 1) { + DBG(("%s: only handle in batch, discarding last operations\n", + __FUNCTION__)); + assert(bo->exec == &kgem->exec[0]); + assert(kgem->exec[0].handle == bo->handle); + assert(RQ(bo->rq) == kgem->next_request); + bo->refcnt = 1; + kgem_reset(kgem); + bo->refcnt = 0; + } + + if (bo->rq && bo->exec == NULL && !__kgem_busy(kgem, bo->handle)) + __kgem_bo_clear_busy(bo); + if (bo->rq) { struct list *cache; @@ -1534,26 +1795,6 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) assert(bo->exec == NULL); assert(list_is_empty(&bo->request)); - if (bo->needs_flush) { - if ((bo->needs_flush = kgem_busy(kgem, bo->handle))) { - struct list *cache; - - DBG(("%s: handle=%d -> flushing\n", - __FUNCTION__, bo->handle)); - - list_add(&bo->request, &kgem->flushing); - if (bucket(bo) < NUM_CACHE_BUCKETS) - cache = &kgem->active[bucket(bo)][bo->tiling]; - else - cache = &kgem->large; - list_add(&bo->list, cache); - bo->rq = &_kgem_static_request; - return; - } - - bo->domain = DOMAIN_NONE; - } - if (!IS_CPU_MAP(bo->map)) { if (!kgem_bo_set_purgeable(kgem, bo)) goto destroy; @@ -1627,27 +1868,27 @@ static bool kgem_retire__flushing(struct kgem *kgem) bool retired = false; list_for_each_entry_safe(bo, next, &kgem->flushing, request) { - assert(bo->rq == &_kgem_static_request); + assert(bo->rq == (void *)kgem); assert(bo->exec == NULL); - if (kgem_busy(kgem, bo->handle)) + if (__kgem_busy(kgem, bo->handle)) break; - bo->needs_flush = false; - bo->domain = DOMAIN_NONE; - bo->rq = NULL; - list_del(&bo->request); + __kgem_bo_clear_busy(bo); - if (!bo->refcnt) { - if (bo->snoop) { - kgem_bo_move_to_snoop(kgem, bo); - } else if (kgem_bo_set_purgeable(kgem, bo)) { - assert(bo->reusable); - kgem_bo_move_to_inactive(kgem, bo); - retired = true; - } else - kgem_bo_free(kgem, bo); - } + if (bo->refcnt) + continue; + + if (bo->snoop) { + kgem_bo_move_to_snoop(kgem, bo); + } else if (bo->scanout) { + kgem_bo_move_to_scanout(kgem, bo); + } else if ((bo = kgem_bo_replace_io(bo))->reusable && + kgem_bo_set_purgeable(kgem, bo)) { + kgem_bo_move_to_inactive(kgem, bo); + retired = true; + } else + kgem_bo_free(kgem, bo); } #if HAS_DEBUG_FULL { @@ -1658,149 +1899,143 @@ static bool kgem_retire__flushing(struct kgem *kgem) } #endif + kgem->need_retire |= !list_is_empty(&kgem->flushing); + return retired; } -static bool kgem_retire__requests(struct kgem *kgem) + +static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq) { - struct kgem_bo *bo; bool retired = false; - int n; - for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) { - while (!list_is_empty(&kgem->requests[n])) { - struct kgem_request *rq; + DBG(("%s: request %d complete\n", + __FUNCTION__, rq->bo->handle)); - rq = list_first_entry(&kgem->requests[n], - struct kgem_request, - list); - if (kgem_busy(kgem, rq->bo->handle)) - break; - - DBG(("%s: request %d complete\n", - __FUNCTION__, rq->bo->handle)); + while (!list_is_empty(&rq->buffers)) { + struct kgem_bo *bo; - while (!list_is_empty(&rq->buffers)) { - bo = list_first_entry(&rq->buffers, - struct kgem_bo, - request); + bo = list_first_entry(&rq->buffers, + struct kgem_bo, + request); - assert(bo->rq == rq); - assert(bo->exec == NULL); - assert(bo->domain == DOMAIN_GPU); - - list_del(&bo->request); - - if (bo->needs_flush) - bo->needs_flush = kgem_busy(kgem, bo->handle); - if (bo->needs_flush) { - DBG(("%s: moving %d to flushing\n", - __FUNCTION__, bo->handle)); - list_add(&bo->request, &kgem->flushing); - bo->rq = &_kgem_static_request; - } else { - bo->domain = DOMAIN_NONE; - bo->rq = NULL; - } + assert(RQ(bo->rq) == rq); + assert(bo->exec == NULL); + assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE); - if (bo->refcnt) - continue; + list_del(&bo->request); - if (bo->snoop) { - if (bo->needs_flush) { - list_add(&bo->request, &kgem->flushing); - bo->rq = &_kgem_static_request; - } else { - kgem_bo_move_to_snoop(kgem, bo); - } - continue; - } + if (bo->needs_flush) + bo->needs_flush = __kgem_busy(kgem, bo->handle); + if (bo->needs_flush) { + DBG(("%s: moving %d to flushing\n", + __FUNCTION__, bo->handle)); + list_add(&bo->request, &kgem->flushing); + bo->rq = (void *)kgem; + continue; + } - if (!bo->reusable) { - DBG(("%s: closing %d\n", - __FUNCTION__, bo->handle)); - kgem_bo_free(kgem, bo); - continue; - } + bo->domain = DOMAIN_NONE; + bo->rq = NULL; + if (bo->refcnt) + continue; - if (!bo->needs_flush) { - if (kgem_bo_set_purgeable(kgem, bo)) { - kgem_bo_move_to_inactive(kgem, bo); - retired = true; - } else { - DBG(("%s: closing %d\n", - __FUNCTION__, bo->handle)); - kgem_bo_free(kgem, bo); - } - } - } + if (bo->snoop) { + kgem_bo_move_to_snoop(kgem, bo); + } else if (bo->scanout) { + kgem_bo_move_to_scanout(kgem, bo); + } else if ((bo = kgem_bo_replace_io(bo))->reusable && + kgem_bo_set_purgeable(kgem, bo)) { + kgem_bo_move_to_inactive(kgem, bo); + retired = true; + } else { + DBG(("%s: closing %d\n", + __FUNCTION__, bo->handle)); + kgem_bo_free(kgem, bo); + } + } - assert(rq->bo->rq == NULL); - assert(list_is_empty(&rq->bo->request)); - - if (--rq->bo->refcnt == 0) { - if (kgem_bo_set_purgeable(kgem, rq->bo)) { - kgem_bo_move_to_inactive(kgem, rq->bo); - retired = true; - } else { - DBG(("%s: closing %d\n", - __FUNCTION__, rq->bo->handle)); - kgem_bo_free(kgem, rq->bo); - } - } + assert(rq->bo->rq == NULL); + assert(list_is_empty(&rq->bo->request)); - __kgem_request_free(rq); - kgem->num_requests--; + if (--rq->bo->refcnt == 0) { + if (kgem_bo_set_purgeable(kgem, rq->bo)) { + kgem_bo_move_to_inactive(kgem, rq->bo); + retired = true; + } else { + DBG(("%s: closing %d\n", + __FUNCTION__, rq->bo->handle)); + kgem_bo_free(kgem, rq->bo); } + } -#if HAS_DEBUG_FULL - { - int count = 0; + __kgem_request_free(rq); + return retired; +} - list_for_each_entry(bo, &kgem->requests[n], request) - count++; +static bool kgem_retire__requests_ring(struct kgem *kgem, int ring) +{ + bool retired = false; - bo = NULL; - if (!list_is_empty(&kgem->requests[n])) - bo = list_first_entry(&kgem->requests[n], - struct kgem_request, - list)->bo; + while (!list_is_empty(&kgem->requests[ring])) { + struct kgem_request *rq; - ErrorF("%s: ring=%d, %d outstanding requests, oldest=%d\n", - __FUNCTION__, n, count, bo ? bo->handle : 0); - } -#endif + rq = list_first_entry(&kgem->requests[ring], + struct kgem_request, + list); + if (__kgem_busy(kgem, rq->bo->handle)) + break; + + retired |= __kgem_retire_rq(kgem, rq); } #if HAS_DEBUG_FULL { + struct kgem_bo *bo; int count = 0; - for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) - list_for_each_entry(bo, &kgem->requests[n], request) - count++; + list_for_each_entry(bo, &kgem->requests[ring], request) + count++; - assert(count == kgem->num_requests); + bo = NULL; + if (!list_is_empty(&kgem->requests[ring])) + bo = list_first_entry(&kgem->requests[ring], + struct kgem_request, + list)->bo; + + ErrorF("%s: ring=%d, %d outstanding requests, oldest=%d\n", + __FUNCTION__, ring, count, bo ? bo->handle : 0); } #endif return retired; } +static bool kgem_retire__requests(struct kgem *kgem) +{ + bool retired = false; + int n; + + for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) { + retired |= kgem_retire__requests_ring(kgem, n); + kgem->need_retire |= !list_is_empty(&kgem->requests[n]); + } + + return retired; +} + bool kgem_retire(struct kgem *kgem) { bool retired = false; DBG(("%s\n", __FUNCTION__)); + kgem->need_retire = false; + retired |= kgem_retire__flushing(kgem); - if (kgem->num_requests) - retired |= kgem_retire__requests(kgem); + retired |= kgem_retire__requests(kgem); retired |= kgem_retire__buffers(kgem); - kgem->need_retire = - kgem->num_requests || - !list_is_empty(&kgem->flushing); DBG(("%s -- retired=%d, need_retire=%d\n", __FUNCTION__, retired, kgem->need_retire)); @@ -1809,31 +2044,25 @@ bool kgem_retire(struct kgem *kgem) return retired; } -bool __kgem_is_idle(struct kgem *kgem) +bool __kgem_ring_is_idle(struct kgem *kgem, int ring) { - int n; + struct kgem_request *rq; - assert(kgem->num_requests); + assert(!list_is_empty(&kgem->requests[ring])); - for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) { - struct kgem_request *rq; - - if (list_is_empty(&kgem->requests[n])) - continue; + rq = list_last_entry(&kgem->requests[ring], + struct kgem_request, list); + if (__kgem_busy(kgem, rq->bo->handle)) { + DBG(("%s: last requests handle=%d still busy\n", + __FUNCTION__, rq->bo->handle)); + return false; + } - rq = list_last_entry(&kgem->requests[n], - struct kgem_request, list); - if (kgem_busy(kgem, rq->bo->handle)) { - DBG(("%s: last requests handle=%d still busy\n", - __FUNCTION__, rq->bo->handle)); - return false; - } + DBG(("%s: ring=%d idle (handle=%d)\n", + __FUNCTION__, ring, rq->bo->handle)); - DBG(("%s: ring=%d idle (handle=%d)\n", - __FUNCTION__, n, rq->bo->handle)); - } - kgem_retire__requests(kgem); - assert(kgem->num_requests == 0); + kgem_retire__requests_ring(kgem, ring); + assert(list_is_empty(&kgem->requests[ring])); return true; } @@ -1853,10 +2082,11 @@ static void kgem_commit(struct kgem *kgem) assert(!bo->purged); assert(bo->exec); assert(bo->proxy == NULL || bo->exec == &_kgem_dummy_exec); - assert(bo->rq == rq || (bo->proxy->rq == rq)); + assert(RQ(bo->rq) == rq || (RQ(bo->proxy->rq) == rq)); bo->presumed_offset = bo->exec->offset; bo->exec = NULL; + bo->target_handle = -1; if (!bo->refcnt && !bo->reusable) { assert(!bo->snoop); @@ -1870,13 +2100,14 @@ static void kgem_commit(struct kgem *kgem) if (bo->proxy) { /* proxies are not used for domain tracking */ - list_del(&bo->request); - bo->rq = NULL; bo->exec = NULL; + __kgem_bo_clear_busy(bo); } + + kgem->scanout_busy |= bo->scanout; } - if (rq == &_kgem_static_request) { + if (rq == &kgem->static_request) { struct drm_i915_gem_set_domain set_domain; DBG(("%s: syncing due to allocation failure\n", __FUNCTION__)); @@ -1894,10 +2125,10 @@ static void kgem_commit(struct kgem *kgem) assert(list_is_empty(&rq->buffers)); gem_close(kgem->fd, rq->bo->handle); + kgem_cleanup_cache(kgem); } else { list_add_tail(&rq->list, &kgem->requests[rq->ring]); kgem->need_throttle = kgem->need_retire = 1; - kgem->num_requests++; } kgem->next_request = NULL; @@ -1946,13 +2177,12 @@ static void kgem_finish_buffers(struct kgem *kgem) assert(!bo->need_io); - used = ALIGN(bo->used + PAGE_SIZE-1, PAGE_SIZE); + used = ALIGN(bo->used, PAGE_SIZE); if (!DBG_NO_UPLOAD_ACTIVE && used + PAGE_SIZE <= bytes(&bo->base) && - (kgem->has_llc || !IS_CPU_MAP(bo->base.map))) { + (kgem->has_llc || !IS_CPU_MAP(bo->base.map) || bo->base.snoop)) { DBG(("%s: retaining upload buffer (%d/%d)\n", __FUNCTION__, bo->used, bytes(&bo->base))); - assert(!bo->base.snoop); bo->used = used; list_move(&bo->base.list, &kgem->active_buffers); @@ -1973,16 +2203,65 @@ static void kgem_finish_buffers(struct kgem *kgem) } assert(bo->need_io); - assert(bo->base.rq == kgem->next_request); + assert(bo->base.rq == MAKE_REQUEST(kgem->next_request, kgem->ring)); assert(bo->base.domain != DOMAIN_GPU); if (bo->base.refcnt == 1 && bo->base.size.pages.count > 1 && bo->used < bytes(&bo->base) / 2) { struct kgem_bo *shrink; + unsigned alloc = NUM_PAGES(bo->used); + + shrink = search_snoop_cache(kgem, alloc, + CREATE_INACTIVE | CREATE_NO_RETIRE); + if (shrink) { + void *map; + int n; - shrink = search_linear_cache(kgem, - PAGE_ALIGN(bo->used), + DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n", + __FUNCTION__, + bo->used, bytes(&bo->base), bytes(shrink), + bo->base.handle, shrink->handle)); + + assert(bo->used <= bytes(shrink)); + map = kgem_bo_map__cpu(kgem, shrink); + if (map) { + kgem_bo_sync__cpu(kgem, shrink); + memcpy(map, bo->mem, bo->used); + + shrink->target_handle = + kgem->has_handle_lut ? bo->base.target_handle : shrink->handle; + for (n = 0; n < kgem->nreloc; n++) { + if (kgem->reloc[n].target_handle == bo->base.target_handle) { + kgem->reloc[n].target_handle = shrink->target_handle; + kgem->reloc[n].presumed_offset = shrink->presumed_offset; + kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = + kgem->reloc[n].delta + shrink->presumed_offset; + } + } + + bo->base.exec->handle = shrink->handle; + bo->base.exec->offset = shrink->presumed_offset; + shrink->exec = bo->base.exec; + shrink->rq = bo->base.rq; + list_replace(&bo->base.request, + &shrink->request); + list_init(&bo->base.request); + shrink->needs_flush = bo->base.dirty; + + bo->base.exec = NULL; + bo->base.rq = NULL; + bo->base.dirty = false; + bo->base.needs_flush = false; + bo->used = 0; + + goto decouple; + } + + __kgem_bo_destroy(kgem, shrink); + } + + shrink = search_linear_cache(kgem, alloc, CREATE_INACTIVE | CREATE_NO_RETIRE); if (shrink) { int n; @@ -1993,40 +2272,44 @@ static void kgem_finish_buffers(struct kgem *kgem) bo->base.handle, shrink->handle)); assert(bo->used <= bytes(shrink)); - gem_write(kgem->fd, shrink->handle, - 0, bo->used, bo->mem); - - for (n = 0; n < kgem->nreloc; n++) { - if (kgem->reloc[n].target_handle == bo->base.handle) { - kgem->reloc[n].target_handle = shrink->handle; - kgem->reloc[n].presumed_offset = shrink->presumed_offset; - kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = - kgem->reloc[n].delta + shrink->presumed_offset; + if (gem_write(kgem->fd, shrink->handle, + 0, bo->used, bo->mem) == 0) { + shrink->target_handle = + kgem->has_handle_lut ? bo->base.target_handle : shrink->handle; + for (n = 0; n < kgem->nreloc; n++) { + if (kgem->reloc[n].target_handle == bo->base.target_handle) { + kgem->reloc[n].target_handle = shrink->target_handle; + kgem->reloc[n].presumed_offset = shrink->presumed_offset; + kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = + kgem->reloc[n].delta + shrink->presumed_offset; + } } + + bo->base.exec->handle = shrink->handle; + bo->base.exec->offset = shrink->presumed_offset; + shrink->exec = bo->base.exec; + shrink->rq = bo->base.rq; + list_replace(&bo->base.request, + &shrink->request); + list_init(&bo->base.request); + shrink->needs_flush = bo->base.dirty; + + bo->base.exec = NULL; + bo->base.rq = NULL; + bo->base.dirty = false; + bo->base.needs_flush = false; + bo->used = 0; + + goto decouple; } - bo->base.exec->handle = shrink->handle; - bo->base.exec->offset = shrink->presumed_offset; - shrink->exec = bo->base.exec; - shrink->rq = bo->base.rq; - list_replace(&bo->base.request, - &shrink->request); - list_init(&bo->base.request); - shrink->needs_flush = bo->base.dirty; - - bo->base.exec = NULL; - bo->base.rq = NULL; - bo->base.dirty = false; - bo->base.needs_flush = false; - bo->used = 0; - - goto decouple; + __kgem_bo_destroy(kgem, shrink); } } DBG(("%s: handle=%d, uploading %d/%d\n", __FUNCTION__, bo->base.handle, bo->used, bytes(&bo->base))); - assert(!kgem_busy(kgem, bo->base.handle)); + ASSERT_IDLE(kgem, bo->base.handle); assert(bo->used <= bytes(&bo->base)); gem_write(kgem->fd, bo->base.handle, 0, bo->used, bo->mem); @@ -2058,11 +2341,9 @@ static void kgem_cleanup(struct kgem *kgem) struct kgem_bo, request); - list_del(&bo->request); - bo->rq = NULL; bo->exec = NULL; - bo->domain = DOMAIN_NONE; bo->dirty = false; + __kgem_bo_clear_busy(bo); if (bo->refcnt == 0) kgem_bo_free(kgem, bo); } @@ -2071,7 +2352,6 @@ static void kgem_cleanup(struct kgem *kgem) } } - kgem->num_requests = 0; kgem_close_inactive(kgem); } @@ -2079,7 +2359,7 @@ static int kgem_batch_write(struct kgem *kgem, uint32_t handle, uint32_t size) { int ret; - assert(!kgem_busy(kgem, handle)); + ASSERT_IDLE(kgem, handle); /* If there is no surface data, just upload the batch */ if (kgem->surface == kgem->batch_size) @@ -2122,34 +2402,46 @@ void kgem_reset(struct kgem *kgem) request); list_del(&bo->request); + assert(RQ(bo->rq) == rq); + bo->binding.offset = 0; bo->exec = NULL; + bo->target_handle = -1; bo->dirty = false; - bo->rq = NULL; - bo->domain = DOMAIN_NONE; - if (!bo->refcnt) { + if (bo->needs_flush && __kgem_busy(kgem, bo->handle)) { + list_add(&bo->request, &kgem->flushing); + bo->rq = (void *)kgem; + } else + __kgem_bo_clear_busy(bo); + + if (!bo->refcnt && !bo->reusable) { + assert(!bo->snoop); DBG(("%s: discarding handle=%d\n", __FUNCTION__, bo->handle)); kgem_bo_free(kgem, bo); } } - if (kgem->next_request != &_kgem_static_request) - free(kgem->next_request); + if (rq != &kgem->static_request) { + list_init(&rq->list); + __kgem_request_free(rq); + } } kgem->nfence = 0; kgem->nexec = 0; kgem->nreloc = 0; + kgem->nreloc__self = 0; kgem->aperture = 0; kgem->aperture_fenced = 0; kgem->nbatch = 0; kgem->surface = kgem->batch_size; kgem->mode = KGEM_NONE; kgem->flush = 0; + kgem->batch_flags = kgem->batch_flags_base; - kgem->next_request = __kgem_request_alloc(); + kgem->next_request = __kgem_request_alloc(kgem); kgem_sna_reset(kgem); } @@ -2173,7 +2465,7 @@ static int compact_batch_surface(struct kgem *kgem) shrink *= sizeof(uint32_t); for (n = 0; n < kgem->nreloc; n++) { if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION && - kgem->reloc[n].target_handle == 0) + kgem->reloc[n].target_handle == ~0U) kgem->reloc[n].delta -= shrink; if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch) @@ -2184,6 +2476,74 @@ static int compact_batch_surface(struct kgem *kgem) return size * sizeof(uint32_t); } +static struct kgem_bo * +kgem_create_batch(struct kgem *kgem, int size) +{ + struct drm_i915_gem_set_domain set_domain; + struct kgem_bo *bo; + + if (size <= 4096) { + bo = list_first_entry(&kgem->pinned_batches[0], + struct kgem_bo, + list); + if (!bo->rq) { +out_4096: + list_move_tail(&bo->list, &kgem->pinned_batches[0]); + return kgem_bo_reference(bo); + } + + if (!__kgem_busy(kgem, bo->handle)) { + assert(RQ(bo->rq)->bo == bo); + __kgem_retire_rq(kgem, RQ(bo->rq)); + goto out_4096; + } + } + + if (size <= 16384) { + bo = list_first_entry(&kgem->pinned_batches[1], + struct kgem_bo, + list); + if (!bo->rq) { +out_16384: + list_move_tail(&bo->list, &kgem->pinned_batches[1]); + return kgem_bo_reference(bo); + } + + if (!__kgem_busy(kgem, bo->handle)) { + assert(RQ(bo->rq)->bo == bo); + __kgem_retire_rq(kgem, RQ(bo->rq)); + goto out_16384; + } + } + + if (kgem->gen == 020 && !kgem->has_pinned_batches) { + assert(size <= 16384); + + bo = list_first_entry(&kgem->pinned_batches[size > 4096], + struct kgem_bo, + list); + list_move_tail(&bo->list, &kgem->pinned_batches[size > 4096]); + + DBG(("%s: syncing due to busy batches\n", __FUNCTION__)); + + VG_CLEAR(set_domain); + set_domain.handle = bo->handle; + set_domain.read_domains = I915_GEM_DOMAIN_GTT; + set_domain.write_domain = I915_GEM_DOMAIN_GTT; + if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { + DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); + kgem_throttle(kgem); + return NULL; + } + + kgem_retire(kgem); + assert(bo->rq == NULL); + return kgem_bo_reference(bo); + } + + return kgem_create_linear(kgem, size, CREATE_NO_THROTTLE); +} + void _kgem_submit(struct kgem *kgem) { struct kgem_request *rq; @@ -2212,7 +2572,7 @@ void _kgem_submit(struct kgem *kgem) kgem_finish_buffers(kgem); -#if HAS_DEBUG_FULL && SHOW_BATCH +#if SHOW_BATCH __kgem_batch_debug(kgem, batch_end); #endif @@ -2221,7 +2581,7 @@ void _kgem_submit(struct kgem *kgem) size = compact_batch_surface(kgem); else size = kgem->nbatch * sizeof(kgem->batch[0]); - rq->bo = kgem_create_linear(kgem, size, CREATE_NO_THROTTLE); + rq->bo = kgem_create_batch(kgem, size); if (rq->bo) { uint32_t handle = rq->bo->handle; int i; @@ -2233,13 +2593,14 @@ void _kgem_submit(struct kgem *kgem) kgem->exec[i].relocation_count = kgem->nreloc; kgem->exec[i].relocs_ptr = (uintptr_t)kgem->reloc; kgem->exec[i].alignment = 0; - kgem->exec[i].offset = 0; + kgem->exec[i].offset = rq->bo->presumed_offset; kgem->exec[i].flags = 0; kgem->exec[i].rsvd1 = 0; kgem->exec[i].rsvd2 = 0; + rq->bo->target_handle = kgem->has_handle_lut ? i : handle; rq->bo->exec = &kgem->exec[i]; - rq->bo->rq = rq; /* useful sanity check */ + rq->bo->rq = MAKE_REQUEST(rq, kgem->ring); /* useful sanity check */ list_add(&rq->bo->request, &rq->buffers); rq->ring = kgem->ring == KGEM_BLT; @@ -2258,7 +2619,7 @@ void _kgem_submit(struct kgem *kgem) execbuf.num_cliprects = 0; execbuf.DR1 = 0; execbuf.DR4 = 0; - execbuf.flags = kgem->ring; + execbuf.flags = kgem->ring | kgem->batch_flags; execbuf.rsvd1 = 0; execbuf.rsvd2 = 0; @@ -2281,13 +2642,23 @@ void _kgem_submit(struct kgem *kgem) DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); } - if (ret == -1 && (errno == EIO || errno == EBUSY)) { - DBG(("%s: GPU hang detected\n", __FUNCTION__)); - kgem_throttle(kgem); - ret = 0; + if (DEBUG_SYNC && ret == 0) { + struct drm_i915_gem_set_domain set_domain; + + VG_CLEAR(set_domain); + set_domain.handle = handle; + set_domain.read_domains = I915_GEM_DOMAIN_GTT; + set_domain.write_domain = I915_GEM_DOMAIN_GTT; + + ret = drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); } + if (ret == -1) { + DBG(("%s: GPU hang detected [%d]\n", + __FUNCTION__, errno)); + kgem_throttle(kgem); + kgem->wedged = true; + #if !NDEBUG - if (ret < 0) { ret = errno; ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d: errno=%d\n", kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface, @@ -2323,33 +2694,16 @@ void _kgem_submit(struct kgem *kgem) (int)kgem->reloc[i].presumed_offset); } - i = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666); - if (i != -1) { - i = write(i, kgem->batch, batch_end*sizeof(uint32_t)); - (void)i; - } - - FatalError("SNA: failed to submit batchbuffer, errno=%d\n", ret); - } -#endif - - if (DEBUG_FLUSH_SYNC) { - struct drm_i915_gem_set_domain set_domain; - - DBG(("%s: debug sync, starting\n", __FUNCTION__)); - - VG_CLEAR(set_domain); - set_domain.handle = handle; - set_domain.read_domains = I915_GEM_DOMAIN_GTT; - set_domain.write_domain = I915_GEM_DOMAIN_GTT; + if (DEBUG_SYNC) { + int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666); + if (fd != -1) { + write(fd, kgem->batch, batch_end*sizeof(uint32_t)); + close(fd); + } - ret = drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); - if (ret == -1) { - DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); - kgem_throttle(kgem); + FatalError("SNA: failed to submit batchbuffer, errno=%d\n", ret); } - - DBG(("%s: debug sync, completed\n", __FUNCTION__)); +#endif } } @@ -2425,6 +2779,13 @@ bool kgem_expire_cache(struct kgem *kgem) } + while (!list_is_empty(&kgem->scanout)) { + bo = list_first_entry(&kgem->scanout, struct kgem_bo, list); + list_del(&bo->list); + kgem_bo_clear_scanout(kgem, bo); + __kgem_bo_destroy(kgem, bo); + } + expire = 0; list_for_each_entry(bo, &kgem->snoop, list) { if (bo->delta) { @@ -2619,7 +2980,7 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) return NULL; } - if (!__kgem_throttle_retire(kgem, 0)) { + if (!__kgem_throttle_retire(kgem, flags)) { DBG(("%s: nothing retired\n", __FUNCTION__)); return NULL; } @@ -2642,6 +3003,7 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) assert(bo->proxy == NULL); assert(bo->rq == NULL); assert(bo->exec == NULL); + assert(!bo->scanout); if (num_pages > num_pages(bo)) { DBG(("inactive too small: %d < %d\n", @@ -2655,8 +3017,8 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) } if (I915_TILING_NONE != bo->tiling && - gem_set_tiling(kgem->fd, bo->handle, - I915_TILING_NONE, 0) != I915_TILING_NONE) + !gem_set_tiling(kgem->fd, bo->handle, + I915_TILING_NONE, 0)) continue; kgem_bo_remove_from_inactive(kgem, bo); @@ -2668,12 +3030,15 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) __FUNCTION__, bo->handle, num_pages(bo))); assert(use_active || bo->domain != DOMAIN_GPU); assert(!bo->needs_flush); - //assert(!kgem_busy(kgem, bo->handle)); + ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active); return bo; } if (flags & CREATE_EXACT) return NULL; + + if (flags & CREATE_CPU_MAP && !kgem->has_llc) + return NULL; } cache = use_active ? active(kgem, num_pages, I915_TILING_NONE) : inactive(kgem, num_pages); @@ -2682,12 +3047,13 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) assert(bo->reusable); assert(!!bo->rq == !!use_active); assert(bo->proxy == NULL); + assert(!bo->scanout); if (num_pages > num_pages(bo)) continue; if (use_active && - kgem->gen <= 40 && + kgem->gen <= 040 && bo->tiling != I915_TILING_NONE) continue; @@ -2703,11 +3069,12 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) if (first) continue; - if (gem_set_tiling(kgem->fd, bo->handle, - I915_TILING_NONE, 0) != I915_TILING_NONE) + if (!gem_set_tiling(kgem->fd, bo->handle, + I915_TILING_NONE, 0)) continue; bo->tiling = I915_TILING_NONE; + bo->pitch = 0; } if (bo->map) { @@ -2751,7 +3118,7 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) assert(list_is_empty(&bo->list)); assert(use_active || bo->domain != DOMAIN_GPU); assert(!bo->needs_flush || use_active); - //assert(use_active || !kgem_busy(kgem, bo->handle)); + ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active); return bo; } @@ -2771,7 +3138,7 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) assert(list_is_empty(&first->list)); assert(use_active || first->domain != DOMAIN_GPU); assert(!first->needs_flush || use_active); - //assert(use_active || !kgem_busy(kgem, first->handle)); + ASSERT_MAYBE_IDLE(kgem, first->handle, !use_active); return first; } @@ -2878,10 +3245,15 @@ struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags) size = (size + PAGE_SIZE - 1) / PAGE_SIZE; bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags); if (bo) { + assert(bo->domain != DOMAIN_GPU); + ASSERT_IDLE(kgem, bo->handle); bo->refcnt = 1; return bo; } + if (flags & CREATE_CACHED) + return NULL; + handle = gem_create(kgem->fd, size); if (handle == 0) return NULL; @@ -2902,7 +3274,7 @@ int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int if (DBG_NO_TILING) return tiling < 0 ? tiling : I915_TILING_NONE; - if (kgem->gen < 40) { + if (kgem->gen < 040) { if (tiling && width * bpp > 8192 * 8) { DBG(("%s: pitch too large for tliing [%d]\n", __FUNCTION__, width*bpp/8)); @@ -2910,13 +3282,17 @@ int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int goto done; } } else { + /* XXX rendering to I915_TILING_Y seems broken? */ + if (kgem->gen < 050 && tiling == I915_TILING_Y) + tiling = I915_TILING_X; + if (width*bpp > (MAXSHORT-512) * 8) { - DBG(("%s: large pitch [%d], forcing TILING_X\n", - __FUNCTION__, width*bpp/8)); if (tiling > 0) tiling = -tiling; else if (tiling == 0) tiling = -I915_TILING_X; + DBG(("%s: large pitch [%d], forcing TILING [%d]\n", + __FUNCTION__, width*bpp/8, tiling)); } else if (tiling && (width|height) > 8192) { DBG(("%s: large tiled buffer [%dx%d], forcing TILING_X\n", __FUNCTION__, width, height)); @@ -2927,9 +3303,9 @@ int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int if (tiling < 0) return tiling; - if (tiling && height == 1) { - DBG(("%s: disabling tiling [%d] for single row\n", - __FUNCTION__,height)); + if (tiling && (height == 1 || width == 1)) { + DBG(("%s: disabling tiling [%dx%d] for single row/col\n", + __FUNCTION__,width, height)); tiling = I915_TILING_NONE; goto done; } @@ -3004,6 +3380,7 @@ unsigned kgem_can_create_2d(struct kgem *kgem, { uint32_t pitch, size; unsigned flags = 0; + int tiling; int bpp; DBG(("%s: %dx%d @ %d\n", __FUNCTION__, width, height, depth)); @@ -3023,33 +3400,41 @@ unsigned kgem_can_create_2d(struct kgem *kgem, size = kgem_surface_size(kgem, false, 0, width, height, bpp, I915_TILING_NONE, &pitch); - if (size > 0 && size <= kgem->max_cpu_size) - flags |= KGEM_CAN_CREATE_CPU | KGEM_CAN_CREATE_GPU; - if (size > 0 && size <= kgem->aperture_mappable/4) - flags |= KGEM_CAN_CREATE_GTT; - if (size > kgem->large_object_size) - flags |= KGEM_CAN_CREATE_LARGE; - if (size > kgem->max_object_size) { - DBG(("%s: too large (untiled) %d > %d\n", - __FUNCTION__, size, kgem->max_object_size)); - return 0; + DBG(("%s: untiled size=%d\n", __FUNCTION__, size)); + if (size > 0) { + if (size <= kgem->max_cpu_size) + flags |= KGEM_CAN_CREATE_CPU; + if (size <= kgem->max_gpu_size) + flags |= KGEM_CAN_CREATE_GPU; + if (size <= kgem->aperture_mappable/4) + flags |= KGEM_CAN_CREATE_GTT; + if (size > kgem->large_object_size) + flags |= KGEM_CAN_CREATE_LARGE; + if (size > kgem->max_object_size) { + DBG(("%s: too large (untiled) %d > %d\n", + __FUNCTION__, size, kgem->max_object_size)); + return 0; + } } - size = kgem_surface_size(kgem, false, 0, - width, height, bpp, - kgem_choose_tiling(kgem, I915_TILING_X, - width, height, bpp), - &pitch); - if (size > 0 && size <= kgem->max_gpu_size) - flags |= KGEM_CAN_CREATE_GPU; - if (size > 0 && size <= kgem->aperture_mappable/4) - flags |= KGEM_CAN_CREATE_GTT; - if (size > kgem->large_object_size) - flags |= KGEM_CAN_CREATE_LARGE; - if (size > kgem->max_object_size) { - DBG(("%s: too large (tiled) %d > %d\n", - __FUNCTION__, size, kgem->max_object_size)); - return 0; + tiling = kgem_choose_tiling(kgem, I915_TILING_X, + width, height, bpp); + if (tiling != I915_TILING_NONE) { + size = kgem_surface_size(kgem, false, 0, + width, height, bpp, tiling, + &pitch); + DBG(("%s: tiled[%d] size=%d\n", __FUNCTION__, tiling, size)); + if (size > 0 && size <= kgem->max_gpu_size) + flags |= KGEM_CAN_CREATE_GPU; + if (size > 0 && size <= kgem->aperture_mappable/4) + flags |= KGEM_CAN_CREATE_GTT; + if (size > kgem->large_object_size) + flags |= KGEM_CAN_CREATE_LARGE; + if (size > kgem->max_object_size) { + DBG(("%s: too large (tiled) %d > %d\n", + __FUNCTION__, size, kgem->max_object_size)); + return 0; + } } return flags; @@ -3060,9 +3445,9 @@ inline int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo) unsigned int size; assert(bo->tiling); - assert(kgem->gen < 40); + assert(kgem->gen < 040); - if (kgem->gen < 30) + if (kgem->gen < 030) size = 512 * 1024; else size = 1024 * 1024; @@ -3104,6 +3489,36 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, size /= PAGE_SIZE; bucket = cache_bucket(size); + if (flags & CREATE_SCANOUT) { + list_for_each_entry(bo, &kgem->scanout, list) { + assert(bo->scanout); + assert(bo->delta); + assert(!bo->purged); + + if (size > num_pages(bo) || num_pages(bo) > 2*size) + continue; + + if (bo->tiling != tiling || + (tiling != I915_TILING_NONE && bo->pitch != pitch)) { + if (!gem_set_tiling(kgem->fd, bo->handle, + tiling, pitch)) + continue; + + bo->tiling = tiling; + bo->pitch = pitch; + } + + list_del(&bo->list); + + bo->unique_id = kgem_get_unique_id(kgem); + DBG((" 1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n", + bo->pitch, bo->tiling, bo->handle, bo->unique_id)); + assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + bo->refcnt = 1; + return bo; + } + } + if (bucket >= NUM_CACHE_BUCKETS) { DBG(("%s: large bo num pages=%d, bucket=%d\n", __FUNCTION__, size, bucket)); @@ -3116,10 +3531,12 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, list_for_each_entry(bo, &kgem->large, list) { assert(!bo->purged); + assert(!bo->scanout); assert(bo->refcnt == 0); assert(bo->reusable); + assert(bo->flush == true); - if (kgem->gen < 40) { + if (kgem->gen < 040) { if (bo->pitch < pitch) { DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n", bo->tiling, tiling, @@ -3134,11 +3551,12 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, continue; if (bo->pitch != pitch || bo->tiling != tiling) { - if (gem_set_tiling(kgem->fd, bo->handle, - tiling, pitch) != tiling) + if (!gem_set_tiling(kgem->fd, bo->handle, + tiling, pitch)) continue; bo->pitch = pitch; + bo->tiling = tiling; } } @@ -3157,16 +3575,19 @@ large_inactive: list_for_each_entry(bo, &kgem->large_inactive, list) { assert(bo->refcnt == 0); assert(bo->reusable); + assert(!bo->scanout); if (size > num_pages(bo)) continue; if (bo->tiling != tiling || (tiling != I915_TILING_NONE && bo->pitch != pitch)) { - if (tiling != gem_set_tiling(kgem->fd, - bo->handle, - tiling, pitch)) + if (!gem_set_tiling(kgem->fd, bo->handle, + tiling, pitch)) continue; + + bo->tiling = tiling; + bo->pitch = pitch; } if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { @@ -3201,10 +3622,12 @@ large_inactive: list_for_each_entry(bo, cache, vma) { assert(bucket(bo) == bucket); assert(bo->refcnt == 0); + assert(!bo->scanout); assert(bo->map); assert(IS_CPU_MAP(bo->map) == for_cpu); assert(bo->rq == NULL); assert(list_is_empty(&bo->request)); + assert(bo->flush == false); if (size > num_pages(bo)) { DBG(("inactive too small: %d < %d\n", @@ -3233,13 +3656,17 @@ large_inactive: DBG((" from inactive vma: pitch=%d, tiling=%d: handle=%d, id=%d\n", bo->pitch, bo->tiling, bo->handle, bo->unique_id)); assert(bo->reusable); - assert(bo->domain != DOMAIN_GPU && !kgem_busy(kgem, bo->handle)); + assert(bo->domain != DOMAIN_GPU); + ASSERT_IDLE(kgem, bo->handle); assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); bo->refcnt = 1; return bo; } } while (!list_is_empty(cache) && __kgem_throttle_retire(kgem, flags)); + + if (flags & CREATE_CPU_MAP && !kgem->has_llc) + goto create; } if (flags & CREATE_INACTIVE) @@ -3260,8 +3687,10 @@ search_again: assert(bucket(bo) == bucket); assert(bo->reusable); assert(bo->tiling == tiling); + assert(bo->flush == false); + assert(!bo->scanout); - if (kgem->gen < 40) { + if (kgem->gen < 040) { if (bo->pitch < pitch) { DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n", bo->tiling, tiling, @@ -3276,9 +3705,10 @@ search_again: continue; if (bo->pitch != pitch) { - gem_set_tiling(kgem->fd, - bo->handle, - tiling, pitch); + if (!gem_set_tiling(kgem->fd, + bo->handle, + tiling, pitch)) + continue; bo->pitch = pitch; } @@ -3300,7 +3730,9 @@ search_again: assert(!bo->purged); assert(bo->refcnt == 0); assert(bo->reusable); + assert(!bo->scanout); assert(bo->tiling == tiling); + assert(bo->flush == false); if (num_pages(bo) < size) continue; @@ -3319,7 +3751,7 @@ search_again: } if (--retry && flags & CREATE_EXACT) { - if (kgem->gen >= 40) { + if (kgem->gen >= 040) { for (i = I915_TILING_NONE; i <= I915_TILING_Y; i++) { if (i == tiling) continue; @@ -3329,13 +3761,15 @@ search_again: assert(!bo->purged); assert(bo->refcnt == 0); assert(bo->reusable); + assert(!bo->scanout); + assert(bo->flush == false); if (num_pages(bo) < size) continue; - if (tiling != gem_set_tiling(kgem->fd, - bo->handle, - tiling, pitch)) + if (!gem_set_tiling(kgem->fd, + bo->handle, + tiling, pitch)) continue; kgem_bo_remove_from_active(kgem, bo); @@ -3369,6 +3803,8 @@ search_again: assert(!bo->purged); assert(bo->refcnt == 0); assert(bo->reusable); + assert(!bo->scanout); + assert(bo->flush == false); if (bo->tiling) { if (bo->pitch < pitch) { @@ -3408,6 +3844,8 @@ search_inactive: list_for_each_entry(bo, cache, list) { assert(bucket(bo) == bucket); assert(bo->reusable); + assert(!bo->scanout); + assert(bo->flush == false); if (size > num_pages(bo)) { DBG(("inactive too small: %d < %d\n", @@ -3417,9 +3855,8 @@ search_inactive: if (bo->tiling != tiling || (tiling != I915_TILING_NONE && bo->pitch != pitch)) { - if (tiling != gem_set_tiling(kgem->fd, - bo->handle, - tiling, pitch)) + if (!gem_set_tiling(kgem->fd, bo->handle, + tiling, pitch)) continue; if (bo->map) @@ -3444,7 +3881,7 @@ search_inactive: assert(bo->refcnt == 0); assert(bo->reusable); assert((flags & CREATE_INACTIVE) == 0 || bo->domain != DOMAIN_GPU); - assert((flags & CREATE_INACTIVE) == 0 || !kgem_busy(kgem, bo->handle)); + ASSERT_MAYBE_IDLE(kgem, bo->handle, flags & CREATE_INACTIVE); assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); bo->refcnt = 1; return bo; @@ -3479,8 +3916,9 @@ create: bo->domain = DOMAIN_CPU; bo->unique_id = kgem_get_unique_id(kgem); bo->pitch = pitch; - if (tiling != I915_TILING_NONE) - bo->tiling = gem_set_tiling(kgem->fd, handle, tiling, pitch); + if (tiling != I915_TILING_NONE && + gem_set_tiling(kgem->fd, handle, tiling, pitch)) + bo->tiling = tiling; if (bucket >= NUM_CACHE_BUCKETS) { DBG(("%s: marking large bo for automatic flushing\n", __FUNCTION__)); @@ -3611,16 +4049,23 @@ void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) __kgem_bo_destroy(kgem, bo); } -bool __kgem_flush(struct kgem *kgem, struct kgem_bo *bo) +void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo) { + assert(bo->rq); + assert(bo->exec == NULL); + assert(bo->needs_flush); + /* The kernel will emit a flush *and* update its own flushing lists. */ - if (!bo->needs_flush) - return false; + if (!__kgem_busy(kgem, bo->handle)) + __kgem_bo_clear_busy(bo); - bo->needs_flush = kgem_busy(kgem, bo->handle); DBG(("%s: handle=%d, busy?=%d\n", - __FUNCTION__, bo->handle, bo->needs_flush)); - return bo->needs_flush; + __FUNCTION__, bo->handle, bo->rq != NULL)); +} + +inline static bool needs_semaphore(struct kgem *kgem, struct kgem_bo *bo) +{ + return kgem->nreloc && bo->rq && RQ_RING(bo->rq) != kgem->ring; } bool kgem_check_bo(struct kgem *kgem, ...) @@ -3629,22 +4074,22 @@ bool kgem_check_bo(struct kgem *kgem, ...) struct kgem_bo *bo; int num_exec = 0; int num_pages = 0; - - if (kgem_flush(kgem)) - return false; + bool flush = false; va_start(ap, kgem); while ((bo = va_arg(ap, struct kgem_bo *))) { + while (bo->proxy) + bo = bo->proxy; if (bo->exec) continue; - while (bo->proxy) { - bo = bo->proxy; - if (bo->exec) - continue; - } + if (needs_semaphore(kgem, bo)) + return false; + num_pages += num_pages(bo); num_exec++; + + flush |= bo->flush; } va_end(ap); @@ -3654,7 +4099,11 @@ bool kgem_check_bo(struct kgem *kgem, ...) if (!num_pages) return true; - if (kgem->aperture > kgem->aperture_low && kgem_is_idle(kgem)) { + if (kgem_flush(kgem, flush)) + return false; + + if (kgem->aperture > kgem->aperture_low && + kgem_ring_is_idle(kgem, kgem->ring)) { DBG(("%s: current aperture usage (%d) is greater than low water mark (%d)\n", __FUNCTION__, kgem->aperture, kgem->aperture_low)); return false; @@ -3679,46 +4128,55 @@ bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo) { uint32_t size; - if (kgem_flush(kgem)) - return false; - while (bo->proxy) bo = bo->proxy; if (bo->exec) { - if (kgem->gen < 40 && + if (kgem->gen < 040 && bo->tiling != I915_TILING_NONE && (bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) { if (kgem->nfence >= kgem->fence_max) return false; + if (3*kgem->aperture_fenced > kgem->aperture_mappable && + kgem_ring_is_idle(kgem, kgem->ring)) + return false; + size = kgem->aperture_fenced; size += kgem_bo_fenced_size(kgem, bo); - if (4*size > 3*kgem->aperture_mappable) + if (3*size > 2*kgem->aperture_mappable) return false; } return true; } + if (needs_semaphore(kgem, bo)) + return false; + + if (kgem_flush(kgem, bo->flush)) + return false; + if (kgem->nexec >= KGEM_EXEC_SIZE(kgem) - 1) return false; - if (kgem->aperture > kgem->aperture_low) + if (kgem->aperture > kgem->aperture_low && + kgem_ring_is_idle(kgem, kgem->ring)) return false; if (kgem->aperture + num_pages(bo) > kgem->aperture_high) return false; - if (kgem->gen < 40 && bo->tiling != I915_TILING_NONE) { + if (kgem->gen < 040 && bo->tiling != I915_TILING_NONE) { if (kgem->nfence >= kgem->fence_max) return false; - if (2*kgem->aperture_fenced > kgem->aperture_mappable) + if (3*kgem->aperture_fenced > kgem->aperture_mappable && + kgem_ring_is_idle(kgem, kgem->ring)) return false; size = kgem->aperture_fenced; size += kgem_bo_fenced_size(kgem, bo); - if (4*size > 3*kgem->aperture_mappable) + if (3*size > 2*kgem->aperture_mappable) return false; } @@ -3733,16 +4191,14 @@ bool kgem_check_many_bo_fenced(struct kgem *kgem, ...) int num_exec = 0; int num_pages = 0; int fenced_size = 0; - - if (kgem_flush(kgem)) - return false; + bool flush = false; va_start(ap, kgem); while ((bo = va_arg(ap, struct kgem_bo *))) { while (bo->proxy) bo = bo->proxy; if (bo->exec) { - if (kgem->gen >= 40 || bo->tiling == I915_TILING_NONE) + if (kgem->gen >= 040 || bo->tiling == I915_TILING_NONE) continue; if ((bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) { @@ -3753,12 +4209,17 @@ bool kgem_check_many_bo_fenced(struct kgem *kgem, ...) continue; } + if (needs_semaphore(kgem, bo)) + return false; + num_pages += num_pages(bo); num_exec++; - if (kgem->gen < 40 && bo->tiling) { + if (kgem->gen < 040 && bo->tiling) { fenced_size += kgem_bo_fenced_size(kgem, bo); num_fence++; } + + flush |= bo->flush; } va_end(ap); @@ -3766,15 +4227,20 @@ bool kgem_check_many_bo_fenced(struct kgem *kgem, ...) if (kgem->nfence + num_fence > kgem->fence_max) return false; - if (2*kgem->aperture_fenced > kgem->aperture_mappable) + if (3*kgem->aperture_fenced > kgem->aperture_mappable && + kgem_ring_is_idle(kgem, kgem->ring)) return false; - if (4*(fenced_size + kgem->aperture_fenced) > 3*kgem->aperture_mappable) + if (3*(fenced_size + kgem->aperture_fenced) > 2*kgem->aperture_mappable) return false; } if (num_pages) { - if (kgem->aperture > kgem->aperture_low) + if (kgem_flush(kgem, flush)) + return false; + + if (kgem->aperture > kgem->aperture_low && + kgem_ring_is_idle(kgem, kgem->ring)) return false; if (num_pages + kgem->aperture > kgem->aperture_high) @@ -3816,20 +4282,25 @@ uint32_t kgem_add_reloc(struct kgem *kgem, if (bo->exec == NULL) { list_move_tail(&bo->request, &kgem->next_request->buffers); - bo->rq = kgem->next_request; + bo->rq = MAKE_REQUEST(kgem->next_request, + kgem->ring); bo->exec = &_kgem_dummy_exec; } + if (read_write_domain & 0x7fff && !bo->dirty) + __kgem_bo_mark_dirty(bo); + bo = bo->proxy; assert(bo->refcnt); assert(!bo->purged); } if (bo->exec == NULL) - _kgem_add_bo(kgem, bo); - assert(bo->rq == kgem->next_request); + kgem_add_bo(kgem, bo); + assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring)); + assert(RQ_RING(bo->rq) == kgem->ring); - if (kgem->gen < 40 && read_write_domain & KGEM_RELOC_FENCED) { + if (kgem->gen < 040 && read_write_domain & KGEM_RELOC_FENCED) { if (bo->tiling && (bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) { assert(kgem->nfence < kgem->fence_max); @@ -3841,19 +4312,21 @@ uint32_t kgem_add_reloc(struct kgem *kgem, } kgem->reloc[index].delta = delta; - kgem->reloc[index].target_handle = bo->handle; + kgem->reloc[index].target_handle = bo->target_handle; kgem->reloc[index].presumed_offset = bo->presumed_offset; - if (read_write_domain & 0x7ff) { + if (read_write_domain & 0x7fff && !bo->dirty) { assert(!bo->snoop || kgem->can_blt_cpu); - kgem_bo_mark_dirty(bo); + __kgem_bo_mark_dirty(bo); } delta += bo->presumed_offset; } else { kgem->reloc[index].delta = delta; - kgem->reloc[index].target_handle = 0; + kgem->reloc[index].target_handle = ~0U; kgem->reloc[index].presumed_offset = 0; + if (kgem->nreloc__self < 256) + kgem->reloc__self[kgem->nreloc__self++] = index; } kgem->reloc[index].read_domains = read_write_domain >> 16; kgem->reloc[index].write_domain = read_write_domain & 0x7fff; @@ -3984,7 +4457,7 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo) ptr = bo->map; if (ptr == NULL) { assert(kgem_bo_size(bo) <= kgem->aperture_mappable / 2); - assert(kgem->gen != 21 || bo->tiling != I915_TILING_Y); + assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y); kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); @@ -4005,7 +4478,7 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo) struct drm_i915_gem_set_domain set_domain; DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__, - bo->needs_flush, bo->domain, kgem_busy(kgem, bo->handle))); + bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle))); /* XXX use PROT_READ to avoid the write flush? */ @@ -4097,6 +4570,11 @@ retry: if (__kgem_throttle_retire(kgem, 0)) goto retry; + if (kgem->need_expire) { + kgem_cleanup_cache(kgem); + goto retry; + } + return NULL; } @@ -4132,6 +4610,11 @@ retry: if (__kgem_throttle_retire(kgem, 0)) goto retry; + if (kgem->need_expire) { + kgem_cleanup_cache(kgem); + goto retry; + } + return NULL; } @@ -4180,10 +4663,7 @@ uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo) */ bo->reusable = false; - /* The bo is outside of our control, so presume it is written to */ - bo->needs_flush = true; - if (bo->domain != DOMAIN_GPU) - bo->domain = DOMAIN_NONE; + kgem_bo_unclean(kgem, bo); /* Henceforth, we need to broadcast all updates to clients and * flush our rendering before doing so. @@ -4231,8 +4711,8 @@ void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo) if (bo->domain != DOMAIN_CPU) { struct drm_i915_gem_set_domain set_domain; - DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__, - bo->needs_flush, bo->domain, kgem_busy(kgem, bo->handle))); + DBG(("%s: SYNC: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__, + bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle))); VG_CLEAR(set_domain); set_domain.handle = bo->handle; @@ -4246,6 +4726,30 @@ void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo) } } +void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write) +{ + assert(bo->proxy == NULL); + kgem_bo_submit(kgem, bo); + + if (bo->domain != DOMAIN_CPU) { + struct drm_i915_gem_set_domain set_domain; + + DBG(("%s: SYNC: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__, + bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle))); + + VG_CLEAR(set_domain); + set_domain.handle = bo->handle; + set_domain.read_domains = I915_GEM_DOMAIN_CPU; + set_domain.write_domain = write ? I915_GEM_DOMAIN_CPU : 0; + + if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) { + if (write || bo->needs_flush) + kgem_bo_retire(kgem, bo); + bo->domain = write ? DOMAIN_CPU : DOMAIN_NONE; + } + } +} + void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo) { assert(bo->proxy == NULL); @@ -4254,8 +4758,8 @@ void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo) if (bo->domain != DOMAIN_GTT) { struct drm_i915_gem_set_domain set_domain; - DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__, - bo->needs_flush, bo->domain, kgem_busy(kgem, bo->handle))); + DBG(("%s: SYNC: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__, + bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle))); VG_CLEAR(set_domain); set_domain.handle = bo->handle; @@ -4271,10 +4775,10 @@ void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo) void kgem_clear_dirty(struct kgem *kgem) { - struct kgem_request *rq = kgem->next_request; + struct list * const buffers = &kgem->next_request->buffers; struct kgem_bo *bo; - list_for_each_entry(bo, &rq->buffers, request) { + list_for_each_entry(bo, buffers, request) { if (!bo->dirty) break; @@ -4305,6 +4809,7 @@ struct kgem_bo *kgem_create_proxy(struct kgem *kgem, bo->tiling = target->tiling; bo->pitch = target->pitch; + assert(!bo->scanout); bo->proxy = kgem_bo_reference(target); bo->delta = offset; @@ -4351,7 +4856,7 @@ static inline bool use_snoopable_buffer(struct kgem *kgem, uint32_t flags) { if ((flags & KGEM_BUFFER_WRITE) == 0) - return kgem->gen >= 30; + return kgem->gen >= 030; return true; } @@ -4425,8 +4930,6 @@ create_snoopable_buffer(struct kgem *kgem, unsigned alloc) struct kgem_buffer *bo; uint32_t handle; - assert(!kgem->has_llc); - if (kgem->has_cacheing) { struct kgem_bo *old; @@ -4524,9 +5027,6 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, /* we should never be asked to create anything TOO large */ assert(size <= kgem->max_object_size); - if (kgem->has_llc) - flags &= ~KGEM_BUFFER_INPLACE; - #if !DBG_NO_UPLOAD_CACHE list_for_each_entry(bo, &kgem->batch_buffers, base.list) { assert(bo->base.io); @@ -4580,8 +5080,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, assert(bo->base.io); assert(bo->base.refcnt >= 1); assert(bo->mmapped); - assert(!bo->base.snoop); - assert(!IS_CPU_MAP(bo->base.map) || kgem->has_llc); + assert(!IS_CPU_MAP(bo->base.map) || kgem->has_llc || bo->base.snoop); if ((bo->write & ~flags) & KGEM_BUFFER_INPLACE) { DBG(("%s: skip write %x buffer, need %x\n", @@ -4608,11 +5107,16 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, alloc = ALIGN(size, kgem->buffer_size); if (alloc > MAX_CACHE_SIZE) alloc = PAGE_ALIGN(size); + + if (alloc > kgem->aperture_mappable / 4) + flags &= ~KGEM_BUFFER_INPLACE; alloc /= PAGE_SIZE; - if (kgem->has_llc) { + + if (kgem->has_llc && + (flags & KGEM_BUFFER_WRITE_INPLACE) != KGEM_BUFFER_WRITE_INPLACE) { bo = buffer_alloc(); if (bo == NULL) - return NULL; + goto skip_llc; old = NULL; if ((flags & KGEM_BUFFER_WRITE) == 0) @@ -4630,7 +5134,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, uint32_t handle = gem_create(kgem->fd, alloc); if (handle == 0) { free(bo); - return NULL; + goto skip_llc; } __kgem_bo_init(&bo->base, handle, alloc); DBG(("%s: created LLC handle=%d for buffer\n", @@ -4646,17 +5150,14 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, if (bo->mem) { if (flags & KGEM_BUFFER_WRITE) kgem_bo_sync__cpu(kgem, &bo->base); - - alloc = num_pages(&bo->base); + flags &= ~KGEM_BUFFER_INPLACE; goto init; } else { bo->base.refcnt = 0; /* for valgrind */ kgem_bo_free(kgem, &bo->base); } } - - if (PAGE_SIZE * alloc > kgem->aperture_mappable / 4) - flags &= ~KGEM_BUFFER_INPLACE; +skip_llc: if ((flags & KGEM_BUFFER_WRITE_INPLACE) == KGEM_BUFFER_WRITE_INPLACE) { /* The issue with using a GTT upload buffer is that we may @@ -4695,7 +5196,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP); if (old == NULL) { old = search_linear_cache(kgem, alloc, CREATE_INACTIVE); - if (old && !kgem_bo_is_mappable(kgem, old)) { + if (old && !__kgem_bo_is_mappable(kgem, old)) { _kgem_bo_destroy(kgem, old); old = NULL; } @@ -4703,7 +5204,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, if (old) { DBG(("%s: reusing handle=%d for buffer\n", __FUNCTION__, old->handle)); - assert(kgem_bo_is_mappable(kgem, old)); + assert(__kgem_bo_is_mappable(kgem, old)); assert(!old->snoop); assert(old->rq == NULL); @@ -4719,9 +5220,8 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, bo->mem = kgem_bo_map(kgem, &bo->base); if (bo->mem) { - alloc = num_pages(&bo->base); if (IS_CPU_MAP(bo->base.map)) - flags &= ~KGEM_BUFFER_INPLACE; + flags &= ~KGEM_BUFFER_INPLACE; goto init; } else { bo->base.refcnt = 0; @@ -4742,16 +5242,13 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, if (flags & KGEM_BUFFER_WRITE) kgem_bo_sync__cpu(kgem, &bo->base); flags &= ~KGEM_BUFFER_INPLACE; - alloc = num_pages(&bo->base); goto init; } - if ((flags & KGEM_BUFFER_WRITE_INPLACE) != KGEM_BUFFER_WRITE_INPLACE) { + if ((flags & KGEM_BUFFER_INPLACE) == 0) { bo = create_snoopable_buffer(kgem, alloc); - if (bo) { - flags &= ~KGEM_BUFFER_INPLACE; + if (bo) goto init; - } } } @@ -4765,8 +5262,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, if (old) { DBG(("%s: reusing ordinary handle %d for io\n", __FUNCTION__, old->handle)); - alloc = num_pages(old); - bo = buffer_alloc_with_data(alloc); + bo = buffer_alloc_with_data(num_pages(old)); if (bo == NULL) return NULL; @@ -4793,7 +5289,6 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, DBG(("%s: reusing handle=%d for buffer\n", __FUNCTION__, old->handle)); - alloc = num_pages(old); init_buffer_from_bo(bo, old); } else { uint32_t handle = gem_create(kgem->fd, alloc); @@ -4803,7 +5298,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, } DBG(("%s: created handle=%d for buffer\n", - __FUNCTION__, bo->base.handle)); + __FUNCTION__, handle)); __kgem_bo_init(&bo->base, handle, alloc); debug_alloc(kgem, alloc * PAGE_SIZE); @@ -4815,16 +5310,18 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, if (flags & KGEM_BUFFER_WRITE) { bo->mem = kgem_bo_map__cpu(kgem, &bo->base); - if (bo->mem != NULL) + if (bo->mem != NULL) { kgem_bo_sync__cpu(kgem, &bo->base); - goto init; + goto init; + } } DBG(("%s: failing back to new pwrite buffer\n", __FUNCTION__)); old = &bo->base; - bo = buffer_alloc_with_data(alloc); + bo = buffer_alloc_with_data(num_pages(old)); if (bo == NULL) { - free(old); + old->refcnt= 0; + kgem_bo_free(kgem, old); return NULL; } @@ -4839,7 +5336,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, init: bo->base.io = true; assert(bo->base.refcnt == 1); - assert(num_pages(&bo->base) == alloc); + assert(num_pages(&bo->base) >= NUM_PAGES(size)); assert(!bo->need_io || !bo->base.needs_flush); assert(!bo->need_io || bo->base.domain != DOMAIN_GPU); assert(bo->mem); @@ -4852,8 +5349,8 @@ init: assert(list_is_empty(&bo->base.list)); list_add(&bo->base.list, &kgem->batch_buffers); - DBG(("%s(pages=%d) new handle=%d, used=%d, write=%d\n", - __FUNCTION__, alloc, bo->base.handle, bo->used, bo->write)); + DBG(("%s(pages=%d [%d]) new handle=%d, used=%d, write=%d\n", + __FUNCTION__, num_pages(&bo->base), alloc, bo->base.handle, bo->used, bo->write)); done: bo->used = ALIGN(bo->used, UPLOAD_ALIGNMENT); @@ -4919,10 +5416,10 @@ struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem, struct kgem_bo *kgem_upload_source_image(struct kgem *kgem, const void *data, - BoxPtr box, + const BoxRec *box, int stride, int bpp) { - int width = box->x2 - box->x1; + int width = box->x2 - box->x1; int height = box->y2 - box->y1; struct kgem_bo *bo; void *dst; @@ -4987,7 +5484,7 @@ void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo) __FUNCTION__, bo->base.needs_flush, bo->base.domain, - kgem_busy(kgem, bo->base.handle))); + __kgem_busy(kgem, bo->base.handle))); assert(!IS_CPU_MAP(bo->base.map) || bo->base.snoop || kgem->has_llc); @@ -5007,6 +5504,7 @@ void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo) return; } kgem_bo_retire(kgem, &bo->base); + bo->base.domain = DOMAIN_NONE; } uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format) @@ -5104,18 +5602,22 @@ kgem_replace_bo(struct kgem *kgem, dst->unique_id = kgem_get_unique_id(kgem); dst->refcnt = 1; - kgem_set_mode(kgem, KGEM_BLT); + kgem_set_mode(kgem, KGEM_BLT, dst); if (!kgem_check_batch(kgem, 8) || !kgem_check_reloc(kgem, 2) || !kgem_check_many_bo_fenced(kgem, src, dst, NULL)) { - _kgem_submit(kgem); + kgem_submit(kgem); + if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) { + kgem_bo_destroy(kgem, dst); + return NULL; + } _kgem_set_mode(kgem, KGEM_BLT); } br00 = XY_SRC_COPY_BLT_CMD; br13 = pitch; pitch = src->pitch; - if (kgem->gen >= 40 && src->tiling) { + if (kgem->gen >= 040 && src->tiling) { br00 |= BLT_SRC_TILED; pitch >>= 2; } diff --git a/src/sna/kgem.h b/src/sna/kgem.h index e547215bb..a23194feb 100644 --- a/src/sna/kgem.h +++ b/src/sna/kgem.h @@ -43,6 +43,12 @@ #endif struct kgem_bo { + struct kgem_request *rq; +#define RQ(rq) ((struct kgem_request *)((uintptr_t)(rq) & ~3)) +#define RQ_RING(rq) ((uintptr_t)(rq) & 3) +#define RQ_IS_BLT(rq) (RQ_RING(rq) == KGEM_BLT) + struct drm_i915_gem_exec_object2 *exec; + struct kgem_bo *proxy; struct list list; @@ -52,8 +58,6 @@ struct kgem_bo { void *map; #define IS_CPU_MAP(ptr) ((uintptr_t)(ptr) & 1) #define IS_GTT_MAP(ptr) (ptr && ((uintptr_t)(ptr) & 1) == 0) - struct kgem_request *rq; - struct drm_i915_gem_exec_object2 *exec; struct kgem_bo_binding { struct kgem_bo_binding *next; @@ -64,6 +68,7 @@ struct kgem_bo { uint32_t unique_id; uint32_t refcnt; uint32_t handle; + uint32_t target_handle; uint32_t presumed_offset; uint32_t delta; union { @@ -126,22 +131,30 @@ struct kgem { struct list large_inactive; struct list active[NUM_CACHE_BUCKETS][3]; struct list inactive[NUM_CACHE_BUCKETS]; + struct list pinned_batches[2]; struct list snoop; + struct list scanout; struct list batch_buffers, active_buffers; struct list requests[2]; struct kgem_request *next_request; - uint32_t num_requests; + struct kgem_request static_request; struct { struct list inactive[NUM_CACHE_BUCKETS]; int16_t count; } vma[NUM_MAP_TYPES]; + uint32_t batch_flags; + uint32_t batch_flags_base; +#define I915_EXEC_SECURE (1<<9) +#define LOCAL_EXEC_OBJECT_WRITE (1<<2) + uint16_t nbatch; uint16_t surface; uint16_t nexec; uint16_t nreloc; + uint16_t nreloc__self; uint16_t nfence; uint16_t batch_size; uint16_t min_alignment; @@ -151,6 +164,7 @@ struct kgem { uint32_t need_purge:1; uint32_t need_retire:1; uint32_t need_throttle:1; + uint32_t scanout_busy:1; uint32_t busy:1; uint32_t has_userptr :1; @@ -158,8 +172,12 @@ struct kgem { uint32_t has_relaxed_fencing :1; uint32_t has_relaxed_delta :1; uint32_t has_semaphores :1; + uint32_t has_secure_batches :1; + uint32_t has_pinned_batches :1; uint32_t has_cacheing :1; uint32_t has_llc :1; + uint32_t has_no_reloc :1; + uint32_t has_handle_lut :1; uint32_t can_blt_cpu :1; @@ -179,6 +197,7 @@ struct kgem { uint32_t batch[64*1024-8]; struct drm_i915_gem_exec_object2 exec[256]; struct drm_i915_gem_relocation_entry reloc[4096]; + uint16_t reloc__self[256]; #ifdef DEBUG_MEMORY struct { @@ -200,7 +219,7 @@ struct kgem { #define KGEM_EXEC_SIZE(K) (int)(ARRAY_SIZE((K)->exec)-KGEM_EXEC_RESERVED) #define KGEM_RELOC_SIZE(K) (int)(ARRAY_SIZE((K)->reloc)-KGEM_RELOC_RESERVED) -void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen); +void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen); void kgem_reset(struct kgem *kgem); struct kgem_bo *kgem_create_map(struct kgem *kgem, @@ -218,7 +237,7 @@ struct kgem_bo *kgem_create_proxy(struct kgem *kgem, struct kgem_bo *kgem_upload_source_image(struct kgem *kgem, const void *data, - BoxPtr box, + const BoxRec *box, int stride, int bpp); void kgem_proxy_bo_attach(struct kgem_bo *bo, struct kgem_bo **ptr); @@ -245,8 +264,9 @@ enum { CREATE_SCANOUT = 0x10, CREATE_PRIME = 0x20, CREATE_TEMPORARY = 0x40, - CREATE_NO_RETIRE = 0x80, - CREATE_NO_THROTTLE = 0x100, + CREATE_CACHED = 0x80, + CREATE_NO_RETIRE = 0x100, + CREATE_NO_THROTTLE = 0x200, }; struct kgem_bo *kgem_create_2d(struct kgem *kgem, int width, @@ -264,17 +284,25 @@ uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format); void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset); int kgem_bo_get_swizzling(struct kgem *kgem, struct kgem_bo *bo); -void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo); bool kgem_retire(struct kgem *kgem); -bool __kgem_is_idle(struct kgem *kgem); + +bool __kgem_ring_is_idle(struct kgem *kgem, int ring); +static inline bool kgem_ring_is_idle(struct kgem *kgem, int ring) +{ + ring = ring == KGEM_BLT; + + if (list_is_empty(&kgem->requests[ring])) + return true; + + return __kgem_ring_is_idle(kgem, ring); +} + static inline bool kgem_is_idle(struct kgem *kgem) { - if (kgem->num_requests == 0) { - DBG(("%s: no outstanding requests\n", __FUNCTION__)); + if (!kgem->need_retire) return true; - } - return __kgem_is_idle(kgem); + return kgem_ring_is_idle(kgem, kgem->ring); } void _kgem_submit(struct kgem *kgem); @@ -284,9 +312,12 @@ static inline void kgem_submit(struct kgem *kgem) _kgem_submit(kgem); } -static inline bool kgem_flush(struct kgem *kgem) +static inline bool kgem_flush(struct kgem *kgem, bool flush) { - return kgem->flush && kgem_is_idle(kgem); + if (kgem->nreloc == 0) + return false; + + return (kgem->flush ^ flush) && kgem_ring_is_idle(kgem, kgem->ring); } static inline void kgem_bo_submit(struct kgem *kgem, struct kgem_bo *bo) @@ -295,7 +326,7 @@ static inline void kgem_bo_submit(struct kgem *kgem, struct kgem_bo *bo) _kgem_submit(kgem); } -bool __kgem_flush(struct kgem *kgem, struct kgem_bo *bo); +void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo); static inline void kgem_bo_flush(struct kgem *kgem, struct kgem_bo *bo) { kgem_bo_submit(kgem, bo); @@ -307,7 +338,7 @@ static inline void kgem_bo_flush(struct kgem *kgem, struct kgem_bo *bo) * we assume direct access. And as the useual failure is EIO, we do * not actualy care. */ - (void)__kgem_flush(kgem, bo); + __kgem_flush(kgem, bo); } static inline struct kgem_bo *kgem_bo_reference(struct kgem_bo *bo) @@ -327,7 +358,9 @@ static inline void kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) void kgem_clear_dirty(struct kgem *kgem); -static inline void kgem_set_mode(struct kgem *kgem, enum kgem_mode mode) +static inline void kgem_set_mode(struct kgem *kgem, + enum kgem_mode mode, + struct kgem_bo *bo) { assert(!kgem->wedged); @@ -335,6 +368,9 @@ static inline void kgem_set_mode(struct kgem *kgem, enum kgem_mode mode) kgem_submit(kgem); #endif + if (kgem->nreloc && bo->exec == NULL && kgem_ring_is_idle(kgem, kgem->ring)) + _kgem_submit(kgem); + if (kgem->mode == mode) return; @@ -346,6 +382,7 @@ static inline void _kgem_set_mode(struct kgem *kgem, enum kgem_mode mode) { assert(kgem->mode == KGEM_NONE); assert(kgem->nbatch == 0); + assert(!kgem->wedged); kgem->context_switch(kgem, mode); kgem->mode = mode; } @@ -384,33 +421,21 @@ static inline bool kgem_check_batch_with_surfaces(struct kgem *kgem, kgem_check_exec(kgem, num_surfaces); } -static inline uint32_t *kgem_get_batch(struct kgem *kgem, int num_dwords) +static inline uint32_t *kgem_get_batch(struct kgem *kgem) { - if (!kgem_check_batch(kgem, num_dwords)) + if (kgem->nreloc) { + unsigned mode = kgem->mode; _kgem_submit(kgem); + _kgem_set_mode(kgem, mode); + } return kgem->batch + kgem->nbatch; } -static inline void kgem_advance_batch(struct kgem *kgem, int num_dwords) -{ - kgem->nbatch += num_dwords; -} - bool kgem_check_bo(struct kgem *kgem, ...) __attribute__((sentinel(0))); bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo); bool kgem_check_many_bo_fenced(struct kgem *kgem, ...) __attribute__((sentinel(0))); -void _kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo); -static inline void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo) -{ - if (bo->proxy) - bo = bo->proxy; - - if (bo->exec == NULL) - _kgem_add_bo(kgem, bo); -} - #define KGEM_RELOC_FENCED 0x8000 uint32_t kgem_add_reloc(struct kgem *kgem, uint32_t pos, @@ -425,6 +450,7 @@ void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo); void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo); void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo); void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo); +void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write); void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo); void __kgem_bo_unmap__cpu(struct kgem *kgem, struct kgem_bo *bo, void *ptr); uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo); @@ -460,7 +486,7 @@ static inline bool kgem_bo_blt_pitch_is_ok(struct kgem *kgem, struct kgem_bo *bo) { int pitch = bo->pitch; - if (kgem->gen >= 40 && bo->tiling) + if (kgem->gen >= 040 && bo->tiling) pitch /= 4; if (pitch > MAXSHORT) { DBG(("%s: can not blt to handle=%d, adjusted pitch=%d\n", @@ -483,16 +509,13 @@ static inline bool kgem_bo_can_blt(struct kgem *kgem, return kgem_bo_blt_pitch_is_ok(kgem, bo); } -static inline bool kgem_bo_is_mappable(struct kgem *kgem, - struct kgem_bo *bo) +static inline bool __kgem_bo_is_mappable(struct kgem *kgem, + struct kgem_bo *bo) { - DBG(("%s: domain=%d, offset: %d size: %d\n", - __FUNCTION__, bo->domain, bo->presumed_offset, kgem_bo_size(bo))); - if (bo->domain == DOMAIN_GTT) return true; - if (kgem->gen < 40 && bo->tiling && + if (kgem->gen < 040 && bo->tiling && bo->presumed_offset & (kgem_bo_fenced_size(kgem, bo) - 1)) return false; @@ -502,17 +525,24 @@ static inline bool kgem_bo_is_mappable(struct kgem *kgem, return bo->presumed_offset + kgem_bo_size(bo) <= kgem->aperture_mappable; } +static inline bool kgem_bo_is_mappable(struct kgem *kgem, + struct kgem_bo *bo) +{ + DBG(("%s: domain=%d, offset: %d size: %d\n", + __FUNCTION__, bo->domain, bo->presumed_offset, kgem_bo_size(bo))); + assert(bo->refcnt); + return __kgem_bo_is_mappable(kgem, bo); +} + static inline bool kgem_bo_mapped(struct kgem *kgem, struct kgem_bo *bo) { DBG(("%s: map=%p, tiling=%d, domain=%d\n", __FUNCTION__, bo->map, bo->tiling, bo->domain)); + assert(bo->refcnt); if (bo->map == NULL) return bo->tiling == I915_TILING_NONE && bo->domain == DOMAIN_CPU; - if (bo->tiling == I915_TILING_X && !bo->scanout && kgem->has_llc) - return IS_CPU_MAP(bo->map); - return IS_CPU_MAP(bo->map) == !bo->tiling; } @@ -524,7 +554,7 @@ static inline bool kgem_bo_can_map(struct kgem *kgem, struct kgem_bo *bo) if (!bo->tiling && kgem->has_llc) return true; - if (kgem->gen == 21 && bo->tiling == I915_TILING_Y) + if (kgem->gen == 021 && bo->tiling == I915_TILING_Y) return false; return kgem_bo_size(bo) <= kgem->aperture_mappable / 4; @@ -532,15 +562,32 @@ static inline bool kgem_bo_can_map(struct kgem *kgem, struct kgem_bo *bo) static inline bool kgem_bo_is_snoop(struct kgem_bo *bo) { + assert(bo->refcnt); while (bo->proxy) bo = bo->proxy; return bo->snoop; } +bool __kgem_busy(struct kgem *kgem, int handle); + +static inline void kgem_bo_mark_busy(struct kgem_bo *bo, int ring) +{ + bo->rq = (struct kgem_request *)((uintptr_t)bo->rq | ring); +} + +inline static void __kgem_bo_clear_busy(struct kgem_bo *bo) +{ + bo->needs_flush = false; + list_del(&bo->request); + bo->rq = NULL; + bo->domain = DOMAIN_NONE; +} + static inline bool kgem_bo_is_busy(struct kgem_bo *bo) { DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__, bo->handle, bo->domain, bo->exec != NULL, bo->rq != NULL)); + assert(bo->refcnt); return bo->rq; } @@ -548,10 +595,17 @@ static inline bool __kgem_bo_is_busy(struct kgem *kgem, struct kgem_bo *bo) { DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__, bo->handle, bo->domain, bo->exec != NULL, bo->rq != NULL)); - if (kgem_flush(kgem)) + assert(bo->refcnt); + + if (bo->exec) + return true; + + if (kgem_flush(kgem, bo->flush)) kgem_submit(kgem); - if (bo->rq && !bo->exec) - kgem_retire(kgem); + + if (bo->rq && !__kgem_busy(kgem, bo->handle)) + __kgem_bo_clear_busy(bo); + return kgem_bo_is_busy(bo); } @@ -560,21 +614,42 @@ static inline bool kgem_bo_is_dirty(struct kgem_bo *bo) if (bo == NULL) return false; + assert(bo->refcnt); return bo->dirty; } +static inline void kgem_bo_unclean(struct kgem *kgem, struct kgem_bo *bo) +{ + /* The bo is outside of our control, so presume it is written to */ + bo->needs_flush = true; + if (bo->rq == NULL) + bo->rq = (void *)kgem; + + if (bo->domain != DOMAIN_GPU) + bo->domain = DOMAIN_NONE; +} + +static inline void __kgem_bo_mark_dirty(struct kgem_bo *bo) +{ + DBG(("%s: handle=%d (proxy? %d)\n", __FUNCTION__, + bo->handle, bo->proxy != NULL)); + + bo->exec->flags |= LOCAL_EXEC_OBJECT_WRITE; + bo->needs_flush = bo->dirty = true; + list_move(&bo->request, &RQ(bo->rq)->buffers); +} + static inline void kgem_bo_mark_dirty(struct kgem_bo *bo) { + assert(bo->refcnt); do { - if (bo->dirty) - return; - - DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); assert(bo->exec); assert(bo->rq); - bo->needs_flush = bo->dirty = true; - list_move(&bo->request, &bo->rq->buffers); + if (bo->dirty) + return; + + __kgem_bo_mark_dirty(bo); } while ((bo = bo->proxy)); } @@ -600,7 +675,7 @@ bool kgem_expire_cache(struct kgem *kgem); void kgem_purge_cache(struct kgem *kgem); void kgem_cleanup_cache(struct kgem *kgem); -#if HAS_EXTRA_DEBUG +#if HAS_DEBUG_FULL void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch); #else static inline void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch) diff --git a/src/sna/kgem_debug.c b/src/sna/kgem_debug.c index 2dc1b4564..48c75889c 100644 --- a/src/sna/kgem_debug.c +++ b/src/sna/kgem_debug.c @@ -62,7 +62,7 @@ kgem_debug_get_bo_for_reloc_entry(struct kgem *kgem, return NULL; list_for_each_entry(bo, &kgem->next_request->buffers, request) - if (bo->handle == reloc->target_handle && bo->proxy == NULL) + if (bo->target_handle == reloc->target_handle && bo->proxy == NULL) break; assert(&bo->request != &kgem->next_request->buffers); @@ -74,6 +74,9 @@ static int kgem_debug_handle_is_fenced(struct kgem *kgem, uint32_t handle) { int i; + if (kgem->has_handle_lut) + return kgem->exec[handle].flags & EXEC_OBJECT_NEEDS_FENCE; + for (i = 0; i < kgem->nexec; i++) if (kgem->exec[i].handle == handle) return kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE; @@ -86,7 +89,7 @@ static int kgem_debug_handle_tiling(struct kgem *kgem, uint32_t handle) struct kgem_bo *bo; list_for_each_entry(bo, &kgem->next_request->buffers, request) - if (bo->handle == handle) + if (bo->target_handle == handle) return bo->tiling; return 0; @@ -95,7 +98,7 @@ static int kgem_debug_handle_tiling(struct kgem *kgem, uint32_t handle) void kgem_debug_print(const uint32_t *data, uint32_t offset, unsigned int index, - char *fmt, ...) + const char *fmt, ...) { va_list va; char buf[240]; @@ -273,7 +276,7 @@ decode_2d(struct kgem *kgem, uint32_t offset) kgem_debug_handle_is_fenced(kgem, reloc->target_handle), kgem_debug_handle_tiling(kgem, reloc->target_handle)); kgem_debug_print(data, offset, 5, "color\n"); - assert(kgem->gen >= 40 || + assert(kgem->gen >= 040 || kgem_debug_handle_is_fenced(kgem, reloc->target_handle)); return len; @@ -321,7 +324,7 @@ decode_2d(struct kgem *kgem, uint32_t offset) reloc->read_domains, reloc->write_domain, kgem_debug_handle_is_fenced(kgem, reloc->target_handle), kgem_debug_handle_tiling(kgem, reloc->target_handle)); - assert(kgem->gen >= 40 || + assert(kgem->gen >= 040 || kgem_debug_handle_is_fenced(kgem, reloc->target_handle)); kgem_debug_print(data, offset, 5, "src (%d,%d)\n", @@ -336,7 +339,7 @@ decode_2d(struct kgem *kgem, uint32_t offset) reloc->read_domains, reloc->write_domain, kgem_debug_handle_is_fenced(kgem, reloc->target_handle), kgem_debug_handle_tiling(kgem, reloc->target_handle)); - assert(kgem->gen >= 40 || + assert(kgem->gen >= 040 || kgem_debug_handle_is_fenced(kgem, reloc->target_handle)); return len; @@ -368,18 +371,18 @@ decode_2d(struct kgem *kgem, uint32_t offset) static int (*decode_3d(int gen))(struct kgem*, uint32_t) { - if (gen >= 80) { - } else if (gen >= 70) { + if (gen >= 0100) { + } else if (gen >= 070) { return kgem_gen7_decode_3d; - } else if (gen >= 60) { + } else if (gen >= 060) { return kgem_gen6_decode_3d; - } else if (gen >= 50) { + } else if (gen >= 050) { return kgem_gen5_decode_3d; - } else if (gen >= 40) { + } else if (gen >= 040) { return kgem_gen4_decode_3d; - } else if (gen >= 30) { + } else if (gen >= 030) { return kgem_gen3_decode_3d; - } else if (gen >= 20) { + } else if (gen >= 020) { return kgem_gen2_decode_3d; } assert(0); @@ -387,18 +390,18 @@ static int (*decode_3d(int gen))(struct kgem*, uint32_t) static void (*finish_state(int gen))(struct kgem*) { - if (gen >= 80) { - } else if (gen >= 70) { + if (gen >= 0100) { + } else if (gen >= 070) { return kgem_gen7_finish_state; - } else if (gen >= 60) { + } else if (gen >= 060) { return kgem_gen6_finish_state; - } else if (gen >= 50) { + } else if (gen >= 050) { return kgem_gen5_finish_state; - } else if (gen >= 40) { + } else if (gen >= 040) { return kgem_gen4_finish_state; - } else if (gen >= 30) { + } else if (gen >= 030) { return kgem_gen3_finish_state; - } else if (gen >= 20) { + } else if (gen >= 020) { return kgem_gen2_finish_state; } assert(0); diff --git a/src/sna/kgem_debug.h b/src/sna/kgem_debug.h index 82d6f6664..a0c9fc177 100644 --- a/src/sna/kgem_debug.h +++ b/src/sna/kgem_debug.h @@ -4,7 +4,7 @@ void kgem_debug_print(const uint32_t *data, uint32_t offset, unsigned int index, - char *fmt, ...); + const char *fmt, ...); struct drm_i915_gem_relocation_entry * kgem_debug_get_reloc_entry(struct kgem *kgem, uint32_t offset); diff --git a/src/sna/kgem_debug_gen5.c b/src/sna/kgem_debug_gen5.c index e23ceb1fa..8b55dd919 100644 --- a/src/sna/kgem_debug_gen5.c +++ b/src/sna/kgem_debug_gen5.c @@ -73,7 +73,7 @@ static void gen5_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) int i, size; reloc = kgem_debug_get_reloc_entry(kgem, &data[1] - kgem->batch); - if (reloc->target_handle == 0) { + if (reloc->target_handle == -1) { base = kgem->batch; size = kgem->nbatch * sizeof(uint32_t); } else { @@ -529,20 +529,19 @@ int kgem_gen5_decode_3d(struct kgem *kgem, uint32_t offset) for (i = 1; i < len;) { gen5_update_vertex_elements(kgem, (i - 1)/2, data + i); - kgem_debug_print(data, offset, i, "buffer %d: %svalid, type 0x%04x, " - "src offset 0x%04x bytes\n", - data[i] >> 27, - data[i] & (1 << 26) ? "" : "in", - (data[i] >> 16) & 0x1ff, - data[i] & 0x07ff); + kgem_debug_print(data, offset, i, + "buffer %d: %svalid, type 0x%04x, " + "src offset 0x%04x bytes\n", + data[i] >> 27, + data[i] & (1 << 26) ? "" : "in", + (data[i] >> 16) & 0x1ff, + data[i] & 0x07ff); i++; - kgem_debug_print(data, offset, i, "(%s, %s, %s, %s), " - "dst offset 0x%02x bytes\n", + kgem_debug_print(data, offset, i, "(%s, %s, %s, %s)\n", get_965_element_component(data[i], 0), get_965_element_component(data[i], 1), get_965_element_component(data[i], 2), - get_965_element_component(data[i], 3), - (data[i] & 0xff) * 4); + get_965_element_component(data[i], 3)); i++; } state.num_ve = (len - 1) / 2; /* XXX? */ diff --git a/src/sna/kgem_debug_gen6.c b/src/sna/kgem_debug_gen6.c index e0b09d558..7ef55d38f 100644 --- a/src/sna/kgem_debug_gen6.c +++ b/src/sna/kgem_debug_gen6.c @@ -75,11 +75,11 @@ static void gen6_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) assert(i < kgem->nreloc); reloc = kgem->reloc[i].target_handle; - if (reloc == 0) { + if (reloc == -1) { base = kgem->batch; } else { list_for_each_entry(bo, &kgem->next_request->buffers, request) - if (bo->handle == reloc) + if (bo->target_handle == reloc) break; assert(&bo->request != &kgem->next_request->buffers); base = kgem_bo_map__debug(kgem, bo); @@ -643,7 +643,7 @@ int kgem_gen6_decode_3d(struct kgem *kgem, uint32_t offset) case 0x6101: i = 0; kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n"); - if (kgem->gen >= 60) { + if (kgem->gen >= 060) { assert(len == 10); state_base_out(data, offset, i++, "general"); @@ -658,7 +658,7 @@ int kgem_gen6_decode_3d(struct kgem *kgem, uint32_t offset) state_max_out(data, offset, i++, "instruction"); gen6_update_dynamic_buffer(kgem, offset + 3); - } else if (kgem->gen >= 50) { + } else if (kgem->gen >= 050) { assert(len == 8); state_base_out(data, offset, i++, "general"); @@ -674,7 +674,7 @@ int kgem_gen6_decode_3d(struct kgem *kgem, uint32_t offset) return len; case 0x7801: - if (kgem->gen >= 60) { + if (kgem->gen >= 060) { assert(len == 4); kgem_debug_print(data, offset, 0, @@ -686,7 +686,7 @@ int kgem_gen6_decode_3d(struct kgem *kgem, uint32_t offset) kgem_debug_print(data, offset, 1, "VS binding table\n"); kgem_debug_print(data, offset, 2, "GS binding table\n"); kgem_debug_print(data, offset, 3, "WM binding table\n"); - } else if (kgem->gen >= 40) { + } else if (kgem->gen >= 040) { assert(len == 6); kgem_debug_print(data, offset, 0, diff --git a/src/sna/sna.h b/src/sna/sna.h index 031be7287..b470c48a0 100644 --- a/src/sna/sna.h +++ b/src/sna/sna.h @@ -42,6 +42,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #endif #include <stdint.h> + #include "compiler.h" #include <xorg-server.h> @@ -79,11 +80,9 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define DBG(x) #endif -#define DEBUG_NO_RENDER 0 #define DEBUG_NO_BLT 0 #define DEBUG_FLUSH_BATCH 0 -#define DEBUG_FLUSH_SYNC 0 #define TEST_ALL 0 #define TEST_ACCEL (TEST_ALL || 0) @@ -112,9 +111,9 @@ struct sna_pixmap { struct kgem_bo *gpu_bo, *cpu_bo; struct sna_damage *gpu_damage, *cpu_damage; void *ptr; +#define PTR(ptr) ((void*)((uintptr_t)(ptr) & ~1)) struct list list; - struct list inactive; uint32_t stride; uint32_t clear_color; @@ -127,11 +126,10 @@ struct sna_pixmap { #define PIN_SCANOUT 0x1 #define PIN_DRI 0x2 #define PIN_PRIME 0x4 + uint8_t create :4; uint8_t mapped :1; uint8_t shm :1; uint8_t clear :1; - uint8_t undamaged :1; - uint8_t create :3; uint8_t header :1; uint8_t cpu :1; }; @@ -143,6 +141,15 @@ struct sna_glyph { uint16_t size, pos; }; +static inline WindowPtr root(ScreenPtr screen) +{ +#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,10,0,0,0) + return screen->root; +#else + return WindowTable[screen->myNum]; +#endif +} + static inline PixmapPtr get_window_pixmap(WindowPtr window) { return fbGetWindowPixmap(window); @@ -160,7 +167,7 @@ extern DevPrivateKeyRec sna_pixmap_key; constant static inline struct sna_pixmap *sna_pixmap(PixmapPtr pixmap) { - return ((void **)dixGetPrivateAddr(&pixmap->devPrivates, &sna_pixmap_key))[1]; + return ((void **)__get_private(pixmap, sna_pixmap_key))[1]; } static inline struct sna_pixmap *sna_pixmap_from_drawable(DrawablePtr drawable) @@ -178,14 +185,13 @@ struct sna_gc { static inline struct sna_gc *sna_gc(GCPtr gc) { - return dixGetPrivateAddr(&gc->devPrivates, &sna_gc_key); + return (struct sna_gc *)__get_private(gc, sna_gc_key); } enum { FLUSH_TIMER = 0, THROTTLE_TIMER, EXPIRE_TIMER, - INACTIVE_TIMER, #if DEBUG_MEMORY DEBUG_MEMORY_TIMER, #endif @@ -196,10 +202,9 @@ struct sna { ScrnInfoPtr scrn; unsigned flags; -#define SNA_NO_THROTTLE 0x1 -#define SNA_NO_DELAYED_FLUSH 0x2 -#define SNA_NO_WAIT 0x4 -#define SNA_NO_FLIP 0x8 +#define SNA_NO_WAIT 0x1 +#define SNA_NO_FLIP 0x2 +#define SNA_TRIPLE_BUFFER 0x4 #define SNA_TEAR_FREE 0x10 #define SNA_FORCE_SHADOW 0x20 @@ -213,7 +218,6 @@ struct sna { struct list flush_pixmaps; struct list active_pixmaps; - struct list inactive_clock[2]; PixmapPtr front; PixmapPtr freed_pixmap; @@ -237,7 +241,6 @@ struct sna { unsigned int tiling; #define SNA_TILING_FB 0x1 #define SNA_TILING_2D 0x2 -#define SNA_TILING_3D 0x4 #define SNA_TILING_ALL (~0) EntityInfoPtr pEnt; @@ -262,7 +265,6 @@ struct sna { struct gen6_render_state gen6; struct gen7_render_state gen7; } render_state; - uint32_t have_render; bool dri_available; bool dri_open; @@ -298,6 +300,7 @@ extern void sna_mode_update(struct sna *sna); extern void sna_mode_disable_unused(struct sna *sna); extern void sna_mode_wakeup(struct sna *sna); extern void sna_mode_redisplay(struct sna *sna); +extern void sna_mode_close(struct sna *sna); extern void sna_mode_fini(struct sna *sna); extern int sna_page_flip(struct sna *sna, @@ -320,7 +323,7 @@ to_sna_from_screen(ScreenPtr screen) constant static inline struct sna * to_sna_from_pixmap(PixmapPtr pixmap) { - return ((void **)dixGetPrivateAddr(&pixmap->devPrivates, &sna_pixmap_key))[0]; + return ((void **)__get_private(pixmap, sna_pixmap_key))[0]; } constant static inline struct sna * @@ -371,10 +374,11 @@ static inline void sna_dri_vblank_handler(struct sna *sna, struct drm_event_vbla static inline void sna_dri_destroy_window(WindowPtr win) { } static inline void sna_dri_close(struct sna *sna, ScreenPtr pScreen) { } #endif +void sna_dri_pixmap_update_bo(struct sna *sna, PixmapPtr pixmap); extern int sna_crtc_to_pipe(xf86CrtcPtr crtc); -extern int sna_crtc_to_plane(xf86CrtcPtr crtc); -extern int sna_crtc_id(xf86CrtcPtr crtc); +extern uint32_t sna_crtc_to_plane(xf86CrtcPtr crtc); +extern uint32_t sna_crtc_id(xf86CrtcPtr crtc); CARD32 sna_format_for_depth(int depth); CARD32 sna_render_format_for_depth(int depth); @@ -438,6 +442,9 @@ void sna_pixmap_destroy(PixmapPtr pixmap); #define __MOVE_FORCE 0x40 #define __MOVE_DRI 0x80 +bool +sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int flags); + struct sna_pixmap *sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags); static inline struct sna_pixmap * sna_pixmap_force_to_gpu(PixmapPtr pixmap, unsigned flags) @@ -483,6 +490,24 @@ struct kgem_bo * sna_drawable_use_bo(DrawablePtr drawable, unsigned flags, const BoxRec *box, struct sna_damage ***damage); +inline static int16_t bound(int16_t a, uint16_t b) +{ + int v = (int)a + (int)b; + if (v > MAXSHORT) + return MAXSHORT; + return v; +} + +inline static int16_t clamp(int16_t a, int16_t b) +{ + int v = (int)a + (int)b; + if (v > MAXSHORT) + return MAXSHORT; + if (v < MINSHORT) + return MINSHORT; + return v; +} + static inline bool box_inplace(PixmapPtr pixmap, const BoxRec *box) { @@ -587,6 +612,20 @@ _sna_get_transformed_coordinates(int x, int y, *y_out = result[1] / (double)result[2]; } +static inline void +_sna_get_transformed_scaled(int x, int y, + const PictTransform *transform, const float *sf, + float *x_out, float *y_out) +{ + *x_out = sf[0] * (transform->matrix[0][0] * x + + transform->matrix[0][1] * y + + transform->matrix[0][2]); + + *y_out = sf[1] * (transform->matrix[1][0] * x + + transform->matrix[1][1] * y + + transform->matrix[1][2]); +} + void sna_get_transformed_coordinates(int x, int y, const PictTransform *transform, @@ -602,6 +641,12 @@ bool sna_transform_is_integer_translation(const PictTransform *t, int16_t *tx, int16_t *ty); bool sna_transform_is_translation(const PictTransform *t, pixman_fixed_t *tx, pixman_fixed_t *ty); +static inline bool +sna_affine_transform_is_rotation(const PictTransform *t) +{ + assert(sna_transform_is_affine(t)); + return t->matrix[0][1] | t->matrix[1][0]; +} static inline bool sna_transform_equal(const PictTransform *a, const PictTransform *b) @@ -635,7 +680,7 @@ static inline bool wedged(struct sna *sna) static inline bool can_render(struct sna *sna) { - return likely(!sna->kgem.wedged && sna->have_render); + return likely(!sna->kgem.wedged && sna->render.prefer_gpu & PREFER_GPU_RENDER); } static inline uint32_t pixmap_size(PixmapPtr pixmap) @@ -665,6 +710,15 @@ void sna_composite(CARD8 op, INT16 mask_x, INT16 mask_y, INT16 dst_x, INT16 dst_y, CARD16 width, CARD16 height); +void sna_composite_fb(CARD8 op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + RegionPtr region, + INT16 src_x, INT16 src_y, + INT16 mask_x, INT16 mask_y, + INT16 dst_x, INT16 dst_y, + CARD16 width, CARD16 height); void sna_composite_rectangles(CARD8 op, PicturePtr dst, xRenderColor *color, @@ -787,6 +841,7 @@ memcpy_xor(const void *src, void *dst, int bpp, #define SNA_CREATE_FB 0x10 #define SNA_CREATE_SCRATCH 0x11 +#define SNA_CREATE_GLYPHS 0x12 inline static bool is_power_of_two(unsigned x) { @@ -801,4 +856,22 @@ inline static bool is_clipped(const RegionRec *r, r->extents.y2 - r->extents.y1 != d->height); } +void sna_threads_init(void); +int sna_use_threads (int width, int height, int threshold); +void sna_threads_run(void (*func)(void *arg), void *arg); +void sna_threads_wait(void); + +void sna_image_composite(pixman_op_t op, + pixman_image_t *src, + pixman_image_t *mask, + pixman_image_t *dst, + int16_t src_x, + int16_t src_y, + int16_t mask_x, + int16_t mask_y, + int16_t dst_x, + int16_t dst_y, + uint16_t width, + uint16_t height); + #endif /* _SNA_H */ diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c index a8a0c931a..827dcf4ac 100644 --- a/src/sna/sna_accel.c +++ b/src/sna/sna_accel.c @@ -29,6 +29,7 @@ #include "config.h" #endif +#include "intel_options.h" #include "sna.h" #include "sna_reg.h" #include "rop.h" @@ -51,6 +52,8 @@ #include <sys/mman.h> #include <unistd.h> +#define FAULT_INJECTION 0 + #define FORCE_INPLACE 0 #define FORCE_FALLBACK 0 #define FORCE_FLUSH 0 @@ -60,8 +63,9 @@ #define USE_INPLACE 1 #define USE_WIDE_SPANS 0 /* -1 force CPU, 1 force GPU */ #define USE_ZERO_SPANS 1 /* -1 force CPU, 1 force GPU */ -#define USE_INACTIVE 0 #define USE_CPU_BO 1 +#define USE_USERPTR_UPLOADS 1 +#define USE_USERPTR_DOWNLOADS 1 #define MIGRATE_ALL 0 #define DBG_NO_CPU_UPLOAD 0 @@ -92,6 +96,9 @@ #define NO_TILE_8x8 0 #define NO_STIPPLE_8x8 0 +#define IS_STATIC_PTR(ptr) ((uintptr_t)(ptr) & 1) +#define MAKE_STATIC_PTR(ptr) ((void*)((uintptr_t)(ptr) | 1)) + #if 0 static void __sna_fallback_flush(DrawablePtr d) { @@ -318,6 +325,8 @@ static void assert_pixmap_damage(PixmapPtr p) if (priv == NULL) return; + assert(priv->gpu_damage == NULL || priv->gpu_bo); + if (priv->clear) { assert(DAMAGE_IS_ALL(priv->gpu_damage)); assert(priv->cpu_damage == NULL); @@ -345,7 +354,7 @@ static void assert_pixmap_damage(PixmapPtr p) _sna_damage_debug_get_region(DAMAGE_PTR(priv->cpu_damage), &cpu); RegionIntersect(®, &cpu, &gpu); - assert(!RegionNotEmpty(®)); + assert(RegionNil(®)); RegionUninit(®); RegionUninit(&gpu); @@ -405,8 +414,6 @@ static void sna_pixmap_free_gpu(struct sna *sna, struct sna_pixmap *priv) priv->mapped = false; } - list_del(&priv->inactive); - /* and reset the upload counter */ priv->source_count = SOURCE_BIAS; } @@ -433,20 +440,20 @@ sna_pixmap_alloc_cpu(struct sna *sna, pixmap->drawable.width, pixmap->drawable.height, pixmap->drawable.bitsPerPixel, - from_gpu ? 0 : CREATE_CPU_MAP | CREATE_INACTIVE); + from_gpu ? 0 : CREATE_CPU_MAP | CREATE_INACTIVE | CREATE_NO_THROTTLE); if (priv->cpu_bo) { priv->ptr = kgem_bo_map__cpu(&sna->kgem, priv->cpu_bo); - priv->stride = priv->cpu_bo->pitch; if (priv->ptr) { DBG(("%s: allocated CPU handle=%d (snooped? %d)\n", __FUNCTION__, priv->cpu_bo->handle, priv->cpu_bo->snoop)); + priv->stride = priv->cpu_bo->pitch; #ifdef DEBUG_MEMORY sna->debug_memory.cpu_bo_allocs++; sna->debug_memory.cpu_bo_bytes += kgem_bo_size(priv->cpu_bo); +#endif } else { kgem_bo_destroy(&sna->kgem, priv->cpu_bo); priv->cpu_bo = NULL; -#endif } } } @@ -459,17 +466,15 @@ sna_pixmap_alloc_cpu(struct sna *sna, assert(priv->ptr); done: - pixmap->devPrivate.ptr = priv->ptr; - pixmap->devKind = priv->stride; assert(priv->stride); + assert(!priv->mapped); + pixmap->devPrivate.ptr = PTR(priv->ptr); + pixmap->devKind = priv->stride; return priv->ptr != NULL; } -static void sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv) +static void __sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv) { - assert(priv->cpu_damage == NULL); - assert(list_is_empty(&priv->list)); - if (priv->cpu_bo) { DBG(("%s: discarding CPU buffer, handle=%d, size=%d\n", __FUNCTION__, priv->cpu_bo->handle, kgem_bo_size(priv->cpu_bo))); @@ -477,17 +482,29 @@ static void sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv) sna->debug_memory.cpu_bo_allocs--; sna->debug_memory.cpu_bo_bytes -= kgem_bo_size(priv->cpu_bo); #endif - if (priv->cpu_bo->flush) { - assert(priv->cpu_bo->reusable == false); + if (!priv->cpu_bo->reusable) { + assert(priv->cpu_bo->flush == true); kgem_bo_sync__cpu(&sna->kgem, priv->cpu_bo); sna_accel_watch_flush(sna, -1); } kgem_bo_destroy(&sna->kgem, priv->cpu_bo); - priv->cpu_bo = NULL; - } else + } else if (!IS_STATIC_PTR(priv->ptr)) free(priv->ptr); +} + +static void sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv) +{ + assert(priv->cpu_damage == NULL); + assert(list_is_empty(&priv->list)); + + if (IS_STATIC_PTR(priv->ptr)) + return; + __sna_pixmap_free_cpu(sna, priv); + + priv->cpu_bo = NULL; priv->ptr = NULL; + if (!priv->mapped) priv->pixmap->devPrivate.ptr = NULL; } @@ -499,14 +516,14 @@ static inline uint32_t default_tiling(PixmapPtr pixmap, struct sna *sna = to_sna_from_pixmap(pixmap); /* Try to avoid hitting the Y-tiling GTT mapping bug on 855GM */ - if (sna->kgem.gen == 21) + if (sna->kgem.gen == 021) return I915_TILING_X; /* Only on later generations was the render pipeline * more flexible than the BLT. So on gen2/3, prefer to * keep large objects accessible through the BLT. */ - if (sna->kgem.gen < 40 && + if (sna->kgem.gen < 040 && (pixmap->drawable.width > sna->render.max_3d_size || pixmap->drawable.height > sna->render.max_3d_size)) return I915_TILING_X; @@ -518,7 +535,6 @@ static inline uint32_t default_tiling(PixmapPtr pixmap, DBG(("%s: entire source is damaged, using Y-tiling\n", __FUNCTION__)); sna_damage_destroy(&priv->gpu_damage); - priv->undamaged = false; return I915_TILING_Y; } @@ -611,7 +627,7 @@ struct kgem_bo *sna_pixmap_change_tiling(PixmapPtr pixmap, uint32_t tiling) static inline void sna_set_pixmap(PixmapPtr pixmap, struct sna_pixmap *sna) { - ((void **)dixGetPrivateAddr(&pixmap->devPrivates, &sna_pixmap_key))[1] = sna; + ((void **)__get_private(pixmap, sna_pixmap_key))[1] = sna; assert(sna_pixmap(pixmap) == sna); } @@ -619,7 +635,6 @@ static struct sna_pixmap * _sna_pixmap_init(struct sna_pixmap *priv, PixmapPtr pixmap) { list_init(&priv->list); - list_init(&priv->inactive); priv->source_count = SOURCE_BIAS; priv->pixmap = pixmap; @@ -664,6 +679,7 @@ bool sna_pixmap_attach_to_bo(PixmapPtr pixmap, struct kgem_bo *bo) return false; priv->gpu_bo = kgem_bo_reference(bo); + assert(priv->gpu_bo->proxy == NULL); sna_damage_all(&priv->gpu_damage, pixmap->drawable.width, pixmap->drawable.height); @@ -711,11 +727,13 @@ create_pixmap(struct sna *sna, ScreenPtr screen, datasize += adjust; } + DBG(("%s: allocating pixmap %dx%d, depth=%d, size=%ld\n", + __FUNCTION__, width, height, depth, (long)datasize)); pixmap = AllocatePixmap(screen, datasize); if (!pixmap) return NullPixmap; - ((void **)dixGetPrivateAddr(&pixmap->devPrivates, &sna_pixmap_key))[0] = sna; + ((void **)__get_private(pixmap, sna_pixmap_key))[0] = sna; assert(to_sna_from_pixmap(pixmap) == sna); pixmap->drawable.type = DRAWABLE_PIXMAP; @@ -764,7 +782,7 @@ sna_pixmap_create_shm(ScreenPtr screen, DBG(("%s(%dx%d, depth=%d, bpp=%d, pitch=%d)\n", __FUNCTION__, width, height, depth, bpp, pitch)); - if (wedged(sna) || bpp == 0 || pitch*height <= 4096) { + if (wedged(sna) || bpp == 0 || pitch*height < 4096) { fallback: pixmap = sna_pixmap_create_unattached(screen, 0, 0, depth); if (pixmap == NULL) @@ -833,6 +851,8 @@ fallback: priv->cpu = true; priv->shm = true; + priv->stride = pitch; + priv->ptr = MAKE_STATIC_PTR(addr); sna_damage_all(&priv->cpu_damage, width, height); pixmap->devKind = pitch; @@ -863,7 +883,8 @@ sna_pixmap_create_scratch(ScreenPtr screen, width, height, depth, tiling)); bpp = bits_per_pixel(depth); - if (tiling == I915_TILING_Y && !sna->have_render) + if (tiling == I915_TILING_Y && + (sna->render.prefer_gpu & PREFER_GPU_RENDER) == 0) tiling = I915_TILING_X; if (tiling == I915_TILING_Y && @@ -1108,6 +1129,7 @@ sna_create_pixmap_shared(struct sna *sna, ScreenPtr screen, assert(priv->gpu_bo->tiling == I915_TILING_NONE); assert((priv->gpu_bo->pitch & 255) == 0); + assert(!priv->mapped); pixmap->devPrivate.ptr = kgem_bo_map__async(&sna->kgem, priv->gpu_bo); if (pixmap->devPrivate.ptr == NULL) { @@ -1136,8 +1158,10 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen, { struct sna *sna = to_sna_from_screen(screen); PixmapPtr pixmap; + struct sna_pixmap *priv; unsigned flags; int pad; + void *ptr; DBG(("%s(%d, %d, %d, usage=%x)\n", __FUNCTION__, width, height, depth, usage)); @@ -1161,19 +1185,29 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen, goto fallback; } - if (!can_render(sna)) + if (unlikely((sna->render.prefer_gpu & PREFER_GPU_RENDER) == 0)) + flags &= ~KGEM_CAN_CREATE_GPU; + if (wedged(sna)) flags = 0; - if (usage == CREATE_PIXMAP_USAGE_SCRATCH) { + switch (usage) { + case CREATE_PIXMAP_USAGE_SCRATCH: if (flags & KGEM_CAN_CREATE_GPU) return sna_pixmap_create_scratch(screen, width, height, depth, I915_TILING_X); else goto fallback; - } - if (usage == SNA_CREATE_SCRATCH) { + case SNA_CREATE_GLYPHS: + if (flags & KGEM_CAN_CREATE_GPU) + return sna_pixmap_create_scratch(screen, + width, height, depth, + -I915_TILING_Y); + else + goto fallback; + + case SNA_CREATE_SCRATCH: if (flags & KGEM_CAN_CREATE_GPU) return sna_pixmap_create_scratch(screen, width, height, depth, @@ -1188,7 +1222,7 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen, usage = 0; pad = PixmapBytePad(width, depth); - if (pad * height <= 4096) { + if (pad * height < 4096) { DBG(("%s: small buffer [%d], attaching to shadow pixmap\n", __FUNCTION__, pad * height)); pixmap = create_pixmap(sna, screen, @@ -1196,10 +1230,10 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen, if (pixmap == NullPixmap) return NullPixmap; - sna_pixmap_attach(pixmap); + ptr = MAKE_STATIC_PTR(pixmap->devPrivate.ptr); + pad = pixmap->devKind; + flags &= ~(KGEM_CAN_CREATE_GPU | KGEM_CAN_CREATE_CPU); } else { - struct sna_pixmap *priv; - DBG(("%s: creating GPU pixmap %dx%d, stride=%d, flags=%x\n", __FUNCTION__, width, height, pad, flags)); @@ -1212,16 +1246,19 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen, pixmap->devKind = pad; pixmap->devPrivate.ptr = NULL; - priv = sna_pixmap_attach(pixmap); - if (priv == NULL) { - free(pixmap); - goto fallback; - } + ptr = NULL; + } - priv->stride = pad; - priv->create = flags; + priv = sna_pixmap_attach(pixmap); + if (priv == NULL) { + free(pixmap); + goto fallback; } + priv->stride = pad; + priv->create = flags; + priv->ptr = ptr; + return pixmap; fallback: @@ -1235,9 +1272,10 @@ void sna_add_flush_pixmap(struct sna *sna, DBG(("%s: marking pixmap=%ld for flushing\n", __FUNCTION__, priv->pixmap->drawable.serialNumber)); assert(bo); + assert(bo->flush); list_move(&priv->list, &sna->flush_pixmaps); - if (bo->exec == NULL) { + if (bo->exec == NULL && kgem_is_idle(&sna->kgem)) { DBG(("%s: new flush bo, flushin before\n", __FUNCTION__)); kgem_submit(&sna->kgem); } @@ -1248,12 +1286,11 @@ static void __sna_free_pixmap(struct sna *sna, struct sna_pixmap *priv) { list_del(&priv->list); - list_del(&priv->inactive); sna_damage_destroy(&priv->gpu_damage); sna_damage_destroy(&priv->cpu_damage); - sna_pixmap_free_cpu(sna, priv); + __sna_pixmap_free_cpu(sna, priv); if (priv->header) { assert(!priv->shm); @@ -1308,7 +1345,8 @@ void sna_pixmap_destroy(PixmapPtr pixmap) static inline bool pixmap_inplace(struct sna *sna, PixmapPtr pixmap, - struct sna_pixmap *priv) + struct sna_pixmap *priv, + bool write_only) { if (FORCE_INPLACE) return FORCE_INPLACE > 0; @@ -1317,7 +1355,10 @@ static inline bool pixmap_inplace(struct sna *sna, return false; if (priv->mapped) - return true; + return !IS_CPU_MAP(priv->gpu_bo->map); + + if (!write_only && priv->cpu_damage) + return false; return (pixmap->devKind * pixmap->drawable.height >> 12) > sna->kgem.half_cpu_cache_pages; @@ -1332,8 +1373,12 @@ sna_pixmap_create_mappable_gpu(PixmapPtr pixmap) if (wedged(sna)) return false; + if ((priv->create & KGEM_CAN_CREATE_GTT) == 0) + return false; + assert_pixmap_damage(pixmap); + assert(priv->gpu_damage == NULL); assert(priv->gpu_bo == NULL); priv->gpu_bo = kgem_create_2d(&sna->kgem, @@ -1395,21 +1440,43 @@ static inline bool use_cpu_bo_for_upload(struct sna *sna, kgem_bo_is_busy(priv->gpu_bo), kgem_bo_is_busy(priv->cpu_bo))); + if (!priv->cpu) + return true; + if (flags & (MOVE_WRITE | MOVE_ASYNC_HINT)) return true; + if (priv->gpu_bo->tiling) + return true; + return kgem_bo_is_busy(priv->gpu_bo) || kgem_bo_is_busy(priv->cpu_bo); } static inline bool operate_inplace(struct sna_pixmap *priv, unsigned flags) { - if ((flags & MOVE_INPLACE_HINT) == 0 || priv->gpu_bo == NULL) + if ((flags & MOVE_INPLACE_HINT) == 0) { + DBG(("%s: no, inplace operation not suitable\n", __FUNCTION__)); + return false; + } + + assert((flags & MOVE_ASYNC_HINT) == 0); + + if ((priv->create & KGEM_CAN_CREATE_GTT) == 0) { + DBG(("%s: no, not accessible via GTT\n", __FUNCTION__)); return false; + } + + if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) { + DBG(("%s: yes, CPU is busy\n", __FUNCTION__)); + return true; + } - if (flags & MOVE_WRITE && kgem_bo_is_busy(priv->gpu_bo)) + if (flags & MOVE_WRITE && priv->gpu_bo&&kgem_bo_is_busy(priv->gpu_bo)) { + DBG(("%s: no, GPU is busy, so stage write\n", __FUNCTION__)); return false; + } - return priv->stride != 0; + return true; } bool @@ -1437,13 +1504,15 @@ _sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags) priv->gpu_bo ? priv->gpu_bo->handle : 0, priv->gpu_damage, priv->cpu_damage, priv->clear)); + assert(priv->gpu_damage == NULL || priv->gpu_bo); + if (USE_INPLACE && (flags & MOVE_READ) == 0) { assert(flags & MOVE_WRITE); DBG(("%s: no readbck, discarding gpu damage [%d], pending clear[%d]\n", __FUNCTION__, priv->gpu_damage != NULL, priv->clear)); if (priv->create & KGEM_CAN_CREATE_GPU && - pixmap_inplace(sna, pixmap, priv)) { + pixmap_inplace(sna, pixmap, priv, true)) { assert(!priv->shm); DBG(("%s: write inplace\n", __FUNCTION__)); if (priv->gpu_bo) { @@ -1460,29 +1529,26 @@ _sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags) !sna_pixmap_create_mappable_gpu(pixmap)) goto skip_inplace_map; - if (!priv->mapped) { - pixmap->devPrivate.ptr = - kgem_bo_map(&sna->kgem, priv->gpu_bo); - if (pixmap->devPrivate.ptr == NULL) - goto skip_inplace_map; + pixmap->devPrivate.ptr = + kgem_bo_map(&sna->kgem, priv->gpu_bo); + priv->mapped = pixmap->devPrivate.ptr != NULL; + if (!priv->mapped) + goto skip_inplace_map; - priv->mapped = true; - } pixmap->devKind = priv->gpu_bo->pitch; + assert(priv->gpu_bo->proxy == NULL); sna_damage_all(&priv->gpu_damage, pixmap->drawable.width, pixmap->drawable.height); sna_damage_destroy(&priv->cpu_damage); - priv->undamaged = false; priv->clear = false; priv->cpu = false; list_del(&priv->list); - if (priv->cpu_bo) { - assert(!priv->shm); - assert(!priv->cpu_bo->flush); - sna_pixmap_free_cpu(sna, priv); - } + + assert(!priv->shm); + assert(priv->cpu_bo == NULL || !priv->cpu_bo->flush); + sna_pixmap_free_cpu(sna, priv); assert_pixmap_damage(pixmap); return true; @@ -1490,6 +1556,7 @@ _sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags) skip_inplace_map: sna_damage_destroy(&priv->gpu_damage); + priv->clear = false; if (priv->cpu_bo && !priv->cpu_bo->flush && __kgem_bo_is_busy(&sna->kgem, priv->cpu_bo)) { DBG(("%s: discarding busy CPU bo\n", __FUNCTION__)); @@ -1497,10 +1564,12 @@ skip_inplace_map: assert(priv->gpu_bo == NULL || priv->gpu_damage == NULL); sna_damage_destroy(&priv->cpu_damage); - priv->undamaged = false; - - sna_pixmap_free_gpu(sna, priv); sna_pixmap_free_cpu(sna, priv); + + if (!sna_pixmap_alloc_cpu(sna, pixmap, priv, false)) + return false; + + goto mark_damage; } } @@ -1512,52 +1581,89 @@ skip_inplace_map: assert(priv->gpu_bo == NULL || priv->gpu_bo->proxy == NULL); if (operate_inplace(priv, flags) && - pixmap_inplace(sna, pixmap, priv) && - sna_pixmap_move_to_gpu(pixmap, flags)) { + pixmap_inplace(sna, pixmap, priv, (flags & MOVE_READ) == 0) && + (priv->gpu_bo || sna_pixmap_create_mappable_gpu(pixmap))) { kgem_bo_submit(&sna->kgem, priv->gpu_bo); - DBG(("%s: try to operate inplace\n", __FUNCTION__)); - assert(priv->cpu == false); + DBG(("%s: try to operate inplace (GTT)\n", __FUNCTION__)); + assert((flags & MOVE_READ) == 0 || priv->cpu == false); - pixmap->devPrivate.ptr = - kgem_bo_map(&sna->kgem, priv->gpu_bo); - if (pixmap->devPrivate.ptr != NULL) { - priv->mapped = true; + pixmap->devPrivate.ptr = kgem_bo_map(&sna->kgem, priv->gpu_bo); + priv->mapped = pixmap->devPrivate.ptr != NULL; + if (priv->mapped) { pixmap->devKind = priv->gpu_bo->pitch; if (flags & MOVE_WRITE) { + assert(priv->gpu_bo->proxy == NULL); sna_damage_all(&priv->gpu_damage, pixmap->drawable.width, pixmap->drawable.height); sna_damage_destroy(&priv->cpu_damage); + sna_pixmap_free_cpu(sna, priv); list_del(&priv->list); - priv->undamaged = false; priv->clear = false; } assert_pixmap_damage(pixmap); - DBG(("%s: operate inplace\n", __FUNCTION__)); + DBG(("%s: operate inplace (GTT)\n", __FUNCTION__)); return true; } - - priv->mapped = false; } if (priv->mapped) { - assert(!priv->shm); - pixmap->devPrivate.ptr = NULL; + assert(!priv->shm && priv->stride); + pixmap->devPrivate.ptr = PTR(priv->ptr); + pixmap->devKind = priv->stride; priv->mapped = false; } - if (priv->clear && priv->cpu_bo && !priv->cpu_bo->flush && + if (priv->gpu_damage && + ((flags & MOVE_ASYNC_HINT) == 0 || + !__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo)) && + priv->gpu_bo->tiling == I915_TILING_NONE && + sna_pixmap_move_to_gpu(pixmap, MOVE_READ)) { + kgem_bo_submit(&sna->kgem, priv->gpu_bo); + + DBG(("%s: try to operate inplace (CPU)\n", __FUNCTION__)); + + assert(!priv->mapped); + pixmap->devPrivate.ptr = + kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo); + if (pixmap->devPrivate.ptr != NULL) { + priv->cpu = true; + priv->mapped = true; + pixmap->devKind = priv->gpu_bo->pitch; + if (flags & MOVE_WRITE) { + assert(priv->gpu_bo->proxy == NULL); + sna_damage_all(&priv->gpu_damage, + pixmap->drawable.width, + pixmap->drawable.height); + sna_damage_destroy(&priv->cpu_damage); + sna_pixmap_free_cpu(sna, priv); + list_del(&priv->list); + priv->clear = false; + } + + kgem_bo_sync__cpu_full(&sna->kgem, + priv->gpu_bo, flags & MOVE_WRITE); + assert_pixmap_damage(pixmap); + DBG(("%s: operate inplace (CPU)\n", __FUNCTION__)); + return true; + } + } + + if (((flags & MOVE_READ) == 0 || priv->clear) && + priv->cpu_bo && !priv->cpu_bo->flush && __kgem_bo_is_busy(&sna->kgem, priv->cpu_bo)) { assert(!priv->shm); - assert(DAMAGE_IS_ALL(priv->gpu_damage)); sna_pixmap_free_cpu(sna, priv); } if (pixmap->devPrivate.ptr == NULL && - !sna_pixmap_alloc_cpu(sna, pixmap, priv, priv->gpu_damage != NULL)) + !sna_pixmap_alloc_cpu(sna, pixmap, priv, + flags & MOVE_READ ? priv->gpu_damage && !priv->clear : 0)) return false; + assert(pixmap->devPrivate.ptr); + assert(!priv->mapped); if (priv->clear) { DBG(("%s: applying clear [%08x]\n", @@ -1585,7 +1691,7 @@ skip_inplace_map: pixmap->drawable.width, pixmap->drawable.height); sna_pixmap_free_gpu(sna, priv); - priv->undamaged = false; + assert(priv->gpu_damage == NULL); priv->clear = false; } @@ -1594,6 +1700,7 @@ skip_inplace_map: int n; DBG(("%s: flushing GPU damage\n", __FUNCTION__)); + assert(priv->gpu_bo); n = sna_damage_get_boxes(priv->gpu_damage, &box); if (n) { @@ -1615,16 +1722,15 @@ skip_inplace_map: __sna_damage_destroy(DAMAGE_PTR(priv->gpu_damage)); priv->gpu_damage = NULL; - priv->undamaged = true; } if (flags & MOVE_WRITE || priv->create & KGEM_CAN_CREATE_LARGE) { +mark_damage: DBG(("%s: marking as damaged\n", __FUNCTION__)); sna_damage_all(&priv->cpu_damage, pixmap->drawable.width, pixmap->drawable.height); sna_pixmap_free_gpu(sna, priv); - priv->undamaged = false; if (priv->flush) { assert(!priv->shm); @@ -1634,25 +1740,25 @@ skip_inplace_map: done: if (flags & MOVE_WRITE) { + assert(DAMAGE_IS_ALL(priv->cpu_damage)); priv->source_count = SOURCE_BIAS; assert(priv->gpu_bo == NULL || priv->gpu_bo->proxy == NULL); if (priv->gpu_bo && priv->gpu_bo->domain != DOMAIN_GPU) { DBG(("%s: discarding inactive GPU bo\n", __FUNCTION__)); - assert(DAMAGE_IS_ALL(priv->cpu_damage)); sna_pixmap_free_gpu(sna, priv); - priv->undamaged = false; } } if (priv->cpu_bo) { if ((flags & MOVE_ASYNC_HINT) == 0) { DBG(("%s: syncing CPU bo\n", __FUNCTION__)); - kgem_bo_sync__cpu(&sna->kgem, priv->cpu_bo); + kgem_bo_sync__cpu_full(&sna->kgem, + priv->cpu_bo, flags & MOVE_WRITE); + assert(!priv->shm || !kgem_bo_is_busy(priv->cpu_bo)); } if (flags & MOVE_WRITE) { DBG(("%s: discarding GPU bo in favour of CPU bo\n", __FUNCTION__)); sna_pixmap_free_gpu(sna, priv); - priv->undamaged = false; } } priv->cpu = (flags & MOVE_ASYNC_HINT) == 0; @@ -1717,29 +1823,30 @@ static inline bool region_inplace(struct sna *sna, if (wedged(sna) && !priv->pinned) return false; - if (priv->cpu) { - DBG(("%s: no, preferring last action of CPU\n", __FUNCTION__)); - return false; - } - - if (!write_only && + if ((priv->cpu || !write_only) && region_overlaps_damage(region, priv->cpu_damage, 0, 0)) { DBG(("%s: no, uncovered CPU damage pending\n", __FUNCTION__)); return false; } - if (priv->flush) { - DBG(("%s: yes, exported via dri, will flush\n", __FUNCTION__)); - return true; + if (priv->cpu) { + DBG(("%s: no, preferring last action of CPU\n", __FUNCTION__)); + return false; } if (priv->mapped) { DBG(("%s: yes, already mapped, continuiung\n", __FUNCTION__)); + return !IS_CPU_MAP(priv->gpu_bo->map); + } + + if (priv->flush) { + DBG(("%s: yes, exported via dri, will flush\n", __FUNCTION__)); return true; } if (DAMAGE_IS_ALL(priv->gpu_damage)) { DBG(("%s: yes, already wholly damaged on the GPU\n", __FUNCTION__)); + assert(priv->gpu_bo); return true; } @@ -1785,6 +1892,8 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable, return true; } + assert(priv->gpu_damage == NULL || priv->gpu_bo); + if (sna_damage_is_all(&priv->cpu_damage, pixmap->drawable.width, pixmap->drawable.height)) { @@ -1792,7 +1901,6 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable, __FUNCTION__, pixmap->drawable.serialNumber)); sna_damage_destroy(&priv->gpu_damage); - priv->undamaged = false; if (flags & MOVE_WRITE) sna_pixmap_free_gpu(sna, priv); @@ -1804,6 +1912,14 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable, goto out; } + if (USE_INPLACE && + (flags & (MOVE_READ | MOVE_ASYNC_HINT)) == 0 && + (priv->flush || box_inplace(pixmap, ®ion->extents))) { + DBG(("%s: marking for inplace hint (%d, %d)\n", + __FUNCTION__, priv->flush, box_inplace(pixmap, ®ion->extents))); + flags |= MOVE_INPLACE_HINT; + } + if (flags & MOVE_WHOLE_HINT) return _sna_pixmap_move_to_cpu(pixmap, flags); @@ -1824,132 +1940,40 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable, return _sna_pixmap_move_to_cpu(pixmap, flags); } - if (USE_INPLACE && (flags & MOVE_READ) == 0) { - DBG(("%s: no read, checking to see if we can stream the write into the GPU bo\n", - __FUNCTION__)); - assert(flags & MOVE_WRITE); - - if (priv->stride && priv->gpu_bo && - kgem_bo_can_map(&sna->kgem, priv->gpu_bo) && - region_inplace(sna, pixmap, region, priv, true)) { - assert(priv->gpu_bo->proxy == NULL); - if (!__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo)) { - pixmap->devPrivate.ptr = - kgem_bo_map(&sna->kgem, priv->gpu_bo); - if (pixmap->devPrivate.ptr == NULL) { - if (dx | dy) - RegionTranslate(region, -dx, -dy); - return false; - } - - priv->mapped = true; - pixmap->devKind = priv->gpu_bo->pitch; - - sna_damage_subtract(&priv->cpu_damage, region); - if (priv->cpu_damage == NULL) { - list_del(&priv->list); - sna_damage_all(&priv->gpu_damage, - pixmap->drawable.width, - pixmap->drawable.height); - priv->undamaged = false; - } else - sna_damage_add(&priv->gpu_damage, - region); - - priv->clear = false; - priv->cpu = false; - assert_pixmap_damage(pixmap); - if (dx | dy) - RegionTranslate(region, -dx, -dy); - return true; - } - } - - if (priv->cpu_bo && !priv->cpu_bo->flush) { - if (__kgem_bo_is_busy(&sna->kgem, priv->cpu_bo)) { - sna_damage_subtract(&priv->cpu_damage, region); - if (!sna_pixmap_move_to_gpu(pixmap, MOVE_WRITE)) { - if (dx | dy) - RegionTranslate(region, -dx, -dy); - return false; - } - - assert(!priv->shm); - sna_pixmap_free_cpu(sna, priv); - } - } - - if (priv->gpu_bo == NULL && priv->stride && - sna_pixmap_choose_tiling(pixmap, DEFAULT_TILING) != I915_TILING_NONE && - region_inplace(sna, pixmap, region, priv, true) && - sna_pixmap_create_mappable_gpu(pixmap)) { - pixmap->devPrivate.ptr = - kgem_bo_map(&sna->kgem, priv->gpu_bo); - if (pixmap->devPrivate.ptr == NULL) { - if (dx | dy) - RegionTranslate(region, -dx, -dy); - return false; - } - - priv->mapped = true; - pixmap->devKind = priv->gpu_bo->pitch; - - sna_damage_subtract(&priv->cpu_damage, region); - if (priv->cpu_damage == NULL) { - list_del(&priv->list); - sna_damage_all(&priv->gpu_damage, - pixmap->drawable.width, - pixmap->drawable.height); - priv->undamaged = false; - } else - sna_damage_add(&priv->gpu_damage, region); - - assert_pixmap_damage(pixmap); - priv->clear = false; - priv->cpu = false; - if (dx | dy) - RegionTranslate(region, -dx, -dy); - return true; - } - } - if (operate_inplace(priv, flags) && - kgem_bo_can_map(&sna->kgem, priv->gpu_bo) && - region_inplace(sna, pixmap, region, priv, (flags & MOVE_READ) == 0)) { + region_inplace(sna, pixmap, region, priv, (flags & MOVE_READ) == 0) && + (priv->gpu_bo || sna_pixmap_create_mappable_gpu(pixmap))) { kgem_bo_submit(&sna->kgem, priv->gpu_bo); DBG(("%s: try to operate inplace\n", __FUNCTION__)); - pixmap->devPrivate.ptr = - kgem_bo_map(&sna->kgem, priv->gpu_bo); - if (pixmap->devPrivate.ptr != NULL) { - priv->mapped = true; + pixmap->devPrivate.ptr = kgem_bo_map(&sna->kgem, priv->gpu_bo); + priv->mapped = pixmap->devPrivate.ptr != NULL; + if (priv->mapped) { pixmap->devKind = priv->gpu_bo->pitch; - if (flags & MOVE_WRITE && - !DAMAGE_IS_ALL(priv->gpu_damage)) { - sna_damage_add(&priv->gpu_damage, region); - if (sna_damage_is_all(&priv->gpu_damage, - pixmap->drawable.width, - pixmap->drawable.height)) { - DBG(("%s: replaced entire pixmap, destroying CPU shadow\n", - __FUNCTION__)); - sna_damage_destroy(&priv->cpu_damage); - priv->undamaged = false; - list_del(&priv->list); - } else - sna_damage_subtract(&priv->cpu_damage, - region); + if (flags & MOVE_WRITE) { + if (!DAMAGE_IS_ALL(priv->gpu_damage)) { + sna_damage_add(&priv->gpu_damage, region); + if (sna_damage_is_all(&priv->gpu_damage, + pixmap->drawable.width, + pixmap->drawable.height)) { + DBG(("%s: replaced entire pixmap, destroying CPU shadow\n", + __FUNCTION__)); + sna_damage_destroy(&priv->cpu_damage); + list_del(&priv->list); + } else + sna_damage_subtract(&priv->cpu_damage, + region); + } + priv->clear = false; } assert_pixmap_damage(pixmap); - priv->clear = false; priv->cpu = false; if (dx | dy) RegionTranslate(region, -dx, -dy); DBG(("%s: operate inplace\n", __FUNCTION__)); return true; } - - priv->mapped = false; } if (priv->clear && flags & MOVE_WRITE) { @@ -1965,12 +1989,26 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable, priv->mapped = false; } + if ((priv->clear || (flags & MOVE_READ) == 0) && + priv->cpu_bo && !priv->cpu_bo->flush && + __kgem_bo_is_busy(&sna->kgem, priv->cpu_bo)) { + sna_damage_subtract(&priv->cpu_damage, region); + if (sna_pixmap_move_to_gpu(pixmap, MOVE_READ | MOVE_ASYNC_HINT)) { + sna_damage_all(&priv->gpu_damage, + pixmap->drawable.width, + pixmap->drawable.height); + sna_pixmap_free_cpu(sna, priv); + } + } + if (pixmap->devPrivate.ptr == NULL && - !sna_pixmap_alloc_cpu(sna, pixmap, priv, priv->gpu_damage != NULL)) { + !sna_pixmap_alloc_cpu(sna, pixmap, priv, + flags & MOVE_READ ? priv->gpu_damage && !priv->clear : 0)) { if (dx | dy) RegionTranslate(region, -dx, -dy); return false; } + assert(pixmap->devPrivate.ptr); if (priv->gpu_bo == NULL) { assert(priv->gpu_damage == NULL); @@ -1979,8 +2017,8 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable, assert(priv->gpu_bo->proxy == NULL); if (priv->clear) { - int n = REGION_NUM_RECTS(region); - BoxPtr box = REGION_RECTS(region); + int n = RegionNumRects(region); + BoxPtr box = RegionRects(region); DBG(("%s: pending clear, doing partial fill\n", __FUNCTION__)); if (priv->cpu_bo) { @@ -2022,6 +2060,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable, DBG(("%s: forced migration\n", __FUNCTION__)); assert(pixmap_contains_damage(pixmap, priv->gpu_damage)); + assert(priv->gpu_bo); ok = false; if (use_cpu_bo_for_download(sna, priv, &priv->gpu_damage->extents)) { @@ -2038,7 +2077,6 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable, box, n); } sna_damage_destroy(&priv->gpu_damage); - priv->undamaged = true; } if (priv->gpu_damage && @@ -2048,6 +2086,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable, __FUNCTION__, region->extents.x2 - region->extents.x1, region->extents.y2 - region->extents.y1)); + assert(priv->gpu_bo); if (priv->cpu_damage == NULL) { if ((flags & MOVE_WRITE) == 0 && @@ -2095,9 +2134,9 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable, * reads. */ if (flags & MOVE_WRITE) { - int n = REGION_NUM_RECTS(region), i; - BoxPtr boxes = REGION_RECTS(region); - BoxPtr blocks = malloc(sizeof(BoxRec) * REGION_NUM_RECTS(region)); + int n = RegionNumRects(region), i; + BoxPtr boxes = RegionRects(region); + BoxPtr blocks = malloc(sizeof(BoxRec) * RegionNumRects(region)); if (blocks) { for (i = 0; i < n; i++) { blocks[i].x1 = boxes[i].x1 & ~31; @@ -2150,12 +2189,11 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable, } sna_damage_destroy(&priv->gpu_damage); - priv->undamaged = true; } else if (DAMAGE_IS_ALL(priv->gpu_damage) || sna_damage_contains_box__no_reduce(priv->gpu_damage, &r->extents)) { - BoxPtr box = REGION_RECTS(r); - int n = REGION_NUM_RECTS(r); + BoxPtr box = RegionRects(r); + int n = RegionNumRects(r); bool ok = false; DBG(("%s: region wholly inside damage\n", @@ -2175,14 +2213,13 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable, box, n); sna_damage_subtract(&priv->gpu_damage, r); - priv->undamaged = true; } else { RegionRec need; pixman_region_init(&need); if (sna_damage_intersect(priv->gpu_damage, r, &need)) { - BoxPtr box = REGION_RECTS(&need); - int n = REGION_NUM_RECTS(&need); + BoxPtr box = RegionRects(&need); + int n = RegionNumRects(&need); bool ok = false; DBG(("%s: region intersects damage\n", @@ -2202,7 +2239,6 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable, box, n); sna_damage_subtract(&priv->gpu_damage, r); - priv->undamaged = true; RegionUninit(&need); } } @@ -2212,7 +2248,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable, } done: - if (flags & MOVE_WRITE) { + if ((flags & (MOVE_WRITE | MOVE_ASYNC_HINT)) == MOVE_WRITE) { DBG(("%s: applying cpu damage\n", __FUNCTION__)); assert(!DAMAGE_IS_ALL(priv->cpu_damage)); assert_pixmap_contains_box(pixmap, RegionExtents(region)); @@ -2226,7 +2262,6 @@ done: __FUNCTION__)); sna_pixmap_free_gpu(sna, priv); } - priv->undamaged = false; } if (priv->flush) { assert(!priv->shm); @@ -2245,8 +2280,8 @@ out: } if ((flags & MOVE_ASYNC_HINT) == 0 && priv->cpu_bo) { DBG(("%s: syncing cpu bo\n", __FUNCTION__)); - kgem_bo_sync__cpu(&sna->kgem, priv->cpu_bo); - assert(!kgem_bo_is_busy(priv->cpu_bo)); + kgem_bo_sync__cpu_full(&sna->kgem, + priv->cpu_bo, flags & MOVE_WRITE); } priv->cpu = (flags & MOVE_ASYNC_HINT) == 0; assert(pixmap->devPrivate.ptr); @@ -2354,31 +2389,30 @@ static inline struct sna_pixmap * sna_pixmap_mark_active(struct sna *sna, struct sna_pixmap *priv) { assert(priv->gpu_bo); - if (USE_INACTIVE && - !priv->pinned && priv->gpu_bo->proxy == NULL && - (priv->create & KGEM_CAN_CREATE_LARGE) == 0) - list_move(&priv->inactive, &sna->active_pixmaps); return priv; } -static bool +bool sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int flags) { struct sna *sna = to_sna_from_pixmap(pixmap); struct sna_pixmap *priv = sna_pixmap(pixmap); RegionRec i, r; - DBG(("%s()\n", __FUNCTION__)); + DBG(("%s: pixmap=%ld box=(%d, %d), (%d, %d), flags=%x\n", + __FUNCTION__, pixmap->drawable.serialNumber, + box->x1, box->y1, box->x2, box->y2, flags)); + assert(box->x2 > box->x1 && box->y2 > box->y1); assert_pixmap_damage(pixmap); assert_pixmap_contains_box(pixmap, box); assert(!wedged(sna)); + assert(priv->gpu_damage == NULL || priv->gpu_bo); if (sna_damage_is_all(&priv->gpu_damage, pixmap->drawable.width, pixmap->drawable.height)) { sna_damage_destroy(&priv->cpu_damage); - priv->undamaged = false; list_del(&priv->list); goto done; } @@ -2390,7 +2424,6 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl assert_pixmap_damage(pixmap); if (priv->cpu_damage == NULL) { - priv->undamaged = false; list_del(&priv->list); return sna_pixmap_move_to_gpu(pixmap, flags); } @@ -2398,6 +2431,8 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl if (priv->gpu_bo == NULL) { unsigned create, tiling; + assert(priv->gpu_damage == NULL); + create = CREATE_INACTIVE; if (pixmap->usage_hint == SNA_CREATE_FB) create |= CREATE_EXACT | CREATE_SCANOUT; @@ -2423,6 +2458,11 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl priv->mapped = false; } + if (priv->shm) { + assert(!priv->flush); + sna_add_flush_pixmap(sna, priv, priv->cpu_bo); + } + region_set(&r, box); if (MIGRATE_ALL || region_subsumes_damage(&r, priv->cpu_damage)) { int n; @@ -2437,15 +2477,12 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl pixmap, priv->cpu_bo, 0, 0, pixmap, priv->gpu_bo, 0, 0, box, n, 0); - if (ok && priv->shm) { - assert(!priv->flush); - sna_add_flush_pixmap(sna, priv, priv->cpu_bo); - } } if (!ok) { + assert(!priv->mapped); if (pixmap->devPrivate.ptr == NULL) { - assert(priv->stride && priv->ptr); - pixmap->devPrivate.ptr = priv->ptr; + assert(priv->ptr && priv->stride); + pixmap->devPrivate.ptr = PTR(priv->ptr); pixmap->devKind = priv->stride; } if (n == 1 && !priv->pinned && @@ -2471,7 +2508,6 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl sna_damage_destroy(&priv->cpu_damage); list_del(&priv->list); - priv->undamaged = true; } else if (DAMAGE_IS_ALL(priv->cpu_damage) || sna_damage_contains_box__no_reduce(priv->cpu_damage, box)) { bool ok = false; @@ -2481,15 +2517,12 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl pixmap, priv->cpu_bo, 0, 0, pixmap, priv->gpu_bo, 0, 0, box, 1, 0); - if (ok && priv->shm) { - assert(!priv->flush); - sna_add_flush_pixmap(sna, priv, priv->cpu_bo); - } } if (!ok) { + assert(!priv->mapped); if (pixmap->devPrivate.ptr == NULL) { - assert(priv->stride && priv->ptr); - pixmap->devPrivate.ptr = priv->ptr; + assert(priv->ptr && priv->stride); + pixmap->devPrivate.ptr = PTR(priv->ptr); pixmap->devKind = priv->stride; } ok = sna_write_boxes(sna, pixmap, @@ -2503,12 +2536,11 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl return false; sna_damage_subtract(&priv->cpu_damage, &r); - priv->undamaged = true; } else if (sna_damage_intersect(priv->cpu_damage, &r, &i)) { - int n = REGION_NUM_RECTS(&i); + int n = RegionNumRects(&i); bool ok; - box = REGION_RECTS(&i); + box = RegionRects(&i); ok = false; if (use_cpu_bo_for_upload(sna, priv, 0)) { DBG(("%s: using CPU bo for upload to GPU, %d boxes\n", __FUNCTION__, n)); @@ -2516,15 +2548,12 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl pixmap, priv->cpu_bo, 0, 0, pixmap, priv->gpu_bo, 0, 0, box, n, 0); - if (ok && priv->shm) { - assert(!priv->flush); - sna_add_flush_pixmap(sna, priv, priv->cpu_bo); - } } if (!ok) { + assert(!priv->mapped); if (pixmap->devPrivate.ptr == NULL) { - assert(priv->stride && priv->ptr); - pixmap->devPrivate.ptr = priv->ptr; + assert(priv->ptr && priv->stride); + pixmap->devPrivate.ptr = PTR(priv->ptr); pixmap->devKind = priv->stride; } ok = sna_write_boxes(sna, pixmap, @@ -2538,26 +2567,21 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl return false; sna_damage_subtract(&priv->cpu_damage, &r); - priv->undamaged = true; RegionUninit(&i); } - if (priv->shm) { - assert(!priv->flush); - sna_add_flush_pixmap(sna, priv, priv->cpu_bo); - } - done: if (flags & MOVE_WRITE) { priv->clear = false; priv->cpu = false; - if (priv->cpu_damage == NULL && box_inplace(pixmap, box)) { + if (priv->cpu_damage == NULL && + box_inplace(pixmap, &r.extents)) { DBG(("%s: large operation on undamaged, promoting to full GPU\n", __FUNCTION__)); + assert(priv->gpu_bo->proxy == NULL); sna_damage_all(&priv->gpu_damage, pixmap->drawable.width, pixmap->drawable.height); - priv->undamaged = false; } } @@ -2582,6 +2606,8 @@ sna_drawable_use_bo(DrawablePtr drawable, unsigned flags, const BoxRec *box, box->x1, box->y1, box->x2, box->y2, flags)); + assert(box->x2 > box->x1 && box->y2 > box->y1); + assert(pixmap->refcnt); assert_pixmap_damage(pixmap); assert_drawable_contains_box(drawable, box); @@ -2604,7 +2630,7 @@ sna_drawable_use_bo(DrawablePtr drawable, unsigned flags, const BoxRec *box, flags |= PREFER_GPU; if (priv->shm) flags &= ~PREFER_GPU; - if (priv->cpu && (flags & FORCE_GPU) == 0) + if (priv->cpu && (flags & (FORCE_GPU | IGNORE_CPU)) == 0) flags &= ~PREFER_GPU; DBG(("%s: flush=%d, shm=%d, cpu=%d => flags=%x\n", @@ -2619,6 +2645,7 @@ sna_drawable_use_bo(DrawablePtr drawable, unsigned flags, const BoxRec *box, if (DAMAGE_IS_ALL(priv->gpu_damage)) { DBG(("%s: use GPU fast path (all-damaged)\n", __FUNCTION__)); assert(priv->cpu_damage == NULL); + assert(priv->gpu_bo); goto use_gpu_bo; } @@ -2692,7 +2719,6 @@ sna_drawable_use_bo(DrawablePtr drawable, unsigned flags, const BoxRec *box, sna_damage_subtract(&priv->cpu_damage, ®ion); if (priv->cpu_damage == NULL) { list_del(&priv->list); - priv->undamaged = false; priv->cpu = false; } } @@ -2721,6 +2747,7 @@ create_gpu_bo: region.extents.x2, region.extents.y2)); if (priv->gpu_damage) { + assert(priv->gpu_bo); if (!priv->cpu_damage) { if (sna_damage_contains_box__no_reduce(priv->gpu_damage, ®ion.extents)) { @@ -2779,18 +2806,19 @@ move_to_gpu: done: assert(priv->gpu_bo != NULL); + assert(priv->gpu_bo->refcnt); if (sna_damage_is_all(&priv->gpu_damage, pixmap->drawable.width, pixmap->drawable.height)) { sna_damage_destroy(&priv->cpu_damage); list_del(&priv->list); - priv->undamaged = false; *damage = NULL; } else *damage = &priv->gpu_damage; DBG(("%s: using GPU bo with damage? %d\n", __FUNCTION__, *damage != NULL)); + assert(*damage == NULL || !DAMAGE_IS_ALL(*damage)); assert(priv->gpu_bo->proxy == NULL); assert(priv->clear == false); assert(priv->cpu == false); @@ -2799,22 +2827,34 @@ done: use_gpu_bo: DBG(("%s: using whole GPU bo\n", __FUNCTION__)); assert(priv->gpu_bo != NULL); + assert(priv->gpu_bo->refcnt); assert(priv->gpu_bo->proxy == NULL); + assert(priv->gpu_damage); priv->clear = false; priv->cpu = false; - if (USE_INACTIVE && - !priv->pinned && (priv->create & KGEM_CAN_CREATE_LARGE) == 0) - list_move(&priv->inactive, - &to_sna_from_pixmap(pixmap)->active_pixmaps); *damage = NULL; return priv->gpu_bo; use_cpu_bo: - if (!USE_CPU_BO) - return NULL; + if (!USE_CPU_BO || priv->cpu_bo == NULL) { +cpu_fail: + if ((flags & FORCE_GPU) && priv->gpu_bo) { + get_drawable_deltas(drawable, pixmap, &dx, &dy); + + region.extents = *box; + region.extents.x1 += dx; + region.extents.x2 += dx; + region.extents.y1 += dy; + region.extents.y2 += dy; + region.data = NULL; + + goto move_to_gpu; + } - if (priv->cpu_bo == NULL) return NULL; + } + + assert(priv->cpu_bo->refcnt); sna = to_sna_from_pixmap(pixmap); if ((flags & FORCE_GPU) == 0 && @@ -2851,12 +2891,25 @@ use_cpu_bo: } if (!sna->kgem.can_blt_cpu) - return NULL; + goto cpu_fail; if (!sna_drawable_move_region_to_cpu(&pixmap->drawable, ®ion, - MOVE_READ | MOVE_ASYNC_HINT)) { + (flags & IGNORE_CPU ? MOVE_READ : 0) | MOVE_WRITE | MOVE_ASYNC_HINT)) { DBG(("%s: failed to move-to-cpu, fallback\n", __FUNCTION__)); - return NULL; + goto cpu_fail; + } + + if (priv->shm) { + assert(!priv->flush); + sna_add_flush_pixmap(sna, priv, priv->cpu_bo); + + /* As we may have flushed and retired,, recheck for busy bo */ + if ((flags & FORCE_GPU) == 0 && !kgem_bo_is_busy(priv->cpu_bo)) + return NULL; + } + if (priv->flush) { + assert(!priv->shm); + sna_add_flush_pixmap(sna, priv, priv->gpu_bo); } if (sna_damage_is_all(&priv->cpu_damage, @@ -2873,21 +2926,9 @@ use_cpu_bo: *damage = &priv->cpu_damage; } - if (priv->shm) { - assert(!priv->flush); - sna_add_flush_pixmap(sna, priv, priv->cpu_bo); - - /* As we may have flushed and retired,, recheck for busy bo */ - if ((flags & FORCE_GPU) == 0 && !kgem_bo_is_busy(priv->cpu_bo)) - return NULL; - } - if (priv->flush) { - assert(!priv->shm); - sna_add_flush_pixmap(sna, priv, priv->gpu_bo); - } - DBG(("%s: using CPU bo with damage? %d\n", __FUNCTION__, *damage != NULL)); + assert(damage == NULL || !DAMAGE_IS_ALL(*damage)); assert(priv->clear == false); return priv->cpu_bo; } @@ -3001,19 +3042,21 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags) pixmap->drawable.height); } + assert(priv->gpu_damage == NULL || priv->gpu_bo); + if (sna_damage_is_all(&priv->gpu_damage, pixmap->drawable.width, pixmap->drawable.height)) { DBG(("%s: already all-damaged\n", __FUNCTION__)); sna_damage_destroy(&priv->cpu_damage); list_del(&priv->list); - priv->undamaged = false; - assert(priv->cpu == false); + assert(priv->cpu == false || IS_CPU_MAP(priv->gpu_bo->map)); goto active; } if (flags & MOVE_WRITE && priv->gpu_bo && priv->gpu_bo->proxy) { DBG(("%s: discarding cached upload buffer\n", __FUNCTION__)); + assert(priv->gpu_damage == NULL); kgem_bo_destroy(&sna->kgem, priv->gpu_bo); priv->gpu_bo = NULL; } @@ -3045,6 +3088,8 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags) create = 0; if (priv->cpu_damage && priv->cpu_bo == NULL) create = CREATE_GTT_MAP | CREATE_INACTIVE; + if (flags & MOVE_INPLACE_HINT) + create = CREATE_GTT_MAP | CREATE_INACTIVE; priv->gpu_bo = kgem_create_2d(&sna->kgem, @@ -3055,6 +3100,7 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags) } if (priv->gpu_bo == NULL) { DBG(("%s: not creating GPU bo\n", __FUNCTION__)); + assert(priv->gpu_damage == NULL); assert(list_is_empty(&priv->list)); return NULL; } @@ -3066,6 +3112,7 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags) * synchronisation that takes the most time. This is * mitigated by avoiding fallbacks in the first place. */ + assert(priv->gpu_bo->proxy == NULL); sna_damage_all(&priv->gpu_damage, pixmap->drawable.width, pixmap->drawable.height); @@ -3078,17 +3125,16 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags) if (priv->gpu_bo->proxy) { DBG(("%s: reusing cached upload\n", __FUNCTION__)); assert((flags & MOVE_WRITE) == 0); + assert(priv->gpu_damage == NULL); return priv; } if (priv->cpu_damage == NULL) goto done; - if (priv->mapped) { - assert(priv->stride); - pixmap->devPrivate.ptr = priv->ptr; - pixmap->devKind = priv->stride; - priv->mapped = false; + if (priv->shm) { + assert(!priv->flush); + sna_add_flush_pixmap(sna, priv, priv->cpu_bo); } n = sna_damage_get_boxes(priv->cpu_damage, &box); @@ -3098,9 +3144,6 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags) assert(pixmap_contains_damage(pixmap, priv->cpu_damage)); DBG(("%s: uploading %d damage boxes\n", __FUNCTION__, n)); - if (!priv->cpu) - flags |= MOVE_ASYNC_HINT; - ok = false; if (use_cpu_bo_for_upload(sna, priv, flags)) { DBG(("%s: using CPU bo for upload to GPU\n", __FUNCTION__)); @@ -3110,9 +3153,15 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags) box, n, 0); } if (!ok) { + if (priv->mapped) { + assert(priv->stride && priv->stride); + pixmap->devPrivate.ptr = PTR(priv->ptr); + pixmap->devKind = priv->stride; + priv->mapped = false; + } if (pixmap->devPrivate.ptr == NULL) { - assert(priv->stride && priv->ptr); - pixmap->devPrivate.ptr = priv->ptr; + assert(priv->ptr && priv->stride); + pixmap->devPrivate.ptr = PTR(priv->ptr); pixmap->devKind = priv->stride; } if (n == 1 && !priv->pinned && @@ -3137,15 +3186,14 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags) __sna_damage_destroy(DAMAGE_PTR(priv->cpu_damage)); priv->cpu_damage = NULL; - priv->undamaged = true; - - if (priv->shm) { - assert(!priv->flush); - sna_add_flush_pixmap(sna, priv, priv->cpu_bo); - } /* For large bo, try to keep only a single copy around */ - if (priv->create & KGEM_CAN_CREATE_LARGE) { + if (priv->create & KGEM_CAN_CREATE_LARGE || + flags & MOVE_SOURCE_HINT) { + DBG(("%s: disposing of system copy for large/source\n", + __FUNCTION__)); + assert(!priv->shm); + assert(priv->gpu_bo->proxy == NULL); sna_damage_all(&priv->gpu_damage, pixmap->drawable.width, pixmap->drawable.height); @@ -3157,14 +3205,8 @@ done: sna_damage_reduce_all(&priv->gpu_damage, pixmap->drawable.width, pixmap->drawable.height); - if (DAMAGE_IS_ALL(priv->gpu_damage)) { - priv->undamaged = false; - if (priv->ptr) { - assert(priv->cpu_bo == NULL || !priv->cpu_bo->flush); - assert(!priv->shm); - sna_pixmap_free_cpu(sna, priv); - } - } + if (DAMAGE_IS_ALL(priv->gpu_damage)) + sna_pixmap_free_cpu(sna, priv); active: if (flags & MOVE_WRITE) @@ -3209,7 +3251,7 @@ static bool must_check sna_gc_move_to_cpu(GCPtr gc, if (gc->clientClipType == CT_PIXMAP) { PixmapPtr clip = gc->clientClip; - gc->clientClip = BitmapToRegion(gc->pScreen, clip); + gc->clientClip = region_from_bitmap(gc->pScreen, clip); gc->pScreen->DestroyPixmap(clip); gc->clientClipType = gc->clientClip ? CT_REGION : CT_NONE; changes |= GCClipMask; @@ -3346,24 +3388,6 @@ static inline void box_add_pt(BoxPtr box, int16_t x, int16_t y) box->y2 = y; } -static int16_t bound(int16_t a, uint16_t b) -{ - int v = (int)a + (int)b; - if (v > MAXSHORT) - return MAXSHORT; - return v; -} - -static int16_t clamp(int16_t a, int16_t b) -{ - int v = (int)a + (int)b; - if (v > MAXSHORT) - return MAXSHORT; - if (v < MINSHORT) - return MINSHORT; - return v; -} - static inline bool box32_to_box16(const Box32Rec *b32, BoxRec *b16) { b16->x1 = b32->x1; @@ -3394,141 +3418,10 @@ static inline void box32_add_rect(Box32Rec *box, const xRectangle *r) } static bool -sna_put_image_upload_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, - int x, int y, int w, int h, char *bits, int stride) -{ - PixmapPtr pixmap = get_drawable_pixmap(drawable); - struct sna *sna = to_sna_from_pixmap(pixmap); - struct sna_pixmap *priv = sna_pixmap(pixmap); - BoxPtr box; - int nbox; - int16_t dx, dy; - - box = REGION_RECTS(region); - nbox = REGION_NUM_RECTS(region); - - DBG(("%s: %d x [(%d, %d), (%d, %d)...]\n", - __FUNCTION__, nbox, - box->x1, box->y1, box->x2, box->y2)); - - if (gc->alu != GXcopy) - return false; - - if (priv->gpu_bo == NULL && - !sna_pixmap_create_mappable_gpu(pixmap)) - return false; - - assert(priv->gpu_bo); - assert(priv->gpu_bo->proxy == NULL); - - if (!priv->pinned && nbox == 1 && - box->x1 <= 0 && box->y1 <= 0 && - box->x2 >= pixmap->drawable.width && - box->y2 >= pixmap->drawable.height) - return sna_replace(sna, pixmap, &priv->gpu_bo, bits, stride); - - get_drawable_deltas(drawable, pixmap, &dx, &dy); - x += dx + drawable->x; - y += dy + drawable->y; - - return sna_write_boxes(sna, pixmap, - priv->gpu_bo, 0, 0, - bits, stride, -x, -y, - box, nbox); -} - -static bool upload_inplace(struct sna *sna, - PixmapPtr pixmap, - struct sna_pixmap *priv, - RegionRec *region) -{ - if (priv->shm) { - DBG(("%s: no, SHM Pixmap\n", __FUNCTION__)); - return false; - } - - if (priv->create & KGEM_CAN_CREATE_LARGE) { - if (priv->gpu_bo) { - DBG(("%s: yes, large buffer and already have GPU bo\n", - __FUNCTION__)); - return true; - } - if (priv->cpu_bo){ - DBG(("%s: no, large buffer and already have CPU bo\n", - __FUNCTION__)); - return false; - } - } - - if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo) && - !(priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))) { - DBG(("%s: yes, CPU bo is busy, but the GPU bo is not\n", __FUNCTION__)); - return true; - } - - if (!region_inplace(sna, pixmap, region, priv, true)) { - DBG(("%s? no, region not suitable\n", __FUNCTION__)); - return false; - } - - if (sna->kgem.has_llc && !priv->flush) { - if (priv->cpu_bo) { - if (priv->cpu_damage && - kgem_bo_is_busy(priv->cpu_bo) && - !region_subsumes_damage(region, priv->cpu_damage)) { - DBG(("%s? yes, CPU bo is busy\n", __FUNCTION__)); - return true; - } - - DBG(("%s? no, have CPU bo\n", __FUNCTION__)); - return false; - } - - if (priv->create & KGEM_CAN_CREATE_CPU) { - DBG(("%s? no, can create CPU bo\n", __FUNCTION__)); - return false; - } - } - - if (priv->gpu_bo) { - if (priv->gpu_bo->proxy) - return false; - - if (!kgem_bo_can_map(&sna->kgem, priv->gpu_bo)) { - DBG(("%s? no, GPU bo not mappable\n", __FUNCTION__)); - return false; - } - - if (!kgem_bo_is_busy(priv->gpu_bo)) { - DBG(("%s? yes, GPU bo is idle\n", __FUNCTION__)); - return true; - } - - if (!priv->pinned && - region_subsumes_drawable(region, &pixmap->drawable)) { - DBG(("%s? yes, will replace busy GPU\n", __FUNCTION__)); - return true; - } - } - - if ((priv->create & (KGEM_CAN_CREATE_GPU | KGEM_CAN_CREATE_CPU)) == KGEM_CAN_CREATE_GPU && - region_subsumes_drawable(region, &pixmap->drawable)) { - DBG(("%s? yes, will fill fresh GPU bo\n", __FUNCTION__)); - return true; - } - - return false; -} - -static bool sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, int x, int y, int w, int h, char *bits, int stride) { PixmapPtr pixmap = get_drawable_pixmap(drawable); - struct sna *sna = to_sna_from_pixmap(pixmap); - struct sna_pixmap *priv = sna_pixmap(pixmap); - char *dst_bits; - int dst_stride; BoxRec *box; int16_t dx, dy; int n; @@ -3538,170 +3431,22 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, if (gc->alu != GXcopy) return false; - if (!priv) { - if (drawable->depth < 8) - return false; - - goto blt; - } - - /* XXX performing the upload inplace is currently about 20x slower - * for putimage10 on gen6 -- mostly due to slow page faulting in kernel. - * So we try again with vma caching and only for pixmaps who will be - * immediately flushed... - */ - if (upload_inplace(sna, pixmap, priv, region) && - sna_put_image_upload_blt(drawable, gc, region, - x, y, w, h, bits, stride)) { - if (!DAMAGE_IS_ALL(priv->gpu_damage)) { - DBG(("%s: marking damage\n", __FUNCTION__)); - if (region_subsumes_drawable(region, &pixmap->drawable)) - sna_damage_destroy(&priv->cpu_damage); - else - sna_damage_subtract(&priv->cpu_damage, region); - if (priv->cpu_damage == NULL) { - sna_damage_all(&priv->gpu_damage, - pixmap->drawable.width, - pixmap->drawable.height); - list_del(&priv->list); - priv->undamaged = false; - } else - sna_damage_add(&priv->gpu_damage, region); - } - - /* And mark as having a valid GTT mapping for future uploads */ - if (priv->stride && kgem_bo_can_map(&sna->kgem, priv->gpu_bo)) { - pixmap->devPrivate.ptr = - kgem_bo_map__async(&sna->kgem, priv->gpu_bo); - if (pixmap->devPrivate.ptr) { - priv->mapped = true; - pixmap->devKind = priv->gpu_bo->pitch; - } - } - - assert_pixmap_damage(pixmap); - priv->clear = false; - priv->cpu = false; - return true; - } - - if (priv->gpu_bo && priv->gpu_bo->proxy) { - DBG(("%s: discarding cached upload buffer\n", __FUNCTION__)); - kgem_bo_destroy(&sna->kgem, priv->gpu_bo); - priv->gpu_bo = NULL; - } - - if (priv->mapped) { - assert(!priv->shm); - pixmap->devPrivate.ptr = NULL; - priv->mapped = false; - } - - /* If the GPU is currently accessing the CPU pixmap, then - * we will need to wait for that to finish before we can - * modify the memory. - * - * However, we can queue some writes to the GPU bo to avoid - * the wait. Or we can try to replace the CPU bo. - */ - if (!priv->shm && priv->cpu_bo && __kgem_bo_is_busy(&sna->kgem, priv->cpu_bo)) { - assert(!priv->cpu_bo->flush); - DBG(("%s: cpu bo will stall, upload damage and discard\n", - __FUNCTION__)); - if (priv->cpu_damage) { - if (!region_subsumes_drawable(region, &pixmap->drawable)) { - sna_damage_subtract(&priv->cpu_damage, region); - if (!sna_pixmap_move_to_gpu(pixmap, MOVE_READ | MOVE_ASYNC_HINT)) - return false; - } else { - sna_damage_destroy(&priv->cpu_damage); - priv->undamaged = false; - } - } - assert(priv->cpu_damage == NULL); - sna_damage_all(&priv->gpu_damage, - pixmap->drawable.width, - pixmap->drawable.height); - sna_pixmap_free_cpu(sna, priv); - assert(pixmap->devPrivate.ptr == NULL); - } - - if (pixmap->devPrivate.ptr == NULL && - !sna_pixmap_alloc_cpu(sna, pixmap, priv, false)) - return true; - - if (priv->cpu_bo) { - DBG(("%s: syncing CPU bo\n", __FUNCTION__)); - kgem_bo_sync__cpu(&sna->kgem, priv->cpu_bo); - } - - if (priv->clear) { - DBG(("%s: applying clear [%08x]\n", - __FUNCTION__, priv->clear_color)); - - if (priv->clear_color == 0) { - memset(pixmap->devPrivate.ptr, - 0, pixmap->devKind * pixmap->drawable.height); - } else { - pixman_fill(pixmap->devPrivate.ptr, - pixmap->devKind/sizeof(uint32_t), - pixmap->drawable.bitsPerPixel, - 0, 0, - pixmap->drawable.width, - pixmap->drawable.height, - priv->clear_color); - } - - sna_damage_all(&priv->cpu_damage, - pixmap->drawable.width, - pixmap->drawable.height); - sna_pixmap_free_gpu(sna, priv); - priv->undamaged = false; - } + if (drawable->depth < 8) + return false; - if (!DAMAGE_IS_ALL(priv->cpu_damage)) { - DBG(("%s: marking damage\n", __FUNCTION__)); - if (region_subsumes_drawable(region, &pixmap->drawable)) { - DBG(("%s: replacing entire pixmap\n", __FUNCTION__)); - sna_damage_all(&priv->cpu_damage, - pixmap->drawable.width, - pixmap->drawable.height); - sna_pixmap_free_gpu(sna, priv); - priv->undamaged = false; - assert(priv->gpu_damage == NULL); - } else { - sna_damage_subtract(&priv->gpu_damage, region); - sna_damage_add(&priv->cpu_damage, region); - if (priv->gpu_bo && - sna_damage_is_all(&priv->cpu_damage, - pixmap->drawable.width, - pixmap->drawable.height)) { - DBG(("%s: replaced entire pixmap\n", __FUNCTION__)); - sna_pixmap_free_gpu(sna, priv); - priv->undamaged = false; - } - } - if (priv->flush) { - assert(!priv->shm); - sna_add_flush_pixmap(sna, priv, priv->gpu_bo); - } - } - assert(!priv->flush || !list_is_empty(&priv->list)); - priv->cpu = true; + if (!sna_drawable_move_region_to_cpu(&pixmap->drawable, + region, MOVE_WRITE)) + return false; -blt: get_drawable_deltas(drawable, pixmap, &dx, &dy); x += dx + drawable->x; y += dy + drawable->y; DBG(("%s: upload(%d, %d, %d, %d)\n", __FUNCTION__, x, y, w, h)); - dst_stride = pixmap->devKind; - dst_bits = pixmap->devPrivate.ptr; - /* Region is pre-clipped and translated into pixmap space */ - box = REGION_RECTS(region); - n = REGION_NUM_RECTS(region); + box = RegionRects(region); + n = RegionNumRects(region); do { DBG(("%s: copy box (%d, %d)->(%d, %d)x(%d, %d)\n", __FUNCTION__, @@ -3722,9 +3467,9 @@ blt: assert(box->x2 - x <= w); assert(box->y2 - y <= h); - memcpy_blt(bits, dst_bits, + memcpy_blt(bits, pixmap->devPrivate.ptr, pixmap->drawable.bitsPerPixel, - stride, dst_stride, + stride, pixmap->devKind, box->x1 - x, box->y1 - y, box->x1, box->y1, box->x2 - box->x1, box->y2 - box->y1); @@ -3790,11 +3535,11 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, x += dx + drawable->x; y += dy + drawable->y; - kgem_set_mode(&sna->kgem, KGEM_BLT); + kgem_set_mode(&sna->kgem, KGEM_BLT, bo); /* Region is pre-clipped and translated into pixmap space */ - box = REGION_RECTS(region); - n = REGION_NUM_RECTS(region); + box = RegionRects(region); + n = RegionNumRects(region); do { int bx1 = (box->x1 - x) & ~7; int bx2 = (box->x2 - x + 7) & ~7; @@ -3810,7 +3555,9 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, if (!kgem_check_batch(&sna->kgem, 8) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc_and_exec(&sna->kgem, 2)) { - _kgem_submit(&sna->kgem); + kgem_submit(&sna->kgem); + if (!kgem_check_bo_fenced(&sna->kgem, bo)) + return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } @@ -3840,7 +3587,7 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, b[0] = XY_MONO_SRC_COPY | 3 << 20; b[0] |= ((box->x1 - x) & 7) << 17; b[1] = bo->pitch; - if (sna->kgem.gen >= 40 && bo->tiling) { + if (sna->kgem.gen >= 040 && bo->tiling) { b[0] |= BLT_DST_TILED; b[1] >>= 2; } @@ -3912,12 +3659,12 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, x += dx + drawable->x; y += dy + drawable->y; - kgem_set_mode(&sna->kgem, KGEM_BLT); + kgem_set_mode(&sna->kgem, KGEM_BLT, bo); skip = h * BitmapBytePad(w + left); for (i = 1 << (gc->depth-1); i; i >>= 1, bits += skip) { - const BoxRec *box = REGION_RECTS(region); - int n = REGION_NUM_RECTS(region); + const BoxRec *box = RegionRects(region); + int n = RegionNumRects(region); if ((gc->planemask & i) == 0) continue; @@ -3938,7 +3685,9 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, if (!kgem_check_batch(&sna->kgem, 12) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc_and_exec(&sna->kgem, 2)) { - _kgem_submit(&sna->kgem); + kgem_submit(&sna->kgem); + if (!kgem_check_bo_fenced(&sna->kgem, bo)) + return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } @@ -3968,7 +3717,7 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, b[0] = XY_FULL_MONO_PATTERN_MONO_SRC_BLT | 3 << 20; b[0] |= ((box->x1 - x) & 7) << 17; b[1] = bo->pitch; - if (sna->kgem.gen >= 40 && bo->tiling) { + if (sna->kgem.gen >= 040 && bo->tiling) { b[0] |= BLT_DST_TILED; b[1] >>= 2; } @@ -4037,7 +3786,7 @@ sna_put_image(DrawablePtr drawable, GCPtr gc, int depth, gc->pCompositeClip->extents.x2 < region.extents.x2 || gc->pCompositeClip->extents.y2 < region.extents.y2) { RegionIntersect(®ion, ®ion, gc->pCompositeClip); - if (!RegionNotEmpty(®ion)) + if (RegionNil(®ion)) return; } @@ -4120,8 +3869,10 @@ move_to_gpu(PixmapPtr pixmap, struct sna_pixmap *priv, int h = box->y2 - box->y1; int count; - if (DAMAGE_IS_ALL(priv->gpu_damage)) + if (DAMAGE_IS_ALL(priv->gpu_damage)) { + assert(priv->gpu_bo); return true; + } if (priv->gpu_bo) { if (alu != GXcopy) @@ -4248,15 +3999,17 @@ sna_self_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, if (dst != src) get_drawable_deltas(dst, pixmap, &tx, &ty); - if (priv == NULL || DAMAGE_IS_ALL(priv->cpu_damage)) + if (priv == NULL || DAMAGE_IS_ALL(priv->cpu_damage) || priv->shm) goto fallback; if (priv->gpu_damage) { + assert(priv->gpu_bo); + if (alu == GXcopy && priv->clear) goto out; assert(priv->gpu_bo->proxy == NULL); - if (!sna_pixmap_move_to_gpu(pixmap, MOVE_WRITE | MOVE_READ)) { + if (!sna_pixmap_move_to_gpu(pixmap, MOVE_WRITE | MOVE_READ | MOVE_ASYNC_HINT)) { DBG(("%s: fallback - not a pure copy and failed to move dst to GPU\n", __FUNCTION__)); goto fallback; @@ -4334,7 +4087,7 @@ sna_pixmap_is_gpu(PixmapPtr pixmap) } static int -source_prefer_gpu(struct sna_pixmap *priv) +source_prefer_gpu(struct sna *sna, struct sna_pixmap *priv) { if (priv == NULL) { DBG(("%s: source unattached, use cpu\n", __FUNCTION__)); @@ -4348,6 +4101,7 @@ source_prefer_gpu(struct sna_pixmap *priv) if (priv->gpu_damage) { DBG(("%s: source has gpu damage, force gpu\n", __FUNCTION__)); + assert(priv->gpu_bo); return PREFER_GPU | FORCE_GPU; } @@ -4357,13 +4111,50 @@ source_prefer_gpu(struct sna_pixmap *priv) } if (DAMAGE_IS_ALL(priv->cpu_damage)) - return 0; + return priv->cpu_bo && kgem_is_idle(&sna->kgem); DBG(("%s: source has GPU bo? %d\n", __FUNCTION__, priv->gpu_bo != NULL)); return priv->gpu_bo != NULL; } +static bool use_shm_bo(struct sna *sna, + struct kgem_bo *bo, + struct sna_pixmap *priv, + int alu) +{ + if (priv == NULL || priv->cpu_bo == NULL) { + DBG(("%s: no, not attached\n", __FUNCTION__)); + return false; + } + + if (!priv->shm) { + DBG(("%s: yes, ordinary CPU bo\n", __FUNCTION__)); + return true; + } + + if (alu != GXcopy) { + DBG(("%s: yes, complex alu=%d\n", __FUNCTION__, alu)); + return true; + } + if (bo->tiling) { + DBG(("%s:, yes, dst tiled=%d\n", __FUNCTION__, bo->tiling)); + return true; + } + + if (__kgem_bo_is_busy(&sna->kgem, bo)) { + DBG(("%s: yes, dst is busy\n", __FUNCTION__)); + return true; + } + + if (__kgem_bo_is_busy(&sna->kgem, priv->cpu_bo)) { + DBG(("%s: yes, src is busy\n", __FUNCTION__)); + return true; + } + + return false; +} + static void sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, RegionPtr region, int dx, int dy, @@ -4435,7 +4226,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, if (dst_priv == NULL) goto fallback; - hint = source_prefer_gpu(src_priv) ?: + hint = source_prefer_gpu(sna, src_priv) ?: region_inplace(sna, dst_pixmap, region, dst_priv, alu_overwrites(alu)); if (dst_priv->cpu_damage && alu_overwrites(alu)) { @@ -4453,6 +4244,8 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, if (region->data == NULL) hint |= IGNORE_CPU; } + if (replaces) + hint |= IGNORE_CPU; bo = sna_drawable_use_bo(&dst_pixmap->drawable, hint, ®ion->extents, &damage); @@ -4493,7 +4286,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, if (src_priv && move_to_gpu(src_pixmap, src_priv, ®ion->extents, alu) && - sna_pixmap_move_to_gpu(src_pixmap, MOVE_READ)) { + sna_pixmap_move_to_gpu(src_pixmap, MOVE_READ | MOVE_ASYNC_HINT)) { DBG(("%s: move whole src_pixmap to GPU and copy\n", __FUNCTION__)); if (!sna->render.copy_boxes(sna, alu, @@ -4525,7 +4318,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, area.y2 += src_dy; if (!sna_pixmap_move_area_to_gpu(src_pixmap, &area, - MOVE_READ)) + MOVE_READ | MOVE_ASYNC_HINT)) goto fallback; if (!sna->render.copy_boxes(sna, alu, @@ -4545,11 +4338,11 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, if (bo != dst_priv->gpu_bo) goto fallback; - if (src_priv && src_priv->cpu_bo) { + if (use_shm_bo(sna, bo, src_priv, alu)) { bool ret; - DBG(("%s: region overlaps CPU damage, copy from CPU bo\n", - __FUNCTION__)); + DBG(("%s: region overlaps CPU damage, copy from CPU bo (shm? %d)\n", + __FUNCTION__, src_priv->shm)); assert(bo != dst_priv->cpu_bo); @@ -4561,29 +4354,31 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, if (!ret) goto fallback; + if (src_priv->shm) { + assert(!src_priv->flush); + sna_add_flush_pixmap(sna, src_priv, src_priv->cpu_bo); + } + if (!sna->render.copy_boxes(sna, alu, src_pixmap, src_priv->cpu_bo, src_dx, src_dy, dst_pixmap, bo, 0, 0, - box, n, 0)) { + box, n, src_priv->shm ? COPY_LAST : 0)) { DBG(("%s: fallback - accelerated copy boxes failed\n", __FUNCTION__)); goto fallback; } - if (src_priv->shm) { - assert(!src_priv->flush); - sna_add_flush_pixmap(sna, src_priv, src_priv->cpu_bo); - } - if (damage) sna_damage_add(damage, region); return; } - if (src_priv == NULL && + if (USE_USERPTR_UPLOADS && + src_priv == NULL && sna->kgem.has_userptr && - __kgem_bo_is_busy(&sna->kgem, bo) && - box_inplace(src_pixmap, ®ion->extents)) { + box_inplace(src_pixmap, ®ion->extents) && + ((sna->kgem.has_llc && bo->tiling && !bo->scanout) || + __kgem_bo_is_busy(&sna->kgem, bo))) { struct kgem_bo *src_bo; bool ok = false; @@ -4664,8 +4459,13 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, if (n == 1 && tmp->drawable.width == src_pixmap->drawable.width && - tmp->drawable.height == src_pixmap->drawable.height) + tmp->drawable.height == src_pixmap->drawable.height) { + DBG(("%s: caching upload for src bo\n", + __FUNCTION__)); + assert(src_priv->gpu_damage == NULL); + assert(src_priv->gpu_bo == NULL); kgem_proxy_bo_attach(src_bo, &src_priv->gpu_bo); + } if (!sna->render.copy_boxes(sna, alu, tmp, src_bo, dx, dy, @@ -4695,8 +4495,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, if (src_pixmap->devPrivate.ptr == NULL) { if (!src_priv->ptr) /* uninitialised!*/ return; - assert(src_priv->stride); - src_pixmap->devPrivate.ptr = src_priv->ptr; + src_pixmap->devPrivate.ptr = PTR(src_priv->ptr); src_pixmap->devKind = src_priv->stride; } } @@ -4721,15 +4520,16 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, goto fallback; } + assert(dst_priv->clear == false); dst_priv->cpu = false; if (damage) { + assert(dst_priv->gpu_bo->proxy == NULL); if (replaces) { sna_damage_destroy(&dst_priv->cpu_damage); sna_damage_all(&dst_priv->gpu_damage, dst_pixmap->drawable.width, dst_pixmap->drawable.height); list_del(&dst_priv->list); - dst_priv->undamaged = false; } else sna_damage_add(&dst_priv->gpu_damage, region); @@ -4752,6 +4552,7 @@ fallback: return; } + assert(dst_pixmap->devPrivate.ptr); do { pixman_fill(dst_pixmap->devPrivate.ptr, dst_pixmap->devKind/sizeof(uint32_t), @@ -4891,10 +4692,7 @@ sna_do_copy(DrawablePtr src, DrawablePtr dst, GCPtr gc, return NULL; } - if (src->pScreen->SourceValidate) - src->pScreen->SourceValidate(src, sx, sy, - width, height, - gc->subWindowMode); + SourceValidate(src, sx, sy, width, height, gc->subWindowMode); sx += src->x; sy += src->y; @@ -4938,7 +4736,7 @@ sna_do_copy(DrawablePtr src, DrawablePtr dst, GCPtr gc, * VT is inactive, make sure the region isn't empty */ if (((WindowPtr)src)->parent || - !RegionNotEmpty(&((WindowPtr)src)->borderClip)) { + RegionNil(&((WindowPtr)src)->borderClip)) { DBG(("%s: include inferiors\n", __FUNCTION__)); free_clip = clip = NotClippedByChildren((WindowPtr)src); } @@ -4973,17 +4771,17 @@ sna_do_copy(DrawablePtr src, DrawablePtr dst, GCPtr gc, if (free_clip) RegionDestroy(free_clip); } - DBG(("%s: src extents (%d, %d), (%d, %d) x %d\n", __FUNCTION__, + DBG(("%s: src extents (%d, %d), (%d, %d) x %ld\n", __FUNCTION__, region.extents.x1, region.extents.y1, region.extents.x2, region.extents.y2, - RegionNumRects(®ion))); + (long)RegionNumRects(®ion))); RegionTranslate(®ion, dx-sx, dy-sy); if (gc->pCompositeClip->data) RegionIntersect(®ion, ®ion, gc->pCompositeClip); - DBG(("%s: copy region (%d, %d), (%d, %d) x %d\n", __FUNCTION__, + DBG(("%s: copy region (%d, %d), (%d, %d) x %ld\n", __FUNCTION__, region.extents.x1, region.extents.y1, region.extents.x2, region.extents.y2, - RegionNumRects(®ion))); + (long)RegionNumRects(®ion))); if (RegionNotEmpty(®ion)) copy(src, dst, gc, ®ion, sx-dx, sy-dy, bitPlane, closure); @@ -5005,8 +4803,8 @@ sna_fallback_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, RegionPtr region, int dx, int dy, Pixel bitplane, void *closure) { - DBG(("%s (boxes=%dx[(%d, %d), (%d, %d)...], src=+(%d, %d), alu=%d\n", - __FUNCTION__, RegionNumRects(region), + DBG(("%s (boxes=%ldx[(%d, %d), (%d, %d)...], src=+(%d, %d), alu=%d\n", + __FUNCTION__, (long)RegionNumRects(region), region->extents.x1, region->extents.y1, region->extents.x2, region->extents.y2, dx, dy, gc->alu)); @@ -5014,16 +4812,17 @@ sna_fallback_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, if (!sna_gc_move_to_cpu(gc, dst, region)) return; + RegionTranslate(region, dx, dy); + if (!sna_drawable_move_region_to_cpu(src, region, MOVE_READ)) + goto out_gc; + RegionTranslate(region, -dx, -dy); + if (src == dst || get_drawable_pixmap(src) == get_drawable_pixmap(dst)) { + DBG(("%s: self-copy\n", __FUNCTION__)); if (!sna_drawable_move_to_cpu(dst, MOVE_WRITE | MOVE_READ)) goto out_gc; } else { - RegionTranslate(region, dx, dy); - if (!sna_drawable_move_region_to_cpu(src, region, MOVE_READ)) - goto out_gc; - RegionTranslate(region, -dx, -dy); - if (!sna_drawable_move_region_to_cpu(dst, region, drawable_gc_flags(dst, gc, false))) goto out_gc; @@ -5049,10 +4848,11 @@ sna_copy_area(DrawablePtr src, DrawablePtr dst, GCPtr gc, if (gc->planemask == 0) return NULL; - DBG(("%s: src=(%d, %d)x(%d, %d)+(%d, %d) -> dst=(%d, %d)+(%d, %d)\n", + DBG(("%s: src=(%d, %d)x(%d, %d)+(%d, %d) -> dst=(%d, %d)+(%d, %d); alu=%d, pm=%lx\n", __FUNCTION__, src_x, src_y, width, height, src->x, src->y, - dst_x, dst_y, dst->x, dst->y)); + dst_x, dst_y, dst->x, dst->y, + gc->alu, gc->planemask)); if (FORCE_FALLBACK || !ACCEL_COPY_AREA || wedged(sna) || !PM_IS_SOLID(dst, gc->planemask)) @@ -5599,7 +5399,7 @@ no_damage_clipped: region_set(&clip, extents); region_maybe_clip(&clip, gc->pCompositeClip); - if (!RegionNotEmpty(&clip)) + if (RegionNil(&clip)) return true; assert(dx + clip.extents.x1 >= 0); @@ -5607,9 +5407,9 @@ no_damage_clipped: assert(dx + clip.extents.x2 <= pixmap->drawable.width); assert(dy + clip.extents.y2 <= pixmap->drawable.height); - DBG(("%s: clip %d x [(%d, %d), (%d, %d)] x %d [(%d, %d)...]\n", + DBG(("%s: clip %ld x [(%d, %d), (%d, %d)] x %d [(%d, %d)...]\n", __FUNCTION__, - REGION_NUM_RECTS(&clip), + (long)RegionNumRects(&clip), clip.extents.x1, clip.extents.y1, clip.extents.x2, clip.extents.y2, n, pt->x, pt->y)); @@ -5700,7 +5500,7 @@ damage_clipped: region_set(&clip, extents); region_maybe_clip(&clip, gc->pCompositeClip); - if (!RegionNotEmpty(&clip)) + if (RegionNil(&clip)) return true; assert(dx + clip.extents.x1 >= 0); @@ -5708,9 +5508,9 @@ damage_clipped: assert(dx + clip.extents.x2 <= pixmap->drawable.width); assert(dy + clip.extents.y2 <= pixmap->drawable.height); - DBG(("%s: clip %d x [(%d, %d), (%d, %d)] x %d [(%d, %d)...]\n", + DBG(("%s: clip %ld x [(%d, %d), (%d, %d)] x %d [(%d, %d)...]\n", __FUNCTION__, - REGION_NUM_RECTS(&clip), + RegionNumRects(&clip), clip.extents.x1, clip.extents.y1, clip.extents.x2, clip.extents.y2, n, pt->x, pt->y)); @@ -6021,7 +5821,7 @@ fallback: DBG(("%s: fallback\n", __FUNCTION__)); region.data = NULL; region_maybe_clip(®ion, gc->pCompositeClip); - if (!RegionNotEmpty(®ion)) + if (RegionNil(®ion)) return; if (!sna_gc_move_to_cpu(gc, drawable, ®ion)) @@ -6061,7 +5861,7 @@ sna_set_spans(DrawablePtr drawable, GCPtr gc, char *src, fallback: region.data = NULL; region_maybe_clip(®ion, gc->pCompositeClip); - if (!RegionNotEmpty(®ion)) + if (RegionNil(®ion)) return; if (!sna_gc_move_to_cpu(gc, drawable, ®ion)) @@ -6098,10 +5898,11 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc, BoxPtr box; int n; - DBG(("%s: plane=%x (%d,%d),(%d,%d)x%d\n", - __FUNCTION__, (unsigned)bitplane, RegionNumRects(region), + DBG(("%s: plane=%x (%d,%d),(%d,%d)x%ld\n", + __FUNCTION__, (unsigned)bitplane, region->extents.x1, region->extents.y1, - region->extents.x2, region->extents.y2)); + region->extents.x2, region->extents.y2, + (long)RegionNumRects(region))); box = RegionRects(region); n = RegionNumRects(region); @@ -6112,14 +5913,14 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc, br00 = 3 << 20; br13 = arg->bo->pitch; - if (sna->kgem.gen >= 40 && arg->bo->tiling) { + if (sna->kgem.gen >= 040 && arg->bo->tiling) { br00 |= BLT_DST_TILED; br13 >>= 2; } br13 |= blt_depth(drawable->depth) << 24; br13 |= copy_ROP[gc->alu] << 16; - kgem_set_mode(&sna->kgem, KGEM_BLT); + kgem_set_mode(&sna->kgem, KGEM_BLT, arg->bo); do { int bx1 = (box->x1 + sx) & ~7; int bx2 = (box->x2 + sx + 7) & ~7; @@ -6142,7 +5943,9 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc, if (!kgem_check_batch(&sna->kgem, 7+src_stride) || !kgem_check_bo_fenced(&sna->kgem, arg->bo) || !kgem_check_reloc(&sna->kgem, 1)) { - _kgem_submit(&sna->kgem); + kgem_submit(&sna->kgem); + if (!kgem_check_bo_fenced(&sna->kgem, arg->bo)) + return; /* XXX fallback? */ _kgem_set_mode(&sna->kgem, KGEM_BLT); } @@ -6184,7 +5987,9 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc, if (!kgem_check_batch(&sna->kgem, 8) || !kgem_check_bo_fenced(&sna->kgem, arg->bo) || !kgem_check_reloc_and_exec(&sna->kgem, 2)) { - _kgem_submit(&sna->kgem); + kgem_submit(&sna->kgem); + if (!kgem_check_bo_fenced(&sna->kgem, arg->bo)) + return; /* XXX fallback? */ _kgem_set_mode(&sna->kgem, KGEM_BLT); } @@ -6276,14 +6081,14 @@ sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc, br00 = XY_MONO_SRC_COPY | 3 << 20; br13 = arg->bo->pitch; - if (sna->kgem.gen >= 40 && arg->bo->tiling) { + if (sna->kgem.gen >= 040 && arg->bo->tiling) { br00 |= BLT_DST_TILED; br13 >>= 2; } br13 |= blt_depth(drawable->depth) << 24; br13 |= copy_ROP[gc->alu] << 16; - kgem_set_mode(&sna->kgem, KGEM_BLT); + kgem_set_mode(&sna->kgem, KGEM_BLT, arg->bo); do { int bx1 = (box->x1 + sx) & ~7; int bx2 = (box->x2 + sx + 7) & ~7; @@ -6303,7 +6108,9 @@ sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc, if (!kgem_check_batch(&sna->kgem, 8) || !kgem_check_bo_fenced(&sna->kgem, arg->bo) || !kgem_check_reloc_and_exec(&sna->kgem, 2)) { - _kgem_submit(&sna->kgem); + kgem_submit(&sna->kgem); + if (!kgem_check_bo_fenced(&sna->kgem, arg->bo)) + return; /* XXX fallback? */ _kgem_set_mode(&sna->kgem, KGEM_BLT); } @@ -6504,7 +6311,7 @@ sna_copy_plane(DrawablePtr src, DrawablePtr dst, GCPtr gc, __FUNCTION__, region.extents.x1, region.extents.y1, region.extents.x2, region.extents.y2)); - if (!RegionNotEmpty(®ion)) + if (RegionNil(®ion)) goto empty; RegionTranslate(®ion, @@ -6750,7 +6557,7 @@ sna_poly_point(DrawablePtr drawable, GCPtr gc, DBG(("%s: trying solid fill [%08lx] blt paths\n", __FUNCTION__, gc->fgPixel)); - if ((bo = sna_drawable_use_bo(drawable, 0, + if ((bo = sna_drawable_use_bo(drawable, PREFER_GPU, ®ion.extents, &damage)) && sna_poly_point_blt(drawable, bo, damage, gc, mode, n, pt, flags & 2)) @@ -6761,7 +6568,7 @@ fallback: DBG(("%s: fallback\n", __FUNCTION__)); region.data = NULL; region_maybe_clip(®ion, gc->pCompositeClip); - if (!RegionNotEmpty(®ion)) + if (RegionNil(®ion)) return; if (!sna_gc_move_to_cpu(gc, drawable, ®ion)) @@ -6816,7 +6623,7 @@ sna_poly_zero_line_blt(DrawablePtr drawable, region_set(&clip, extents); if (clipped) { region_maybe_clip(&clip, gc->pCompositeClip); - if (!RegionNotEmpty(&clip)) + if (RegionNil(&clip)) return true; } @@ -6827,8 +6634,8 @@ sna_poly_zero_line_blt(DrawablePtr drawable, clip.extents.x2, clip.extents.y2, dx, dy, damage)); - extents = REGION_RECTS(&clip); - last_extents = extents + REGION_NUM_RECTS(&clip); + extents = RegionRects(&clip); + last_extents = extents + RegionNumRects(&clip); b = box; do { @@ -7223,6 +7030,8 @@ sna_poly_line_blt(DrawablePtr drawable, b->y1 = p.y; b->y2 = last.y; } + b->y2 += last.x == p.x; + b->x2 += last.y == p.y; DBG(("%s: blt (%d, %d), (%d, %d)\n", __FUNCTION__, b->x1, b->y1, b->x2, b->y2)); @@ -7241,7 +7050,7 @@ sna_poly_line_blt(DrawablePtr drawable, region_set(&clip, extents); region_maybe_clip(&clip, gc->pCompositeClip); - if (!RegionNotEmpty(&clip)) + if (RegionNil(&clip)) return true; last.x = pt->x + drawable->x; @@ -7280,6 +7089,8 @@ sna_poly_line_blt(DrawablePtr drawable, b->y1 = p.y; b->y2 = last.y; } + b->y2 += last.x == p.x; + b->x2 += last.y == p.y; DBG(("%s: blt (%d, %d), (%d, %d)\n", __FUNCTION__, b->x1, b->y1, b->x2, b->y2)); @@ -7336,6 +7147,8 @@ sna_poly_line_blt(DrawablePtr drawable, box.y1 = p.y; box.y2 = last.y; } + b->y2 += last.x == p.x; + b->x2 += last.y == p.y; DBG(("%s: blt (%d, %d), (%d, %d)\n", __FUNCTION__, box.x1, box.y1, box.x2, box.y2)); @@ -7683,7 +7496,7 @@ spans_fallback: } else { region_maybe_clip(&data.region, gc->pCompositeClip); - if (!RegionNotEmpty(&data.region)) + if (RegionNil(&data.region)) return; if (region_is_singular(&data.region)) @@ -7708,7 +7521,7 @@ spans_fallback: } else { region_maybe_clip(&data.region, gc->pCompositeClip); - if (!RegionNotEmpty(&data.region)) + if (RegionNil(&data.region)) return; if (region_is_singular(&data.region)) @@ -7785,7 +7598,7 @@ spans_fallback: fallback: DBG(("%s: fallback\n", __FUNCTION__)); region_maybe_clip(&data.region, gc->pCompositeClip); - if (!RegionNotEmpty(&data.region)) + if (RegionNil(&data.region)) return; if (!sna_gc_move_to_cpu(gc, drawable, &data.region)) @@ -7915,7 +7728,7 @@ sna_poly_segment_blt(DrawablePtr drawable, region_set(&clip, extents); region_maybe_clip(&clip, gc->pCompositeClip); - if (!RegionNotEmpty(&clip)) + if (RegionNil(&clip)) goto done; if (clip.data) { @@ -8022,7 +7835,7 @@ sna_poly_zero_segment_blt(DrawablePtr drawable, region_set(&clip, extents); if (clipped) { region_maybe_clip(&clip, gc->pCompositeClip); - if (!RegionNotEmpty(&clip)) + if (RegionNil(&clip)) return true; } DBG(("%s: [clipped] extents=(%d, %d), (%d, %d), delta=(%d, %d)\n", @@ -8034,8 +7847,8 @@ sna_poly_zero_segment_blt(DrawablePtr drawable, jump = _jump[(damage != NULL) | !!(dx|dy) << 1]; b = box; - extents = REGION_RECTS(&clip); - last_extents = extents + REGION_NUM_RECTS(&clip); + extents = RegionRects(&clip); + last_extents = extents + RegionNumRects(&clip); do { int n = _n; const xSegment *s = _s; @@ -8590,7 +8403,7 @@ spans_fallback: } else { region_maybe_clip(&data.region, gc->pCompositeClip); - if (!RegionNotEmpty(&data.region)) + if (RegionNil(&data.region)) return; if (region_is_singular(&data.region)) @@ -8629,7 +8442,7 @@ spans_fallback: fallback: DBG(("%s: fallback\n", __FUNCTION__)); region_maybe_clip(&data.region, gc->pCompositeClip); - if (!RegionNotEmpty(&data.region)) + if (RegionNil(&data.region)) return; if (!sna_gc_move_to_cpu(gc, drawable, &data.region)) @@ -8657,6 +8470,7 @@ sna_poly_rectangle_extents(DrawablePtr drawable, GCPtr gc, Box32Rec box; int extra = gc->lineWidth >> 1; bool clipped; + bool zero = false; if (n == 0) return 0; @@ -8665,9 +8479,13 @@ sna_poly_rectangle_extents(DrawablePtr drawable, GCPtr gc, box.y1 = r->y; box.x2 = box.x1 + r->width; box.y2 = box.y1 + r->height; + zero |= (r->width | r->height) == 0; - while (--n) - box32_add_rect(&box, ++r); + while (--n) { + r++; + zero |= (r->width | r->height) == 0; + box32_add_rect(&box, r); + } box.x2++; box.y2++; @@ -8677,13 +8495,15 @@ sna_poly_rectangle_extents(DrawablePtr drawable, GCPtr gc, box.x2 += extra; box.y1 -= extra; box.y2 += extra; - } + zero = !zero; + } else + zero = true; clipped = box32_trim_and_translate(&box, drawable, gc); if (!box32_to_box16(&box, out)) return 0; - return 1 | clipped << 1; + return 1 | clipped << 1 | zero << 2; } static bool @@ -8722,7 +8542,7 @@ zero: xRectangle rr = *r++; if ((rr.width | rr.height) == 0) - continue; + continue; /* XXX -> PolyLine */ DBG(("%s - zero : r[%d] = (%d, %d) x (%d, %d)\n", __FUNCTION__, n, rr.x, rr.y, rr.width, rr.height)); @@ -8777,7 +8597,7 @@ zero_clipped: region_set(&clip, extents); region_maybe_clip(&clip, gc->pCompositeClip); - if (!RegionNotEmpty(&clip)) + if (RegionNil(&clip)) goto done; if (clip.data) { @@ -8791,7 +8611,7 @@ zero_clipped: n, rr.x, rr.y, rr.width, rr.height)); if ((rr.width | rr.height) == 0) - continue; + continue; /* XXX -> PolyLine */ rr.x += drawable->x; rr.y += drawable->y; @@ -8855,7 +8675,7 @@ zero_clipped: n, rr.x, rr.y, rr.width, rr.height)); if ((rr.width | rr.height) == 0) - continue; + continue; /* XXX -> PolyLine */ rr.x += drawable->x; rr.y += drawable->y; @@ -8923,7 +8743,7 @@ wide_clipped: __FUNCTION__, clip.extents.x1, clip.extents.y1, clip.extents.x2, clip.extents.y2)); - if (!RegionNotEmpty(&clip)) + if (RegionNil(&clip)) goto done; if (clip.data) { @@ -8935,7 +8755,7 @@ wide_clipped: int count; if ((rr.width | rr.height) == 0) - continue; + continue; /* XXX -> PolyLine */ rr.x += drawable->x; rr.y += drawable->y; @@ -9100,7 +8920,7 @@ wide: xRectangle rr = *r++; if ((rr.width | rr.height) == 0) - continue; + continue; /* XXX -> PolyLine */ rr.x += dx; rr.y += dy; @@ -9198,8 +9018,9 @@ sna_poly_rectangle(DrawablePtr drawable, GCPtr gc, int n, xRectangle *r) goto fallback; } - DBG(("%s: line=%d [%d], join=%d [%d], mask=%lu [%d]\n", + DBG(("%s: fill=_%d [%d], line=%d [%d], join=%d [%d], mask=%lu [%d]\n", __FUNCTION__, + gc->fillStyle, gc->fillStyle == FillSolid, gc->lineStyle, gc->lineStyle == LineSolid, gc->joinStyle, gc->joinStyle == JoinMiter, gc->planemask, PM_IS_SOLID(drawable, gc->planemask))); @@ -9207,7 +9028,7 @@ sna_poly_rectangle(DrawablePtr drawable, GCPtr gc, int n, xRectangle *r) if (!PM_IS_SOLID(drawable, gc->planemask)) goto fallback; - if (gc->lineStyle == LineSolid && gc->joinStyle == JoinMiter) { + if (flags & 4 && gc->fillStyle == FillSolid && gc->lineStyle == LineSolid && gc->joinStyle == JoinMiter) { DBG(("%s: trying blt solid fill [%08lx] paths\n", __FUNCTION__, gc->fgPixel)); if ((bo = sna_drawable_use_bo(drawable, PREFER_GPU, @@ -9231,7 +9052,7 @@ fallback: region.data = NULL; region_maybe_clip(®ion, gc->pCompositeClip); - if (!RegionNotEmpty(®ion)) + if (RegionNil(®ion)) return; if (!sna_gc_move_to_cpu(gc, drawable, ®ion)) @@ -9370,7 +9191,7 @@ sna_poly_arc(DrawablePtr drawable, GCPtr gc, int n, xArc *arc) } else { region_maybe_clip(&data.region, gc->pCompositeClip); - if (!RegionNotEmpty(&data.region)) + if (RegionNil(&data.region)) return; if (region_is_singular(&data.region)) { @@ -9394,7 +9215,7 @@ sna_poly_arc(DrawablePtr drawable, GCPtr gc, int n, xArc *arc) } else { region_maybe_clip(&data.region, gc->pCompositeClip); - if (!RegionNotEmpty(&data.region)) + if (RegionNil(&data.region)) return; sna_gc_ops__tmp.FillSpans = sna_fill_spans__gpu; @@ -9430,7 +9251,7 @@ sna_poly_arc(DrawablePtr drawable, GCPtr gc, int n, xArc *arc) fallback: DBG(("%s -- fallback\n", __FUNCTION__)); region_maybe_clip(&data.region, gc->pCompositeClip); - if (!RegionNotEmpty(&data.region)) + if (RegionNil(&data.region)) return; if (!sna_gc_move_to_cpu(gc, drawable, &data.region)) @@ -9502,12 +9323,12 @@ sna_poly_fill_rect_blt(DrawablePtr drawable, r.y2 - r.y1 == pixmap->drawable.height) { struct sna_pixmap *priv = sna_pixmap(pixmap); if (bo == priv->gpu_bo) { + assert(priv->gpu_bo->proxy == NULL); sna_damage_all(&priv->gpu_damage, pixmap->drawable.width, pixmap->drawable.height); sna_damage_destroy(&priv->cpu_damage); list_del(&priv->list); - priv->undamaged = false; priv->clear = true; priv->clear_color = gc->alu == GXcopy ? pixel : 0; @@ -9573,7 +9394,7 @@ sna_poly_fill_rect_blt(DrawablePtr drawable, region_set(&clip, extents); region_maybe_clip(&clip, gc->pCompositeClip); - if (!RegionNotEmpty(&clip)) + if (RegionNil(&clip)) goto done; if (clip.data == NULL) { @@ -9744,7 +9565,7 @@ sna_poly_fill_polygon(DrawablePtr draw, GCPtr gc, } else { region_maybe_clip(&data.region, gc->pCompositeClip); - if (!RegionNotEmpty(&data.region)) + if (RegionNil(&data.region)) return; if (region_is_singular(&data.region)) @@ -9781,7 +9602,7 @@ fallback: data.region.extents.x1, data.region.extents.y1, data.region.extents.x2, data.region.extents.y2)); region_maybe_clip(&data.region, gc->pCompositeClip); - if (!RegionNotEmpty(&data.region)) { + if (RegionNil(&data.region)) { DBG(("%s: nothing to do, all clipped\n", __FUNCTION__)); return; } @@ -9831,7 +9652,8 @@ sna_pixmap_get_source_bo(PixmapPtr pixmap) return upload; } - if (priv->gpu_damage && !sna_pixmap_move_to_gpu(pixmap, MOVE_READ)) + if (priv->gpu_damage && + !sna_pixmap_move_to_gpu(pixmap, MOVE_READ | MOVE_ASYNC_HINT)) return NULL; if (priv->cpu_damage && priv->cpu_bo) @@ -9873,17 +9695,19 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable, DBG(("%s x %d [(%d, %d)x(%d, %d)...], clipped=%x\n", __FUNCTION__, n, r->x, r->y, r->width, r->height, clipped)); - kgem_set_mode(&sna->kgem, KGEM_BLT); + kgem_set_mode(&sna->kgem, KGEM_BLT, bo); if (!kgem_check_batch(&sna->kgem, 8+2*3) || !kgem_check_reloc(&sna->kgem, 2) || !kgem_check_bo_fenced(&sna->kgem, bo)) { - _kgem_submit(&sna->kgem); + kgem_submit(&sna->kgem); + if (!kgem_check_bo_fenced(&sna->kgem, bo)) + return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } br00 = XY_SCANLINE_BLT; br13 = bo->pitch; - if (sna->kgem.gen >= 40 && bo->tiling) { + if (sna->kgem.gen >= 040 && bo->tiling) { br00 |= BLT_DST_TILED; br13 >>= 2; } @@ -9988,7 +9812,7 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable, region_set(&clip, extents); region_maybe_clip(&clip, gc->pCompositeClip); - if (!RegionNotEmpty(&clip)) + if (RegionNil(&clip)) goto done; b = sna->kgem.batch + sna->kgem.nbatch; @@ -10315,7 +10139,7 @@ sna_poly_fill_rect_tiled_blt(DrawablePtr drawable, region_set(&clip, extents); region_maybe_clip(&clip, gc->pCompositeClip); - if (!RegionNotEmpty(&clip)) + if (RegionNil(&clip)) goto done; if (clip.data == NULL) { @@ -10393,8 +10217,8 @@ sna_poly_fill_rect_tiled_blt(DrawablePtr drawable, region.data = NULL; RegionIntersect(®ion, ®ion, &clip); - nbox = REGION_NUM_RECTS(®ion); - box = REGION_RECTS(®ion); + nbox = RegionNumRects(®ion); + box = RegionRects(®ion); while (nbox--) { int height = box->y2 - box->y1; int dst_y = box->y1; @@ -10487,7 +10311,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable, DBG(("%s: pat offset (%d, %d)\n", __FUNCTION__ ,px, py)); br00 = XY_SCANLINE_BLT | px << 12 | py << 8 | 3 << 20; br13 = bo->pitch; - if (sna->kgem.gen >= 40 && bo->tiling) { + if (sna->kgem.gen >= 040 && bo->tiling) { br00 |= BLT_DST_TILED; br13 >>= 2; } @@ -10507,11 +10331,13 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable, } while (--j); } - kgem_set_mode(&sna->kgem, KGEM_BLT); + kgem_set_mode(&sna->kgem, KGEM_BLT, bo); if (!kgem_check_batch(&sna->kgem, 9 + 2*3) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc(&sna->kgem, 1)) { - _kgem_submit(&sna->kgem); + kgem_submit(&sna->kgem); + if (!kgem_check_bo_fenced(&sna->kgem, bo)) + return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } @@ -10592,7 +10418,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable, region_set(&clip, extents); region_maybe_clip(&clip, gc->pCompositeClip); - if (!RegionNotEmpty(&clip)) + if (RegionNil(&clip)) return true; b = sna->kgem.batch + sna->kgem.nbatch; @@ -10783,11 +10609,11 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, origin->x, origin->y)); get_drawable_deltas(drawable, pixmap, &dx, &dy); - kgem_set_mode(&sna->kgem, KGEM_BLT); + kgem_set_mode(&sna->kgem, KGEM_BLT, bo); br00 = 3 << 20; br13 = bo->pitch; - if (sna->kgem.gen >= 40 && bo->tiling) { + if (sna->kgem.gen >= 040 && bo->tiling) { br00 |= BLT_DST_TILED; br13 >>= 2; } @@ -10821,7 +10647,9 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, if (!kgem_check_batch(&sna->kgem, 7+src_stride) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc(&sna->kgem, 1)) { - _kgem_submit(&sna->kgem); + kgem_submit(&sna->kgem); + if (!kgem_check_bo_fenced(&sna->kgem, bo)) + return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } @@ -10863,7 +10691,9 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, if (!kgem_check_batch(&sna->kgem, 8) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc_and_exec(&sna->kgem, 2)) { - _kgem_submit(&sna->kgem); + kgem_submit(&sna->kgem); + if (!kgem_check_bo_fenced(&sna->kgem, bo)) + return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } @@ -10920,7 +10750,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, region_set(&clip, extents); region_maybe_clip(&clip, gc->pCompositeClip); - if (!RegionNotEmpty(&clip)) + if (RegionNil(&clip)) return true; pat.x = origin->x + drawable->x; @@ -10963,7 +10793,9 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, if (!kgem_check_batch(&sna->kgem, 7+src_stride) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc(&sna->kgem, 1)) { - _kgem_submit(&sna->kgem); + kgem_submit(&sna->kgem); + if (!kgem_check_bo_fenced(&sna->kgem, bo)) + return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } @@ -11002,7 +10834,9 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, if (!kgem_check_batch(&sna->kgem, 8) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc_and_exec(&sna->kgem, 2)) { - _kgem_submit(&sna->kgem); + kgem_submit(&sna->kgem); + if (!kgem_check_bo_fenced(&sna->kgem, bo)) + return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } @@ -11103,7 +10937,9 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, if (!kgem_check_batch(&sna->kgem, 7+src_stride) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc(&sna->kgem, 1)) { - _kgem_submit(&sna->kgem); + kgem_submit(&sna->kgem); + if (!kgem_check_bo_fenced(&sna->kgem, bo)) + return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } @@ -11142,7 +10978,9 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, if (!kgem_check_batch(&sna->kgem, 8) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc_and_exec(&sna->kgem, 2)) { - _kgem_submit(&sna->kgem); + kgem_submit(&sna->kgem); + if (!kgem_check_bo_fenced(&sna->kgem, bo)) + return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } @@ -11252,7 +11090,9 @@ sna_poly_fill_rect_stippled_n_box__imm(struct sna *sna, if (!kgem_check_batch(&sna->kgem, 7+len) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc(&sna->kgem, 1)) { - _kgem_submit(&sna->kgem); + kgem_submit(&sna->kgem); + if (!kgem_check_bo_fenced(&sna->kgem, bo)) + return; /* XXX fallback? */ _kgem_set_mode(&sna->kgem, KGEM_BLT); } @@ -11356,7 +11196,9 @@ sna_poly_fill_rect_stippled_n_box(struct sna *sna, if (!kgem_check_batch(&sna->kgem, 7+len) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc(&sna->kgem, 2)) { - _kgem_submit(&sna->kgem); + kgem_submit(&sna->kgem); + if (!kgem_check_bo_fenced(&sna->kgem, bo)) + return; /* XXX fallback? */ _kgem_set_mode(&sna->kgem, KGEM_BLT); } @@ -11479,11 +11321,11 @@ sna_poly_fill_rect_stippled_n_blt__imm(DrawablePtr drawable, clipped, gc->alu, gc->fillStyle == FillOpaqueStippled)); get_drawable_deltas(drawable, pixmap, &dx, &dy); - kgem_set_mode(&sna->kgem, KGEM_BLT); + kgem_set_mode(&sna->kgem, KGEM_BLT, bo); br00 = XY_MONO_SRC_COPY_IMM | 3 << 20; br13 = bo->pitch; - if (sna->kgem.gen >= 40 && bo->tiling) { + if (sna->kgem.gen >= 040 && bo->tiling) { br00 |= BLT_DST_TILED; br13 >>= 2; } @@ -11517,7 +11359,7 @@ sna_poly_fill_rect_stippled_n_blt__imm(DrawablePtr drawable, region_set(&clip, extents); region_maybe_clip(&clip, gc->pCompositeClip); - if (!RegionNotEmpty(&clip)) { + if (RegionNil(&clip)) { DBG(("%s: all clipped\n", __FUNCTION__)); return true; } @@ -11624,11 +11466,11 @@ sna_poly_fill_rect_stippled_n_blt(DrawablePtr drawable, extents, clipped); get_drawable_deltas(drawable, pixmap, &dx, &dy); - kgem_set_mode(&sna->kgem, KGEM_BLT); + kgem_set_mode(&sna->kgem, KGEM_BLT, bo); br00 = XY_MONO_SRC_COPY | 3 << 20; br13 = bo->pitch; - if (sna->kgem.gen >= 40 && bo->tiling) { + if (sna->kgem.gen >= 040 && bo->tiling) { br00 |= BLT_DST_TILED; br13 >>= 2; } @@ -11662,7 +11504,7 @@ sna_poly_fill_rect_stippled_n_blt(DrawablePtr drawable, region_set(&clip, extents); region_maybe_clip(&clip, gc->pCompositeClip); - if (!RegionNotEmpty(&clip)) { + if (RegionNil(&clip)) { DBG(("%s: all clipped\n", __FUNCTION__)); return true; } @@ -11943,10 +11785,10 @@ sna_poly_fill_rect(DrawablePtr draw, GCPtr gc, int n, xRectangle *rect) box_inplace(pixmap, ®ion.extents))) { DBG(("%s: promoting to full GPU\n", __FUNCTION__)); if (priv->gpu_bo) { + assert(priv->gpu_bo->proxy == NULL); sna_damage_all(&priv->gpu_damage, pixmap->drawable.width, pixmap->drawable.height); - priv->undamaged = false; } } if (priv->cpu_damage == NULL) { @@ -12000,7 +11842,7 @@ fallback: region.extents.x2, region.extents.y2)); region.data = NULL; region_maybe_clip(®ion, gc->pCompositeClip); - if (!RegionNotEmpty(®ion)) { + if (RegionNil(®ion)) { DBG(("%s: nothing to do, all clipped\n", __FUNCTION__)); return; } @@ -12093,7 +11935,7 @@ sna_poly_fill_arc(DrawablePtr draw, GCPtr gc, int n, xArc *arc) } else { region_maybe_clip(&data.region, gc->pCompositeClip); - if (!RegionNotEmpty(&data.region)) + if (RegionNil(&data.region)) return; if (region_is_singular(&data.region)) @@ -12130,7 +11972,7 @@ fallback: data.region.extents.x1, data.region.extents.y1, data.region.extents.x2, data.region.extents.y2)); region_maybe_clip(&data.region, gc->pCompositeClip); - if (!RegionNotEmpty(&data.region)) { + if (RegionNil(&data.region)) { DBG(("%s: nothing to do, all clipped\n", __FUNCTION__)); return; } @@ -12256,19 +12098,21 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc, _y += drawable->y + dy; RegionTranslate(clip, dx, dy); - extents = REGION_RECTS(clip); - last_extents = extents + REGION_NUM_RECTS(clip); + extents = RegionRects(clip); + last_extents = extents + RegionNumRects(clip); if (!transparent) /* emulate miImageGlyphBlt */ sna_blt_fill_boxes(sna, GXcopy, bo, drawable->bitsPerPixel, - bg, extents, REGION_NUM_RECTS(clip)); + bg, extents, RegionNumRects(clip)); - kgem_set_mode(&sna->kgem, KGEM_BLT); + kgem_set_mode(&sna->kgem, KGEM_BLT, bo); if (!kgem_check_batch(&sna->kgem, 16) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc(&sna->kgem, 1)) { - _kgem_submit(&sna->kgem); + kgem_submit(&sna->kgem); + if (!kgem_check_bo_fenced(&sna->kgem, bo)) + return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } @@ -12280,7 +12124,7 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc, b = sna->kgem.batch + sna->kgem.nbatch; b[0] = XY_SETUP_BLT | 3 << 20; b[1] = bo->pitch; - if (sna->kgem.gen >= 40 && bo->tiling) { + if (sna->kgem.gen >= 040 && bo->tiling) { b[0] |= BLT_DST_TILED; b[1] >>= 2; } @@ -12298,7 +12142,7 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc, sna->kgem.nbatch += 8; br00 = XY_TEXT_IMMEDIATE_BLT; - if (bo->tiling && sna->kgem.gen >= 40) + if (bo->tiling && sna->kgem.gen >= 040) br00 |= BLT_DST_TILED; do { @@ -12343,7 +12187,7 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc, b = sna->kgem.batch + sna->kgem.nbatch; b[0] = XY_SETUP_BLT | 3 << 20; b[1] = bo->pitch; - if (sna->kgem.gen >= 40 && bo->tiling) { + if (sna->kgem.gen >= 040 && bo->tiling) { b[0] |= BLT_DST_TILED; b[1] >>= 2; } @@ -12585,7 +12429,7 @@ sna_poly_text8(DrawablePtr drawable, GCPtr gc, region.data = NULL; region_maybe_clip(®ion, gc->pCompositeClip); - if (!RegionNotEmpty(®ion)) + if (RegionNil(®ion)) return x + extents.overallRight; if (FORCE_FALLBACK) @@ -12659,7 +12503,7 @@ sna_poly_text16(DrawablePtr drawable, GCPtr gc, region.data = NULL; region_maybe_clip(®ion, gc->pCompositeClip); - if (!RegionNotEmpty(®ion)) + if (RegionNil(®ion)) return x + extents.overallRight; if (FORCE_FALLBACK) @@ -12740,7 +12584,7 @@ sna_image_text8(DrawablePtr drawable, GCPtr gc, region.data = NULL; region_maybe_clip(®ion, gc->pCompositeClip); - if (!RegionNotEmpty(®ion)) + if (RegionNil(®ion)) return; DBG(("%s: clipped extents (%d, %d), (%d, %d)\n", @@ -12822,7 +12666,7 @@ sna_image_text16(DrawablePtr drawable, GCPtr gc, region.data = NULL; region_maybe_clip(®ion, gc->pCompositeClip); - if (!RegionNotEmpty(®ion)) + if (RegionNil(®ion)) return; DBG(("%s: clipped extents (%d, %d), (%d, %d)\n", @@ -12901,19 +12745,21 @@ sna_reversed_glyph_blt(DrawablePtr drawable, GCPtr gc, _y += drawable->y + dy; RegionTranslate(clip, dx, dy); - extents = REGION_RECTS(clip); - last_extents = extents + REGION_NUM_RECTS(clip); + extents = RegionRects(clip); + last_extents = extents + RegionNumRects(clip); if (!transparent) /* emulate miImageGlyphBlt */ sna_blt_fill_boxes(sna, GXcopy, bo, drawable->bitsPerPixel, - bg, extents, REGION_NUM_RECTS(clip)); + bg, extents, RegionNumRects(clip)); - kgem_set_mode(&sna->kgem, KGEM_BLT); + kgem_set_mode(&sna->kgem, KGEM_BLT, bo); if (!kgem_check_batch(&sna->kgem, 16) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc(&sna->kgem, 1)) { - _kgem_submit(&sna->kgem); + kgem_submit(&sna->kgem); + if (!kgem_check_bo_fenced(&sna->kgem, bo)) + return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } @@ -12924,7 +12770,7 @@ sna_reversed_glyph_blt(DrawablePtr drawable, GCPtr gc, b = sna->kgem.batch + sna->kgem.nbatch; b[0] = XY_SETUP_BLT | 1 << 20; b[1] = bo->pitch; - if (sna->kgem.gen >= 40 && bo->tiling) { + if (sna->kgem.gen >= 040 && bo->tiling) { b[0] |= BLT_DST_TILED; b[1] >>= 2; } @@ -13005,7 +12851,7 @@ sna_reversed_glyph_blt(DrawablePtr drawable, GCPtr gc, b = sna->kgem.batch + sna->kgem.nbatch; b[0] = XY_SETUP_BLT | 1 << 20; b[1] = bo->pitch; - if (sna->kgem.gen >= 40 && bo->tiling) { + if (sna->kgem.gen >= 040 && bo->tiling) { b[0] |= BLT_DST_TILED; b[1] >>= 2; } @@ -13028,7 +12874,7 @@ sna_reversed_glyph_blt(DrawablePtr drawable, GCPtr gc, sna->kgem.nbatch += 3 + len; b[0] = XY_TEXT_IMMEDIATE_BLT | (1 + len); - if (bo->tiling && sna->kgem.gen >= 40) + if (bo->tiling && sna->kgem.gen >= 040) b[0] |= BLT_DST_TILED; b[1] = (uint16_t)y1 << 16 | (uint16_t)x1; b[2] = (uint16_t)(y1+h) << 16 | (uint16_t)(x1+w); @@ -13122,7 +12968,7 @@ sna_image_glyph(DrawablePtr drawable, GCPtr gc, region.data = NULL; region_maybe_clip(®ion, gc->pCompositeClip); - if (!RegionNotEmpty(®ion)) + if (RegionNil(®ion)) return; if (FORCE_FALLBACK) @@ -13200,7 +13046,7 @@ sna_poly_glyph(DrawablePtr drawable, GCPtr gc, region.data = NULL; region_maybe_clip(®ion, gc->pCompositeClip); - if (!RegionNotEmpty(®ion)) + if (RegionNil(®ion)) return; if (FORCE_FALLBACK) @@ -13289,11 +13135,11 @@ sna_push_pixels_solid_blt(GCPtr gc, region->extents.x1, region->extents.y1, region->extents.x2, region->extents.y2)); - kgem_set_mode(&sna->kgem, KGEM_BLT); + kgem_set_mode(&sna->kgem, KGEM_BLT, bo); /* Region is pre-clipped and translated into pixmap space */ - box = REGION_RECTS(region); - n = REGION_NUM_RECTS(region); + box = RegionRects(region); + n = RegionNumRects(region); do { int bx1 = (box->x1 - region->extents.x1) & ~7; int bx2 = (box->x2 - region->extents.x1 + 7) & ~7; @@ -13309,7 +13155,9 @@ sna_push_pixels_solid_blt(GCPtr gc, if (!kgem_check_batch(&sna->kgem, 8) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc_and_exec(&sna->kgem, 2)) { - _kgem_submit(&sna->kgem); + kgem_submit(&sna->kgem); + if (!kgem_check_bo_fenced(&sna->kgem, bo)) + return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } @@ -13340,7 +13188,7 @@ sna_push_pixels_solid_blt(GCPtr gc, b[0] = XY_MONO_SRC_COPY | 3 << 20; b[0] |= ((box->x1 - region->extents.x1) & 7) << 17; b[1] = bo->pitch; - if (sna->kgem.gen >= 40 && bo->tiling) { + if (sna->kgem.gen >= 040 && bo->tiling) { b[0] |= BLT_DST_TILED; b[1] >>= 2; } @@ -13399,7 +13247,7 @@ sna_push_pixels(GCPtr gc, PixmapPtr bitmap, DrawablePtr drawable, region.data = NULL; region_maybe_clip(®ion, gc->pCompositeClip); - if (!RegionNotEmpty(®ion)) + if (RegionNil(®ion)) return; switch (gc->fillStyle) { @@ -13537,6 +13385,10 @@ static int sna_create_gc(GCPtr gc) gc->miTranslate = 1; gc->fExpose = 1; + gc->freeCompClip = 0; + gc->pCompositeClip = 0; + gc->pRotatedPixmap = 0; + fb_gc(gc)->bpp = bits_per_pixel(gc->depth); gc->funcs = (GCFuncs *)&sna_gc_funcs; @@ -13544,6 +13396,82 @@ static int sna_create_gc(GCPtr gc) return true; } +static bool +sna_get_image_blt(DrawablePtr drawable, + RegionPtr region, + char *dst) +{ + PixmapPtr pixmap = get_drawable_pixmap(drawable); + struct sna_pixmap *priv = sna_pixmap(pixmap); + struct sna *sna = to_sna_from_pixmap(pixmap); + struct kgem_bo *dst_bo; + bool ok = false; + int pitch; + + if (!USE_USERPTR_DOWNLOADS) + return false; + + if (priv == NULL) + return false; + + if (priv->clear) { + int w = region->extents.x2 - region->extents.x1; + int h = region->extents.y2 - region->extents.y1; + + pitch = PixmapBytePad(w, pixmap->drawable.depth); + if (priv->clear_color == 0 || + pixmap->drawable.bitsPerPixel == 8) { + memset(dst, priv->clear_color, pitch * h); + } else { + pixman_fill((uint32_t *)dst, + pitch/sizeof(uint32_t), + pixmap->drawable.bitsPerPixel, + 0, 0, + w, h, + priv->clear_color); + } + + return true; + } + + if (!sna->kgem.has_userptr) + return false; + + if (!DAMAGE_IS_ALL(priv->gpu_damage) || + !__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo)) + return false; + + DBG(("%s: download through a temporary map\n", __FUNCTION__)); + + pitch = PixmapBytePad(region->extents.x2 - region->extents.x1, + drawable->depth); + dst_bo = kgem_create_map(&sna->kgem, dst, + pitch * (region->extents.y2 - region->extents.y1), + false); + if (dst_bo) { + int16_t dx, dy; + + dst_bo->flush = true; + dst_bo->pitch = pitch; + dst_bo->reusable = false; + + get_drawable_deltas(drawable, pixmap, &dx, &dy); + + ok = sna->render.copy_boxes(sna, GXcopy, + pixmap, priv->gpu_bo, dx, dy, + pixmap, dst_bo, + -region->extents.x1, + -region->extents.y1, + ®ion->extents, 1, + COPY_LAST); + + kgem_bo_sync__cpu(&sna->kgem, dst_bo); + kgem_bo_destroy(&sna->kgem, dst_bo); + } + + return ok; +} + static void sna_get_image(DrawablePtr drawable, int x, int y, int w, int h, @@ -13552,6 +13480,7 @@ sna_get_image(DrawablePtr drawable, { RegionRec region; unsigned int flags; + bool can_blt; if (!fbDrawableEnabled(drawable)) return; @@ -13564,6 +13493,13 @@ sna_get_image(DrawablePtr drawable, region.extents.y2 = region.extents.y1 + h; region.data = NULL; + can_blt = format == ZPixmap && + drawable->bitsPerPixel >= 8 && + PM_IS_SOLID(drawable, mask); + + if (can_blt && sna_get_image_blt(drawable, ®ion, dst)) + return; + flags = MOVE_READ; if ((w | h) == 1) flags |= MOVE_INPLACE_HINT; @@ -13572,9 +13508,7 @@ sna_get_image(DrawablePtr drawable, if (!sna_drawable_move_region_to_cpu(drawable, ®ion, flags)) return; - if (format == ZPixmap && - drawable->bitsPerPixel >= 8 && - PM_IS_SOLID(drawable, mask)) { + if (can_blt) { PixmapPtr pixmap = get_drawable_pixmap(drawable); int16_t dx, dy; @@ -13629,7 +13563,7 @@ sna_copy_window(WindowPtr win, DDXPointRec origin, RegionPtr src) RegionNull(&dst); RegionIntersect(&dst, &win->borderClip, src); - if (!RegionNotEmpty(&dst)) + if (RegionNil(&dst)) return; #ifdef COMPOSITE @@ -13697,8 +13631,10 @@ sna_accel_flush_callback(CallbackListPtr *list, list_del(&priv->list); if (priv->shm) { - DBG(("%s: syncing SHM pixmap=%ld\n", __FUNCTION__, - priv->pixmap->drawable.serialNumber)); + DBG(("%s: syncing SHM pixmap=%ld (refcnt=%d)\n", + __FUNCTION__, + priv->pixmap->drawable.serialNumber, + priv->pixmap->refcnt)); ret = sna_pixmap_move_to_cpu(priv->pixmap, MOVE_READ | MOVE_WRITE); assert(!ret || priv->gpu_bo == NULL); @@ -13707,8 +13643,9 @@ sna_accel_flush_callback(CallbackListPtr *list, } else { DBG(("%s: flushing DRI pixmap=%ld\n", __FUNCTION__, priv->pixmap->drawable.serialNumber)); - ret = sna_pixmap_move_to_gpu(priv->pixmap, - MOVE_READ | __MOVE_FORCE); + if (sna_pixmap_move_to_gpu(priv->pixmap, + MOVE_READ | __MOVE_FORCE)) + kgem_bo_unclean(&sna->kgem, priv->gpu_bo); } (void)ret; } @@ -13810,6 +13747,15 @@ static bool stop_flush(struct sna *sna, struct sna_pixmap *scanout) return scanout->cpu_damage || scanout->gpu_bo->needs_flush; } +static void timer_enable(struct sna *sna, int whom, int interval) +{ + if (!sna->timer_active) + UpdateCurrentTimeIf(); + sna->timer_active |= 1 << whom; + sna->timer_expire[whom] = TIME + interval; + DBG(("%s (time=%ld), starting timer %d\n", __FUNCTION__, (long)TIME, whom)); +} + static bool sna_accel_do_flush(struct sna *sna) { struct sna_pixmap *priv; @@ -13822,9 +13768,6 @@ static bool sna_accel_do_flush(struct sna *sna) return false; } - if (sna->flags & SNA_NO_DELAYED_FLUSH) - return true; - interval = sna->vblank_interval ?: 20; if (sna->timer_active & (1<<(FLUSH_TIMER))) { int32_t delta = sna->timer_expire[FLUSH_TIMER] - TIME; @@ -13835,26 +13778,18 @@ static bool sna_accel_do_flush(struct sna *sna) sna->timer_expire[FLUSH_TIMER] = TIME + interval; return true; } - } else { - if (!start_flush(sna, priv)) { - DBG(("%s -- no pending write to scanout\n", __FUNCTION__)); - if (priv) - kgem_bo_flush(&sna->kgem, priv->gpu_bo); - } else { - sna->timer_active |= 1 << FLUSH_TIMER; - sna->timer_expire[FLUSH_TIMER] = TIME + interval / 2; - DBG(("%s (time=%ld), starting\n", __FUNCTION__, (long)TIME)); - } - } + } else if (!start_flush(sna, priv)) { + DBG(("%s -- no pending write to scanout\n", __FUNCTION__)); + if (priv) + kgem_bo_flush(&sna->kgem, priv->gpu_bo); + } else + timer_enable(sna, FLUSH_TIMER, interval/2); return false; } static bool sna_accel_do_throttle(struct sna *sna) { - if (sna->flags & SNA_NO_THROTTLE) - return false; - if (sna->timer_active & (1<<(THROTTLE_TIMER))) { int32_t delta = sna->timer_expire[THROTTLE_TIMER] - TIME; if (delta <= 3) { @@ -13862,15 +13797,10 @@ static bool sna_accel_do_throttle(struct sna *sna) sna->timer_expire[THROTTLE_TIMER] = TIME + 20; return true; } - } else { - if (!sna->kgem.need_retire) { - DBG(("%s -- no pending activity\n", __FUNCTION__)); - } else { - DBG(("%s (time=%ld), starting\n", __FUNCTION__, (long)TIME)); - sna->timer_active |= 1 << THROTTLE_TIMER; - sna->timer_expire[THROTTLE_TIMER] = TIME + 20; - } - } + } else if (!sna->kgem.need_retire) { + DBG(("%s -- no pending activity\n", __FUNCTION__)); + } else + timer_enable(sna, THROTTLE_TIMER, 20); return false; } @@ -13885,65 +13815,12 @@ static bool sna_accel_do_expire(struct sna *sna) TIME + MAX_INACTIVE_TIME * 1000; return true; } - } else { - if (sna->kgem.need_expire) { - sna->timer_active |= 1 << EXPIRE_TIMER; - sna->timer_expire[EXPIRE_TIMER] = - TIME + MAX_INACTIVE_TIME * 1000; - DBG(("%s (time=%ld), starting\n", __FUNCTION__, (long)TIME)); - } - } + } else if (sna->kgem.need_expire) + timer_enable(sna, EXPIRE_TIMER, MAX_INACTIVE_TIME * 1000); return false; } -static bool sna_accel_do_inactive(struct sna *sna) -{ - if (!USE_INACTIVE) - return false; - - if (sna->timer_active & (1<<(INACTIVE_TIMER))) { - int32_t delta = sna->timer_expire[INACTIVE_TIMER] - TIME; - if (delta <= 3) { - sna->timer_expire[INACTIVE_TIMER] = - TIME + 120 * 1000; - DBG(("%s (time=%ld), triggered\n", __FUNCTION__, (long)TIME)); - return true; - } - } else { - if (!list_is_empty(&sna->active_pixmaps)) { - sna->timer_active |= 1 << INACTIVE_TIMER; - sna->timer_expire[INACTIVE_TIMER] = - TIME + 120 * 1000; - DBG(("%s (time=%ld), starting\n", __FUNCTION__, (long)TIME)); - } - } - - return false; -} - -static int32_t sna_timeout(struct sna *sna) -{ - int32_t now = TIME, next = 0; - int i; - - DBG(("%s: now=%d, active=%08x\n", - __FUNCTION__, (int)now, sna->timer_active)); - for (i = 0; i < NUM_TIMERS; i++) { - if (sna->timer_active & (1 << i)) { - int32_t delta = sna->timer_expire[i] - now; - DBG(("%s: timer[%d] expires in %d [%d]\n", - __FUNCTION__, i, delta, sna->timer_expire[i])); - if (next == 0 || delta < next) - next = delta; - } - } - - DBG(("%s: active=%08x, next=+%d\n", - __FUNCTION__, sna->timer_active, next)); - return next; -} - static void sna_accel_post_damage(struct sna *sna) { #if HAS_PIXMAP_SHARING @@ -13958,7 +13835,7 @@ static void sna_accel_post_damage(struct sna *sna) int n; damage = DamageRegion(dirty->damage); - if (!RegionNotEmpty(damage)) + if (RegionNil(damage)) continue; src = dirty->src; @@ -13979,9 +13856,19 @@ static void sna_accel_post_damage(struct sna *sna) region.extents.x2, region.extents.y2)); RegionIntersect(®ion, ®ion, damage); + if (RegionNil(®ion)) + goto skip; + + RegionTranslate(®ion, -dirty->x, -dirty->y); + DamageRegionAppend(&dirty->slave_dst->drawable, ®ion); + + DBG(("%s: slave: ((%d, %d), (%d, %d))x%d\n", __FUNCTION__, + region.extents.x1, region.extents.y1, + region.extents.x2, region.extents.y2, + RegionNumRects(®ion.extents))); - box = REGION_RECTS(®ion); - n = REGION_NUM_RECTS(®ion); + box = RegionRects(®ion); + n = RegionNumRects(®ion); if (wedged(sna)) { fallback: if (!sna_pixmap_move_to_cpu(src, MOVE_READ)) @@ -13994,53 +13881,52 @@ fallback: do { DBG(("%s: copy box (%d, %d)->(%d, %d)x(%d, %d)\n", __FUNCTION__, + box->x1 + dirty->x, box->y1 + dirty->y, box->x1, box->y1, - box->x1 - dirty->x, box->y1 - dirty->y, box->x2 - box->x1, box->y2 - box->y1)); assert(box->x2 > box->x1); assert(box->y2 > box->y1); + assert(box->x1 + dirty->x >= 0); + assert(box->y1 + dirty->y >= 0); + assert(box->x2 + dirty->x <= src->drawable.width); + assert(box->y2 + dirty->y <= src->drawable.height); + assert(box->x1 >= 0); assert(box->y1 >= 0); assert(box->x2 <= src->drawable.width); assert(box->y2 <= src->drawable.height); - assert(box->x1 - dirty->x >= 0); - assert(box->y1 - dirty->y >= 0); - assert(box->x2 - dirty->x <= src->drawable.width); - assert(box->y2 - dirty->y <= src->drawable.height); - memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr, src->drawable.bitsPerPixel, src->devKind, dst->devKind, - box->x1, box->y1, - box->x1 - dirty->x, - box->y1 - dirty->y, + box->x1 + dirty->x, + box->y1 + dirty->y, + box->x1, + box->y1, box->x2 - box->x1, box->y2 - box->y1); box++; } while (--n); } else { - if (!sna_pixmap_move_to_gpu(src, MOVE_READ | __MOVE_FORCE)) + if (!sna_pixmap_move_to_gpu(src, MOVE_READ | MOVE_ASYNC_HINT | __MOVE_FORCE)) goto fallback; - if (!sna_pixmap_move_to_gpu(dst, MOVE_READ | MOVE_WRITE | __MOVE_FORCE)) + if (!sna_pixmap_move_to_gpu(dst, MOVE_READ | MOVE_WRITE | MOVE_ASYNC_HINT | __MOVE_FORCE)) goto fallback; if (!sna->render.copy_boxes(sna, GXcopy, - src, sna_pixmap_get_bo(src), 0, 0, - dst, sna_pixmap_get_bo(dst), -dirty->x, -dirty->y, + src, sna_pixmap_get_bo(src), dirty->x, dirty->y, + dst, sna_pixmap_get_bo(dst),0, 0, box, n, COPY_LAST)) goto fallback; flush = true; } - RegionTranslate(®ion, -dirty->x, -dirty->y); - DamageRegionAppend(&dirty->slave_dst->drawable, ®ion); - + DamageRegionProcessPending(&dirty->slave_dst->drawable); skip: RegionUninit(®ion); DamageEmpty(dirty->damage); @@ -14099,105 +13985,6 @@ static void sna_accel_expire(struct sna *sna) sna_accel_disarm_timer(sna, EXPIRE_TIMER); } -static void sna_accel_inactive(struct sna *sna) -{ - struct sna_pixmap *priv; - struct list preserve; - - DBG(("%s (time=%ld)\n", __FUNCTION__, (long)TIME)); - -#if HAS_FULL_DEBUG - { - unsigned count, bytes; - - count = bytes = 0; - list_for_each_entry(priv, &sna->inactive_clock[1], inactive) - if (!priv->pinned) - count++, bytes += kgem_bo_size(priv->gpu_bo); - - DBG(("%s: trimming %d inactive GPU buffers, %d bytes\n", - __FUNCTION__, count, bytes)); - - count = bytes = 0; - list_for_each_entry(priv, &sna->active_pixmaps, inactive) { - if (priv->ptr && - sna_damage_is_all(&priv->gpu_damage, - priv->pixmap->drawable.width, - priv->pixmap->drawable.height)) { - count++, bytes += priv->pixmap->devKind * priv->pixmap->drawable.height; - } - } - - DBG(("%s: trimming %d inactive CPU buffers, %d bytes\n", - __FUNCTION__, count, bytes)); - } -#endif - - /* clear out the oldest inactive pixmaps */ - list_init(&preserve); - while (!list_is_empty(&sna->inactive_clock[1])) { - priv = list_first_entry(&sna->inactive_clock[1], - struct sna_pixmap, - inactive); - assert((priv->create & KGEM_CAN_CREATE_LARGE) == 0); - assert(priv->gpu_bo); - assert(!priv->gpu_bo->proxy); - - /* XXX Rather than discarding the GPU buffer here, we - * could mark it purgeable and allow the shrinker to - * reap its storage only under memory pressure. - */ - list_del(&priv->inactive); - if (priv->pinned) - continue; - - if (priv->ptr && - sna_damage_is_all(&priv->gpu_damage, - priv->pixmap->drawable.width, - priv->pixmap->drawable.height)) { - DBG(("%s: discarding inactive CPU shadow\n", - __FUNCTION__)); - sna_damage_destroy(&priv->cpu_damage); - list_del(&priv->list); - - assert(priv->cpu_bo == NULL || !priv->cpu_bo->flush); - assert(!priv->shm); - sna_pixmap_free_cpu(sna, priv); - priv->undamaged = false; - priv->cpu = false; - - list_add(&priv->inactive, &preserve); - } else { - DBG(("%s: discarding inactive GPU bo handle=%d\n", - __FUNCTION__, priv->gpu_bo->handle)); - if (!sna_pixmap_move_to_cpu(priv->pixmap, - MOVE_READ | MOVE_WRITE | MOVE_ASYNC_HINT)) - list_add(&priv->inactive, &preserve); - } - } - - /* Age the current inactive pixmaps */ - sna->inactive_clock[1].next = sna->inactive_clock[0].next; - sna->inactive_clock[0].next->prev = &sna->inactive_clock[1]; - sna->inactive_clock[0].prev->next = &sna->inactive_clock[1]; - sna->inactive_clock[1].prev = sna->inactive_clock[0].prev; - - sna->inactive_clock[0].next = sna->active_pixmaps.next; - sna->active_pixmaps.next->prev = &sna->inactive_clock[0]; - sna->active_pixmaps.prev->next = &sna->inactive_clock[0]; - sna->inactive_clock[0].prev = sna->active_pixmaps.prev; - - sna->active_pixmaps.next = preserve.next; - preserve.next->prev = &sna->active_pixmaps; - preserve.prev->next = &sna->active_pixmaps; - sna->active_pixmaps.prev = preserve.prev; - - if (list_is_empty(&sna->inactive_clock[1]) && - list_is_empty(&sna->inactive_clock[0]) && - list_is_empty(&sna->active_pixmaps)) - sna_accel_disarm_timer(sna, INACTIVE_TIMER); -} - #ifdef DEBUG_MEMORY static bool sna_accel_do_debug_memory(struct sna *sna) { @@ -14236,7 +14023,7 @@ sna_get_window_pixmap(WindowPtr window) static void sna_set_window_pixmap(WindowPtr window, PixmapPtr pixmap) { - *(PixmapPtr *)dixGetPrivateAddr(&window->devPrivates, &sna_window_key) = pixmap; + *(PixmapPtr *)__get_private(window, sna_window_key) = pixmap; } static Bool @@ -14306,11 +14093,15 @@ static bool sna_picture_init(ScreenPtr screen) { PictureScreenPtr ps; + DBG(("%s\n", __FUNCTION__)); + if (!miPictureInit(screen, NULL, 0)) return false; ps = GetPictureScreen(screen); assert(ps != NULL); + assert(ps->CreatePicture != NULL); + assert(ps->DestroyPicture != NULL); ps->Composite = sna_composite; ps->CompositeRects = sna_composite_rectangles; @@ -14320,25 +14111,38 @@ static bool sna_picture_init(ScreenPtr screen) ps->UnrealizeGlyph = sna_glyph_unrealize; ps->AddTraps = sna_add_traps; ps->Trapezoids = sna_composite_trapezoids; +#if HAS_PIXMAN_TRIANGLES ps->Triangles = sna_composite_triangles; #if PICTURE_SCREEN_VERSION >= 2 ps->TriStrip = sna_composite_tristrip; ps->TriFan = sna_composite_trifan; #endif +#endif return true; } +static bool sna_option_accel_blt(struct sna *sna) +{ + const char *s; + + s = xf86GetOptValString(sna->Options, OPTION_ACCEL_METHOD); + if (s == NULL) + return false; + + return strcasecmp(s, "blt") == 0; +} + bool sna_accel_init(ScreenPtr screen, struct sna *sna) { const char *backend; + DBG(("%s\n", __FUNCTION__)); + sna_font_key = AllocateFontPrivateIndex(); list_init(&sna->flush_pixmaps); list_init(&sna->active_pixmaps); - list_init(&sna->inactive_clock[0]); - list_init(&sna->inactive_clock[1]); AddGeneralSocket(sna->kgem.fd); @@ -14404,33 +14208,30 @@ bool sna_accel_init(ScreenPtr screen, struct sna *sna) return false; backend = "no"; - sna->have_render = false; no_render_init(sna); -#if !DEBUG_NO_RENDER - if (sna->info->gen >= 80) { - } else if (sna->info->gen >= 70) { - if ((sna->have_render = gen7_render_init(sna))) + if (sna_option_accel_blt(sna) || sna->info->gen >= 0100) { + } else if (sna->info->gen >= 070) { + if (gen7_render_init(sna)) backend = "IvyBridge"; - } else if (sna->info->gen >= 60) { - if ((sna->have_render = gen6_render_init(sna))) + } else if (sna->info->gen >= 060) { + if (gen6_render_init(sna)) backend = "SandyBridge"; - } else if (sna->info->gen >= 50) { - if ((sna->have_render = gen5_render_init(sna))) + } else if (sna->info->gen >= 050) { + if (gen5_render_init(sna)) backend = "Ironlake"; - } else if (sna->info->gen >= 40) { - if ((sna->have_render = gen4_render_init(sna))) - backend = "Broadwater"; - } else if (sna->info->gen >= 30) { - if ((sna->have_render = gen3_render_init(sna))) + } else if (sna->info->gen >= 040) { + if (gen4_render_init(sna)) + backend = "Broadwater/Crestline"; + } else if (sna->info->gen >= 030) { + if (gen3_render_init(sna)) backend = "gen3"; - } else if (sna->info->gen >= 20) { - if ((sna->have_render = gen2_render_init(sna))) + } else if (sna->info->gen >= 020) { + if (gen2_render_init(sna)) backend = "gen2"; } -#endif - DBG(("%s(backend=%s, have_render=%d)\n", - __FUNCTION__, backend, sna->have_render)); + DBG(("%s(backend=%s, prefer_gpu=%x)\n", + __FUNCTION__, backend, sna->render.prefer_gpu)); kgem_reset(&sna->kgem); @@ -14443,6 +14244,8 @@ bool sna_accel_init(ScreenPtr screen, struct sna *sna) void sna_accel_create(struct sna *sna) { + DBG(("%s\n", __FUNCTION__)); + if (!sna_glyphs_create(sna)) goto fail; @@ -14457,7 +14260,6 @@ void sna_accel_create(struct sna *sna) fail: xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, "Failed to allocate caches, disabling RENDER acceleration\n"); - sna->have_render = false; no_render_init(sna); } @@ -14481,6 +14283,8 @@ void sna_accel_watch_flush(struct sna *sna, int enable) void sna_accel_close(struct sna *sna) { + DBG(("%s\n", __FUNCTION__)); + sna_composite_close(sna); sna_gradients_close(sna); sna_glyphs_close(sna); @@ -14500,24 +14304,25 @@ void sna_accel_close(struct sna *sna) void sna_accel_block_handler(struct sna *sna, struct timeval **tv) { - UpdateCurrentTimeIf(); + if (sna->timer_active) + UpdateCurrentTimeIf(); - if (sna->kgem.nbatch && kgem_is_idle(&sna->kgem)) { + if (sna->kgem.nbatch && + (sna->kgem.scanout_busy || + kgem_ring_is_idle(&sna->kgem, sna->kgem.ring))) { DBG(("%s: GPU idle, flushing\n", __FUNCTION__)); _kgem_submit(&sna->kgem); } if (sna_accel_do_flush(sna)) sna_accel_flush(sna); - assert(sna->flags & SNA_NO_DELAYED_FLUSH || - sna_accel_scanout(sna) == NULL || + assert(sna_accel_scanout(sna) == NULL || sna_accel_scanout(sna)->gpu_bo->exec == NULL || sna->timer_active & (1<<(FLUSH_TIMER))); if (sna_accel_do_throttle(sna)) sna_accel_throttle(sna); - assert(sna->flags & SNA_NO_THROTTLE || - !sna->kgem.need_retire || + assert(!sna->kgem.need_retire || sna->timer_active & (1<<(THROTTLE_TIMER))); if (sna_accel_do_expire(sna)) @@ -14525,9 +14330,6 @@ void sna_accel_block_handler(struct sna *sna, struct timeval **tv) assert(!sna->kgem.need_expire || sna->timer_active & (1<<(EXPIRE_TIMER))); - if (sna_accel_do_inactive(sna)) - sna_accel_inactive(sna); - if (sna_accel_do_debug_memory(sna)) sna_accel_debug_memory(sna); @@ -14537,24 +14339,28 @@ void sna_accel_block_handler(struct sna *sna, struct timeval **tv) sna->watch_flush = 0; } - if (sna->timer_active) { + if (sna->timer_active & 1) { int32_t timeout; DBG(("%s: evaluating timers, active=%x\n", __FUNCTION__, sna->timer_active)); - timeout = sna_timeout(sna); - if (timeout) { - if (*tv == NULL) { - *tv = &sna->timer_tv; - goto set_tv; - } - if ((*tv)->tv_sec * 1000 + (*tv)->tv_usec / 1000 > timeout) { + + timeout = sna->timer_expire[0] - TIME; + DBG(("%s: flush timer expires in %d [%d]\n", + __FUNCTION__, timeout, sna->timer_expire[0])); + + if (*tv == NULL) { + *tv = &sna->timer_tv; + goto set_tv; + } + if ((*tv)->tv_sec * 1000 + (*tv)->tv_usec / 1000 > timeout) { set_tv: - (*tv)->tv_sec = timeout / 1000; - (*tv)->tv_usec = timeout % 1000 * 1000; - } + (*tv)->tv_sec = timeout / 1000; + (*tv)->tv_usec = timeout % 1000 * 1000; } } + + sna->kgem.scanout_busy = false; } void sna_accel_wakeup_handler(struct sna *sna) @@ -14563,14 +14369,22 @@ void sna_accel_wakeup_handler(struct sna *sna) if (sna->kgem.need_retire) kgem_retire(&sna->kgem); - if (!sna->mode.shadow_active && !sna->kgem.need_retire) { + if (sna->kgem.nbatch && !sna->kgem.need_retire) { DBG(("%s: GPU idle, flushing\n", __FUNCTION__)); - kgem_submit(&sna->kgem); + _kgem_submit(&sna->kgem); } if (sna->kgem.need_purge) kgem_purge_cache(&sna->kgem); + + if (FAULT_INJECTION && (rand() % FAULT_INJECTION) == 0) { + ErrorF("%s hardware acceleration\n", + sna->kgem.wedged ? "Re-enabling" : "Disabling"); + kgem_submit(&sna->kgem); + sna->kgem.wedged = !sna->kgem.wedged; + } } void sna_accel_free(struct sna *sna) { + DBG(("%s\n", __FUNCTION__)); } diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c index 7410eb116..edfcb9ea4 100644 --- a/src/sna/sna_blt.c +++ b/src/sna/sna_blt.c @@ -119,7 +119,7 @@ static bool sna_blt_fill_init(struct sna *sna, blt->br13 = bo->pitch; blt->cmd = XY_SCANLINE_BLT; - if (kgem->gen >= 40 && bo->tiling) { + if (kgem->gen >= 040 && bo->tiling) { blt->cmd |= BLT_DST_TILED; blt->br13 >>= 2; } @@ -145,11 +145,12 @@ static bool sna_blt_fill_init(struct sna *sna, blt->pixel = pixel; blt->bpp = bpp; - kgem_set_mode(kgem, KGEM_BLT); + kgem_set_mode(kgem, KGEM_BLT, bo); if (!kgem_check_batch(kgem, 12) || !kgem_check_bo_fenced(kgem, bo)) { - _kgem_submit(kgem); - assert(kgem_check_bo_fenced(kgem, bo)); + kgem_submit(kgem); + if (!kgem_check_bo_fenced(kgem, bo)) + return false; _kgem_set_mode(kgem, KGEM_BLT); } @@ -267,14 +268,14 @@ static bool sna_blt_copy_init(struct sna *sna, blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; blt->pitch[0] = src->pitch; - if (kgem->gen >= 40 && src->tiling) { + if (kgem->gen >= 040 && src->tiling) { blt->cmd |= BLT_SRC_TILED; blt->pitch[0] >>= 2; } assert(blt->pitch[0] <= MAXSHORT); blt->pitch[1] = dst->pitch; - if (kgem->gen >= 40 && dst->tiling) { + if (kgem->gen >= 040 && dst->tiling) { blt->cmd |= BLT_DST_TILED; blt->pitch[1] >>= 2; } @@ -289,9 +290,9 @@ static bool sna_blt_copy_init(struct sna *sna, case 8: break; } - kgem_set_mode(kgem, KGEM_BLT); + kgem_set_mode(kgem, KGEM_BLT, dst); if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) { - _kgem_submit(kgem); + kgem_submit(kgem); if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) return false; _kgem_set_mode(kgem, KGEM_BLT); @@ -317,14 +318,14 @@ static bool sna_blt_alpha_fixup_init(struct sna *sna, blt->cmd = XY_FULL_MONO_PATTERN_BLT; blt->pitch[0] = src->pitch; - if (kgem->gen >= 40 && src->tiling) { + if (kgem->gen >= 040 && src->tiling) { blt->cmd |= BLT_SRC_TILED; blt->pitch[0] >>= 2; } assert(blt->pitch[0] <= MAXSHORT); blt->pitch[1] = dst->pitch; - if (kgem->gen >= 40 && dst->tiling) { + if (kgem->gen >= 040 && dst->tiling) { blt->cmd |= BLT_DST_TILED; blt->pitch[1] >>= 2; } @@ -341,9 +342,9 @@ static bool sna_blt_alpha_fixup_init(struct sna *sna, } blt->pixel = alpha; - kgem_set_mode(kgem, KGEM_BLT); + kgem_set_mode(kgem, KGEM_BLT, dst); if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) { - _kgem_submit(kgem); + kgem_submit(kgem); if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) return false; _kgem_set_mode(kgem, KGEM_BLT); @@ -433,7 +434,7 @@ static void sna_blt_copy_one(struct sna *sna, kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (blt->cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB))) && kgem->batch[kgem->nbatch-4] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) && kgem->batch[kgem->nbatch-3] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width)) && - kgem->reloc[kgem->nreloc-1].target_handle == blt->bo[1]->handle) { + kgem->reloc[kgem->nreloc-1].target_handle == blt->bo[1]->target_handle) { DBG(("%s: replacing last fill\n", __FUNCTION__)); if (kgem_check_batch(kgem, 8-6)) { b = kgem->batch + kgem->nbatch - 6; @@ -933,6 +934,76 @@ static void blt_composite_fill_boxes_no_offset(struct sna *sna, _sna_blt_fill_boxes(sna, &op->u.blt, box, n); } +static void blt_composite_fill_boxes_no_offset__thread(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + struct kgem *kgem = &sna->kgem; + const struct sna_blt_state *blt = &op->u.blt; + uint32_t cmd = blt->cmd; + + DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox)); + + sna_vertex_lock(&sna->render); + if (!kgem_check_batch(kgem, 3)) { + sna_vertex_wait__locked(&sna->render); + sna_blt_fill_begin(sna, blt); + } + + do { + uint32_t *b = kgem->batch + kgem->nbatch; + int nbox_this_time; + + nbox_this_time = nbox; + if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3; + assert(nbox_this_time); + nbox -= nbox_this_time; + + kgem->nbatch += 3 * nbox_this_time; + assert(kgem->nbatch < kgem->surface); + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + while (nbox_this_time >= 8) { + b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; + b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++; + b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++; + b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++; + b[12] = cmd; *(uint64_t *)(b+13) = *(const uint64_t *)box++; + b[15] = cmd; *(uint64_t *)(b+16) = *(const uint64_t *)box++; + b[18] = cmd; *(uint64_t *)(b+19) = *(const uint64_t *)box++; + b[21] = cmd; *(uint64_t *)(b+22) = *(const uint64_t *)box++; + b += 24; + nbox_this_time -= 8; + } + if (nbox_this_time & 4) { + b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; + b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++; + b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++; + b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++; + b += 12; + } + if (nbox_this_time & 2) { + b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; + b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++; + b += 6; + } + if (nbox_this_time & 1) { + b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; + } + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + if (!nbox) + break; + + sna_vertex_wait__locked(&sna->render); + sna_blt_fill_begin(sna, blt); + } while (1); + sna_vertex_unlock(&sna->render); +} + fastcall static void blt_composite_fill_box(struct sna *sna, const struct sna_composite_op *op, const BoxRec *box) @@ -956,6 +1027,92 @@ static void blt_composite_fill_boxes(struct sna *sna, } while (--n); } +static inline uint64_t add4(const BoxRec *b, int16_t x, int16_t y) +{ + union { + uint64_t v; + int16_t i[4]; + } vi; + vi.v = *(uint64_t *)b; + vi.i[0] += x; + vi.i[1] += y; + vi.i[2] += x; + vi.i[3] += y; + return vi.v; +} + +static void blt_composite_fill_boxes__thread(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + struct kgem *kgem = &sna->kgem; + const struct sna_blt_state *blt = &op->u.blt; + uint32_t cmd = blt->cmd; + int16_t dx = op->dst.x; + int16_t dy = op->dst.y; + + DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox)); + + sna_vertex_lock(&sna->render); + if (!kgem_check_batch(kgem, 3)) { + sna_vertex_wait__locked(&sna->render); + sna_blt_fill_begin(sna, blt); + } + + do { + uint32_t *b = kgem->batch + kgem->nbatch; + int nbox_this_time; + + nbox_this_time = nbox; + if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3; + assert(nbox_this_time); + nbox -= nbox_this_time; + + kgem->nbatch += 3 * nbox_this_time; + assert(kgem->nbatch < kgem->surface); + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + while (nbox_this_time >= 8) { + b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy); + b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy); + b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy); + b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy); + b[12] = cmd; *(uint64_t *)(b+13) = add4(box++, dx, dy); + b[15] = cmd; *(uint64_t *)(b+16) = add4(box++, dx, dy); + b[18] = cmd; *(uint64_t *)(b+19) = add4(box++, dx, dy); + b[21] = cmd; *(uint64_t *)(b+22) = add4(box++, dx, dy); + b += 24; + nbox_this_time -= 8; + } + if (nbox_this_time & 4) { + b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy); + b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy); + b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy); + b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy); + b += 12; + } + if (nbox_this_time & 2) { + b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy); + b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy); + b += 6; + } + if (nbox_this_time & 1) { + b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy); + } + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + if (!nbox) + break; + + sna_vertex_wait__locked(&sna->render); + sna_blt_fill_begin(sna, blt); + } while (1); + sna_vertex_unlock(&sna->render); +} + fastcall static void blt_composite_nop(struct sna *sna, const struct sna_composite_op *op, @@ -980,8 +1137,10 @@ begin_blt(struct sna *sna, struct sna_composite_op *op) { if (!kgem_check_bo_fenced(&sna->kgem, op->dst.bo)) { - _kgem_submit(&sna->kgem); - assert(kgem_check_bo_fenced(&sna->kgem, op->dst.bo)); + kgem_submit(&sna->kgem); + if (!kgem_check_bo_fenced(&sna->kgem, op->dst.bo)) + return false; + _kgem_set_mode(&sna->kgem, KGEM_BLT); } @@ -1011,6 +1170,7 @@ prepare_blt_clear(struct sna *sna, op->blt = blt_composite_fill__cpu; op->box = blt_composite_fill_box__cpu; op->boxes = blt_composite_fill_boxes__cpu; + op->thread_boxes = blt_composite_fill_boxes__cpu; op->done = nop_done; op->u.blt.pixel = 0; return true; @@ -1020,9 +1180,11 @@ prepare_blt_clear(struct sna *sna, if (op->dst.x|op->dst.y) { op->box = blt_composite_fill_box; op->boxes = blt_composite_fill_boxes; + op->thread_boxes = blt_composite_fill_boxes__thread; } else { op->box = blt_composite_fill_box_no_offset; op->boxes = blt_composite_fill_boxes_no_offset; + op->thread_boxes = blt_composite_fill_boxes_no_offset__thread; } op->done = nop_done; @@ -1047,6 +1209,7 @@ prepare_blt_fill(struct sna *sna, op->blt = blt_composite_fill__cpu; op->box = blt_composite_fill_box__cpu; op->boxes = blt_composite_fill_boxes__cpu; + op->thread_boxes = blt_composite_fill_boxes__cpu; op->done = nop_done; return true; } @@ -1055,9 +1218,11 @@ prepare_blt_fill(struct sna *sna, if (op->dst.x|op->dst.y) { op->box = blt_composite_fill_box; op->boxes = blt_composite_fill_boxes; + op->thread_boxes = blt_composite_fill_boxes__thread; } else { op->box = blt_composite_fill_box_no_offset; op->boxes = blt_composite_fill_boxes_no_offset; + op->thread_boxes = blt_composite_fill_boxes_no_offset__thread; } op->done = nop_done; @@ -1148,6 +1313,141 @@ static void blt_composite_copy_boxes(struct sna *sna, } while(--nbox); } +static inline uint32_t add2(uint32_t v, int16_t x, int16_t y) +{ + x += v & 0xffff; + y += v >> 16; + return (uint16_t)y << 16 | x; +} + +static void blt_composite_copy_boxes__thread(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + struct kgem *kgem = &sna->kgem; + int dst_dx = op->dst.x; + int dst_dy = op->dst.y; + int src_dx = op->src.offset[0]; + int src_dy = op->src.offset[1]; + uint32_t cmd = op->u.blt.cmd; + uint32_t br13 = op->u.blt.br13; + struct kgem_bo *src_bo = op->u.blt.bo[0]; + struct kgem_bo *dst_bo = op->u.blt.bo[1]; + int src_pitch = op->u.blt.pitch[0]; + + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + sna_vertex_lock(&sna->render); + + if ((dst_dx | dst_dy) == 0) { + uint64_t hdr = (uint64_t)br13 << 32 | cmd; + do { + int nbox_this_time; + + nbox_this_time = nbox; + if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; + if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) + nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; + assert(nbox_this_time); + nbox -= nbox_this_time; + + do { + uint32_t *b = kgem->batch + kgem->nbatch; + + DBG((" %s: box=(%d, %d)x(%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, + box->x2 - box->x1, box->y2 - box->y1)); + + assert(box->x1 + src_dx >= 0); + assert(box->y1 + src_dy >= 0); + assert(box->x1 + src_dx <= INT16_MAX); + assert(box->y1 + src_dy <= INT16_MAX); + + assert(box->x1 >= 0); + assert(box->y1 >= 0); + + *(uint64_t *)&b[0] = hdr; + *(uint64_t *)&b[2] = *(const uint64_t *)box; + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = add2(b[2], src_dx, src_dy); + b[6] = src_pitch; + b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + kgem->nbatch += 8; + assert(kgem->nbatch < kgem->surface); + box++; + } while (--nbox_this_time); + + if (!nbox) + break; + + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); + } while (1); + } else { + do { + int nbox_this_time; + + nbox_this_time = nbox; + if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; + if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) + nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; + assert(nbox_this_time); + nbox -= nbox_this_time; + + do { + uint32_t *b = kgem->batch + kgem->nbatch; + + DBG((" %s: box=(%d, %d)x(%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, + box->x2 - box->x1, box->y2 - box->y1)); + + assert(box->x1 + src_dx >= 0); + assert(box->y1 + src_dy >= 0); + + assert(box->x1 + dst_dx >= 0); + assert(box->y1 + dst_dy >= 0); + + b[0] = cmd; + b[1] = br13; + b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx); + b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx); + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx); + b[6] = src_pitch; + b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + kgem->nbatch += 8; + assert(kgem->nbatch < kgem->surface); + box++; + } while (--nbox_this_time); + + if (!nbox) + break; + + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); + } while (1); + } + sna_vertex_unlock(&sna->render); +} + fastcall static void blt_composite_copy_with_alpha(struct sna *sna, const struct sna_composite_op *op, @@ -1245,7 +1545,7 @@ prepare_blt_copy(struct sna *sna, } if (!kgem_check_many_bo_fenced(&sna->kgem, op->dst.bo, bo, NULL)) { - _kgem_submit(&sna->kgem); + kgem_submit(&sna->kgem); if (!kgem_check_many_bo_fenced(&sna->kgem, op->dst.bo, bo, NULL)) { DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__)); @@ -1256,7 +1556,7 @@ prepare_blt_copy(struct sna *sna, DBG(("%s\n", __FUNCTION__)); - if (sna->kgem.gen >= 60) + if (sna->kgem.gen >= 060 && op->dst.bo == bo) op->done = gen6_blt_copy_done; else op->done = nop_done; @@ -1274,6 +1574,7 @@ prepare_blt_copy(struct sna *sna, op->blt = blt_composite_copy; op->box = blt_composite_copy_box; op->boxes = blt_composite_copy_boxes; + op->thread_boxes = blt_composite_copy_boxes__thread; if (!sna_blt_copy_init(sna, &op->u.blt, bo, op->dst.bo, src->drawable.bitsPerPixel, @@ -1715,8 +2016,14 @@ sna_blt_composite(struct sna *sna, was_clear = sna_drawable_is_clear(dst->pDrawable); tmp->dst.pixmap = get_drawable_pixmap(dst->pDrawable); - dst_box.x1 = dst_x; dst_box.x2 = dst_x + width; - dst_box.y1 = dst_y; dst_box.y2 = dst_y + height; + if (width | height) { + dst_box.x1 = dst_x; + dst_box.x2 = bound(dst_x, width); + dst_box.y1 = dst_y; + dst_box.y2 = bound(dst_y, height); + } else + sna_render_picture_extents(dst, &dst_box); + bo = sna_drawable_use_bo(dst->pDrawable, PREFER_GPU, &dst_box, &tmp->damage); if (bo && !kgem_bo_can_blt(&sna->kgem, bo)) { DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n", @@ -1834,10 +2141,21 @@ clear: if (x < 0 || y < 0 || x + width > src->pDrawable->width || y + height > src->pDrawable->height) { - DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d\n", + DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d, repeat=%d\n", __FUNCTION__, - x, y, x+width, y+width, src->pDrawable->width, src->pDrawable->height)); - return false; + x, y, x+width, y+width, src->pDrawable->width, src->pDrawable->height, src->repeatType)); + if (src->repeat && src->repeatType == RepeatNormal) { + x = x % src->pDrawable->width; + y = y % src->pDrawable->height; + if (x < 0) + x += src->pDrawable->width; + if (y < 0) + y += src->pDrawable->height; + if (x + width > src->pDrawable->width || + y + height > src->pDrawable->height) + return false; + } else + return false; } src_pixmap = get_drawable_pixmap(src->pDrawable); @@ -1899,6 +2217,7 @@ put: if (tmp->dst.bo == priv->cpu_bo) { DBG(("%s: forcing the stall to overwrite a busy CPU bo\n", __FUNCTION__)); tmp->dst.bo = NULL; + tmp->damage = NULL; } } } @@ -1924,7 +2243,7 @@ static void convert_done(struct sna *sna, const struct sna_composite_op *op) { struct kgem *kgem = &sna->kgem; - if (kgem->gen >= 60 && kgem_check_batch(kgem, 3)) { + if (kgem->gen >= 060 && op->src.bo == op->dst.bo && kgem_check_batch(kgem, 3)) { uint32_t *b = kgem->batch + kgem->nbatch; b[0] = XY_SETUP_CLIP; b[1] = b[2] = 0; @@ -1943,6 +2262,7 @@ sna_blt_composite__convert(struct sna *sna, struct sna_composite_op *tmp) { uint32_t alpha_fixup; + int sx, sy; uint8_t op; #if DEBUG_NO_BLT || NO_BLT_COMPOSITE @@ -1993,19 +2313,36 @@ sna_blt_composite__convert(struct sna *sna, return false; } - x += tmp->src.offset[0]; - y += tmp->src.offset[1]; + sx = tmp->src.offset[0]; + sy = tmp->src.offset[1]; + + x += sx; + y += sy; if (x < 0 || y < 0 || x + width > tmp->src.width || y + height > tmp->src.height) { DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d\n", __FUNCTION__, x, y, x+width, y+width, tmp->src.width, tmp->src.height)); - return false; + if (tmp->src.repeat == RepeatNormal) { + int xx = x % tmp->src.width; + int yy = y % tmp->src.height; + if (xx < 0) + xx += tmp->src.width; + if (yy < 0) + yy += tmp->src.height; + if (xx + width > tmp->src.width || + yy + height > tmp->src.height) + return false; + + sx += xx - x; + sy += yy - y; + } else + return false; } if (!kgem_check_many_bo_fenced(&sna->kgem, tmp->dst.bo, tmp->src.bo, NULL)) { - _kgem_submit(&sna->kgem); + kgem_submit(&sna->kgem); if (!kgem_check_many_bo_fenced(&sna->kgem, tmp->dst.bo, tmp->src.bo, NULL)) { DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__)); @@ -2014,13 +2351,14 @@ sna_blt_composite__convert(struct sna *sna, _kgem_set_mode(&sna->kgem, KGEM_BLT); } - tmp->u.blt.src_pixmap = NULL; - tmp->u.blt.sx = tmp->src.offset[0]; - tmp->u.blt.sy = tmp->src.offset[1]; DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n", __FUNCTION__, tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy, alpha_fixup)); + tmp->u.blt.src_pixmap = NULL; + tmp->u.blt.sx = sx; + tmp->u.blt.sy = sy; + if (alpha_fixup) { tmp->blt = blt_composite_copy_with_alpha; tmp->box = blt_composite_copy_box_with_alpha; @@ -2035,6 +2373,7 @@ sna_blt_composite__convert(struct sna *sna, tmp->blt = blt_composite_copy; tmp->box = blt_composite_copy_box; tmp->boxes = blt_composite_copy_boxes; + tmp->thread_boxes = blt_composite_copy_boxes__thread; if (!sna_blt_copy_init(sna, &tmp->u.blt, tmp->src.bo, tmp->dst.bo, @@ -2148,7 +2487,7 @@ bool sna_blt_copy(struct sna *sna, uint8_t alu, return false; op->blt = sna_blt_copy_op_blt; - if (sna->kgem.gen >= 60) + if (sna->kgem.gen >= 060 && src == dst) op->done = gen6_blt_copy_op_done; else op->done = sna_blt_copy_op_done; @@ -2174,7 +2513,7 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu, cmd = XY_COLOR_BLT; br13 = bo->pitch; - if (kgem->gen >= 40 && bo->tiling) { + if (kgem->gen >= 040 && bo->tiling) { cmd |= BLT_DST_TILED; br13 >>= 2; } @@ -2194,7 +2533,7 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu, if (overwrites && kgem->nbatch >= 6 && kgem->batch[kgem->nbatch-6] == cmd && *(uint64_t *)&kgem->batch[kgem->nbatch-4] == *(const uint64_t *)box && - kgem->reloc[kgem->nreloc-1].target_handle == bo->handle) { + kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) { DBG(("%s: replacing last fill\n", __FUNCTION__)); kgem->batch[kgem->nbatch-5] = br13; kgem->batch[kgem->nbatch-1] = color; @@ -2203,7 +2542,7 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu, if (overwrites && kgem->nbatch >= 8 && (kgem->batch[kgem->nbatch-8] & 0xffc0000f) == XY_SRC_COPY_BLT_CMD && *(uint64_t *)&kgem->batch[kgem->nbatch-6] == *(const uint64_t *)box && - kgem->reloc[kgem->nreloc-2].target_handle == bo->handle) { + kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) { DBG(("%s: replacing last copy\n", __FUNCTION__)); kgem->batch[kgem->nbatch-8] = cmd; kgem->batch[kgem->nbatch-7] = br13; @@ -2226,11 +2565,11 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu, return false; } - kgem_set_mode(kgem, KGEM_BLT); + kgem_set_mode(kgem, KGEM_BLT, bo); if (!kgem_check_batch(kgem, 6) || !kgem_check_reloc(kgem, 1) || !kgem_check_bo_fenced(kgem, bo)) { - _kgem_submit(kgem); + kgem_submit(kgem); assert(kgem_check_bo_fenced(&sna->kgem, bo)); _kgem_set_mode(kgem, KGEM_BLT); } @@ -2288,7 +2627,7 @@ bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, br13 = bo->pitch; cmd = XY_SCANLINE_BLT; - if (kgem->gen >= 40 && bo->tiling) { + if (kgem->gen >= 040 && bo->tiling) { cmd |= 1 << 11; br13 >>= 2; } @@ -2302,11 +2641,12 @@ bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, case 8: break; } - kgem_set_mode(kgem, KGEM_BLT); + kgem_set_mode(kgem, KGEM_BLT, bo); if (!kgem_check_batch(kgem, 12) || !kgem_check_bo_fenced(kgem, bo)) { - _kgem_submit(kgem); - assert(kgem_check_bo_fenced(&sna->kgem, bo)); + kgem_submit(kgem); + if (!kgem_check_bo_fenced(&sna->kgem, bo)) + return false; _kgem_set_mode(kgem, KGEM_BLT); } @@ -2405,13 +2745,6 @@ bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, return true; } -static inline uint32_t add2(uint32_t v, int16_t x, int16_t y) -{ - x += v & 0xffff; - y += v >> 16; - return (uint16_t)y << 16 | x; -} - bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, @@ -2442,14 +2775,14 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; src_pitch = src_bo->pitch; - if (kgem->gen >= 40 && src_bo->tiling) { + if (kgem->gen >= 040 && src_bo->tiling) { cmd |= BLT_SRC_TILED; src_pitch >>= 2; } assert(src_pitch <= MAXSHORT); br13 = dst_bo->pitch; - if (kgem->gen >= 40 && dst_bo->tiling) { + if (kgem->gen >= 040 && dst_bo->tiling) { cmd |= BLT_DST_TILED; br13 >>= 2; } @@ -2466,7 +2799,7 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, /* Compare first box against a previous fill */ if (kgem->nbatch >= 6 && (alu == GXcopy || alu == GXclear || alu == GXset) && - kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->handle && + kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle && kgem->batch[kgem->nbatch-6] == ((cmd & ~XY_SRC_COPY_BLT_CMD) | XY_COLOR_BLT) && kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) && kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) { @@ -2475,11 +2808,11 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, kgem->nreloc--; } - kgem_set_mode(kgem, KGEM_BLT); + kgem_set_mode(kgem, KGEM_BLT, dst_bo); if (!kgem_check_batch(kgem, 8) || !kgem_check_reloc(kgem, 2) || !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) { - _kgem_submit(kgem); + kgem_submit(kgem); if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) return sna_tiling_blt_copy_boxes(sna, alu, src_bo, src_dx, src_dy, @@ -2595,7 +2928,7 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, } while (1); } - if (kgem->gen >= 60 && kgem_check_batch(kgem, 3)) { + if (kgem->gen >= 060 && kgem_check_batch(kgem, 3)) { uint32_t *b = kgem->batch + kgem->nbatch; b[0] = XY_SETUP_CLIP; b[1] = b[2] = 0; diff --git a/src/sna/sna_composite.c b/src/sna/sna_composite.c index 60d39cdea..a4b85fe58 100644 --- a/src/sna/sna_composite.c +++ b/src/sna/sna_composite.c @@ -399,8 +399,8 @@ static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char static void apply_damage(struct sna_composite_op *op, RegionPtr region) { - DBG(("%s: damage=%p, region=%d [(%d, %d), (%d, %d) + (%d, %d)]\n", - __FUNCTION__, op->damage, REGION_NUM_RECTS(region), + DBG(("%s: damage=%p, region=%ld [(%d, %d), (%d, %d) + (%d, %d)]\n", + __FUNCTION__, op->damage, RegionNumRects(region), region->extents.x1, region->extents.y1, region->extents.x2, region->extents.y2, op->dst.x, op->dst.y)); @@ -438,6 +438,92 @@ static inline bool use_cpu(PixmapPtr pixmap, struct sna_pixmap *priv, return (priv->create & KGEM_CAN_CREATE_GPU) == 0; } +static void validate_source(PicturePtr picture) +{ +#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,10,99,901,0) + miCompositeSourceValidate(picture); +#else + miCompositeSourceValidate(picture, + 0, 0, + picture->pDrawable ? picture->pDrawable->width : 0, + picture->pDrawable ? picture->pDrawable->height : 0); +#endif +} + +void +sna_composite_fb(CARD8 op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + RegionPtr region, + INT16 src_x, INT16 src_y, + INT16 mask_x, INT16 mask_y, + INT16 dst_x, INT16 dst_y, + CARD16 width, CARD16 height) +{ + pixman_image_t *src_image, *mask_image, *dest_image; + int src_xoff, src_yoff; + int msk_xoff, msk_yoff; + int dst_xoff, dst_yoff; + unsigned flags; + + DBG(("%s: fallback -- move dst to cpu\n", __FUNCTION__)); + if (op <= PictOpSrc && !dst->alphaMap) + flags = MOVE_WRITE | MOVE_INPLACE_HINT; + else + flags = MOVE_WRITE | MOVE_READ; + if (!sna_drawable_move_region_to_cpu(dst->pDrawable, region, flags)) + return; + if (dst->alphaMap && + !sna_drawable_move_to_cpu(dst->alphaMap->pDrawable, flags)) + return; + + if (src->pDrawable) { + DBG(("%s: fallback -- move src to cpu\n", __FUNCTION__)); + if (!sna_drawable_move_to_cpu(src->pDrawable, + MOVE_READ)) + return; + + if (src->alphaMap && + !sna_drawable_move_to_cpu(src->alphaMap->pDrawable, + MOVE_READ)) + return; + } + + if (mask && mask->pDrawable) { + DBG(("%s: fallback -- move mask to cpu\n", __FUNCTION__)); + if (!sna_drawable_move_to_cpu(mask->pDrawable, + MOVE_READ)) + return; + + if (mask->alphaMap && + !sna_drawable_move_to_cpu(mask->alphaMap->pDrawable, + MOVE_READ)) + return; + } + + DBG(("%s: fallback -- fbComposite\n", __FUNCTION__)); + + validate_source(src); + if (mask) + validate_source(mask); + + src_image = image_from_pict(src, FALSE, &src_xoff, &src_yoff); + mask_image = image_from_pict(mask, FALSE, &msk_xoff, &msk_yoff); + dest_image = image_from_pict(dst, TRUE, &dst_xoff, &dst_yoff); + + if (src_image && dest_image && !(mask && !mask_image)) + sna_image_composite(op, src_image, mask_image, dest_image, + src_x + src_xoff, src_y + src_yoff, + mask_x + msk_xoff, mask_y + msk_yoff, + dst_x + dst_xoff, dst_y + dst_yoff, + width, height); + + free_pixman_pict(src, src_image); + free_pixman_pict(mask, mask_image); + free_pixman_pict(dst, dest_image); +} + void sna_composite(CARD8 op, PicturePtr src, @@ -510,7 +596,7 @@ sna_composite(CARD8 op, } if (use_cpu(pixmap, priv, op, width, height) && - !picture_is_gpu(src) && !picture_is_gpu(mask)) { + !picture_is_gpu(sna, src) && !picture_is_gpu(sna, mask)) { DBG(("%s: fallback, dst pixmap=%ld is too small (or completely damaged)\n", __FUNCTION__, pixmap->drawable.serialNumber)); goto fallback; @@ -561,8 +647,8 @@ sna_composite(CARD8 op, tmp.box(sna, &tmp, ®ion.extents); else tmp.boxes(sna, &tmp, - REGION_BOXPTR(®ion), - REGION_NUM_RECTS(®ion)); + RegionBoxptr(®ion), + RegionNumRects(®ion)); apply_damage(&tmp, ®ion); tmp.done(sna, &tmp); @@ -610,23 +696,15 @@ fallback: } DBG(("%s: fallback -- fbComposite\n", __FUNCTION__)); - fbComposite(op, src, mask, dst, - src_x, src_y, - mask_x, mask_y, - dst_x, dst_y, - width, height); + sna_composite_fb(op, src, mask, dst, ®ion, + src_x, src_y, + mask_x, mask_y, + dst_x, dst_y, + width, height); out: REGION_UNINIT(NULL, ®ion); } -static int16_t bound(int16_t a, uint16_t b) -{ - int v = (int)a + (int)b; - if (v > MAXSHORT) - return MAXSHORT; - return v; -} - static bool _pixman_region_init_clipped_rectangles(pixman_region16_t *region, unsigned int num_rects, @@ -778,11 +856,11 @@ sna_composite_rectangles(CARD8 op, return; } - DBG(("%s: drawable extents (%d, %d),(%d, %d) x %d\n", + DBG(("%s: drawable extents (%d, %d),(%d, %d) x %ld\n", __FUNCTION__, RegionExtents(®ion)->x1, RegionExtents(®ion)->y1, RegionExtents(®ion)->x2, RegionExtents(®ion)->y2, - RegionNumRects(®ion))); + (long)RegionNumRects(®ion))); if (dst->pCompositeClip->data && (!pixman_region_intersect(®ion, ®ion, dst->pCompositeClip) || @@ -793,11 +871,11 @@ sna_composite_rectangles(CARD8 op, return; } - DBG(("%s: clipped extents (%d, %d),(%d, %d) x %d\n", + DBG(("%s: clipped extents (%d, %d),(%d, %d) x %ld\n", __FUNCTION__, RegionExtents(®ion)->x1, RegionExtents(®ion)->y1, RegionExtents(®ion)->x2, RegionExtents(®ion)->y2, - RegionNumRects(®ion))); + (long)RegionNumRects(®ion))); pixmap = get_drawable_pixmap(dst->pDrawable); get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y); @@ -827,6 +905,10 @@ sna_composite_rectangles(CARD8 op, goto fallback; } + /* XXX xserver-1.8: CompositeRects is not tracked by Damage, so we must + * manually append the damaged regions ourselves. + */ + DamageRegionAppend(&pixmap->drawable, ®ion); boxes = pixman_region_rectangles(®ion, &num_boxes); /* If we going to be overwriting any CPU damage with a subsequent @@ -849,10 +931,10 @@ sna_composite_rectangles(CARD8 op, box_inplace(pixmap, ®ion.extents)) { DBG(("%s: promoting to full GPU\n", __FUNCTION__)); if (priv->gpu_bo && priv->cpu_damage == NULL) { + assert(priv->gpu_bo->proxy == NULL); sna_damage_all(&priv->gpu_damage, pixmap->drawable.width, pixmap->drawable.height); - priv->undamaged = false; } } if (priv->cpu_damage == NULL) { @@ -890,7 +972,6 @@ sna_composite_rectangles(CARD8 op, pixmap->drawable.height); sna_damage_destroy(damage == &priv->gpu_damage ? &priv->cpu_damage : &priv->gpu_damage); - priv->undamaged = false; } if (op <= PictOpSrc && bo == priv->gpu_bo) { @@ -927,9 +1008,11 @@ fallback: !sna_drawable_move_to_cpu(dst->alphaMap->pDrawable, error)) goto done; + assert(pixmap->devPrivate.ptr); + if (op <= PictOpSrc) { - int nbox = REGION_NUM_RECTS(®ion); - BoxPtr box = REGION_RECTS(®ion); + int nbox = RegionNumRects(®ion); + BoxPtr box = RegionRects(®ion); uint32_t pixel; if (op == PictOpClear) @@ -984,10 +1067,6 @@ fallback_composite: } done: - /* XXX xserver-1.8: CompositeRects is not tracked by Damage, so we must - * manually append the damaged regions ourselves. - */ - DamageRegionAppend(&pixmap->drawable, ®ion); DamageRegionProcessPending(&pixmap->drawable); pixman_region_fini(®ion); diff --git a/src/sna/sna_damage.c b/src/sna/sna_damage.c index a870cbf5c..ab693af3a 100644 --- a/src/sna/sna_damage.c +++ b/src/sna/sna_damage.c @@ -507,6 +507,7 @@ static void __sna_damage_reduce(struct sna_damage *damage) } else { pixman_region16_t tmp; + assert(n == nboxes); pixman_region_init_rects(&tmp, boxes, nboxes); pixman_region_subtract(region, region, &tmp); pixman_region_fini(&tmp); @@ -1033,7 +1034,7 @@ static struct sna_damage *__sna_damage_subtract(struct sna_damage *damage, if (damage == NULL) return NULL; - if (!RegionNotEmpty(&damage->region)) { + if (RegionNil(&damage->region)) { no_damage: __sna_damage_destroy(damage); return NULL; @@ -1126,7 +1127,7 @@ inline static struct sna_damage *__sna_damage_subtract_box(struct sna_damage *da if (damage == NULL) return NULL; - if (!RegionNotEmpty(&damage->region)) { + if (RegionNil(&damage->region)) { __sna_damage_destroy(damage); return NULL; } @@ -1198,7 +1199,7 @@ static struct sna_damage *__sna_damage_subtract_boxes(struct sna_damage *damage, if (damage == NULL) return NULL; - if (!RegionNotEmpty(&damage->region)) { + if (RegionNil(&damage->region)) { __sna_damage_destroy(damage); return NULL; } diff --git a/src/sna/sna_damage.h b/src/sna/sna_damage.h index 5e800b7dc..03a54a3d0 100644 --- a/src/sna/sna_damage.h +++ b/src/sna/sna_damage.h @@ -2,7 +2,6 @@ #define SNA_DAMAGE_H #include <regionstr.h> -#include <list.h> #include "compiler.h" @@ -259,20 +258,22 @@ static inline void sna_damage_reduce_all(struct sna_damage **_damage, if (damage == NULL || DAMAGE_IS_ALL(damage)) return; - if (damage->mode == DAMAGE_ADD && - damage->extents.x1 <= 0 && - damage->extents.y1 <= 0 && - damage->extents.x2 >= width && - damage->extents.y2 >= height) { - if (damage->dirty) { - damage = *_damage = _sna_damage_reduce(damage); - if (damage == NULL) - return; + if (damage->mode == DAMAGE_ADD) { + if (damage->extents.x1 <= 0 && + damage->extents.y1 <= 0 && + damage->extents.x2 >= width && + damage->extents.y2 >= height) { + if (damage->dirty) { + damage = *_damage = _sna_damage_reduce(damage); + if (damage == NULL) + return; + } + + if (damage->region.data == NULL) + *_damage = _sna_damage_all(damage, width, height); } - - if (damage->region.data == NULL) - *_damage = _sna_damage_all(damage, width, height); - } + } else + *_damage = _sna_damage_reduce(damage); } void __sna_damage_destroy(struct sna_damage *damage); diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c index ed3237558..558d70626 100644 --- a/src/sna/sna_display.c +++ b/src/sna/sna_display.c @@ -113,7 +113,7 @@ static inline uint32_t fb_id(struct kgem_bo *bo) return bo->delta; } -int sna_crtc_id(xf86CrtcPtr crtc) +uint32_t sna_crtc_id(xf86CrtcPtr crtc) { return to_sna_crtc(crtc)->id; } @@ -123,7 +123,7 @@ int sna_crtc_to_pipe(xf86CrtcPtr crtc) return to_sna_crtc(crtc)->pipe; } -int sna_crtc_to_plane(xf86CrtcPtr crtc) +uint32_t sna_crtc_to_plane(xf86CrtcPtr crtc) { return to_sna_crtc(crtc)->plane; } @@ -164,6 +164,7 @@ static unsigned get_fb(struct sna *sna, struct kgem_bo *bo, scrn->depth, scrn->bitsPerPixel, bo->pitch, errno); return 0; } + assert(arg.fb_id != 0); bo->scanout = true; return bo->delta = arg.fb_id; @@ -197,13 +198,15 @@ sna_output_backlight_set(xf86OutputPtr output, int level) char path[1024], val[BACKLIGHT_VALUE_LEN]; int fd, len, ret; - DBG(("%s: level=%d\n", __FUNCTION__, level)); + DBG(("%s: level=%d, max=%d\n", __FUNCTION__, + level, sna_output->backlight_max)); - if (level > sna_output->backlight_max) - level = sna_output->backlight_max; - if (!sna_output->backlight_iface || level < 0) + if (!sna_output->backlight_iface) return; + if ((unsigned)level > sna_output->backlight_max) + level = sna_output->backlight_max; + len = snprintf(val, BACKLIGHT_VALUE_LEN, "%d\n", level); sprintf(path, "%s/%s/brightness", BACKLIGHT_CLASS, sna_output->backlight_iface); @@ -886,6 +889,9 @@ void sna_copy_fbcon(struct sna *sna) DBG(("%s\n", __FUNCTION__)); + priv = sna_pixmap(sna->front); + assert(priv && priv->gpu_bo); + /* Scan the connectors for a framebuffer and assume that is the fbcon */ VG_CLEAR(fbcon); fbcon.fb_id = 0; @@ -912,6 +918,11 @@ void sna_copy_fbcon(struct sna *sna) return; } + if (fbcon.fb_id == priv->gpu_bo->delta) { + DBG(("%s: fb already installed as scanout\n", __FUNCTION__)); + return; + } + /* Wrap the fbcon in a pixmap so that we select the right formats * in the render copy in case we need to preserve the fbcon * across a depth change upon starting X. @@ -933,9 +944,6 @@ void sna_copy_fbcon(struct sna *sna) DBG(("%s: fbcon handle=%d\n", __FUNCTION__, bo->handle)); - priv = sna_pixmap(sna->front); - assert(priv && priv->gpu_bo); - sx = dx = 0; if (box.x2 < (uint16_t)fbcon.width) sx = (fbcon.width - box.x2) / 2; @@ -957,7 +965,9 @@ void sna_copy_fbcon(struct sna *sna) kgem_bo_destroy(&sna->kgem, bo); +#if ABI_VIDEODRV_VERSION >= SET_ABI_VERSION(10, 0) sna->scrn->pScreen->canDoBGNoneRoot = ok; +#endif cleanup_scratch: FreeScratchPixmapHeader(scratch); @@ -1223,6 +1233,9 @@ sna_crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, struct drm_mode_modeinfo saved_kmode; bool saved_transform; + if (mode->HDisplay == 0 || mode->VDisplay == 0) + return FALSE; + xf86DrvMsg(crtc->scrn->scrnIndex, X_INFO, "switch to mode %dx%d on crtc %d (pipe %d)\n", mode->HDisplay, mode->VDisplay, @@ -1251,6 +1264,8 @@ retry: /* Attach per-crtc pixmap or direct */ if (bo == NULL) return FALSE; + kgem_bo_submit(&sna->kgem, bo); + sna_crtc->bo = bo; mode_to_kmode(&sna_crtc->kmode, mode); if (!sna_crtc_apply(crtc)) { @@ -1307,9 +1322,12 @@ sna_crtc_dpms(xf86CrtcPtr crtc, int mode) void sna_mode_adjust_frame(struct sna *sna, int x, int y) { xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); - xf86OutputPtr output = config->output[config->compat_output]; - xf86CrtcPtr crtc = output->crtc; + xf86CrtcPtr crtc; + if ((unsigned)config->compat_output >= config->num_output) + return; + + crtc = config->output[config->compat_output]->crtc; if (crtc && crtc->enabled) { int saved_x = crtc->x; int saved_y = crtc->y; @@ -1456,9 +1474,10 @@ static const xf86CrtcFuncsRec sna_crtc_funcs = { #endif }; -static uint32_t +static int sna_crtc_find_plane(struct sna *sna, int pipe) { +#ifdef DRM_IOCTL_MODE_GETPLANERESOURCES struct drm_mode_get_plane_res r; uint32_t *planes, id = 0; int i; @@ -1494,7 +1513,11 @@ sna_crtc_find_plane(struct sna *sna, int pipe) } free(planes); + assert(id); return id; +#else + return 0; +#endif } static void @@ -1691,41 +1714,40 @@ static DisplayModePtr sna_output_panel_edid(xf86OutputPtr output, DisplayModePtr modes) { xf86MonPtr mon = output->MonInfo; + DisplayModePtr i, m, preferred = NULL; + int max_x = 0, max_y = 0; + float max_vrefresh = 0.0; - if (!mon || !GTF_SUPPORTED(mon->features.msc)) { - DisplayModePtr i, m, p = NULL; - int max_x = 0, max_y = 0; - float max_vrefresh = 0.0; - - for (m = modes; m; m = m->next) { - if (m->type & M_T_PREFERRED) - p = m; - max_x = max(max_x, m->HDisplay); - max_y = max(max_y, m->VDisplay); - max_vrefresh = max(max_vrefresh, xf86ModeVRefresh(m)); - } + if (mon && GTF_SUPPORTED(mon->features.msc)) + return modes; - max_vrefresh = max(max_vrefresh, 60.0); - max_vrefresh *= (1 + SYNC_TOLERANCE); - - m = xf86GetDefaultModes(); - xf86ValidateModesSize(output->scrn, m, max_x, max_y, 0); + for (m = modes; m; m = m->next) { + if (m->type & M_T_PREFERRED) + preferred = m; + max_x = max(max_x, m->HDisplay); + max_y = max(max_y, m->VDisplay); + max_vrefresh = max(max_vrefresh, xf86ModeVRefresh(m)); + } - for (i = m; i; i = i->next) { - if (xf86ModeVRefresh(i) > max_vrefresh) - i->status = MODE_VSYNC; - if (p && i->HDisplay >= p->HDisplay && - i->VDisplay >= p->VDisplay && - xf86ModeVRefresh(i) >= xf86ModeVRefresh(p)) - i->status = MODE_VSYNC; - } + max_vrefresh = max(max_vrefresh, 60.0); + max_vrefresh *= (1 + SYNC_TOLERANCE); - xf86PruneInvalidModes(output->scrn, &m, FALSE); + m = xf86GetDefaultModes(); + xf86ValidateModesSize(output->scrn, m, max_x, max_y, 0); - modes = xf86ModesAdd(modes, m); + for (i = m; i; i = i->next) { + if (xf86ModeVRefresh(i) > max_vrefresh) + i->status = MODE_VSYNC; + if (preferred && + i->HDisplay >= preferred->HDisplay && + i->VDisplay >= preferred->VDisplay && + xf86ModeVRefresh(i) >= xf86ModeVRefresh(preferred)) + i->status = MODE_PANEL; } - return modes; + xf86PruneInvalidModes(output->scrn, &m, FALSE); + + return xf86ModesAdd(modes, m); } static DisplayModePtr @@ -1759,6 +1781,7 @@ sna_output_get_modes(xf86OutputPtr output) */ sna_output->has_panel_limits = false; if (is_panel(koutput->connector_type)) { + sna_output->panel_hdisplay = sna_output->panel_vdisplay = 0; for (i = 0; i < koutput->count_modes; i++) { drmModeModeInfo *mode_ptr; @@ -1768,7 +1791,6 @@ sna_output_get_modes(xf86OutputPtr output) if (mode_ptr->vdisplay > sna_output->panel_vdisplay) sna_output->panel_vdisplay = mode_ptr->vdisplay; } - sna_output->has_panel_limits = sna_output->panel_hdisplay && sna_output->panel_vdisplay; @@ -2459,13 +2481,13 @@ sna_crtc_resize(ScrnInfoPtr scrn, int width, int height) sna_crtc_disable(crtc); } - if (screen->root) { + if (root(screen)) { struct sna_visit_set_pixmap_window visit; visit.old = old_front; visit.new = sna->front; - TraverseTree(screen->root, sna_visit_set_window_pixmap, &visit); - assert(screen->GetWindowPixmap(screen->root) == sna->front); + TraverseTree(root(screen), sna_visit_set_window_pixmap, &visit); + assert(screen->GetWindowPixmap(root(screen)) == sna->front); } screen->SetScreenPixmap(sna->front); assert(screen->GetScreenPixmap(screen) == sna->front); @@ -2522,6 +2544,12 @@ static int do_page_flip(struct sna *sna, struct kgem_bo *bo, DBG(("%s: flip [fb=%d] on crtc %d [%d] failed - %d\n", __FUNCTION__, arg.fb_id, i, crtc->id, errno)); disable: + if (count == 0) + return 0; + + xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, + "%s: page flipping failed, disabling CRTC:%d (pipe=%d)\n", + __FUNCTION__, crtc->id, crtc->pipe); sna_crtc_disable(config->crtc[i]); continue; } @@ -2614,6 +2642,31 @@ bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna) return true; } +static Bool sna_mode_has_pending_events(struct sna *sna) +{ + struct pollfd pfd; + pfd.fd = sna->kgem.fd; + pfd.events = POLLIN; + return poll(&pfd, 1, 0) == 1; +} + +void +sna_mode_close(struct sna *sna) +{ + xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(sna->scrn); + int i; + + /* In order to workaround a kernel bug in not honouring O_NONBLOCK, + * check that the fd is readable before attempting to read the next + * event from drm. + */ + if (sna_mode_has_pending_events(sna)) + sna_mode_wakeup(sna); + + for (i = 0; i < xf86_config->num_crtc; i++) + sna_crtc_disable_shadow(sna, to_sna_crtc(xf86_config->crtc[i])); +} + void sna_mode_fini(struct sna *sna) { @@ -2689,6 +2742,11 @@ sna_covering_crtc(ScrnInfoPtr scrn, __FUNCTION__, c, crtc->bounds.x1, crtc->bounds.y1, crtc->bounds.x2, crtc->bounds.y2)); + if (*(const uint64_t *)box == *(uint64_t *)&crtc->bounds) { + DBG(("%s: box exactly matches crtc [%d]\n", + __FUNCTION__, c)); + return crtc; + } if (!sna_box_intersect(&cover_box, &crtc->bounds, box)) continue; @@ -2717,41 +2775,123 @@ sna_covering_crtc(ScrnInfoPtr scrn, return best_crtc; } -/* Gen6 wait for scan line support */ #define MI_LOAD_REGISTER_IMM (0x22<<23) -/* gen6: Scan lines register */ -#define GEN6_PIPEA_SLC (0x70004) -#define GEN6_PIPEB_SLC (0x71004) - -static void sna_emit_wait_for_scanline_gen6(struct sna *sna, +static bool sna_emit_wait_for_scanline_gen7(struct sna *sna, + xf86CrtcPtr crtc, int pipe, int y1, int y2, bool full_height) { - uint32_t event; uint32_t *b; + uint32_t event; - assert (y2 > 0); + if (!sna->kgem.has_secure_batches) + return false; - /* We just wait until the trace passes the roi */ - if (pipe == 0) { - pipe = GEN6_PIPEA_SLC; - event = MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW; - } else { - pipe = GEN6_PIPEB_SLC; - event = MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW; + assert(y1 >= 0); + assert(y2 > y1); + assert(sna->kgem.mode); + + /* Always program one less than the desired value */ + if (--y1 < 0) + y1 = crtc->bounds.y2; + y2--; + + switch (pipe) { + default: + assert(0); + case 0: + event = 1 << (full_height ? 3 : 0); + break; + case 1: + event = 1 << (full_height ? 11 : 8); + break; + case 2: + event = 1 << (full_height ? 21 : 14); + break; + } + + b = kgem_get_batch(&sna->kgem); + + /* Both the LRI and WAIT_FOR_EVENT must be in the same cacheline */ + if (((sna->kgem.nbatch + 6) >> 4) != (sna->kgem.nbatch + 10) >> 4) { + int dw = sna->kgem.nbatch + 6; + dw = ALIGN(dw, 16) - dw; + while (dw--) + *b++ = MI_NOOP; } - kgem_set_mode(&sna->kgem, KGEM_RENDER); - b = kgem_get_batch(&sna->kgem, 4); b[0] = MI_LOAD_REGISTER_IMM | 1; - b[1] = pipe; - b[2] = y2 - 1; - b[3] = MI_WAIT_FOR_EVENT | event; - kgem_advance_batch(&sna->kgem, 4); + b[1] = 0x44050; /* DERRMR */ + b[2] = ~event; + b[3] = MI_LOAD_REGISTER_IMM | 1; + b[4] = 0xa188; /* FORCEWAKE_MT */ + b[5] = 2 << 16 | 2; + b[6] = MI_LOAD_REGISTER_IMM | 1; + b[7] = 0x70068 + 0x1000 * pipe; + b[8] = (1 << 31) | (1 << 30) | (y1 << 16) | y2; + b[9] = MI_WAIT_FOR_EVENT | event; + b[10] = MI_LOAD_REGISTER_IMM | 1; + b[11] = 0xa188; /* FORCEWAKE_MT */ + b[12] = 2 << 16; + b[13] = MI_LOAD_REGISTER_IMM | 1; + b[14] = 0x44050; /* DERRMR */ + b[15] = ~0; + + sna->kgem.nbatch = b - sna->kgem.batch + 16; + + sna->kgem.batch_flags |= I915_EXEC_SECURE; + return true; +} + +static bool sna_emit_wait_for_scanline_gen6(struct sna *sna, + xf86CrtcPtr crtc, + int pipe, int y1, int y2, + bool full_height) +{ + uint32_t *b; + uint32_t event; + + if (!sna->kgem.has_secure_batches) + return false; + + assert(y1 >= 0); + assert(y2 > y1); + assert(sna->kgem.mode == KGEM_RENDER); + + /* Always program one less than the desired value */ + if (--y1 < 0) + y1 = crtc->bounds.y2; + y2--; + + /* The scanline granularity is 3 bits */ + y1 &= ~7; + y2 &= ~7; + if (y2 == y1) + return false; + + event = 1 << (3*full_height + pipe*8); + + b = kgem_get_batch(&sna->kgem); + sna->kgem.nbatch += 10; + + b[0] = MI_LOAD_REGISTER_IMM | 1; + b[1] = 0x44050; /* DERRMR */ + b[2] = ~event; + b[3] = MI_LOAD_REGISTER_IMM | 1; + b[4] = 0x4f100; /* magic */ + b[5] = (1 << 31) | (1 << 30) | pipe << 29 | (y1 << 16) | y2; + b[6] = MI_WAIT_FOR_EVENT | event; + b[7] = MI_LOAD_REGISTER_IMM | 1; + b[8] = 0x44050; /* DERRMR */ + b[9] = ~0; + + sna->kgem.batch_flags |= I915_EXEC_SECURE; + return true; } -static void sna_emit_wait_for_scanline_gen4(struct sna *sna, +static bool sna_emit_wait_for_scanline_gen4(struct sna *sna, + xf86CrtcPtr crtc, int pipe, int y1, int y2, bool full_height) { @@ -2770,17 +2910,20 @@ static void sna_emit_wait_for_scanline_gen4(struct sna *sna, event = MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW; } - kgem_set_mode(&sna->kgem, KGEM_BLT); - b = kgem_get_batch(&sna->kgem, 5); + b = kgem_get_batch(&sna->kgem); + sna->kgem.nbatch += 5; + /* The documentation says that the LOAD_SCAN_LINES command * always comes in pairs. Don't ask me why. */ b[2] = b[0] = MI_LOAD_SCAN_LINES_INCL | pipe << 20; b[3] = b[1] = (y1 << 16) | (y2-1); b[4] = MI_WAIT_FOR_EVENT | event; - kgem_advance_batch(&sna->kgem, 5); + + return true; } -static void sna_emit_wait_for_scanline_gen2(struct sna *sna, +static bool sna_emit_wait_for_scanline_gen2(struct sna *sna, + xf86CrtcPtr crtc, int pipe, int y1, int y2, bool full_height) { @@ -2794,17 +2937,16 @@ static void sna_emit_wait_for_scanline_gen2(struct sna *sna, if (full_height) y2 -= 2; - kgem_set_mode(&sna->kgem, KGEM_BLT); - b = kgem_get_batch(&sna->kgem, 5); + b = kgem_get_batch(&sna->kgem); + sna->kgem.nbatch += 5; + /* The documentation says that the LOAD_SCAN_LINES command * always comes in pairs. Don't ask me why. */ b[2] = b[0] = MI_LOAD_SCAN_LINES_INCL | pipe << 20; b[3] = b[1] = (y1 << 16) | (y2-1); - if (pipe == 0) - b[4] = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW; - else - b[4] = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW; - kgem_advance_batch(&sna->kgem, 5); + b[4] = MI_WAIT_FOR_EVENT | 1 << (1 + 4*pipe); + + return true; } bool @@ -2815,15 +2957,12 @@ sna_wait_for_scanline(struct sna *sna, { bool full_height; int y1, y2, pipe; + bool ret; assert(crtc); assert(to_sna_crtc(crtc)->bo != NULL); assert(pixmap == sna->front); - /* XXX WAIT_EVENT is still causing hangs on SNB */ - if (sna->kgem.gen >= 60) - return false; - /* * Make sure we don't wait for a scanline that will * never occur @@ -2850,14 +2989,20 @@ sna_wait_for_scanline(struct sna *sna, DBG(("%s: pipe=%d, y1=%d, y2=%d, full_height?=%d\n", __FUNCTION__, pipe, y1, y2, full_height)); - if (sna->kgem.gen >= 60) - sna_emit_wait_for_scanline_gen6(sna, pipe, y1, y2, full_height); - else if (sna->kgem.gen >= 40) - sna_emit_wait_for_scanline_gen4(sna, pipe, y1, y2, full_height); + if (sna->kgem.gen >= 0100) + ret = false; + else if (sna->kgem.gen == 071) + ret =sna_emit_wait_for_scanline_gen6(sna, crtc, pipe, y1, y2, full_height); + else if (sna->kgem.gen >= 070) + ret = sna_emit_wait_for_scanline_gen7(sna, crtc, pipe, y1, y2, full_height); + else if (sna->kgem.gen >= 060) + ret =sna_emit_wait_for_scanline_gen6(sna, crtc, pipe, y1, y2, full_height); + else if (sna->kgem.gen >= 040) + ret = sna_emit_wait_for_scanline_gen4(sna, crtc, pipe, y1, y2, full_height); else - sna_emit_wait_for_scanline_gen2(sna, pipe, y1, y2, full_height); + ret = sna_emit_wait_for_scanline_gen2(sna, crtc, pipe, y1, y2, full_height); - return true; + return ret; } void sna_mode_update(struct sna *sna) @@ -3028,7 +3173,7 @@ sna_crtc_redisplay__composite(xf86CrtcPtr crtc, RegionPtr region) 0, 0, 0, 0, 0, 0, - 0, 0, + crtc->mode.HDisplay, crtc->mode.VDisplay, memset(&tmp, 0, sizeof(tmp)))) { DBG(("%s: unsupported operation!\n", __FUNCTION__)); sna_crtc_redisplay__fallback(crtc, region); @@ -3118,10 +3263,11 @@ void sna_mode_redisplay(struct sna *sna) assert(sna->mode.shadow_active); region = DamageRegion(sna->mode.shadow_damage); - if (!RegionNotEmpty(region)) + if (RegionNil(region)) return; - if (!sna_pixmap_move_to_gpu(sna->front, MOVE_READ)) { + if (!can_render(sna) || + !sna_pixmap_move_to_gpu(sna->front, MOVE_READ)) { if (!sna_pixmap_move_to_cpu(sna->front, MOVE_READ)) return; @@ -3164,7 +3310,7 @@ void sna_mode_redisplay(struct sna *sna) RegionIntersect(&damage, &damage, region); if (RegionNotEmpty(&damage)) { sna_crtc_redisplay(crtc, &damage); - __kgem_flush(&sna->kgem, sna_crtc->bo); + kgem_bo_flush(&sna->kgem, sna_crtc->bo); } RegionUninit(&damage); } @@ -3184,6 +3330,7 @@ void sna_mode_redisplay(struct sna *sna) for (i = 0; i < config->num_crtc; i++) { struct sna_crtc *crtc = config->crtc[i]->driver_private; + struct drm_mode_crtc_page_flip arg; DBG(("%s: crtc %d [%d, pipe=%d] active? %d\n", __FUNCTION__, i, crtc->id, crtc->pipe, crtc->bo != NULL)); @@ -3191,41 +3338,36 @@ void sna_mode_redisplay(struct sna *sna) continue; assert(config->crtc[i]->enabled); - - if (crtc->dpms_mode == DPMSModeOn) { - struct drm_mode_crtc_page_flip arg; - arg.crtc_id = crtc->id; - arg.fb_id = get_fb(sna, new, - sna->scrn->virtualX, - sna->scrn->virtualY); - if (arg.fb_id == 0) - goto disable; - - /* Only the reference crtc will finally deliver its page flip - * completion event. All other crtc's events will be discarded. - */ - arg.user_data = 0; - arg.flags = DRM_MODE_PAGE_FLIP_EVENT; - arg.reserved = 0; - - if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_PAGE_FLIP, &arg)) { - DBG(("%s: flip [fb=%d] on crtc %d [%d, pipe=%d] failed - %d\n", - __FUNCTION__, arg.fb_id, i, crtc->id, crtc->pipe, errno)); + assert(crtc->dpms_mode == DPMSModeOn); + + arg.crtc_id = crtc->id; + arg.fb_id = get_fb(sna, new, + sna->scrn->virtualX, + sna->scrn->virtualY); + if (arg.fb_id == 0) + goto disable; + + arg.user_data = 0; + arg.flags = DRM_MODE_PAGE_FLIP_EVENT; + arg.reserved = 0; + + if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_PAGE_FLIP, &arg)) { + DBG(("%s: flip [fb=%d] on crtc %d [%d, pipe=%d] failed - %d\n", + __FUNCTION__, arg.fb_id, i, crtc->id, crtc->pipe, errno)); disable: - sna_crtc_disable(config->crtc[i]); - continue; - } - sna->mode.shadow_flip++; + xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, + "%s: page flipping failed, disabling CRTC:%d (pipe=%d)\n", + __FUNCTION__, crtc->id, crtc->pipe); + sna_crtc_disable(config->crtc[i]); + continue; } + sna->mode.shadow_flip++; kgem_bo_destroy(&sna->kgem, old); crtc->bo = kgem_bo_reference(new); } if (sna->mode.shadow) { - /* XXX only works if the kernel stalls fwrites to the current - * scanout whilst the flip is pending - */ while (sna->mode.shadow_flip) sna_mode_wakeup(sna); (void)sna->render.copy_boxes(sna, GXcopy, @@ -3237,8 +3379,9 @@ disable: kgem_submit(&sna->kgem); sna_pixmap(sna->front)->gpu_bo = old; - sna->mode.shadow = new; + sna_dri_pixmap_update_bo(sna, sna->front); + sna->mode.shadow = new; new->flush = old->flush; } diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c index 15ac46a3e..f04f1afeb 100644 --- a/src/sna/sna_dri.c +++ b/src/sna/sna_dri.c @@ -41,6 +41,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "sna.h" #include "sna_reg.h" +#include "intel_options.h" #include <xf86drm.h> #include <i915_drm.h> @@ -51,18 +52,17 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #endif #if DRI2INFOREC_VERSION < 10 +#undef USE_ASYNC_SWAP #define USE_ASYNC_SWAP 0 #endif #define COLOR_PREFER_TILING_Y 0 -#define FLIP_OFF_DELAY 5 enum frame_event_type { DRI2_SWAP, DRI2_SWAP_WAIT, DRI2_SWAP_THROTTLE, DRI2_XCHG_THROTTLE, - DRI2_ASYNC_FLIP, DRI2_FLIP, DRI2_FLIP_THROTTLE, DRI2_WAITMSC, @@ -91,15 +91,17 @@ struct sna_dri_frame_event { struct dri_bo { struct kgem_bo *bo; uint32_t name; - } old_front, next_front, cache; + } scanout[2], cache; - int off_delay; + int mode; }; struct sna_dri_private { - int refcnt; PixmapPtr pixmap; struct kgem_bo *bo; + bool scanout; + uint32_t size; + int refcnt; }; static inline struct sna_dri_frame_event * @@ -144,7 +146,8 @@ static uint32_t color_tiling(struct sna *sna, DrawablePtr draw) static uint32_t other_tiling(struct sna *sna, DrawablePtr draw) { /* XXX Can mix color X / depth Y? */ - return kgem_choose_tiling(&sna->kgem, -I915_TILING_Y, + return kgem_choose_tiling(&sna->kgem, + sna->kgem.gen >=40 ? -I915_TILING_Y : -I915_TILING_X, draw->width, draw->height, draw->bitsPerPixel); @@ -173,6 +176,7 @@ static struct kgem_bo *sna_pixmap_set_dri(struct sna *sna, } assert(priv->cpu_damage == NULL); + assert(priv->gpu_bo->proxy == NULL); if (priv->flush++) return priv->gpu_bo; @@ -198,13 +202,38 @@ static struct kgem_bo *sna_pixmap_set_dri(struct sna *sna, constant static inline void *sna_pixmap_get_buffer(PixmapPtr pixmap) { assert(pixmap->refcnt); - return ((void **)dixGetPrivateAddr(&pixmap->devPrivates, &sna_pixmap_key))[2]; + return ((void **)__get_private(pixmap, sna_pixmap_key))[2]; } static inline void sna_pixmap_set_buffer(PixmapPtr pixmap, void *ptr) { assert(pixmap->refcnt); - ((void **)dixGetPrivateAddr(&pixmap->devPrivates, &sna_pixmap_key))[2] = ptr; + ((void **)__get_private(pixmap, sna_pixmap_key))[2] = ptr; +} + +void +sna_dri_pixmap_update_bo(struct sna *sna, PixmapPtr pixmap) +{ + DRI2Buffer2Ptr buffer; + struct sna_dri_private *private; + struct kgem_bo *bo; + + buffer = sna_pixmap_get_buffer(pixmap); + if (buffer == NULL) + return; + + private = get_private(buffer); + assert(private->pixmap == pixmap); + + bo = sna_pixmap(pixmap)->gpu_bo; + if (private->bo == bo) + return; + + kgem_bo_destroy(&sna->kgem, private->bo); + buffer->name = kgem_bo_flink(&sna->kgem, bo); + private->bo = ref(bo); + + /* XXX DRI2InvalidateDrawable(&pixmap->drawable); */ } static DRI2Buffer2Ptr @@ -217,12 +246,15 @@ sna_dri_create_buffer(DrawablePtr draw, struct sna_dri_private *private; PixmapPtr pixmap; struct kgem_bo *bo; + unsigned flags = CREATE_EXACT; + uint32_t size; int bpp; DBG(("%s(attachment=%d, format=%d, drawable=%dx%d)\n", __FUNCTION__, attachment, format, draw->width, draw->height)); pixmap = NULL; + size = (uint32_t)draw->height << 16 | draw->width; switch (attachment) { case DRI2BufferFrontLeft: pixmap = get_drawable_pixmap(draw); @@ -236,7 +268,10 @@ sna_dri_create_buffer(DrawablePtr draw, assert(private->pixmap == pixmap); assert(sna_pixmap(pixmap)->gpu_bo == private->bo); + assert(sna_pixmap(pixmap)->pinned & PIN_DRI); assert(kgem_bo_flink(&sna->kgem, private->bo) == buffer->name); + assert(8*private->bo->pitch >= pixmap->drawable.width * pixmap->drawable.bitsPerPixel); + assert(private->bo->pitch * pixmap->drawable.height <= kgem_bo_size(private->bo)); private->refcnt++; return buffer; @@ -252,6 +287,9 @@ sna_dri_create_buffer(DrawablePtr draw, __FUNCTION__, pixmap->drawable.width, pixmap->drawable.height, pixmap, pixmap->refcnt)); + if (pixmap == sna->front) + flags |= CREATE_SCANOUT; + size = (uint32_t)pixmap->drawable.height << 16 | pixmap->drawable.width; break; case DRI2BufferBackLeft: @@ -260,12 +298,15 @@ sna_dri_create_buffer(DrawablePtr draw, case DRI2BufferFakeFrontLeft: case DRI2BufferFakeFrontRight: bpp = draw->bitsPerPixel; + if (draw->width == sna->front->drawable.width && + draw->height == sna->front->drawable.height) + flags |= CREATE_SCANOUT; bo = kgem_create_2d(&sna->kgem, draw->width, draw->height, draw->bitsPerPixel, color_tiling(sna, draw), - CREATE_SCANOUT | CREATE_EXACT); + flags); break; case DRI2BufferStencil: @@ -296,7 +337,7 @@ sna_dri_create_buffer(DrawablePtr draw, bo = kgem_create_2d(&sna->kgem, ALIGN(draw->width, 64), ALIGN((draw->height + 1) / 2, 64), - bpp, I915_TILING_NONE, CREATE_EXACT); + bpp, I915_TILING_NONE, flags); break; case DRI2BufferDepth: @@ -307,7 +348,7 @@ sna_dri_create_buffer(DrawablePtr draw, bo = kgem_create_2d(&sna->kgem, draw->width, draw->height, bpp, other_tiling(sna, draw), - CREATE_EXACT); + flags); break; default: @@ -331,6 +372,8 @@ sna_dri_create_buffer(DrawablePtr draw, private->refcnt = 1; private->bo = bo; private->pixmap = pixmap; + private->scanout = !!(flags & CREATE_SCANOUT); + private->size = size; if (buffer->name == 0) goto err; @@ -415,13 +458,13 @@ damage_all: pixmap->drawable.width, pixmap->drawable.height); sna_damage_destroy(&priv->cpu_damage); - priv->undamaged = false; } else { sna_damage_subtract(&priv->cpu_damage, region); if (priv->cpu_damage == NULL) goto damage_all; sna_damage_add(&priv->gpu_damage, region); } + priv->cpu = false; } static void set_bo(PixmapPtr pixmap, struct kgem_bo *bo) @@ -430,6 +473,10 @@ static void set_bo(PixmapPtr pixmap, struct kgem_bo *bo) struct sna_pixmap *priv = sna_pixmap(pixmap); RegionRec region; + assert(pixmap->drawable.width * pixmap->drawable.bitsPerPixel <= 8*bo->pitch); + assert(pixmap->drawable.height * bo->pitch <= kgem_bo_size(bo)); + assert(bo->proxy == NULL); + /* Post damage on the new front buffer so that listeners, such * as DisplayLink know take a copy and shove it over the USB, * also for software cursors and the like. @@ -446,12 +493,17 @@ static void set_bo(PixmapPtr pixmap, struct kgem_bo *bo) sna_damage_destroy(&priv->cpu_damage); list_del(&priv->list); priv->cpu = false; - priv->undamaged = false; assert(bo->refcnt); if (priv->gpu_bo != bo) { kgem_bo_destroy(&sna->kgem, priv->gpu_bo); priv->gpu_bo = ref(bo); + if (priv->mapped) { + assert(!priv->shm && priv->stride); + pixmap->devPrivate.ptr = PTR(priv->ptr); + pixmap->devKind = priv->stride; + priv->mapped = false; + } } if (bo->domain != DOMAIN_GPU) bo->domain = DOMAIN_NONE; @@ -459,17 +511,20 @@ static void set_bo(PixmapPtr pixmap, struct kgem_bo *bo) DamageRegionProcessPending(&pixmap->drawable); } -static void sna_dri_select_mode(struct sna *sna, struct kgem_bo *src, bool sync) +static void sna_dri_select_mode(struct sna *sna, struct kgem_bo *dst, struct kgem_bo *src, bool sync) { struct drm_i915_gem_busy busy; int mode; - if (sna->kgem.gen < 60) + if (sna->kgem.gen < 060) return; if (sync) { - DBG(("%s: sync, force RENDER ring\n", __FUNCTION__)); - kgem_set_mode(&sna->kgem, KGEM_RENDER); + DBG(("%s: sync, force %s ring\n", __FUNCTION__, + sna->kgem.gen >= 070 ? "BLT" : "RENDER")); + kgem_set_mode(&sna->kgem, + sna->kgem.gen >= 070 ? KGEM_BLT : KGEM_RENDER, + dst); return; } @@ -478,21 +533,21 @@ static void sna_dri_select_mode(struct sna *sna, struct kgem_bo *src, bool sync) return; } - if (sna->kgem.has_semaphores) { - DBG(("%s: have sempahores, prefering RENDER\n", __FUNCTION__)); - kgem_set_mode(&sna->kgem, KGEM_RENDER); - return; - } - VG_CLEAR(busy); - busy.handle = src->handle; + busy.handle = dst->handle; if (drmIoctl(sna->kgem.fd, DRM_IOCTL_I915_GEM_BUSY, &busy)) return; - DBG(("%s: src busy?=%x\n", __FUNCTION__, busy.busy)); + DBG(("%s: dst busy?=%x\n", __FUNCTION__, busy.busy)); if (busy.busy == 0) { - DBG(("%s: src is idle, using defaults\n", __FUNCTION__)); - return; + busy.handle = src->handle; + if (drmIoctl(sna->kgem.fd, DRM_IOCTL_I915_GEM_BUSY, &busy)) + return; + DBG(("%s: src busy?=%x\n", __FUNCTION__, busy.busy)); + if (busy.busy == 0) { + DBG(("%s: src/dst is idle, using defaults\n", __FUNCTION__)); + return; + } } /* Sandybridge introduced a separate ring which it uses to @@ -513,6 +568,7 @@ static void sna_dri_select_mode(struct sna *sna, struct kgem_bo *src, bool sync) mode = KGEM_RENDER; if (busy.busy & (1 << 17)) mode = KGEM_BLT; + kgem_bo_mark_busy(dst, mode); _kgem_set_mode(&sna->kgem, mode); } @@ -525,6 +581,9 @@ sna_dri_copy_fallback(struct sna *sna, int bpp, void *dst = kgem_bo_map__gtt(&sna->kgem, dst_bo); void *src = kgem_bo_map__gtt(&sna->kgem, src_bo); + if (dst == NULL || src == NULL) + return; + DBG(("%s: src(%d, %d), dst(%d, %d) x %d\n", __FUNCTION__, sx, sy, dx, dy, n)); @@ -573,7 +632,7 @@ sna_dri_copy_to_front(struct sna *sna, DrawablePtr draw, RegionPtr region, if (sync) sync = sna_pixmap_is_scanout(sna, pixmap); - sna_dri_select_mode(sna, src_bo, sync); + sna_dri_select_mode(sna, dst_bo, src_bo, sync); } else sync = false; @@ -628,10 +687,15 @@ sna_dri_copy_to_front(struct sna *sna, DrawablePtr draw, RegionPtr region, dst_bo, 0, 0, boxes, n); } else { + unsigned flags; + + flags = COPY_LAST; + if (flush) + flags |= COPY_SYNC; sna->render.copy_boxes(sna, GXcopy, (PixmapPtr)draw, src_bo, -draw->x-dx, -draw->y-dy, pixmap, dst_bo, 0, 0, - boxes, n, COPY_LAST); + boxes, n, flags); DBG(("%s: flushing? %d\n", __FUNCTION__, flush)); if (flush) { /* STAT! */ @@ -717,7 +781,7 @@ sna_dri_copy_from_front(struct sna *sna, DrawablePtr draw, RegionPtr region, dst_bo, -draw->x, -draw->y, boxes, n); } else { - sna_dri_select_mode(sna, src_bo, false); + sna_dri_select_mode(sna, dst_bo, src_bo, false); sna->render.copy_boxes(sna, GXcopy, pixmap, src_bo, dx, dy, (PixmapPtr)draw, dst_bo, -draw->x, -draw->y, @@ -766,7 +830,7 @@ sna_dri_copy(struct sna *sna, DrawablePtr draw, RegionPtr region, dst_bo, 0, 0, boxes, n); } else { - sna_dri_select_mode(sna, src_bo, false); + sna_dri_select_mode(sna, dst_bo, src_bo, false); sna->render.copy_boxes(sna, GXcopy, (PixmapPtr)draw, src_bo, 0, 0, (PixmapPtr)draw, dst_bo, 0, 0, @@ -777,6 +841,42 @@ sna_dri_copy(struct sna *sna, DrawablePtr draw, RegionPtr region, pixman_region_fini(&clip); } +static bool +can_blit(struct sna * sna, + DrawablePtr draw, + DRI2BufferPtr front, + DRI2BufferPtr back) +{ + RegionPtr clip; + int w, h; + uint32_t s; + + if (draw->type == DRAWABLE_PIXMAP) + return true; + + clip = &((WindowPtr)draw)->clipList; + w = clip->extents.x2 - draw->x; + h = clip->extents.y2 - draw->y; + if ((w|h) < 0) + return false; + + s = get_private(front)->size; + if ((s>>16) < h || (s&0xffff) < w) { + DBG(("%s: reject front size (%dx%d) < (%dx%d)\n", __func__, + s&0xffff, s>>16, w, h)); + return false; + } + + s = get_private(back)->size; + if ((s>>16) < h || (s&0xffff) < w) { + DBG(("%s:reject back size (%dx%d) < (%dx%d)\n", __func__, + s&0xffff, s>>16, w, h)); + return false; + } + + return true; +} + static void sna_dri_copy_region(DrawablePtr draw, RegionPtr region, @@ -789,6 +889,9 @@ sna_dri_copy_region(DrawablePtr draw, void (*copy)(struct sna *, DrawablePtr, RegionPtr, struct kgem_bo *, struct kgem_bo *, bool) = sna_dri_copy; + if (!can_blit(sna, draw, dst_buffer, src_buffer)) + return; + if (dst_buffer->attachment == DRI2BufferFrontLeft) { dst = sna_pixmap_get_bo(pixmap); copy = (void *)sna_dri_copy_to_front; @@ -860,7 +963,7 @@ sna_dri_get_pipe(DrawablePtr pDraw) static struct sna_dri_frame_event * sna_dri_window_get_chain(WindowPtr win) { - return ((void **)dixGetPrivateAddr(&win->devPrivates, &sna_window_key))[1]; + return ((void **)__get_private(win, sna_window_key))[1]; } static void @@ -869,7 +972,7 @@ sna_dri_window_set_chain(WindowPtr win, { DBG(("%s: head now %p\n", __FUNCTION__, chain)); assert(win->drawable.type == DRAWABLE_WINDOW); - ((void **)dixGetPrivateAddr(&win->devPrivates, &sna_window_key))[1] = chain; + ((void **)__get_private(win, sna_window_key))[1] = chain; } static void @@ -947,8 +1050,10 @@ sna_dri_frame_event_info_free(struct sna *sna, _sna_dri_destroy_buffer(sna, info->front); _sna_dri_destroy_buffer(sna, info->back); - if (info->old_front.bo) - kgem_bo_destroy(&sna->kgem, info->old_front.bo); + assert(info->scanout[1].bo == NULL); + + if (info->scanout[0].bo) + kgem_bo_destroy(&sna->kgem, info->scanout[0].bo); if (info->cache.bo) kgem_bo_destroy(&sna->kgem, info->cache.bo); @@ -959,25 +1064,39 @@ sna_dri_frame_event_info_free(struct sna *sna, free(info); } -static void +static bool sna_dri_page_flip(struct sna *sna, struct sna_dri_frame_event *info) { struct kgem_bo *bo = get_private(info->back)->bo; + struct dri_bo tmp; DBG(("%s()\n", __FUNCTION__)); assert(sna_pixmap_get_buffer(sna->front) == info->front); assert(get_drawable_pixmap(info->draw)->drawable.height * bo->pitch <= kgem_bo_size(bo)); + assert(info->scanout[0].bo); info->count = sna_page_flip(sna, bo, info, info->pipe); + if (!info->count) + return false; + + info->scanout[1] = info->scanout[0]; + info->scanout[0].bo = ref(bo); + info->scanout[0].name = info->back->name; - info->old_front.name = info->front->name; - info->old_front.bo = get_private(info->front)->bo; + tmp.bo = get_private(info->front)->bo; + tmp.name = info->front->name; set_bo(sna->front, bo); info->front->name = info->back->name; get_private(info->front)->bo = bo; + + info->back->name = tmp.name; + get_private(info->back)->bo = tmp.bo; + + sna->dri.flip_pending = info; + return true; } static bool @@ -1031,12 +1150,25 @@ can_flip(struct sna * sna, if (sna_pixmap_get_buffer(pixmap) != front) { DBG(("%s: no, DRI2 drawable is no longer attached (old name=%d, new name=%d) to pixmap=%ld\n", __FUNCTION__, front->name, - ((DRI2BufferPtr)sna_pixmap_get_buffer(pixmap))->name, + sna_pixmap_get_buffer(pixmap) ? ((DRI2BufferPtr)sna_pixmap_get_buffer(pixmap))->name : 0, pixmap->drawable.serialNumber)); return false; } + if (!get_private(front)->scanout) { + DBG(("%s: no, DRI2 drawable not attached at time of creation)\n", + __FUNCTION__)); + return false; + } assert(get_private(front)->pixmap == sna->front); + assert(sna_pixmap(sna->front)->gpu_bo == get_private(front)->bo); + + if (!get_private(back)->scanout) { + DBG(("%s: no, DRI2 drawable was too small at time of creation)\n", + __FUNCTION__)); + return false; + } + assert(get_private(back)->size == get_private(front)->size); DBG(("%s: window size: %dx%d, clip=(%d, %d), (%d, %d) x %d\n", __FUNCTION__, @@ -1094,6 +1226,12 @@ can_exchange(struct sna * sna, WindowPtr win = (WindowPtr)draw; PixmapPtr pixmap; + /* XXX There is an inherent race between the DRI2 client and the DRI2 + * compositor which is only masked if we force a blit and serialise + * the operations through the kernel command queue. Hopeless. + */ + return false; + if (front->format != back->format) { DBG(("%s: no, format mismatch, front = %d, back = %d\n", __FUNCTION__, front->format, back->format)); @@ -1127,6 +1265,20 @@ can_exchange(struct sna * sna, return false; } + if (!get_private(front)->scanout) { + DBG(("%s: no, DRI2 drawable not attached at time of creation)\n", + __FUNCTION__)); + return false; + } + assert(get_private(front)->pixmap == sna->front); + + if (!get_private(back)->scanout) { + DBG(("%s: no, DRI2 drawable was too small at time of creation)\n", + __FUNCTION__)); + return false; + } + assert(get_private(back)->size == get_private(front)->size); + return true; } @@ -1167,10 +1319,12 @@ sna_dri_exchange_buffers(DrawablePtr draw, pixmap->drawable.width, pixmap->drawable.height)); - DBG(("%s: back_bo pitch=%d, size=%d\n", - __FUNCTION__, back_bo->pitch, kgem_bo_size(back_bo))); - DBG(("%s: front_bo pitch=%d, size=%d\n", - __FUNCTION__, front_bo->pitch, kgem_bo_size(front_bo))); + DBG(("%s: back_bo pitch=%d, size=%d, ref=%d\n", + __FUNCTION__, back_bo->pitch, kgem_bo_size(back_bo), back_bo->refcnt)); + DBG(("%s: front_bo pitch=%d, size=%d, ref=%d\n", + __FUNCTION__, front_bo->pitch, kgem_bo_size(front_bo), front_bo->refcnt)); + assert(front_bo->refcnt); + assert(back_bo->refcnt); assert(sna_pixmap_get_buffer(pixmap) == front); assert(pixmap->drawable.height * back_bo->pitch <= kgem_bo_size(back_bo)); @@ -1188,7 +1342,7 @@ sna_dri_exchange_buffers(DrawablePtr draw, static void chain_swap(struct sna *sna, DrawablePtr draw, - struct drm_event_vblank *event, + int frame, unsigned int tv_sec, unsigned int tv_usec, struct sna_dri_frame_event *chain) { drmVBlank vbl; @@ -1209,7 +1363,7 @@ static void chain_swap(struct sna *sna, DBG(("%s: performing chained exchange\n", __FUNCTION__)); sna_dri_exchange_buffers(draw, chain->front, chain->back); type = DRI2_EXCHANGE_COMPLETE; - } else { + } else if (can_blit(sna, draw, chain->front, chain->back)) { DBG(("%s: emitting chained vsync'ed blit\n", __FUNCTION__)); chain->bo = sna_dri_copy_to_front(sna, draw, NULL, @@ -1218,10 +1372,16 @@ static void chain_swap(struct sna *sna, true); type = DRI2_BLIT_COMPLETE; + } else { + DRI2SwapComplete(chain->client, draw, + 0, 0, 0, DRI2_BLIT_COMPLETE, + chain->client ? chain->event_complete : NULL, chain->event_data); + sna_dri_frame_event_info_free(sna, draw, chain); + return; } DRI2SwapComplete(chain->client, draw, - event->sequence, event->tv_sec, event->tv_usec, + frame, tv_sec, tv_usec, type, chain->client ? chain->event_complete : NULL, chain->event_data); VG_CLEAR(vbl); @@ -1273,19 +1433,17 @@ void sna_dri_vblank_handler(struct sna *sna, struct drm_event_vblank *event) switch (info->type) { case DRI2_FLIP: /* If we can still flip... */ - if (can_flip(sna, draw, info->front, info->back)) { - sna_dri_page_flip(sna, info); - info->back->name = info->old_front.name; - get_private(info->back)->bo = info->old_front.bo; - info->old_front.bo = NULL; + if (can_flip(sna, draw, info->front, info->back) && + sna_dri_page_flip(sna, info)) return; - } + /* else fall through to blit */ case DRI2_SWAP: - info->bo = sna_dri_copy_to_front(sna, draw, NULL, - get_private(info->front)->bo, - get_private(info->back)->bo, - true); + if (can_blit(sna, draw, info->front, info->back)) + info->bo = sna_dri_copy_to_front(sna, draw, NULL, + get_private(info->front)->bo, + get_private(info->back)->bo, + true); info->type = DRI2_SWAP_WAIT; /* fall through to SwapComplete */ case DRI2_SWAP_WAIT: @@ -1325,7 +1483,9 @@ void sna_dri_vblank_handler(struct sna *sna, struct drm_event_vblank *event) if (info->chain) { sna_dri_remove_frame_event((WindowPtr)draw, info); - chain_swap(sna, draw, event, info->chain); + chain_swap(sna, draw, + event->sequence, event->tv_sec, event->tv_usec, + info->chain); draw = NULL; } @@ -1334,22 +1494,91 @@ done: } static void +sna_dri_flip_get_back(struct sna *sna, struct sna_dri_frame_event *info) +{ + struct kgem_bo *bo; + uint32_t name; + + DBG(("%s: scanout=(%d, %d), back=%d, cache=%d\n", + __FUNCTION__, + info->scanout[0].bo ? info->scanout[0].bo->handle : 0, + info->scanout[1].bo ? info->scanout[1].bo->handle : 0, + get_private(info->back)->bo->handle, + info->cache.bo ? info->cache.bo->handle : 0)); + + bo = get_private(info->back)->bo; + if (!(bo == info->scanout[0].bo || bo == info->scanout[1].bo)) + return; + + bo = info->cache.bo; + name = info->cache.name; + if (bo == NULL || + bo == info->scanout[0].bo || + bo == info->scanout[1].bo) { + if (bo) { + DBG(("%s: discarding old backbuffer\n", __FUNCTION__)); + kgem_bo_destroy(&sna->kgem, bo); + } + DBG(("%s: allocating new backbuffer\n", __FUNCTION__)); + bo = kgem_create_2d(&sna->kgem, + info->draw->width, + info->draw->height, + info->draw->bitsPerPixel, + get_private(info->front)->bo->tiling, + CREATE_SCANOUT | CREATE_EXACT); + name = kgem_bo_flink(&sna->kgem, bo); + } + + info->cache.bo = get_private(info->back)->bo; + info->cache.name = info->back->name; + + get_private(info->back)->bo = bo; + info->back->name = name; + + assert(get_private(info->back)->bo != info->scanout[0].bo); + assert(get_private(info->back)->bo != info->scanout[1].bo); +} + +static bool sna_dri_flip_continue(struct sna *sna, struct sna_dri_frame_event *info) { - struct dri_bo tmp; + DBG(("%s(mode=%d)\n", __FUNCTION__, info->mode)); - DBG(("%s()\n", __FUNCTION__)); + if (info->mode > 1){ + if (get_private(info->front)->bo != sna_pixmap(sna->front)->gpu_bo) + return false; - assert(sna_pixmap_get_buffer(get_drawable_pixmap(info->draw)) == info->front); + info->count = sna_page_flip(sna, + get_private(info->front)->bo, + info, info->pipe); + if (!info->count) + return false; - tmp = info->old_front; + info->scanout[1] = info->scanout[0]; + info->scanout[0].bo = ref(get_private(info->front)->bo); + info->scanout[0].name = info->front->name; + sna->dri.flip_pending = info; + } else { + if (!info->draw) + return false; - sna_dri_page_flip(sna, info); + assert(sna_pixmap_get_buffer(get_drawable_pixmap(info->draw)) == info->front); + if (!can_flip(sna, info->draw, info->front, info->back)) + return false; - get_private(info->back)->bo = tmp.bo; - info->back->name = tmp.name; + if (!sna_dri_page_flip(sna, info)) + return false; + + sna_dri_flip_get_back(sna, info); + DRI2SwapComplete(info->client, info->draw, + 0, 0, 0, + DRI2_FLIP_COMPLETE, + info->client ? info->event_complete : NULL, + info->event_data); + } - info->next_front.name = 0; + info->mode = 0; + return true; } static void chain_flip(struct sna *sna) @@ -1367,29 +1596,17 @@ static void chain_flip(struct sna *sna) } if (chain->type == DRI2_FLIP && - can_flip(sna, chain->draw, chain->front, chain->back)) { + can_flip(sna, chain->draw, chain->front, chain->back) && + sna_dri_page_flip(sna, chain)) { DBG(("%s: performing chained flip\n", __FUNCTION__)); - sna_dri_page_flip(sna, chain); - - chain->back->name = chain->old_front.name; - get_private(chain->back)->bo = chain->old_front.bo; - chain->old_front.bo = NULL; - - if (chain->count == 0) { - DRI2SwapComplete(chain->client, chain->draw, 0, 0, 0, - DRI2_EXCHANGE_COMPLETE, - chain->event_complete, - chain->event_data); - sna_dri_frame_event_info_free(sna, chain->draw, chain); - } else - sna->dri.flip_pending = chain; } else { - DBG(("%s: emitting chained vsync'ed blit\n", __FUNCTION__)); - - chain->bo = sna_dri_copy_to_front(sna, chain->draw, NULL, - get_private(chain->front)->bo, - get_private(chain->back)->bo, - true); + if (can_blit(sna, chain->draw, chain->front, chain->back)) { + DBG(("%s: emitting chained vsync'ed blit\n", __FUNCTION__)); + chain->bo = sna_dri_copy_to_front(sna, chain->draw, NULL, + get_private(chain->front)->bo, + get_private(chain->back)->bo, + true); + } DRI2SwapComplete(chain->client, chain->draw, 0, 0, 0, DRI2_BLIT_COMPLETE, chain->client ? chain->event_complete : NULL, chain->event_data); sna_dri_frame_event_info_free(sna, chain->draw, chain); @@ -1406,6 +1623,14 @@ static void sna_dri_flip_event(struct sna *sna, flip->fe_tv_usec, flip->type)); + if (flip->cache.bo == NULL) { + flip->cache = flip->scanout[1]; + flip->scanout[1].bo = NULL; + } + if (flip->scanout[1].bo) { + kgem_bo_destroy(&sna->kgem, flip->scanout[1].bo); + flip->scanout[1].bo = NULL; + } if (sna->dri.flip_pending == flip) sna->dri.flip_pending = NULL; @@ -1433,44 +1658,31 @@ static void sna_dri_flip_event(struct sna *sna, if (sna->dri.flip_pending) { sna_dri_frame_event_info_free(sna, flip->draw, flip); chain_flip(sna); - } else if (!flip->next_front.name) { - /* Keep the pageflipping running for a couple of frames - * so we keep the uncached scanouts alive. - */ - DBG(("%s: flip chain complete, off-delay=%d\n", - __FUNCTION__, flip->off_delay)); - if (flip->off_delay-- && flip->draw && - can_flip(sna, flip->draw, flip->front, flip->front) && - (flip->count = sna_page_flip(sna, - get_private(flip->front)->bo, - flip, flip->pipe))) { - assert(flip == sna_dri_window_get_chain((WindowPtr)flip->draw)); - sna->dri.flip_pending = flip; - } else { - DBG(("%s: flip chain complete, off\n", __FUNCTION__)); - sna_dri_frame_event_info_free(sna, flip->draw, flip); + } else if (!flip->mode) { + DBG(("%s: flip chain complete\n", __FUNCTION__)); + + if (flip->chain) { + sna_dri_remove_frame_event((WindowPtr)flip->draw, + flip); + chain_swap(sna, flip->draw, + flip->fe_frame, + flip->fe_tv_sec, + flip->fe_tv_usec, + flip->chain); + flip->draw = NULL; } - } else if (flip->draw && - can_flip(sna, flip->draw, flip->front, flip->back)) { - sna_dri_flip_continue(sna, flip); - DRI2SwapComplete(flip->client, flip->draw, - 0, 0, 0, - DRI2_FLIP_COMPLETE, - flip->client ? flip->event_complete : NULL, - flip->event_data); - if (flip->count) - sna->dri.flip_pending = flip; - else - sna_dri_frame_event_info_free(sna, flip->draw, flip); - flip->off_delay = FLIP_OFF_DELAY; - } else { + + sna_dri_frame_event_info_free(sna, flip->draw, flip); + } else if (!sna_dri_flip_continue(sna, flip)) { DBG(("%s: no longer able to flip\n", __FUNCTION__)); if (flip->draw) { - flip->bo = sna_dri_copy_to_front(sna, flip->draw, NULL, - get_private(flip->front)->bo, - get_private(flip->back)->bo, - false); + if (can_blit(sna, flip->draw, flip->front, flip->back)) { + flip->bo = sna_dri_copy_to_front(sna, flip->draw, NULL, + get_private(flip->front)->bo, + get_private(flip->back)->bo, + false); + } DRI2SwapComplete(flip->client, flip->draw, 0, 0, 0, DRI2_BLIT_COMPLETE, @@ -1482,59 +1694,6 @@ static void sna_dri_flip_event(struct sna *sna, } break; -#if USE_ASYNC_SWAP - case DRI2_ASYNC_FLIP: - DBG(("%s: async swap flip completed on pipe %d, pending? %d, new? %d\n", - __FUNCTION__, flip->pipe, - sna->dri.flip_pending != NULL, - flip->front->name != flip->next_front.name)); - - if (sna->dri.flip_pending) { - chain_flip(sna); - goto finish_async_flip; - } else if (flip->front->name != flip->next_front.name) { - DBG(("%s: async flip continuing\n", __FUNCTION__)); - - flip->cache = flip->old_front; - flip->old_front = flip->next_front; - - flip->count = sna_page_flip(sna, - get_private(flip->front)->bo, - flip, flip->pipe); - if (flip->count == 0) - goto finish_async_flip; - - flip->next_front.bo = get_private(flip->front)->bo; - flip->next_front.name = flip->front->name; - flip->off_delay = FLIP_OFF_DELAY; - - sna->dri.flip_pending = flip; - } else if (flip->draw && - can_flip(sna, flip->draw, flip->front, flip->back) && - flip->off_delay--) { - assert(flip == sna_dri_window_get_chain((WindowPtr)flip->draw)); - DBG(("%s: queuing no-flip [delay=%d]\n", - __FUNCTION__, flip->off_delay)); - /* Just queue a no-op flip to trigger another event */ - flip->count = sna_page_flip(sna, - get_private(flip->front)->bo, - flip, flip->pipe); - if (flip->count == 0) - goto finish_async_flip; - - assert(flip->next_front.bo == get_private(flip->front)->bo); - assert(flip->next_front.name == flip->front->name); - - sna->dri.flip_pending = flip; - } else { -finish_async_flip: - DBG(("%s: async flip completed (drawable gone? %d)\n", - __FUNCTION__, flip->draw == NULL)); - sna_dri_frame_event_info_free(sna, flip->draw, flip); - } - break; -#endif - default: xf86DrvMsg(sna->scrn->scrnIndex, X_WARNING, "%s: unknown vblank event received\n", __func__); @@ -1564,213 +1723,22 @@ sna_dri_page_flip_handler(struct sna *sna, sna_dri_flip_event(sna, info); } -static bool -sna_dri_schedule_flip(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, - DRI2BufferPtr back, CARD64 *target_msc, CARD64 divisor, - CARD64 remainder, DRI2SwapEventPtr func, void *data) -{ - struct sna *sna = to_sna_from_drawable(draw); - struct sna_dri_frame_event *info; - drmVBlank vbl; - int pipe; - CARD64 current_msc; - - DBG(("%s(target_msc=%llu, divisor=%llu, remainder=%llu)\n", - __FUNCTION__, - (long long)*target_msc, - (long long)divisor, - (long long)remainder)); - - VG_CLEAR(vbl); - - pipe = sna_dri_get_pipe(draw); - if (pipe == -1) { - /* XXX WARN_ON(sna->dri.flip_pending) ? */ - if (sna->dri.flip_pending == NULL) { - sna_dri_exchange_buffers(draw, front, back); - DRI2SwapComplete(client, draw, 0, 0, 0, - DRI2_EXCHANGE_COMPLETE, func, data); - return true; - } else - return false; - } - - /* Truncate to match kernel interfaces; means occasional overflow - * misses, but that's generally not a big deal */ - divisor &= 0xffffffff; - if (divisor == 0) { - DBG(("%s: performing immediate swap on pipe %d, pending? %d\n", - __FUNCTION__, pipe, sna->dri.flip_pending != NULL)); - - info = sna->dri.flip_pending; - if (info && info->draw == draw && info->type == DRI2_FLIP_THROTTLE) { - DBG(("%s: chaining flip\n", __FUNCTION__)); - info->next_front.name = 1; - return true; - } - - info = calloc(1, sizeof(struct sna_dri_frame_event)); - if (info == NULL) - return false; - - info->type = DRI2_FLIP_THROTTLE; - - info->draw = draw; - info->client = client; - info->event_complete = func; - info->event_data = data; - info->front = front; - info->back = back; - info->pipe = pipe; - - sna_dri_add_frame_event(draw, info); - sna_dri_reference_buffer(front); - sna_dri_reference_buffer(back); - - if (sna->dri.flip_pending) { - /* We need to first wait (one vblank) for the - * async flips to complete before this client - * can take over. - */ - DBG(("%s: queueing flip after pending completion\n", - __FUNCTION__)); - info->type = DRI2_FLIP; - sna->dri.flip_pending = info; - return true; - } - - sna_dri_page_flip(sna, info); - - if (info->count == 0) { - info->back->name = info->old_front.name; - get_private(info->back)->bo = info->old_front.bo; - info->old_front.bo = NULL; - - DRI2SwapComplete(info->client, draw, 0, 0, 0, - DRI2_EXCHANGE_COMPLETE, - info->event_complete, - info->event_data); - sna_dri_frame_event_info_free(sna, draw, info); - } else if (info->type != DRI2_FLIP) { - get_private(info->back)->bo = - kgem_create_2d(&sna->kgem, - draw->width, - draw->height, - draw->bitsPerPixel, - get_private(info->front)->bo->tiling, - CREATE_SCANOUT | CREATE_EXACT); - info->back->name = kgem_bo_flink(&sna->kgem, - get_private(info->back)->bo); - info->off_delay = FLIP_OFF_DELAY; - sna->dri.flip_pending = info; - - DRI2SwapComplete(info->client, draw, 0, 0, 0, - DRI2_EXCHANGE_COMPLETE, - info->event_complete, - info->event_data); - } else { - info->back->name = info->old_front.name; - get_private(info->back)->bo = info->old_front.bo; - info->old_front.bo = NULL; - } - } else { - info = calloc(1, sizeof(struct sna_dri_frame_event)); - if (info == NULL) - return false; - - info->draw = draw; - info->client = client; - info->event_complete = func; - info->event_data = data; - info->front = front; - info->back = back; - info->pipe = pipe; - info->type = DRI2_FLIP; - - sna_dri_add_frame_event(draw, info); - sna_dri_reference_buffer(front); - sna_dri_reference_buffer(back); - - /* Get current count */ - vbl.request.type = DRM_VBLANK_RELATIVE | pipe_select(pipe); - vbl.request.sequence = 0; - if (sna_wait_vblank(sna, &vbl)) { - sna_dri_frame_event_info_free(sna, draw, info); - return false; - } - - current_msc = vbl.reply.sequence; - *target_msc &= 0xffffffff; - remainder &= 0xffffffff; - - vbl.request.type = - DRM_VBLANK_ABSOLUTE | - DRM_VBLANK_EVENT | - pipe_select(pipe); - - /* - * If divisor is zero, or current_msc is smaller than target_msc - * we just need to make sure target_msc passes before initiating - * the swap. - */ - if (current_msc < *target_msc) { - DBG(("%s: waiting for swap: current=%d, target=%d, divisor=%d\n", - __FUNCTION__, - (int)current_msc, - (int)*target_msc, - (int)divisor)); - vbl.request.sequence = *target_msc; - } else { - DBG(("%s: missed target, queueing event for next: current=%d, target=%d, divisor=%d\n", - __FUNCTION__, - (int)current_msc, - (int)*target_msc, - (int)divisor)); - - vbl.request.sequence = current_msc - current_msc % divisor + remainder; - - /* - * If the calculated deadline vbl.request.sequence is - * smaller than or equal to current_msc, it means - * we've passed the last point when effective onset - * frame seq could satisfy *seq % divisor == remainder, - * so we need to wait for the next time this will - * happen. - * - * This comparison takes the 1 frame swap delay - * in pageflipping mode into account. - */ - if (vbl.request.sequence <= current_msc) - vbl.request.sequence += divisor; - - /* Adjust returned value for 1 frame pageflip offset */ - *target_msc = vbl.reply.sequence + 1; - } - - /* Account for 1 frame extra pageflip delay */ - vbl.request.sequence -= 1; - vbl.request.signal = (unsigned long)info; - if (sna_wait_vblank(sna, &vbl)) { - sna_dri_frame_event_info_free(sna, draw, info); - return false; - } - } - - return true; -} - static void sna_dri_immediate_xchg(struct sna *sna, DrawablePtr draw, - struct sna_dri_frame_event *info) + struct sna_dri_frame_event *info, + bool sync) { drmVBlank vbl; - DBG(("%s: emitting immediate exchange, throttling client\n", - __FUNCTION__)); + if (sna->flags & SNA_NO_WAIT) + sync = false; + + DBG(("%s: emitting immediate exchange, throttling client, synced? %d\n", + __FUNCTION__, sync)); VG_CLEAR(vbl); - if ((sna->flags & SNA_NO_WAIT) == 0) { + if (sync) { info->type = DRI2_XCHG_THROTTLE; if (sna_dri_window_get_chain((WindowPtr)draw) == info) { DBG(("%s: no pending xchg, starting chain\n", @@ -1804,16 +1772,20 @@ sna_dri_immediate_xchg(struct sna *sna, static void sna_dri_immediate_blit(struct sna *sna, DrawablePtr draw, - struct sna_dri_frame_event *info) + struct sna_dri_frame_event *info, + bool sync) { - drmVBlank vbl; + if (sna->flags & SNA_NO_WAIT) + sync = false; - DBG(("%s: emitting immediate blit, throttling client\n", __FUNCTION__)); - VG_CLEAR(vbl); + DBG(("%s: emitting immediate blit, throttling client, synced? %d\n", + __FUNCTION__, sync)); - if ((sna->flags & SNA_NO_WAIT) == 0) { + if (sync) { info->type = DRI2_SWAP_THROTTLE; if (sna_dri_window_get_chain((WindowPtr)draw) == info) { + drmVBlank vbl; + DBG(("%s: no pending blit, starting chain\n", __FUNCTION__)); @@ -1826,6 +1798,7 @@ sna_dri_immediate_blit(struct sna *sna, info->event_complete, info->event_data); + VG_CLEAR(vbl); vbl.request.type = DRM_VBLANK_RELATIVE | DRM_VBLANK_NEXTONMISS | @@ -1849,6 +1822,204 @@ sna_dri_immediate_blit(struct sna *sna, } } +static CARD64 +get_current_msc_for_target(struct sna *sna, CARD64 target_msc, int pipe) +{ + CARD64 ret = -1; + + if (target_msc && (sna->flags & SNA_NO_WAIT) == 0) { + drmVBlank vbl; + + VG_CLEAR(vbl); + vbl.request.type = DRM_VBLANK_RELATIVE | pipe_select(pipe); + vbl.request.sequence = 0; + if (sna_wait_vblank(sna, &vbl) == 0) + ret = vbl.reply.sequence; + } + + return ret; +} + +static bool +sna_dri_schedule_flip(ClientPtr client, DrawablePtr draw, + DRI2BufferPtr front, DRI2BufferPtr back, int pipe, + CARD64 *target_msc, CARD64 divisor, CARD64 remainder, + DRI2SwapEventPtr func, void *data) +{ + struct sna *sna = to_sna_from_drawable(draw); + struct sna_dri_frame_event *info; + drmVBlank vbl; + CARD64 current_msc; + + current_msc = get_current_msc_for_target(sna, *target_msc, pipe); + + DBG(("%s: target_msc=%u, current_msc=%u, divisor=%u\n", __FUNCTION__, + (uint32_t)*target_msc, (uint32_t)current_msc, (uint32_t)divisor)); + + if (divisor == 0 && current_msc >= *target_msc - 1) { + info = sna->dri.flip_pending; + + DBG(("%s: performing immediate swap on pipe %d, pending? %d, mode: %d\n", + __FUNCTION__, pipe, info != NULL, info ? info->mode : 0)); + + if (info && + info->draw == draw) { + assert(info->type == DRI2_FLIP_THROTTLE); + assert(info->front == front); + if (info->back != back) { + _sna_dri_destroy_buffer(sna, info->back); + info->back = back; + sna_dri_reference_buffer(back); + } + if (current_msc >= *target_msc) { + DBG(("%s: executing xchg of pending flip\n", + __FUNCTION__)); + sna_dri_exchange_buffers(draw, front, back); + info->mode = 2; + goto new_back; + } else { + DBG(("%s: chaining flip\n", __FUNCTION__)); + info->mode = 1; + current_msc++; + goto out; + } + } + + info = calloc(1, sizeof(struct sna_dri_frame_event)); + if (info == NULL) + return false; + + info->type = sna->flags & SNA_TRIPLE_BUFFER ? DRI2_FLIP_THROTTLE: DRI2_FLIP; + + info->draw = draw; + info->client = client; + info->event_complete = func; + info->event_data = data; + info->front = front; + info->back = back; + info->pipe = pipe; + + info->scanout[0].bo = ref(get_private(front)->bo); + info->scanout[0].name = info->front->name; + + sna_dri_add_frame_event(draw, info); + sna_dri_reference_buffer(front); + sna_dri_reference_buffer(back); + + if (sna->dri.flip_pending) { + /* We need to first wait (one vblank) for the + * async flips to complete before this client + * can take over. + */ + DBG(("%s: queueing flip after pending completion\n", + __FUNCTION__)); + info->type = DRI2_FLIP; + sna->dri.flip_pending = info; + *target_msc = current_msc + 1; + return true; + } + + if (!sna_dri_page_flip(sna, info)) { + sna_dri_frame_event_info_free(sna, draw, info); + return false; + } + + if (info->type != DRI2_FLIP) { + current_msc++; +new_back: + sna_dri_flip_get_back(sna, info); + DRI2SwapComplete(client, draw, 0, 0, 0, + DRI2_EXCHANGE_COMPLETE, + func, data); + } +out: + *target_msc = current_msc; + return true; + } + + info = calloc(1, sizeof(struct sna_dri_frame_event)); + if (info == NULL) + return false; + + info->draw = draw; + info->client = client; + info->event_complete = func; + info->event_data = data; + info->front = front; + info->back = back; + info->pipe = pipe; + info->type = DRI2_FLIP; + + info->scanout[0].bo = ref(get_private(front)->bo); + info->scanout[0].name = info->front->name; + + sna_dri_add_frame_event(draw, info); + sna_dri_reference_buffer(front); + sna_dri_reference_buffer(back); + + *target_msc &= 0xffffffff; + remainder &= 0xffffffff; + + VG_CLEAR(vbl); + + vbl.request.type = + DRM_VBLANK_ABSOLUTE | + DRM_VBLANK_EVENT | + pipe_select(pipe); + + /* + * If divisor is zero, or current_msc is smaller than target_msc + * we just need to make sure target_msc passes before initiating + * the swap. + */ + if (current_msc <= *target_msc - 1) { + DBG(("%s: waiting for swap: current=%d, target=%d, divisor=%d\n", + __FUNCTION__, + (int)current_msc, + (int)*target_msc, + (int)divisor)); + vbl.request.sequence = *target_msc; + } else { + DBG(("%s: missed target, queueing event for next: current=%d, target=%d, divisor=%d\n", + __FUNCTION__, + (int)current_msc, + (int)*target_msc, + (int)divisor)); + + if (divisor == 0) + divisor = 1; + + vbl.request.sequence = current_msc - current_msc % divisor + remainder; + + /* + * If the calculated deadline vbl.request.sequence is + * smaller than or equal to current_msc, it means + * we've passed the last point when effective onset + * frame seq could satisfy *seq % divisor == remainder, + * so we need to wait for the next time this will + * happen. + * + * This comparison takes the 1 frame swap delay + * in pageflipping mode into account. + */ + if (vbl.request.sequence <= current_msc) + vbl.request.sequence += divisor; + + /* Adjust returned value for 1 frame pageflip offset */ + *target_msc = vbl.reply.sequence; + } + + /* Account for 1 frame extra pageflip delay */ + vbl.request.sequence -= 1; + vbl.request.signal = (unsigned long)info; + if (sna_wait_vblank(sna, &vbl)) { + sna_dri_frame_event_info_free(sna, draw, info); + return false; + } + + return true; +} + /* * ScheduleSwap is responsible for requesting a DRM vblank event for the * appropriate frame. @@ -1889,37 +2060,26 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, (long long)divisor, (long long)remainder)); - if (can_flip(sna, draw, front, back)) { - DBG(("%s: try flip\n", __FUNCTION__)); - if (sna_dri_schedule_flip(client, draw, front, back, - target_msc, divisor, remainder, - func, data)) - return TRUE; - } + /* Truncate to match kernel interfaces; means occasional overflow + * misses, but that's generally not a big deal */ + *target_msc &= 0xffffffff; + divisor &= 0xffffffff; + remainder &= 0xffffffff; /* Drawable not displayed... just complete the swap */ pipe = sna_dri_get_pipe(draw); if (pipe == -1) { - if (can_exchange(sna, draw, front, back)) { - DBG(("%s: unattached, exchange pixmaps\n", __FUNCTION__)); - sna_dri_exchange_buffers(draw, front, back); - - DRI2SwapComplete(client, draw, 0, 0, 0, - DRI2_EXCHANGE_COMPLETE, func, data); - return TRUE; - } - DBG(("%s: off-screen, immediate update\n", __FUNCTION__)); goto blit_fallback; } - VG_CLEAR(vbl); + if (can_flip(sna, draw, front, back) && + sna_dri_schedule_flip(client, draw, front, back, pipe, + target_msc, divisor, remainder, + func, data)) + return TRUE; - /* Truncate to match kernel interfaces; means occasional overflow - * misses, but that's generally not a big deal */ - *target_msc &= 0xffffffff; - divisor &= 0xffffffff; - remainder &= 0xffffffff; + VG_CLEAR(vbl); info = calloc(1, sizeof(struct sna_dri_frame_event)); if (!info) @@ -1938,21 +2098,25 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, sna_dri_reference_buffer(back); info->type = swap_type; - if (divisor == 0) { - if (can_exchange(sna, draw, front, back)) - sna_dri_immediate_xchg(sna, draw, info); - else - sna_dri_immediate_blit(sna, draw, info); - return TRUE; - } - /* Get current count */ - vbl.request.type = DRM_VBLANK_RELATIVE | pipe_select(pipe); - vbl.request.sequence = 0; - if (sna_wait_vblank(sna, &vbl)) - goto blit_fallback; + current_msc = get_current_msc_for_target(sna, *target_msc, pipe); + DBG(("%s: target_msc=%u, current_msc=%u, divisor=%u\n", __FUNCTION__, + (uint32_t)*target_msc, (uint32_t)current_msc, (uint32_t)divisor)); - current_msc = vbl.reply.sequence; + if (divisor == 0 && current_msc >= *target_msc - 1) { + bool sync = current_msc < *target_msc; + if (can_exchange(sna, draw, front, back)) { + sna_dri_immediate_xchg(sna, draw, info, sync); + } else if (can_blit(sna, draw, front, back)) { + sna_dri_immediate_blit(sna, draw, info, sync); + } else { + DRI2SwapComplete(client, draw, 0, 0, 0, + DRI2_BLIT_COMPLETE, func, data); + sna_dri_frame_event_info_free(sna, draw, info); + } + *target_msc = current_msc + sync; + return TRUE; + } /* * If divisor is zero, or current_msc is smaller than target_msc @@ -1991,6 +2155,9 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, (int)*target_msc, (int)divisor)); + if (divisor == 0) + divisor = 1; + vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT | @@ -2007,27 +2174,27 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, */ if (vbl.request.sequence < current_msc) vbl.request.sequence += divisor; - vbl.request.sequence -= 1; + *target_msc = vbl.reply.sequence; + vbl.request.sequence -= 1; vbl.request.signal = (unsigned long)info; if (sna_wait_vblank(sna, &vbl)) goto blit_fallback; - *target_msc = vbl.reply.sequence; return TRUE; blit_fallback: + pipe = DRI2_BLIT_COMPLETE; if (can_exchange(sna, draw, front, back)) { DBG(("%s -- xchg\n", __FUNCTION__)); sna_dri_exchange_buffers(draw, front, back); pipe = DRI2_EXCHANGE_COMPLETE; - } else { + } else if (can_blit(sna, draw, front, back)) { DBG(("%s -- blit\n", __FUNCTION__)); sna_dri_copy_to_front(sna, draw, NULL, get_private(front)->bo, get_private(back)->bo, false); - pipe = DRI2_BLIT_COMPLETE; } if (info) sna_dri_frame_event_info_free(sna, draw, info); @@ -2043,100 +2210,31 @@ sna_dri_async_swap(ClientPtr client, DrawablePtr draw, DRI2SwapEventPtr func, void *data) { struct sna *sna = to_sna_from_drawable(draw); - struct sna_dri_frame_event *info; - struct kgem_bo *bo; - int name; + CARD64 target_msc = 0; + int pipe; DBG(("%s()\n", __FUNCTION__)); - if (!can_flip(sna, draw, front, back)) { -blit: + if (!can_flip(sna, draw, front, back) || + (pipe = sna_dri_get_pipe(draw)) < 0 || + !sna_dri_schedule_flip(client, draw, front, back, pipe, + &target_msc, 0, 0, func, data)) { + pipe = DRI2_BLIT_COMPLETE; if (can_exchange(sna, draw, front, back)) { DBG(("%s: unable to flip, so xchg\n", __FUNCTION__)); sna_dri_exchange_buffers(draw, front, back); - name = DRI2_EXCHANGE_COMPLETE; - } else { + pipe = DRI2_EXCHANGE_COMPLETE; + } else if (can_blit(sna, draw, front, back)) { DBG(("%s: unable to flip, so blit\n", __FUNCTION__)); sna_dri_copy_to_front(sna, draw, NULL, get_private(front)->bo, get_private(back)->bo, false); - name = DRI2_BLIT_COMPLETE; - } - - DRI2SwapComplete(client, draw, 0, 0, 0, name, func, data); - return name == DRI2_EXCHANGE_COMPLETE; - } - - bo = NULL; - name = 0; - - info = sna->dri.flip_pending; - if (info == NULL) { - int pipe = sna_dri_get_pipe(draw); - if (pipe == -1) - goto blit; - - DBG(("%s: no pending flip, so updating scanout\n", - __FUNCTION__)); - - info = calloc(1, sizeof(struct sna_dri_frame_event)); - if (!info) - goto blit; - - info->client = client; - info->draw = draw; - info->type = DRI2_ASYNC_FLIP; - info->pipe = pipe; - info->front = front; - info->back = back; - - sna_dri_add_frame_event(draw, info); - sna_dri_reference_buffer(front); - sna_dri_reference_buffer(back); - - sna_dri_page_flip(sna, info); - - info->next_front.name = info->front->name; - info->next_front.bo = get_private(info->front)->bo; - info->off_delay = FLIP_OFF_DELAY; - } else if (info->type != DRI2_ASYNC_FLIP) { - /* A normal vsync'ed client is finishing, wait for it - * to unpin the old framebuffer before taking over. - */ - goto blit; - } else { - DBG(("%s: pending flip, chaining next\n", __FUNCTION__)); - if (info->next_front.name == info->front->name) { - name = info->cache.name; - bo = info->cache.bo; - } else { - name = info->front->name; - bo = get_private(info->front)->bo; } - info->front->name = info->back->name; - get_private(info->front)->bo = get_private(info->back)->bo; - } - if (bo == NULL) { - DBG(("%s: creating new back buffer\n", __FUNCTION__)); - bo = kgem_create_2d(&sna->kgem, - draw->width, - draw->height, - draw->bitsPerPixel, - get_private(info->front)->bo->tiling, - CREATE_SCANOUT | CREATE_EXACT); - name = kgem_bo_flink(&sna->kgem, bo); + DRI2SwapComplete(client, draw, 0, 0, 0, pipe, func, data); + return pipe == DRI2_EXCHANGE_COMPLETE; } - assert(bo->refcnt); - get_private(info->back)->bo = bo; - info->back->name = name; - - set_bo(sna->front, get_private(info->front)->bo); - sna->dri.flip_pending = info; - - DRI2SwapComplete(client, draw, 0, 0, 0, - DRI2_EXCHANGE_COMPLETE, func, data); return TRUE; } #endif @@ -2291,6 +2389,17 @@ out_complete: } #endif +static const char *dri_driver_name(struct sna *sna) +{ + const char *s = xf86GetOptValString(sna->Options, OPTION_DRI); + Bool dummy; + + if (s == NULL || xf86getBoolValue(&dummy, s)) + return sna->kgem.gen < 040 ? "i915" : "i965"; + + return s; +} + bool sna_dri_open(struct sna *sna, ScreenPtr screen) { DRI2InfoRec info; @@ -2318,8 +2427,7 @@ bool sna_dri_open(struct sna *sna, ScreenPtr screen) sna->deviceName = drmGetDeviceNameFromFd(sna->kgem.fd); memset(&info, '\0', sizeof(info)); info.fd = sna->kgem.fd; - info.driverName = - (sna->kgem.gen && sna->kgem.gen < 40) ? "i915" : "i965"; + info.driverName = dri_driver_name(sna); info.deviceName = sna->deviceName; DBG(("%s: loading dri driver '%s' [gen=%d] for device '%s'\n", diff --git a/src/sna/sna_driver.c b/src/sna/sna_driver.c index 1b7e817f5..ffeaead58 100644 --- a/src/sna/sna_driver.c +++ b/src/sna/sna_driver.c @@ -62,7 +62,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include <sys/ioctl.h> #include <sys/fcntl.h> -#include <sys/poll.h> #include "i915_drm.h" #ifdef HAVE_VALGRIND @@ -79,11 +78,6 @@ DevPrivateKeyRec sna_gc_key; DevPrivateKeyRec sna_window_key; DevPrivateKeyRec sna_glyph_key; -static Bool sna_enter_vt(VT_FUNC_ARGS_DECL); - -/* temporary */ -extern void xf86SetCursor(ScreenPtr screen, CursorPtr pCurs, int x, int y); - static void sna_load_palette(ScrnInfoPtr scrn, int numColors, int *indices, LOCO * colors, VisualPtr pVisual) @@ -150,6 +144,79 @@ sna_load_palette(ScrnInfoPtr scrn, int numColors, int *indices, } } +static void +sna_set_fallback_mode(ScrnInfoPtr scrn) +{ + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn); + xf86OutputPtr output = NULL; + xf86CrtcPtr crtc = NULL; + int n; + + if ((unsigned)config->compat_output < config->num_output) { + output = config->output[config->compat_output]; + crtc = output->crtc; + } + + for (n = 0; n < config->num_output; n++) + config->output[n]->crtc = NULL; + for (n = 0; n < config->num_crtc; n++) + config->crtc[n]->enabled = FALSE; + + if (output && crtc) { + DisplayModePtr mode; + + output->crtc = crtc; + + mode = xf86OutputFindClosestMode(output, scrn->currentMode); + if (mode && + xf86CrtcSetModeTransform(crtc, mode, RR_Rotate_0, NULL, 0, 0)) { + crtc->desiredMode = *mode; + crtc->desiredMode.prev = crtc->desiredMode.next = NULL; + crtc->desiredMode.name = NULL; + crtc->desiredMode.PrivSize = 0; + crtc->desiredMode.PrivFlags = 0; + crtc->desiredMode.Private = NULL; + crtc->desiredRotation = RR_Rotate_0; + crtc->desiredTransformPresent = FALSE; + crtc->desiredX = 0; + crtc->desiredY = 0; + crtc->enabled = TRUE; + } + } + + xf86DisableUnusedFunctions(scrn); +#ifdef RANDR_12_INTERFACE + if (root(scrn->pScreen)) + xf86RandR12TellChanged(scrn->pScreen); +#endif +} + +static Bool sna_become_master(struct sna *sna) +{ + ScrnInfoPtr scrn = sna->scrn; + + DBG(("%s\n", __FUNCTION__)); + + if (drmSetMaster(sna->kgem.fd)) { + sleep(2); /* XXX wait for the current master to decease */ + if (drmSetMaster(sna->kgem.fd)) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "drmSetMaster failed: %s\n", + strerror(errno)); + return FALSE; + } + } + + if (!xf86SetDesiredModes(scrn)) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "failed to restore desired modes on VT switch\n"); + sna_set_fallback_mode(scrn); + } + + sna_mode_disable_unused(sna); + return TRUE; +} + /** * Adjust the screen pixmap for the current location of the front buffer. * This is done at EnterVT when buffers are bound as long as the resources @@ -158,7 +225,6 @@ sna_load_palette(ScrnInfoPtr scrn, int numColors, int *indices, */ static Bool sna_create_screen_resources(ScreenPtr screen) { - ScrnInfoPtr scrn = xf86ScreenToScrn(screen); struct sna *sna = to_sna_from_screen(screen); DBG(("%s(%dx%d@%d)\n", __FUNCTION__, @@ -197,7 +263,7 @@ static Bool sna_create_screen_resources(ScreenPtr screen) sna_copy_fbcon(sna); - if (!sna_enter_vt(VT_FUNC_ARGS(0))) { + if (!sna_become_master(sna)) { xf86DrvMsg(screen->myNum, X_ERROR, "[intel] Failed to become DRM master\n"); goto cleanup_front; @@ -363,6 +429,12 @@ static void sna_setup_capabilities(ScrnInfoPtr scrn, int fd) #endif } +static Bool sna_option_cast_to_bool(struct sna *sna, int id, Bool val) +{ + xf86getBoolValue(&val, xf86GetOptValString(sna->Options, id)); + return val; +} + /** * This is called before ScreenInit to do any require probing of screen * configuration. @@ -468,7 +540,8 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int flags) intel_detect_chipset(scrn, sna->pEnt, sna->PciInfo); kgem_init(&sna->kgem, fd, sna->PciInfo, sna->info->gen); - if (xf86ReturnOptValBool(sna->Options, OPTION_ACCEL_DISABLE, FALSE)) { + if (xf86ReturnOptValBool(sna->Options, OPTION_ACCEL_DISABLE, FALSE) || + !sna_option_cast_to_bool(sna, OPTION_ACCEL_METHOD, TRUE)) { xf86DrvMsg(sna->scrn->scrnIndex, X_CONFIG, "Disabling hardware acceleration.\n"); sna->kgem.wedged = true; @@ -493,12 +566,10 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int flags) sna->tiling &= ~SNA_TILING_FB; sna->flags = 0; - if (!xf86ReturnOptValBool(sna->Options, OPTION_THROTTLE, TRUE)) - sna->flags |= SNA_NO_THROTTLE; - if (!xf86ReturnOptValBool(sna->Options, OPTION_DELAYED_FLUSH, TRUE)) - sna->flags |= SNA_NO_DELAYED_FLUSH; if (!xf86ReturnOptValBool(sna->Options, OPTION_SWAPBUFFERS_WAIT, TRUE)) sna->flags |= SNA_NO_WAIT; + if (xf86ReturnOptValBool(sna->Options, OPTION_TRIPLE_BUFFER, TRUE)) + sna->flags |= SNA_TRIPLE_BUFFER; if (has_pageflipping(sna)) { if (xf86ReturnOptValBool(sna->Options, OPTION_TEAR_FREE, FALSE)) sna->flags |= SNA_TEAR_FREE; @@ -511,12 +582,6 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int flags) sna->tiling & SNA_TILING_FB ? "tiled" : "linear"); xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Pixmaps %s\n", sna->tiling & SNA_TILING_2D ? "tiled" : "linear"); - xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "3D buffers %s\n", - sna->tiling & SNA_TILING_3D ? "tiled" : "linear"); - xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Throttling %sabled\n", - sna->flags & SNA_NO_THROTTLE ? "dis" : "en"); - xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Delayed flush %sabled\n", - sna->flags & SNA_NO_DELAYED_FLUSH ? "dis" : "en"); xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "\"Tear free\" %sabled\n", sna->flags & SNA_TEAR_FREE ? "en" : "dis"); xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Forcing per-crtc-pixmaps? %s\n", @@ -543,7 +608,7 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int flags) xf86SetDpi(scrn, 0, 0); sna->dri_available = false; - if (xf86ReturnOptValBool(sna->Options, OPTION_DRI, TRUE)) + if (sna_option_cast_to_bool(sna, OPTION_DRI, TRUE)) sna->dri_available = !!xf86LoadSubModule(scrn, "dri2"); return TRUE; @@ -552,9 +617,11 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int flags) static void sna_block_handler(BLOCKHANDLER_ARGS_DECL) { - SCREEN_PTR(arg); - ScrnInfoPtr scrn = xf86ScreenToScrn(screen); - struct sna *sna = to_sna(scrn); +#ifndef XF86_SCRN_INTERFACE + struct sna *sna = to_sna(xf86Screens[arg]); +#else + struct sna *sna = to_sna_from_screen(arg); +#endif struct timeval **tv = timeout; DBG(("%s (tv=%ld.%06ld)\n", __FUNCTION__, @@ -569,9 +636,11 @@ sna_block_handler(BLOCKHANDLER_ARGS_DECL) static void sna_wakeup_handler(WAKEUPHANDLER_ARGS_DECL) { - SCREEN_PTR(arg); - ScrnInfoPtr scrn = xf86ScreenToScrn(screen); - struct sna *sna = to_sna(scrn); +#ifndef XF86_SCRN_INTERFACE + struct sna *sna = to_sna(xf86Screens[arg]); +#else + struct sna *sna = to_sna_from_screen(arg); +#endif DBG(("%s\n", __FUNCTION__)); @@ -639,11 +708,14 @@ sna_uevent_init(ScrnInfoPtr scrn) DBG(("%s\n", __FUNCTION__)); - if (!xf86GetOptValBool(sna->Options, OPTION_HOTPLUG, &hotplug)) { - from = X_DEFAULT; - hotplug = TRUE; - } + /* RandR will be disabled if Xinerama is active, and so generating + * RR hotplug events is then verboten. + */ + if (!dixPrivateKeyRegistered(rrPrivKey)) + return; + if (!xf86GetOptValBool(sna->Options, OPTION_HOTPLUG, &hotplug)) + from = X_DEFAULT, hotplug = TRUE; xf86DrvMsg(scrn->scrnIndex, from, "hotplug detection: \"%s\"\n", hotplug ? "enabled" : "disabled"); if (!hotplug) @@ -654,16 +726,14 @@ sna_uevent_init(ScrnInfoPtr scrn) return; mon = udev_monitor_new_from_netlink(u, "udev"); - if (!mon) { udev_unref(u); return; } if (udev_monitor_filter_add_match_subsystem_devtype(mon, - "drm", - "drm_minor") < 0 || - udev_monitor_enable_receiving(mon) < 0) + "drm", "drm_minor") < 0 || + udev_monitor_enable_receiving(mon) < 0) { udev_monitor_unref(mon); udev_unref(u); @@ -681,23 +751,29 @@ sna_uevent_init(ScrnInfoPtr scrn) } sna->uevent_monitor = mon; + + DBG(("%s: installed uvent handler\n", __FUNCTION__)); } static void sna_uevent_fini(ScrnInfoPtr scrn) { struct sna *sna = to_sna(scrn); + struct udev *u; - if (sna->uevent_handler) { - struct udev *u = udev_monitor_get_udev(sna->uevent_monitor); + if (sna->uevent_handler == NULL) + return; - xf86RemoveGeneralHandler(sna->uevent_handler); + xf86RemoveGeneralHandler(sna->uevent_handler); - udev_monitor_unref(sna->uevent_monitor); - udev_unref(u); - sna->uevent_handler = NULL; - sna->uevent_monitor = NULL; - } + u = udev_monitor_get_udev(sna->uevent_monitor); + udev_monitor_unref(sna->uevent_monitor); + udev_unref(u); + + sna->uevent_handler = NULL; + sna->uevent_monitor = NULL; + + DBG(("%s: removed uvent handler\n", __FUNCTION__)); } #else static void sna_uevent_fini(ScrnInfoPtr scrn) { } @@ -717,18 +793,6 @@ static void sna_leave_vt(VT_FUNC_ARGS_DECL) "drmDropMaster failed: %s\n", strerror(errno)); } -/* In order to workaround a kernel bug in not honouring O_NONBLOCK, - * check that the fd is readable before attempting to read the next - * event from drm. - */ -static Bool sna_mode_has_pending_events(struct sna *sna) -{ - struct pollfd pfd; - pfd.fd = sna->kgem.fd; - pfd.events = POLLIN; - return poll(&pfd, 1, 0) == 1; -} - static Bool sna_early_close_screen(CLOSE_SCREEN_ARGS_DECL) { ScrnInfoPtr scrn = xf86ScreenToScrn(screen); @@ -739,9 +803,7 @@ static Bool sna_early_close_screen(CLOSE_SCREEN_ARGS_DECL) xf86_hide_cursors(scrn); sna_uevent_fini(scrn); - /* drain the event queues */ - if (sna_mode_has_pending_events(sna)) - sna_mode_wakeup(sna); + sna_mode_close(sna); if (sna->dri_open) { sna_dri_close(sna, screen); @@ -793,6 +855,7 @@ static void sna_mode_set(ScrnInfoPtr scrn) static Bool sna_register_all_privates(void) { +#if HAS_DIXREGISTERPRIVATEKEY if (!dixRegisterPrivateKey(&sna_pixmap_key, PRIVATE_PIXMAP, 3*sizeof(void *))) return FALSE; @@ -808,6 +871,19 @@ sna_register_all_privates(void) if (!dixRegisterPrivateKey(&sna_window_key, PRIVATE_WINDOW, 2*sizeof(void *))) return FALSE; +#else + if (!dixRequestPrivate(&sna_pixmap_key, 3*sizeof(void *))) + return FALSE; + + if (!dixRequestPrivate(&sna_gc_key, sizeof(FbGCPrivate))) + return FALSE; + + if (!dixRequestPrivate(&sna_glyph_key, sizeof(struct sna_glyph))) + return FALSE; + + if (!dixRequestPrivate(&sna_window_key, 2*sizeof(void *))) + return FALSE; +#endif return TRUE; } @@ -815,7 +891,7 @@ sna_register_all_privates(void) static size_t agp_aperture_size(struct pci_device *dev, int gen) { - return dev->regions[gen < 30 ? 0 : 2].size; + return dev->regions[gen < 030 ? 0 : 2].size; } static Bool @@ -983,24 +1059,9 @@ static void sna_free_screen(FREE_SCREEN_ARGS_DECL) static Bool sna_enter_vt(VT_FUNC_ARGS_DECL) { SCRN_INFO_PTR(arg); - struct sna *sna = to_sna(scrn); DBG(("%s\n", __FUNCTION__)); - - if (drmSetMaster(sna->kgem.fd)) { - xf86DrvMsg(scrn->scrnIndex, X_ERROR, - "drmSetMaster failed: %s\n", - strerror(errno)); - return FALSE; - } - - if (!xf86SetDesiredModes(scrn)) - xf86DrvMsg(scrn->scrnIndex, X_WARNING, - "failed to restore desired modes on VT switch\n"); - - sna_mode_disable_unused(sna); - - return TRUE; + return sna_become_master(to_sna(scrn)); } static Bool sna_switch_mode(SWITCH_MODE_ARGS_DECL) @@ -1094,6 +1155,10 @@ Bool sna_init_scrn(ScrnInfoPtr scrn, int entity_num) xf86DrvMsg(scrn->scrnIndex, X_INFO, "SNA compiled with assertions enabled\n"); #endif +#if DEBUG_SYNC + xf86DrvMsg(scrn->scrnIndex, X_INFO, + "SNA compiled with synchronous rendering\n"); +#endif #if DEBUG_MEMORY xf86DrvMsg(scrn->scrnIndex, X_INFO, "SNA compiled with memory allocation reporting enabled\n"); @@ -1117,11 +1182,15 @@ Bool sna_init_scrn(ScrnInfoPtr scrn, int entity_num) scrn->ValidMode = sna_valid_mode; scrn->PMEvent = sna_pm_event; +#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,9,99,901,0) scrn->ModeSet = sna_mode_set; +#endif xf86SetEntitySharable(entity_num); xf86SetEntityInstanceForScreen(scrn, entity_num, xf86GetNumEntityInstances(entity_num)-1); + sna_threads_init(); + return TRUE; } diff --git a/src/sna/sna_glyphs.c b/src/sna/sna_glyphs.c index 9a6ad4b52..5fed8b419 100644 --- a/src/sna/sna_glyphs.c +++ b/src/sna/sna_glyphs.c @@ -84,6 +84,8 @@ #define N_STACK_GLYPHS 512 +#define glyph_valid(g) *((uint32_t *)&(g)->info.width) + #if HAS_DEBUG_FULL static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char *function) { @@ -108,7 +110,7 @@ extern DevPrivateKeyRec sna_glyph_key; static inline struct sna_glyph *sna_glyph(GlyphPtr glyph) { - return dixGetPrivateAddr(&glyph->devPrivates, &sna_glyph_key); + return __get_private(glyph, sna_glyph_key); } #define NeedsComponent(f) (PICT_FORMAT_A(f) != 0 && PICT_FORMAT_RGB(f) != 0) @@ -191,11 +193,17 @@ bool sna_glyphs_create(struct sna *sna) if (sna->render.white_image == NULL) goto bail; - if (!can_render(sna)) + if (!can_render(sna)) { + DBG(("%s: no render acceleration, no render glyph caches\n", + __FUNCTION__)); return true; + } - if (xf86IsEntityShared(sna->scrn->entityList[0])) + if (xf86IsEntityShared(sna->scrn->entityList[0])) { + DBG(("%s: shared GlyphPictures, no render glyph caches\n", + __FUNCTION__)); return true; + } for (i = 0; i < ARRAY_SIZE(formats); i++) { struct sna_glyph_cache *cache = &sna->render.glyph[i]; @@ -215,9 +223,12 @@ bool sna_glyphs_create(struct sna *sna) CACHE_PICTURE_SIZE, CACHE_PICTURE_SIZE, depth, - SNA_CREATE_SCRATCH); - if (!pixmap) + SNA_CREATE_GLYPHS); + if (!pixmap) { + DBG(("%s: failed to allocate pixmap for Glyph cache\n", + __FUNCTION__)); goto bail; + } priv = sna_pixmap(pixmap); if (priv != NULL) { @@ -235,6 +246,7 @@ bool sna_glyphs_create(struct sna *sna) goto bail; ValidatePicture(picture); + assert(picture->pDrawable == &pixmap->drawable); cache->count = cache->evict = 0; cache->picture = picture; @@ -297,7 +309,7 @@ glyph_extents(int nlist, while (n--) { GlyphPtr glyph = *glyphs++; - if (glyph->info.width && glyph->info.height) { + if (glyph_valid(glyph)) { int v; v = x - glyph->info.x; @@ -350,14 +362,20 @@ glyph_cache(ScreenPtr screen, struct sna_render *render, GlyphPtr glyph) { - PicturePtr glyph_picture = GetGlyphPicture(glyph, screen); - struct sna_glyph_cache *cache = &render->glyph[PICT_FORMAT_RGB(glyph_picture->format) != 0]; + PicturePtr glyph_picture; + struct sna_glyph_cache *cache; struct sna_glyph *priv; int size, mask, pos, s; if (NO_GLYPH_CACHE) return false; + glyph_picture = GetGlyphPicture(glyph, screen); + if (unlikely(glyph_picture == NULL)) { + glyph->info.width = glyph->info.height = 0; + return false; + } + if (glyph->info.width > GLYPH_MAX_SIZE || glyph->info.height > GLYPH_MAX_SIZE) { PixmapPtr pixmap = (PixmapPtr)glyph_picture->pDrawable; @@ -373,6 +391,7 @@ glyph_cache(ScreenPtr screen, if (glyph->info.width <= size && glyph->info.height <= size) break; + cache = &render->glyph[PICT_FORMAT_RGB(glyph_picture->format) != 0]; s = glyph_size_to_count(size); mask = glyph_count_to_mask(s); pos = (cache->count + s - 1) & mask; @@ -528,7 +547,7 @@ glyphs_to_dst(struct sna *sna, struct sna_glyph priv; int i; - if (glyph->info.width == 0 || glyph->info.height == 0) + if (!glyph_valid(glyph)) goto next_glyph; priv = *sna_glyph(glyph); @@ -540,6 +559,10 @@ glyphs_to_dst(struct sna *sna, if (!glyph_cache(screen, &sna->render, glyph)) { /* no cache for this glyph */ priv.atlas = GetGlyphPicture(glyph, screen); + if (unlikely(priv.atlas == NULL)) { + glyph->info.width = glyph->info.height = 0; + goto next_glyph; + } priv.coordinate.x = priv.coordinate.y = 0; } else priv = *sna_glyph(glyph); @@ -671,7 +694,7 @@ glyphs_slow(struct sna *sna, BoxPtr rects; int nrect; - if (glyph->info.width == 0 || glyph->info.height == 0) + if (!glyph_valid(glyph)) goto next_glyph; priv = *sna_glyph(glyph); @@ -679,6 +702,10 @@ glyphs_slow(struct sna *sna, if (!glyph_cache(screen, &sna->render, glyph)) { /* no cache for this glyph */ priv.atlas = GetGlyphPicture(glyph, screen); + if (unlikely(priv.atlas == NULL)) { + glyph->info.width = glyph->info.height = 0; + goto next_glyph; + } priv.coordinate.x = priv.coordinate.y = 0; } else priv = *sna_glyph(glyph); @@ -780,7 +807,7 @@ __sna_glyph_get_image(GlyphPtr g, ScreenPtr s) int dx, dy; p = GetGlyphPicture(g, s); - if (p == NULL) + if (unlikely(p == NULL)) return NULL; image = image_from_pict(p, FALSE, &dx, &dy); @@ -917,7 +944,7 @@ glyphs_via_mask(struct sna *sna, GlyphPtr g = *glyphs++; const void *ptr; - if (g->info.width == 0 || g->info.height == 0) + if (!glyph_valid(g)) goto next_pglyph; ptr = pixman_glyph_cache_lookup(cache, g, NULL); @@ -968,7 +995,7 @@ next_pglyph: pixman_image_t *glyph_image; int16_t xi, yi; - if (g->info.width == 0 || g->info.height == 0) + if (!glyph_valid(g)) goto next_image; /* If the mask has been cropped, it is likely @@ -984,6 +1011,8 @@ next_pglyph: glyph_image = sna_glyph_get_image(g, dst->pDrawable->pScreen); + if (glyph_image == NULL) + goto next_image; DBG(("%s: glyph to mask (%d, %d)x(%d, %d)\n", __FUNCTION__, @@ -1058,7 +1087,7 @@ next_image: PicturePtr this_atlas; struct sna_composite_rectangles r; - if (glyph->info.width == 0 || glyph->info.height == 0) + if (!glyph_valid(glyph)) goto next_glyph; priv = sna_glyph(glyph); @@ -1076,6 +1105,10 @@ next_image: } else { /* no cache for this glyph */ this_atlas = GetGlyphPicture(glyph, screen); + if (unlikely(this_atlas == NULL)) { + glyph->info.width = glyph->info.height = 0; + goto next_glyph; + } r.src.x = r.src.y = 0; } } @@ -1090,7 +1123,8 @@ next_image: __FUNCTION__, (int)this_atlas->format, (int)(format->depth << 24 | format->format))); - if (this_atlas->format == (format->depth << 24 | format->format)) { + if (this_atlas->format == (format->depth << 24 | format->format) && + (sna->kgem.gen >> 3) != 4) { /* XXX cache corruption? how? */ ok = sna->render.composite(sna, PictOpAdd, this_atlas, NULL, mask, 0, 0, 0, 0, 0, 0, @@ -1194,7 +1228,7 @@ glyphs_format(int nlist, GlyphListPtr list, GlyphPtr * glyphs) while (n--) { GlyphPtr glyph = *glyphs++; - if (glyph->info.width == 0 || glyph->info.height == 0) { + if (!glyph_valid(glyph)) { x += glyph->info.xOff; y += glyph->info.yOff; continue; @@ -1333,7 +1367,7 @@ glyphs_fallback(CARD8 op, __FUNCTION__, RegionExtents(®ion)->x1, RegionExtents(®ion)->y1, RegionExtents(®ion)->x2, RegionExtents(®ion)->y2)); - if (!RegionNotEmpty(®ion)) + if (RegionNil(®ion)) return; if (!sna_drawable_move_region_to_cpu(dst->pDrawable, ®ion, @@ -1391,7 +1425,7 @@ glyphs_fallback(CARD8 op, GlyphPtr g = *glyphs++; const void *ptr; - if (g->info.width == 0 || g->info.height == 0) + if (!glyph_valid(g)) goto next; ptr = pixman_glyph_cache_lookup(cache, g, NULL); @@ -1517,7 +1551,7 @@ out: GlyphPtr g = *glyphs++; pixman_image_t *glyph_image; - if (g->info.width == 0 || g->info.height == 0) + if (!glyph_valid(g)) goto next_glyph; glyph_image = sna_glyph_get_image(g, screen); @@ -1654,7 +1688,7 @@ sna_glyphs(CARD8 op, } if ((too_small(priv) || DAMAGE_IS_ALL(priv->cpu_damage)) && - !picture_is_gpu(src)) { + !picture_is_gpu(sna, src)) { DBG(("%s: fallback -- too small (%dx%d)\n", __FUNCTION__, dst->pDrawable->width, dst->pDrawable->height)); goto fallback; @@ -1810,7 +1844,7 @@ glyphs_via_image(struct sna *sna, GlyphPtr g = *glyphs++; const void *ptr; - if (g->info.width == 0 || g->info.height == 0) + if (!glyph_valid(g)) goto next_pglyph; ptr = pixman_glyph_cache_lookup(cache, g, NULL); @@ -1861,7 +1895,7 @@ next_pglyph: pixman_image_t *glyph_image; int16_t xi, yi; - if (g->info.width == 0 || g->info.height == 0) + if (!glyph_valid(g)) goto next_image; /* If the mask has been cropped, it is likely @@ -1877,6 +1911,8 @@ next_pglyph: glyph_image = sna_glyph_get_image(g, dst->pDrawable->pScreen); + if (glyph_image == NULL) + goto next_image; DBG(("%s: glyph to mask (%d, %d)x(%d, %d)\n", __FUNCTION__, @@ -1976,7 +2012,7 @@ sna_glyphs__shared(CARD8 op, } if ((too_small(priv) || DAMAGE_IS_ALL(priv->cpu_damage)) && - !picture_is_gpu(src)) { + !picture_is_gpu(sna, src)) { DBG(("%s: fallback -- too small (%dx%d)\n", __FUNCTION__, dst->pDrawable->width, dst->pDrawable->height)); goto fallback; diff --git a/src/sna/sna_gradient.c b/src/sna/sna_gradient.c index 5f06fbc8d..db09e72db 100644 --- a/src/sna/sna_gradient.c +++ b/src/sna/sna_gradient.c @@ -219,11 +219,11 @@ sna_render_flush_solid(struct sna *sna) DBG(("sna_render_flush_solid(size=%d)\n", cache->size)); assert(cache->dirty); assert(cache->size); + assert(cache->size <= 1024); kgem_bo_write(&sna->kgem, cache->cache_bo, cache->color, cache->size*sizeof(uint32_t)); cache->dirty = 0; - cache->last = 0; } static void @@ -250,21 +250,24 @@ sna_render_finish_solid(struct sna *sna, bool force) cache->bo[i] = NULL; } - old = cache->cache_bo; - DBG(("sna_render_finish_solid reset\n")); - + old = cache->cache_bo; cache->cache_bo = kgem_create_linear(&sna->kgem, sizeof(cache->color), 0); if (cache->cache_bo == NULL) { cache->cache_bo = old; old = NULL; } - cache->bo[0] = kgem_create_proxy(&sna->kgem, cache->cache_bo, - 0, sizeof(uint32_t)); - cache->bo[0]->pitch = 4; if (force) - cache->size = 1; + cache->size = 0; + if (cache->last < cache->size) { + cache->bo[cache->last] = kgem_create_proxy(&sna->kgem, cache->cache_bo, + cache->last*sizeof(uint32_t), sizeof(uint32_t)); + if (cache->bo[cache->last]) + cache->bo[cache->last]->pitch = 4; + else + cache->last = 1024; + } if (old) kgem_bo_destroy(&sna->kgem, old); @@ -283,7 +286,38 @@ sna_render_get_solid(struct sna *sna, uint32_t color) if (color == 0xffffffff) { DBG(("%s(white)\n", __FUNCTION__)); - return kgem_bo_reference(cache->bo[0]); + return kgem_bo_reference(sna->render.alpha_cache.bo[255+7]); + } + + if ((color >> 24) == 0xff) { + int v = 0; + + if (((color >> 16) & 0xff) == 0) + v |= 0; + else if (((color >> 16) & 0xff) == 0xff) + v |= 1 << 2; + else + v = -1; + + if (((color >> 8) & 0xff) == 0) + v |= 0; + else if (((color >> 8) & 0xff) == 0xff) + v |= 1 << 1; + else + v = -1; + + if (((color >> 0) & 0xff) == 0) + v |= 0; + else if (((color >> 0) & 0xff) == 0xff) + v |= 1 << 0; + else + v = -1; + + if (v >= 0) { + DBG(("%s(primary (%d,%d,%d): %d)\n", + __FUNCTION__, v & 4, v & 2, v & 1, v)); + return kgem_bo_reference(sna->render.alpha_cache.bo[255+v]); + } } if (cache->color[cache->last] == color) { @@ -292,7 +326,7 @@ sna_render_get_solid(struct sna *sna, uint32_t color) return kgem_bo_reference(cache->bo[cache->last]); } - for (i = 1; i < cache->size; i++) { + for (i = 0; i < cache->size; i++) { if (cache->color[i] == color) { if (cache->bo[i] == NULL) { DBG(("sna_render_get_solid(%d) = %x (recreate)\n", @@ -306,7 +340,7 @@ sna_render_get_solid(struct sna *sna, uint32_t color) } } - sna_render_finish_solid(sna, i == ARRAY_SIZE(cache->color)); + sna_render_finish_solid(sna, i == 1024); i = cache->size++; cache->color[i] = color; @@ -326,7 +360,7 @@ done: static bool sna_alpha_cache_init(struct sna *sna) { struct sna_alpha_cache *cache = &sna->render.alpha_cache; - uint32_t color[256]; + uint32_t color[256 + 7]; int i; DBG(("%s\n", __FUNCTION__)); @@ -346,6 +380,28 @@ static bool sna_alpha_cache_init(struct sna *sna) cache->bo[i]->pitch = 4; } + + /* primary */ + for (i = 1; i < 8; i++) { + int j = 255+i; + + color[j] = 0xff << 24; + if (i & 1) + color[j] |= 0xff << 0; + if (i & 2) + color[j] |= 0xff << 8; + if (i & 4) + color[j] |= 0xff << 16; + cache->bo[j] = kgem_create_proxy(&sna->kgem, + cache->cache_bo, + sizeof(uint32_t)*j, + sizeof(uint32_t)); + if (cache->bo[j] == NULL) + return false; + + cache->bo[j]->pitch = 4; + } + return kgem_bo_write(&sna->kgem, cache->cache_bo, color, sizeof(color)); } @@ -356,24 +412,14 @@ static bool sna_solid_cache_init(struct sna *sna) DBG(("%s\n", __FUNCTION__)); cache->cache_bo = - kgem_create_linear(&sna->kgem, sizeof(cache->color), 0); + kgem_create_linear(&sna->kgem, 4096, 0); if (!cache->cache_bo) return false; - /* - * Initialise [0] with white since it is very common and filling the - * zeroth slot simplifies some of the checks. - */ - cache->color[0] = 0xffffffff; - cache->bo[0] = kgem_create_proxy(&sna->kgem, cache->cache_bo, - 0, sizeof(uint32_t)); - if (cache->bo[0] == NULL) - return false; - - cache->bo[0]->pitch = 4; - cache->dirty = 1; - cache->size = 1; - cache->last = 0; + cache->last = 1024; + cache->color[cache->last] = 0; + cache->dirty = 0; + cache->size = 0; return true; } diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c index 69d920c7d..f89cd89ec 100644 --- a/src/sna/sna_io.c +++ b/src/sna/sna_io.c @@ -117,12 +117,18 @@ static void read_boxes_inplace(struct kgem *kgem, static bool download_inplace(struct kgem *kgem, struct kgem_bo *bo) { + if (unlikely(kgem->wedged)) + return true; + if (!kgem_bo_can_map(kgem, bo)) return false; if (FORCE_INPLACE) return FORCE_INPLACE > 0; + if (kgem->can_blt_cpu && kgem->max_cpu_size) + return false; + return !__kgem_bo_is_busy(kgem, bo) || bo->tiling == I915_TILING_NONE; } @@ -364,7 +370,7 @@ fallback: cmd = XY_SRC_COPY_BLT_CMD; src_pitch = src_bo->pitch; - if (kgem->gen >= 40 && src_bo->tiling) { + if (kgem->gen >= 040 && src_bo->tiling) { cmd |= BLT_SRC_TILED; src_pitch >>= 2; } @@ -378,11 +384,13 @@ fallback: case 1: break; } - kgem_set_mode(kgem, KGEM_BLT); - if (!kgem_check_reloc_and_exec(kgem, 2) || - !kgem_check_batch(kgem, 8) || + kgem_set_mode(kgem, KGEM_BLT, dst_bo); + if (!kgem_check_batch(kgem, 8) || + !kgem_check_reloc_and_exec(kgem, 2) || !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) { - _kgem_submit(kgem); + kgem_submit(kgem); + if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) + goto fallback; _kgem_set_mode(kgem, KGEM_BLT); } @@ -483,7 +491,16 @@ fallback: static bool upload_inplace__tiled(struct kgem *kgem, struct kgem_bo *bo) { - if (kgem->gen < 50) /* bit17 swizzling :( */ +#ifndef __x86_64__ + /* Between a register starved compiler emitting attrocious code + * and the extra overhead in the kernel for managing the tight + * 32-bit address space, unless we have a 64-bit system, + * using memcpy_to_tiled_x() is extremely slow. + */ + return false; +#endif + + if (kgem->gen < 050) /* bit17 swizzling :( */ return false; if (bo->tiling != I915_TILING_X) @@ -579,19 +596,13 @@ static bool write_boxes_inplace(struct kgem *kgem, return true; } -static bool upload_inplace(struct kgem *kgem, - struct kgem_bo *bo, - const BoxRec *box, - int n, int bpp) +static bool __upload_inplace(struct kgem *kgem, + struct kgem_bo *bo, + const BoxRec *box, + int n, int bpp) { unsigned int bytes; - if (kgem->wedged) - return true; - - if (!kgem_bo_can_map(kgem, bo) && !upload_inplace__tiled(kgem, bo)) - return false; - if (FORCE_INPLACE) return FORCE_INPLACE > 0; @@ -610,6 +621,20 @@ static bool upload_inplace(struct kgem *kgem, return bytes * bpp >> 12; } +static bool upload_inplace(struct kgem *kgem, + struct kgem_bo *bo, + const BoxRec *box, + int n, int bpp) +{ + if (unlikely(kgem->wedged)) + return true; + + if (!kgem_bo_can_map(kgem, bo) && !upload_inplace__tiled(kgem, bo)) + return false; + + return __upload_inplace(kgem, bo, box, n,bpp); +} + bool sna_write_boxes(struct sna *sna, PixmapPtr dst, struct kgem_bo * const dst_bo, int16_t const dst_dx, int16_t const dst_dy, const void * const src, int const stride, int16_t const src_dx, int16_t const src_dy, @@ -672,14 +697,18 @@ fallback: sna->render.max_3d_size, sna->render.max_3d_size)); if (must_tile(sna, tmp.drawable.width, tmp.drawable.height)) { BoxRec tile, stack[64], *clipped, *c; - int step; + int cpp, step; tile: - step = MIN(sna->render.max_3d_size - 4096 / dst->drawable.bitsPerPixel, - 8*(MAXSHORT&~63) / dst->drawable.bitsPerPixel); - while (step * step * 4 > sna->kgem.max_upload_tile_size) + cpp = dst->drawable.bitsPerPixel / 8; + step = MIN(sna->render.max_3d_size, + (MAXSHORT&~63) / cpp); + while (step * step * cpp > sna->kgem.max_upload_tile_size) step /= 2; + if (step * cpp > 4096) + step = 4096 / cpp; + DBG(("%s: tiling upload, using %dx%d tiles\n", __FUNCTION__, step, step)); @@ -803,7 +832,7 @@ tile: cmd = XY_SRC_COPY_BLT_CMD; br13 = dst_bo->pitch; - if (kgem->gen >= 40 && dst_bo->tiling) { + if (kgem->gen >= 040 && dst_bo->tiling) { cmd |= BLT_DST_TILED; br13 >>= 2; } @@ -816,11 +845,13 @@ tile: case 8: break; } - kgem_set_mode(kgem, KGEM_BLT); + kgem_set_mode(kgem, KGEM_BLT, dst_bo); if (!kgem_check_batch(kgem, 8) || !kgem_check_reloc_and_exec(kgem, 2) || !kgem_check_bo_fenced(kgem, dst_bo)) { - _kgem_submit(kgem); + kgem_submit(kgem); + if (!kgem_check_bo_fenced(kgem, dst_bo)) + goto fallback; _kgem_set_mode(kgem, KGEM_BLT); } @@ -960,6 +991,20 @@ write_boxes_inplace__xor(struct kgem *kgem, } while (--n); } +static bool upload_inplace__xor(struct kgem *kgem, + struct kgem_bo *bo, + const BoxRec *box, + int n, int bpp) +{ + if (unlikely(kgem->wedged)) + return true; + + if (!kgem_bo_can_map(kgem, bo)) + return false; + + return __upload_inplace(kgem, bo, box, n, bpp); +} + void sna_write_boxes__xor(struct sna *sna, PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, const void *src, int stride, int16_t src_dx, int16_t src_dy, @@ -976,7 +1021,7 @@ void sna_write_boxes__xor(struct sna *sna, PixmapPtr dst, DBG(("%s x %d\n", __FUNCTION__, nbox)); - if (upload_inplace(kgem, dst_bo, box, nbox, dst->drawable.bitsPerPixel)) { + if (upload_inplace__xor(kgem, dst_bo, box, nbox, dst->drawable.bitsPerPixel)) { fallback: write_boxes_inplace__xor(kgem, src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy, @@ -1158,7 +1203,7 @@ tile: cmd = XY_SRC_COPY_BLT_CMD; br13 = dst_bo->pitch; - if (kgem->gen >= 40 && dst_bo->tiling) { + if (kgem->gen >= 040 && dst_bo->tiling) { cmd |= BLT_DST_TILED; br13 >>= 2; } @@ -1171,11 +1216,13 @@ tile: case 8: break; } - kgem_set_mode(kgem, KGEM_BLT); - if (!kgem_check_reloc_and_exec(kgem, 2) || - !kgem_check_batch(kgem, 8) || + kgem_set_mode(kgem, KGEM_BLT, dst_bo); + if (!kgem_check_batch(kgem, 8) || + !kgem_check_reloc_and_exec(kgem, 2) || !kgem_check_bo_fenced(kgem, dst_bo)) { - _kgem_submit(kgem); + kgem_submit(kgem); + if (!kgem_check_bo_fenced(kgem, dst_bo)) + goto fallback; _kgem_set_mode(kgem, KGEM_BLT); } diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c index 58449228d..69ac21c3b 100644 --- a/src/sna/sna_render.c +++ b/src/sna/sna_render.c @@ -87,8 +87,8 @@ no_render_composite(struct sna *sna, if (mask) return false; - if (!is_gpu(dst->pDrawable) && - (src->pDrawable == NULL || !is_gpu(src->pDrawable))) + if (!is_gpu(sna, dst->pDrawable, PREFER_GPU_BLT) && + (src->pDrawable == NULL || !is_gpu(sna, src->pDrawable, PREFER_GPU_BLT))) return false; return sna_blt_composite(sna, @@ -246,15 +246,14 @@ static void no_render_context_switch(struct kgem *kgem, int new_mode) { - if (!kgem->mode) + if (!kgem->nbatch) return; - if (kgem_is_idle(kgem)) { + if (kgem_ring_is_idle(kgem, kgem->ring)) { DBG(("%s: GPU idle, flushing\n", __FUNCTION__)); _kgem_submit(kgem); } - (void)kgem; (void)new_mode; } @@ -280,7 +279,9 @@ void no_render_init(struct sna *sna) { struct sna_render *render = &sna->render; - memset (render,0, sizeof (*render)); + memset (render, 0, sizeof (*render)); + + render->prefer_gpu = PREFER_GPU_BLT; render->vertices = render->vertex_data; render->vertex_size = ARRAY_SIZE(render->vertex_data); @@ -305,6 +306,8 @@ void no_render_init(struct sna *sna) sna->kgem.expire = no_render_expire; if (sna->kgem.has_blt) sna->kgem.ring = KGEM_BLT; + + sna_vertex_init(sna); } static struct kgem_bo * @@ -321,6 +324,14 @@ use_cpu_bo(struct sna *sna, PixmapPtr pixmap, const BoxRec *box, bool blt) return NULL; } + if (priv->shm) { + DBG(("%s: shm CPU bo, avoiding promotion to GPU\n", + __FUNCTION__)); + assert(!priv->flush); + sna_add_flush_pixmap(sna, priv, priv->cpu_bo); + return priv->cpu_bo; + } + if (priv->cpu_bo->snoop && priv->source_count > SOURCE_BIAS) { DBG(("%s: promoting snooped CPU bo due to reuse\n", __FUNCTION__)); @@ -339,6 +350,11 @@ use_cpu_bo(struct sna *sna, PixmapPtr pixmap, const BoxRec *box, bool blt) __FUNCTION__)); break; default: + if (kgem_bo_is_busy(priv->gpu_bo)){ + DBG(("%s: box is partially damaged on the CPU, and the GPU is busy\n", + __FUNCTION__)); + return NULL; + } if (sna_damage_contains_box(priv->gpu_damage, box) != PIXMAN_REGION_OUT) { DBG(("%s: box is damaged on the GPU\n", @@ -384,11 +400,6 @@ use_cpu_bo(struct sna *sna, PixmapPtr pixmap, const BoxRec *box, bool blt) } } - if (priv->shm) { - assert(!priv->flush); - sna_add_flush_pixmap(sna, priv, priv->cpu_bo); - } - DBG(("%s for box=(%d, %d), (%d, %d)\n", __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); ++priv->source_count; @@ -423,12 +434,21 @@ move_to_gpu(PixmapPtr pixmap, const BoxRec *box, bool blt) return priv->gpu_bo; } + if (priv->cpu_damage == NULL) { + DBG(("%s: not migrating uninitialised pixmap\n", + __FUNCTION__)); + return NULL; + } + if (pixmap->usage_hint) { DBG(("%s: not migrating pixmap due to usage_hint=%d\n", __FUNCTION__, pixmap->usage_hint)); return NULL; } + if (priv->shm) + blt = true; + if (DBG_FORCE_UPLOAD < 0) { if (!sna_pixmap_force_to_gpu(pixmap, blt ? MOVE_READ : MOVE_SOURCE_HINT | MOVE_READ)) @@ -439,7 +459,9 @@ move_to_gpu(PixmapPtr pixmap, const BoxRec *box, bool blt) w = box->x2 - box->x1; h = box->y2 - box->y1; - if (w == pixmap->drawable.width && h == pixmap->drawable.height) { + if (priv->cpu_bo && !priv->cpu_bo->flush) { + migrate = true; + } else if (w == pixmap->drawable.width && h == pixmap->drawable.height) { migrate = priv->source_count++ > SOURCE_BIAS; DBG(("%s: migrating whole pixmap (%dx%d) for source (%d,%d),(%d,%d), count %d? %d\n", @@ -464,9 +486,15 @@ move_to_gpu(PixmapPtr pixmap, const BoxRec *box, bool blt) migrate = count*w*h > pixmap->drawable.width * pixmap->drawable.height; } - if (migrate && !sna_pixmap_force_to_gpu(pixmap, - blt ? MOVE_READ : MOVE_SOURCE_HINT | MOVE_READ)) - return NULL; + if (migrate) { + if (blt) { + if (!sna_pixmap_move_area_to_gpu(pixmap, box, MOVE_READ)) + return NULL; + } else { + if (!sna_pixmap_force_to_gpu(pixmap, MOVE_SOURCE_HINT | MOVE_READ)) + return NULL; + } + } return priv->gpu_bo; } @@ -474,7 +502,7 @@ move_to_gpu(PixmapPtr pixmap, const BoxRec *box, bool blt) static struct kgem_bo *upload(struct sna *sna, struct sna_composite_channel *channel, PixmapPtr pixmap, - BoxPtr box) + const BoxRec *box) { struct sna_pixmap *priv; struct kgem_bo *bo; @@ -488,6 +516,9 @@ static struct kgem_bo *upload(struct sna *sna, priv = sna_pixmap(pixmap); if (priv) { + if (priv->cpu_damage == NULL) + return NULL; + /* As we know this box is on the CPU just fixup the shadow */ if (priv->mapped) { pixmap->devPrivate.ptr = NULL; @@ -497,7 +528,7 @@ static struct kgem_bo *upload(struct sna *sna, if (priv->ptr == NULL) /* uninitialised */ return NULL; assert(priv->stride); - pixmap->devPrivate.ptr = priv->ptr; + pixmap->devPrivate.ptr = PTR(priv->ptr); pixmap->devKind = priv->stride; } } @@ -515,8 +546,11 @@ static struct kgem_bo *upload(struct sna *sna, if (priv && pixmap->usage_hint == 0 && channel->width == pixmap->drawable.width && - channel->height == pixmap->drawable.height) + channel->height == pixmap->drawable.height) { + assert(priv->gpu_damage == NULL); + assert(priv->gpu_bo == NULL); kgem_proxy_bo_attach(bo, &priv->gpu_bo); + } } return bo; @@ -575,6 +609,10 @@ sna_render_pixmap_bo(struct sna *sna, !priv->cpu_bo->snoop && priv->cpu_bo->pitch < 4096) { DBG(("%s: CPU all damaged\n", __FUNCTION__)); channel->bo = priv->cpu_bo; + if (priv->shm) { + assert(!priv->flush); + sna_add_flush_pixmap(sna, priv, priv->cpu_bo); + } goto done; } } @@ -587,8 +625,8 @@ sna_render_pixmap_bo(struct sna *sna, } else { box.x1 = x; box.y1 = y; - box.x2 = x + w; - box.y2 = y + h; + box.x2 = bound(x, w); + box.y2 = bound(y, h); if (channel->repeat == RepeatNone || channel->repeat == RepeatPad) { if (box.x1 < 0) @@ -661,8 +699,8 @@ static int sna_render_picture_downsample(struct sna *sna, box.x1 = x; box.y1 = y; - box.x2 = x + w; - box.y2 = y + h; + box.x2 = bound(x, w); + box.y2 = bound(y, h); if (channel->transform) { pixman_vector_t v; @@ -843,8 +881,8 @@ sna_render_pixmap_partial(struct sna *sna, box.x1 = x; box.y1 = y; - box.x2 = x + w; - box.y2 = y + h; + box.x2 = bound(x, w); + box.y2 = bound(y, h); DBG(("%s: unaligned box (%d, %d), (%d, %d)\n", __FUNCTION__, box.x1, box.y1, box.x2, box.y2)); @@ -861,6 +899,9 @@ sna_render_pixmap_partial(struct sna *sna, DBG(("%s: tile size for tiling %d: %dx%d, size=%d\n", __FUNCTION__, bo->tiling, tile_width, tile_height, tile_size)); + if (sna->kgem.gen < 033) + tile_width = bo->pitch; + /* Ensure we align to an even tile row */ box.y1 = box.y1 & ~(2*tile_height - 1); box.y2 = ALIGN(box.y2, 2*tile_height); @@ -934,8 +975,8 @@ sna_render_picture_partial(struct sna *sna, box.x1 = x; box.y1 = y; - box.x2 = x + w; - box.y2 = y + h; + box.x2 = bound(x, w); + box.y2 = bound(y, h); if (channel->transform) pixman_transform_bounds(channel->transform, &box); @@ -1077,8 +1118,8 @@ sna_render_picture_extract(struct sna *sna, ox = box.x1 = x; oy = box.y1 = y; - box.x2 = x + w; - box.y2 = y + h; + box.x2 = bound(x, w); + box.y2 = bound(y, h); if (channel->transform) { pixman_vector_t v; @@ -1147,8 +1188,11 @@ sna_render_picture_extract(struct sna *sna, box.x2 - box.x1 == pixmap->drawable.width && box.y2 - box.y1 == pixmap->drawable.height) { struct sna_pixmap *priv = sna_pixmap(pixmap); - if (priv) + if (priv) { + assert(priv->gpu_damage == NULL); + assert(priv->gpu_bo == NULL); kgem_proxy_bo_attach(bo, &priv->gpu_bo); + } } } } @@ -1334,6 +1378,7 @@ sna_render_picture_flatten(struct sna *sna, assert(w <= sna->render.max_3d_size && h <= sna->render.max_3d_size); /* XXX shortcut a8? */ + DBG(("%s: %dx%d\n", __FUNCTION__, w, h)); pixmap = screen->CreatePixmap(screen, w, h, 32, SNA_CREATE_SCRATCH); if (pixmap == NullPixmap) @@ -1346,6 +1391,8 @@ sna_render_picture_flatten(struct sna *sna, if (tmp == NULL) return 0; + ValidatePicture(tmp); + old_format = picture->format; picture->format = PICT_FORMAT(PICT_FORMAT_BPP(picture->format), PICT_FORMAT_TYPE(picture->format), @@ -1445,11 +1492,11 @@ sna_render_picture_approximate_gradient(struct sna *sna, pixman_transform_multiply(&t, picture->transform, &t); pixman_image_set_transform(src, &t); - pixman_image_composite(PictOpSrc, src, NULL, dst, - x + dx, y + dy, - 0, 0, - 0, 0, - w2, h2); + sna_image_composite(PictOpSrc, src, NULL, dst, + x+dx, y+dy, + 0, 0, + 0, 0, + w2, h2); free_pixman_pict(picture, src); pixman_image_unref(dst); @@ -1498,7 +1545,8 @@ sna_render_picture_fixup(struct sna *sna, if (picture->alphaMap) { DBG(("%s: alphamap\n", __FUNCTION__)); - if (is_gpu(picture->pDrawable) || is_gpu(picture->alphaMap->pDrawable)) { + if (is_gpu(sna, picture->pDrawable, PREFER_GPU_RENDER) || + is_gpu(sna, picture->alphaMap->pDrawable, PREFER_GPU_RENDER)) { return sna_render_picture_flatten(sna, picture, channel, x, y, w, h, dst_x, dst_y); } @@ -1508,7 +1556,7 @@ sna_render_picture_fixup(struct sna *sna, if (picture->filter == PictFilterConvolution) { DBG(("%s: convolution\n", __FUNCTION__)); - if (is_gpu(picture->pDrawable)) { + if (is_gpu(sna, picture->pDrawable, PREFER_GPU_RENDER)) { return sna_render_picture_convolve(sna, picture, channel, x, y, w, h, dst_x, dst_y); } @@ -1541,8 +1589,10 @@ do_fixup: } /* Composite in the original format to preserve idiosyncracies */ - if (picture->format == channel->pict_format) - dst = pixman_image_create_bits(picture->format, + if (!kgem_buffer_is_inplace(channel->bo) && + (picture->pDrawable == NULL || + picture->format == channel->pict_format)) + dst = pixman_image_create_bits(channel->pict_format, w, h, ptr, channel->bo->pitch); else dst = pixman_image_create_bits(picture->format, w, h, NULL, 0); @@ -1560,15 +1610,15 @@ do_fixup: DBG(("%s: compositing tmp=(%d+%d, %d+%d)x(%d, %d)\n", __FUNCTION__, x, dx, y, dy, w, h)); - pixman_image_composite(PictOpSrc, src, NULL, dst, - x + dx, y + dy, - 0, 0, - 0, 0, - w, h); + sna_image_composite(PictOpSrc, src, NULL, dst, + x + dx, y + dy, + 0, 0, + 0, 0, + w, h); free_pixman_pict(picture, src); /* Then convert to card format */ - if (picture->format != channel->pict_format) { + if (pixman_image_get_data(dst) != ptr) { DBG(("%s: performing post-conversion %08x->%08x (%d, %d)\n", __FUNCTION__, picture->format, channel->pict_format, @@ -1614,11 +1664,10 @@ sna_render_picture_convert(struct sna *sna, PixmapPtr pixmap, int16_t x, int16_t y, int16_t w, int16_t h, - int16_t dst_x, int16_t dst_y) + int16_t dst_x, int16_t dst_y, + bool fixup_alpha) { - pixman_image_t *src, *dst; BoxRec box; - void *ptr; #if NO_CONVERT return -1; @@ -1627,8 +1676,8 @@ sna_render_picture_convert(struct sna *sna, if (w != 0 && h != 0) { box.x1 = x; box.y1 = y; - box.x2 = x + w; - box.y2 = y + h; + box.x2 = bound(x, w); + box.y2 = bound(y, h); if (channel->transform) { DBG(("%s: has transform, converting whole surface\n", @@ -1668,52 +1717,113 @@ sna_render_picture_convert(struct sna *sna, return 0; } - if (!sna_pixmap_move_to_cpu(pixmap, MOVE_READ)) - return 0; + if (fixup_alpha && is_gpu(sna, &pixmap->drawable, PREFER_GPU_RENDER)) { + ScreenPtr screen = pixmap->drawable.pScreen; + PixmapPtr tmp; + PicturePtr src, dst; + int error; - src = pixman_image_create_bits(picture->format, - pixmap->drawable.width, - pixmap->drawable.height, - pixmap->devPrivate.ptr, - pixmap->devKind); - if (!src) - return 0; + assert(PICT_FORMAT_BPP(picture->format) == pixmap->drawable.bitsPerPixel); + channel->pict_format = PICT_FORMAT(PICT_FORMAT_BPP(picture->format), + PICT_FORMAT_TYPE(picture->format), + PICT_FORMAT_BPP(picture->format) - PIXMAN_FORMAT_DEPTH(picture->format), + PICT_FORMAT_R(picture->format), + PICT_FORMAT_G(picture->format), + PICT_FORMAT_B(picture->format)); - if (PICT_FORMAT_RGB(picture->format) == 0) { - channel->pict_format = PIXMAN_a8; - DBG(("%s: converting to a8 from %08x\n", - __FUNCTION__, picture->format)); + DBG(("%s: converting to %08x from %08x using composite alpha-fixup\n", + __FUNCTION__, (unsigned)picture->format)); + + tmp = screen->CreatePixmap(screen, w, h, pixmap->drawable.bitsPerPixel, 0); + if (tmp == NULL) + return 0; + + dst = CreatePicture(0, &tmp->drawable, + PictureMatchFormat(screen, + pixmap->drawable.bitsPerPixel, + channel->pict_format), + 0, NULL, serverClient, &error); + if (dst == NULL) { + screen->DestroyPixmap(tmp); + return 0; + } + + src = CreatePicture(0, &pixmap->drawable, + PictureMatchFormat(screen, + pixmap->drawable.depth, + picture->format), + 0, NULL, serverClient, &error); + if (src == NULL) { + FreePicture(dst, 0); + screen->DestroyPixmap(tmp); + return 0; + } + + ValidatePicture(src); + ValidatePicture(dst); + + sna_composite(PictOpSrc, src, NULL, dst, + box.x1, box.y1, + 0, 0, + 0, 0, + w, h); + FreePicture(dst, 0); + FreePicture(src, 0); + + channel->bo = sna_pixmap_get_bo(tmp); + kgem_bo_reference(channel->bo); + screen->DestroyPixmap(tmp); } else { - channel->pict_format = PIXMAN_a8r8g8b8; - DBG(("%s: converting to a8r8g8b8 from %08x\n", - __FUNCTION__, picture->format)); - } + pixman_image_t *src, *dst; + void *ptr; - channel->bo = kgem_create_buffer_2d(&sna->kgem, - w, h, PIXMAN_FORMAT_BPP(channel->pict_format), - KGEM_BUFFER_WRITE_INPLACE, - &ptr); - if (!channel->bo) { - pixman_image_unref(src); - return 0; - } + if (!sna_pixmap_move_to_cpu(pixmap, MOVE_READ)) + return 0; - dst = pixman_image_create_bits(channel->pict_format, - w, h, ptr, channel->bo->pitch); - if (!dst) { - kgem_bo_destroy(&sna->kgem, channel->bo); + src = pixman_image_create_bits(picture->format, + pixmap->drawable.width, + pixmap->drawable.height, + pixmap->devPrivate.ptr, + pixmap->devKind); + if (!src) + return 0; + + if (PICT_FORMAT_RGB(picture->format) == 0) { + channel->pict_format = PIXMAN_a8; + DBG(("%s: converting to a8 from %08x\n", + __FUNCTION__, picture->format)); + } else { + channel->pict_format = PIXMAN_a8r8g8b8; + DBG(("%s: converting to a8r8g8b8 from %08x\n", + __FUNCTION__, picture->format)); + } + + channel->bo = kgem_create_buffer_2d(&sna->kgem, + w, h, PIXMAN_FORMAT_BPP(channel->pict_format), + KGEM_BUFFER_WRITE_INPLACE, + &ptr); + if (!channel->bo) { + pixman_image_unref(src); + return 0; + } + + dst = pixman_image_create_bits(channel->pict_format, + w, h, ptr, channel->bo->pitch); + if (!dst) { + kgem_bo_destroy(&sna->kgem, channel->bo); + pixman_image_unref(src); + return 0; + } + + pixman_image_composite(PictOpSrc, src, NULL, dst, + box.x1, box.y1, + 0, 0, + 0, 0, + w, h); + pixman_image_unref(dst); pixman_image_unref(src); - return 0; } - pixman_image_composite(PictOpSrc, src, NULL, dst, - box.x1, box.y1, - 0, 0, - 0, 0, - w, h); - pixman_image_unref(dst); - pixman_image_unref(src); - channel->width = w; channel->height = h; @@ -1722,11 +1832,10 @@ sna_render_picture_convert(struct sna *sna, channel->offset[0] = x - dst_x - box.x1; channel->offset[1] = y - dst_y - box.y1; - DBG(("%s: offset=(%d, %d), size=(%d, %d) ptr[0]=%08x\n", + DBG(("%s: offset=(%d, %d), size=(%d, %d)\n", __FUNCTION__, channel->offset[0], channel->offset[1], - channel->width, channel->height, - *(uint32_t*)ptr)); + channel->width, channel->height)); return 1; } @@ -1764,9 +1873,9 @@ sna_render_composite_redirect(struct sna *sna, __FUNCTION__, op->dst.bo->pitch, sna->render.max_3d_pitch)); box.x1 = x; - box.x2 = x + width; + box.x2 = bound(x, width); box.y1 = y; - box.y2 = y + height; + box.y2 = bound(y, height); /* Ensure we align to an even tile row */ if (op->dst.bo->tiling) { @@ -1783,7 +1892,7 @@ sna_render_composite_redirect(struct sna *sna, offset = box.x1 * op->dst.pixmap->drawable.bitsPerPixel / 8 / tile_width * tile_size; } else { - if (sna->kgem.gen < 40) { + if (sna->kgem.gen < 040) { box.y1 = box.y1 & ~3; box.y2 = ALIGN(box.y2, 4); @@ -1860,8 +1969,8 @@ sna_render_composite_redirect(struct sna *sna, t->box.x1 = x + op->dst.x; t->box.y1 = y + op->dst.y; - t->box.x2 = t->box.x1 + width; - t->box.y2 = t->box.y1 + height; + t->box.x2 = bound(t->box.x1, width); + t->box.y2 = bound(t->box.y1, height); DBG(("%s: original box (%d, %d), (%d, %d)\n", __FUNCTION__, t->box.x1, t->box.y1, t->box.x2, t->box.y2)); @@ -1911,11 +2020,13 @@ sna_render_composite_redirect_done(struct sna *sna, assert(ok); } if (t->damage) { - DBG(("%s: combining damage, offset=(%d, %d)\n", - __FUNCTION__, t->box.x1, t->box.y1)); - sna_damage_combine(t->real_damage, t->damage, + DBG(("%s: combining damage (all? %d), offset=(%d, %d)\n", + __FUNCTION__, (int)DAMAGE_IS_ALL(t->damage), + t->box.x1, t->box.y1)); + sna_damage_combine(t->real_damage, + DAMAGE_PTR(t->damage), t->box.x1, t->box.y1); - __sna_damage_destroy(t->damage); + __sna_damage_destroy(DAMAGE_PTR(t->damage)); } kgem_bo_destroy(&sna->kgem, op->dst.bo); diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h index 03a700571..01176c6aa 100644 --- a/src/sna/sna_render.h +++ b/src/sna/sna_render.h @@ -5,6 +5,11 @@ #include <picturestr.h> +#include <stdbool.h> +#include <stdint.h> +#include <pthread.h> +#include "atomic.h" + #define GRADIENT_CACHE_SIZE 16 #define GXinvalid 0xff @@ -30,6 +35,8 @@ struct sna_composite_op { const BoxRec *box); void (*boxes)(struct sna *sna, const struct sna_composite_op *op, const BoxRec *box, int nbox); + void (*thread_boxes)(struct sna *sna, const struct sna_composite_op *op, + const BoxRec *box, int nbox); void (*done)(struct sna *sna, const struct sna_composite_op *op); struct sna_damage **damage; @@ -66,10 +73,10 @@ struct sna_composite_op { union { struct { + float dx, dy, offset; + } linear; + struct { uint32_t pixel; - float linear_dx; - float linear_dy; - float linear_offset; } gen2; struct gen3_shader_channel { int type; @@ -88,6 +95,9 @@ struct sna_composite_op { fastcall void (*prim_emit)(struct sna *sna, const struct sna_composite_op *op, const struct sna_composite_rectangles *r); + fastcall void (*emit_boxes)(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v); struct sna_composite_redirect { struct kgem_bo *real_bo; @@ -122,8 +132,8 @@ struct sna_composite_op { } gen4; struct { - int wm_kernel; - int ve_id; + int16_t wm_kernel; + int16_t ve_id; } gen5; struct { @@ -138,6 +148,11 @@ struct sna_composite_op { void *priv; }; +struct sna_opacity_box { + BoxRec box; + float alpha; +} __packed__; + struct sna_composite_spans_op { struct sna_composite_op base; @@ -149,6 +164,12 @@ struct sna_composite_spans_op { const struct sna_composite_spans_op *op, const BoxRec *box, int nbox, float opacity); + + fastcall void (*thread_boxes)(struct sna *sna, + const struct sna_composite_spans_op *op, + const struct sna_opacity_box *box, + int nbox); + fastcall void (*done)(struct sna *sna, const struct sna_composite_spans_op *op); @@ -156,6 +177,9 @@ struct sna_composite_spans_op { const struct sna_composite_spans_op *op, const BoxRec *box, float opacity); + fastcall void (*emit_boxes)(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *box, int nbox, + float *v); }; struct sna_fill_op { @@ -184,9 +208,18 @@ struct sna_copy_op { }; struct sna_render { + pthread_mutex_t lock; + pthread_cond_t wait; + int active; + int max_3d_size; int max_3d_pitch; + unsigned prefer_gpu; +#define PREFER_GPU_BLT 0x1 +#define PREFER_GPU_RENDER 0x2 +#define PREFER_GPU_SPANS 0x4 + bool (*composite)(struct sna *sna, uint8_t op, PicturePtr dst, PicturePtr src, PicturePtr mask, int16_t src_x, int16_t src_y, @@ -214,6 +247,7 @@ struct sna_render { RegionPtr dstRegion, short src_w, short src_h, short drw_w, short drw_h, + short dx, short dy, PixmapPtr pixmap); bool (*fill_boxes)(struct sna *sna, @@ -237,6 +271,7 @@ struct sna_render { PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, const BoxRec *box, int n, unsigned flags); #define COPY_LAST 0x1 +#define COPY_SYNC 0x2 bool (*copy)(struct sna *sna, uint8_t alu, PixmapPtr src, struct kgem_bo *src_bo, @@ -249,13 +284,13 @@ struct sna_render { struct sna_alpha_cache { struct kgem_bo *cache_bo; - struct kgem_bo *bo[256]; + struct kgem_bo *bo[256+7]; } alpha_cache; struct sna_solid_cache { struct kgem_bo *cache_bo; - uint32_t color[1024]; struct kgem_bo *bo[1024]; + uint32_t color[1025]; int last; int size; int dirty; @@ -282,6 +317,8 @@ struct sna_render { pixman_glyph_cache_t *glyph_cache; #endif + uint16_t vb_id; + uint16_t vertex_offset; uint16_t vertex_start; uint16_t vertex_index; uint16_t vertex_used; @@ -302,7 +339,6 @@ struct gen2_render_state { uint32_t ls1, ls2, vft; uint32_t diffuse; uint32_t specular; - uint16_t vertex_offset; }; struct gen3_render_state { @@ -318,7 +354,6 @@ struct gen3_render_state { uint32_t last_diffuse; uint32_t last_specular; - uint16_t vertex_offset; uint16_t last_vertex_offset; uint16_t floats_per_vertex; uint16_t last_floats_per_vertex; @@ -332,16 +367,14 @@ struct gen4_render_state { struct kgem_bo *general_bo; uint32_t vs; - uint32_t sf[2]; + uint32_t sf; uint32_t wm; uint32_t cc; int ve_id; uint32_t drawrect_offset; uint32_t drawrect_limit; - uint32_t vb_id; uint32_t last_pipelined_pointers; - uint16_t vertex_offset; uint16_t last_primitive; int16_t floats_per_vertex; uint16_t surface_table; @@ -361,8 +394,6 @@ struct gen5_render_state { int ve_id; uint32_t drawrect_offset; uint32_t drawrect_limit; - uint32_t vb_id; - uint16_t vertex_offset; uint16_t last_primitive; int16_t floats_per_vertex; uint16_t surface_table; @@ -402,7 +433,6 @@ struct gen6_render_state { uint32_t wm_state; uint32_t wm_kernel[GEN6_KERNEL_COUNT][3]; - uint32_t cc_vp; uint32_t cc_blend; uint32_t drawrect_offset; @@ -412,9 +442,7 @@ struct gen6_render_state { uint32_t kernel; uint16_t num_sf_outputs; - uint16_t vb_id; uint16_t ve_id; - uint16_t vertex_offset; uint16_t last_primitive; int16_t floats_per_vertex; uint16_t surface_table; @@ -454,7 +482,6 @@ struct gen7_render_state { uint32_t wm_state; uint32_t wm_kernel[GEN7_WM_KERNEL_COUNT][3]; - uint32_t cc_vp; uint32_t cc_blend; uint32_t drawrect_offset; @@ -464,9 +491,7 @@ struct gen7_render_state { uint32_t kernel; uint16_t num_sf_outputs; - uint16_t vb_id; uint16_t ve_id; - uint16_t vertex_offset; uint16_t last_primitive; int16_t floats_per_vertex; uint16_t surface_table; @@ -690,7 +715,8 @@ sna_render_picture_convert(struct sna *sna, PixmapPtr pixmap, int16_t x, int16_t y, int16_t w, int16_t h, - int16_t dst_x, int16_t dst_y); + int16_t dst_x, int16_t dst_y, + bool fixup_alpha); inline static void sna_render_composite_redirect_init(struct sna_composite_op *op) { @@ -717,4 +743,36 @@ sna_render_copy_boxes__overlap(struct sna *sna, uint8_t alu, bool sna_composite_mask_is_opaque(PicturePtr mask); +void sna_vertex_init(struct sna *sna); + +static inline void sna_vertex_lock(struct sna_render *r) +{ + pthread_mutex_lock(&r->lock); +} + +static inline void sna_vertex_acquire__locked(struct sna_render *r) +{ + r->active++; +} + +static inline void sna_vertex_unlock(struct sna_render *r) +{ + pthread_mutex_unlock(&r->lock); +} + +static inline void sna_vertex_release__locked(struct sna_render *r) +{ + assert(r->active > 0); + if (--r->active == 0) + pthread_cond_signal(&r->wait); +} + +static inline bool sna_vertex_wait__locked(struct sna_render *r) +{ + bool was_active = r->active; + while (r->active) + pthread_cond_wait(&r->wait, &r->lock); + return was_active; +} + #endif /* SNA_RENDER_H */ diff --git a/src/sna/sna_render_inline.h b/src/sna/sna_render_inline.h index a796903fb..7d9f2cacf 100644 --- a/src/sna/sna_render_inline.h +++ b/src/sna/sna_render_inline.h @@ -17,6 +17,17 @@ static inline bool need_redirect(struct sna *sna, PixmapPtr dst) dst->drawable.height > sna->render.max_3d_size); } +static inline float pack_2s(int16_t x, int16_t y) +{ + union { + struct sna_coordinate p; + float f; + } u; + u.p.x = x; + u.p.y = y; + return u.f; +} + static inline int vertex_space(struct sna *sna) { return sna->render.vertex_size - sna->render.vertex_used; @@ -28,21 +39,7 @@ static inline void vertex_emit(struct sna *sna, float v) } static inline void vertex_emit_2s(struct sna *sna, int16_t x, int16_t y) { - int16_t *v = (int16_t *)&sna->render.vertices[sna->render.vertex_used++]; - assert(sna->render.vertex_used <= sna->render.vertex_size); - v[0] = x; - v[1] = y; -} - -static inline float pack_2s(int16_t x, int16_t y) -{ - union { - struct sna_coordinate p; - float f; - } u; - u.p.x = x; - u.p.y = y; - return u.f; + vertex_emit(sna, pack_2s(x, y)); } static inline int batch_space(struct sna *sna) @@ -70,17 +67,18 @@ static inline void batch_emit_float(struct sna *sna, float f) } static inline bool -is_gpu(DrawablePtr drawable) +is_gpu(struct sna *sna, DrawablePtr drawable, unsigned prefer) { struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable); - if (priv == NULL || priv->clear) + if (priv == NULL || priv->clear || priv->cpu) return false; if (priv->cpu_damage == NULL) return true; - if (priv->gpu_damage && !priv->gpu_bo->proxy) + if (priv->gpu_damage && !priv->gpu_bo->proxy && + (sna->render.prefer_gpu & prefer)) return true; if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) @@ -111,11 +109,20 @@ unattached(DrawablePtr drawable) } static inline bool -picture_is_gpu(PicturePtr picture) +picture_is_gpu(struct sna *sna, PicturePtr picture) { if (!picture || !picture->pDrawable) return false; - return is_gpu(picture->pDrawable); + return is_gpu(sna, picture->pDrawable, PREFER_GPU_RENDER); +} + +static inline bool +picture_is_cpu(struct sna *sna, PicturePtr picture) +{ + if (!picture->pDrawable) + return false; + + return !is_gpu(sna, picture->pDrawable, PREFER_GPU_RENDER); } static inline bool sna_blt_compare_depth(DrawablePtr src, DrawablePtr dst) @@ -146,8 +153,8 @@ sna_render_picture_extents(PicturePtr p, BoxRec *box) { box->x1 = p->pDrawable->x; box->y1 = p->pDrawable->y; - box->x2 = p->pDrawable->x + p->pDrawable->width; - box->y2 = p->pDrawable->y + p->pDrawable->height; + box->x2 = bound(box->x1, p->pDrawable->width); + box->y2 = bound(box->y1, p->pDrawable->height); if (box->x1 < p->pCompositeClip->extents.x1) box->x1 = p->pCompositeClip->extents.x1; @@ -158,6 +165,8 @@ sna_render_picture_extents(PicturePtr p, BoxRec *box) box->x2 = p->pCompositeClip->extents.x2; if (box->y2 > p->pCompositeClip->extents.y2) box->y2 = p->pCompositeClip->extents.y2; + + assert(box->x2 > box->x1 && box->y2 > box->y1); } static inline void @@ -218,4 +227,44 @@ color_convert(uint32_t pixel, return pixel; } +inline static bool dst_use_gpu(PixmapPtr pixmap) +{ + struct sna_pixmap *priv = sna_pixmap(pixmap); + if (priv == NULL) + return false; + + if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) + return true; + + if (priv->clear) + return false; + + if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) + return true; + + return priv->gpu_damage && (!priv->cpu || !priv->cpu_damage); +} + +inline static bool dst_use_cpu(PixmapPtr pixmap) +{ + struct sna_pixmap *priv = sna_pixmap(pixmap); + if (priv == NULL || priv->shm) + return true; + + return priv->cpu_damage && priv->cpu; +} + +inline static bool dst_is_cpu(PixmapPtr pixmap) +{ + struct sna_pixmap *priv = sna_pixmap(pixmap); + return priv == NULL || DAMAGE_IS_ALL(priv->cpu_damage); +} + +inline static bool +untransformed(PicturePtr p) +{ + return !p->transform || pixman_transform_is_int_translate(p->transform); +} + + #endif /* SNA_RENDER_INLINE_H */ diff --git a/src/sna/sna_threads.c b/src/sna/sna_threads.c new file mode 100644 index 000000000..f77ddbfe8 --- /dev/null +++ b/src/sna/sna_threads.c @@ -0,0 +1,306 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#include "sna.h" + +#include <unistd.h> +#include <pthread.h> +#include <signal.h> + +static int max_threads = -1; + +static struct thread { + pthread_t thread; + pthread_mutex_t mutex; + pthread_cond_t cond; + + void (*func)(void *arg); + void *arg; +} *threads; + +static void *__run__(void *arg) +{ + struct thread *t = arg; + sigset_t signals; + + /* Disable all signals in the slave threads as X uses them for IO */ + sigfillset(&signals); + pthread_sigmask(SIG_BLOCK, &signals, NULL); + + pthread_mutex_lock(&t->mutex); + while (1) { + while (t->func == NULL) + pthread_cond_wait(&t->cond, &t->mutex); + pthread_mutex_unlock(&t->mutex); + + assert(t->func); + t->func(t->arg); + + pthread_mutex_lock(&t->mutex); + t->func = NULL; + pthread_cond_signal(&t->cond); + } + pthread_mutex_unlock(&t->mutex); + + return NULL; +} + +#if defined(__GNUC__) +#define popcount(x) __builtin_popcount(x) +#else +static int popcount(unsigned int x) +{ + int count = 0; + + while (x) { + count += x&1; + x >>= 1; + } + + return count; +} +#endif + +static int +num_cores(void) +{ + FILE *file = fopen("/proc/cpuinfo", "r"); + int count = 0; + if (file) { + size_t len = 0; + char *line = NULL; + uint32_t processors = 0, cores = 0; + while (getline(&line, &len, file) != -1) { + int id; + if (sscanf(line, "physical id : %d", &id) == 1) { + if (id >= 32) + return 0; + processors |= 1 << id; + } else if (sscanf(line, "core id : %d", &id) == 1) { + if (id >= 32) + return 0; + cores |= 1 << id; + } + } + free(line); + fclose(file); + + DBG(("%s: processors=0x%08x, cores=0x%08x\n", + __FUNCTION__, processors, cores)); + + count = popcount(processors) * popcount(cores); + } + return count; +} + +void sna_threads_init(void) +{ + int n; + + if (max_threads != -1) + return; + + max_threads = num_cores(); + if (max_threads == 0) + max_threads = sysconf(_SC_NPROCESSORS_ONLN) / 2; + if (max_threads <= 1) + goto bail; + + DBG(("%s: creating a thread pool of %d threads\n", + __func__, max_threads)); + + threads = malloc (sizeof(threads[0])*max_threads); + if (threads == NULL) + goto bail; + + for (n = 0; n < max_threads; n++) { + pthread_mutex_init(&threads[n].mutex, NULL); + pthread_cond_init(&threads[n].cond, NULL); + + threads[n].func = NULL; + if (pthread_create(&threads[n].thread, NULL, + __run__, &threads[n])) + goto bail; + } + + return; + +bail: + max_threads = 0; +} + +void sna_threads_run(void (*func)(void *arg), void *arg) +{ + int n; + + assert(max_threads > 0); + + for (n = 0; n < max_threads; n++) { + if (threads[n].func) + continue; + + pthread_mutex_lock(&threads[n].mutex); + if (threads[n].func) { + pthread_mutex_unlock(&threads[n].mutex); + continue; + } + + goto execute; + } + + n = rand() % max_threads; + pthread_mutex_lock(&threads[n].mutex); + while (threads[n].func) + pthread_cond_wait(&threads[n].cond, &threads[n].mutex); + +execute: + threads[n].func = func; + threads[n].arg = arg; + pthread_cond_signal(&threads[n].cond); + pthread_mutex_unlock(&threads[n].mutex); +} + +void sna_threads_wait(void) +{ + int n; + + assert(max_threads > 0); + + for (n = 0; n < max_threads; n++) { + if (threads[n].func == NULL) + continue; + + pthread_mutex_lock(&threads[n].mutex); + while (threads[n].func) + pthread_cond_wait(&threads[n].cond, &threads[n].mutex); + pthread_mutex_unlock(&threads[n].mutex); + } +} + +int sna_use_threads(int width, int height, int threshold) +{ + int num_threads; + + if (max_threads <= 0) + return 1; + + num_threads = height / (128/width + 1) / threshold-1; + if (num_threads <= 0) + return 1; + + if (num_threads > max_threads) + num_threads = max_threads; + return num_threads; +} + +struct thread_composite { + pixman_image_t *src, *mask, *dst; + pixman_op_t op; + int16_t src_x, src_y; + int16_t mask_x, mask_y; + int16_t dst_x, dst_y; + uint16_t width, height; +}; + +static void thread_composite(void *arg) +{ + struct thread_composite *t = arg; + pixman_image_composite(t->op, t->src, t->mask, t->dst, + t->src_x, t->src_y, + t->mask_x, t->mask_y, + t->dst_x, t->dst_y, + t->width, t->height); +} + +void sna_image_composite(pixman_op_t op, + pixman_image_t *src, + pixman_image_t *mask, + pixman_image_t *dst, + int16_t src_x, + int16_t src_y, + int16_t mask_x, + int16_t mask_y, + int16_t dst_x, + int16_t dst_y, + uint16_t width, + uint16_t height) +{ + int num_threads; + + num_threads = sna_use_threads(width, height, 16); + if (num_threads <= 1) { + pixman_image_composite(op, src, mask, dst, + src_x, src_y, + mask_x, mask_y, + dst_x, dst_y, + width, height); + } else { + struct thread_composite data[num_threads]; + int y, dy, n; + + DBG(("%s: using %d threads for compositing %dx%d\n", + __FUNCTION__, num_threads, width, height)); + + y = dst_y; + dy = (height + num_threads - 1) / num_threads; + + data[0].op = op; + data[0].src = src; + data[0].mask = mask; + data[0].dst = dst; + data[0].src_x = src_x; + data[0].src_y = src_y; + data[0].mask_x = mask_x; + data[0].mask_y = mask_y; + data[0].dst_x = dst_x; + data[0].dst_y = y; + data[0].width = width; + data[0].height = dy; + + for (n = 1; n < num_threads; n++) { + data[n] = data[0]; + data[n].src_y += y - dst_y; + data[n].mask_y += y - dst_y; + data[n].dst_y = y; + y += dy; + + sna_threads_run(thread_composite, &data[n]); + } + + if (y + dy > dst_y + height) + dy = dst_y + height - y; + + data[0].src_y += y - dst_y; + data[0].mask_y += y - dst_y; + data[0].dst_y = y; + data[0].height = dy; + + thread_composite(&data[0]); + + sna_threads_wait(); + } +} diff --git a/src/sna/sna_tiling.c b/src/sna/sna_tiling.c index 9e70833c6..5bebf0020 100644 --- a/src/sna/sna_tiling.c +++ b/src/sna/sna_tiling.c @@ -624,7 +624,7 @@ sna_tiling_fill_boxes(struct sna *sna, RegionNull(&this); RegionIntersect(&this, ®ion, &tile); - if (!RegionNotEmpty(&this)) + if (RegionNil(&this)) continue; tmp.drawable.width = this.extents.x2 - this.extents.x1; @@ -737,7 +737,7 @@ bool sna_tiling_blt_copy_boxes(struct sna *sna, uint8_t alu, RegionNull(&this); RegionIntersect(&this, ®ion, &tile); - if (!RegionNotEmpty(&this)) + if (RegionNil(&this)) continue; w = this.extents.x2 - this.extents.x1; diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c index 482abd369..c547fb5aa 100644 --- a/src/sna/sna_trapezoids.c +++ b/src/sna/sna_trapezoids.c @@ -49,6 +49,7 @@ #define NO_ALIGNED_BOXES 0 #define NO_UNALIGNED_BOXES 0 #define NO_SCAN_CONVERTER 0 +#define NO_GPU_THREADS 0 /* TODO: Emit unantialiased and MSAA triangles. */ @@ -68,6 +69,9 @@ #define FAST_SAMPLES_Y (1<<FAST_SAMPLES_shift) #define FAST_SAMPLES_mask ((1<<FAST_SAMPLES_shift)-1) +#define region_count(r) ((r)->data ? (r)->data->numRects : 1) +#define region_boxes(r) ((r)->data ? (BoxPtr)((r)->data + 1) : &(r)->extents) + typedef void (*span_func_t)(struct sna *sna, struct sna_composite_spans_op *op, pixman_region16_t *clip, @@ -328,10 +332,10 @@ floored_divrem(int a, int b) /* Compute the floored division (x*a)/b. Assumes / and % perform symmetric * division. */ static struct quorem -floored_muldivrem(int x, int a, int b) +floored_muldivrem(int32_t x, int32_t a, int32_t b) { struct quorem qr; - long long xa = (long long)x*a; + int64_t xa = (int64_t)x*a; qr.quo = xa/b; qr.rem = xa%b; if (qr.rem && (xa>=0) != (b>=0)) { @@ -471,7 +475,7 @@ cell_list_reset(struct cell_list *cells) pool_reset(cells->cell_pool.base); } -static struct cell * +inline static struct cell * cell_list_alloc(struct cell_list *cells, struct cell *tail, int x) @@ -532,6 +536,9 @@ cell_list_add_subspan(struct cell_list *cells, int ix1, fx1; int ix2, fx2; + if (x1 == x2) + return; + FAST_SAMPLES_X_TO_INT_FRAC(x1, ix1, fx1); FAST_SAMPLES_X_TO_INT_FRAC(x2, ix2, fx2); @@ -671,6 +678,8 @@ polygon_add_edge(struct polygon *polygon, ybot = bottom <= ymax ? bottom : ymax; e->ytop = ytop; e->height_left = ybot - ytop; + if (e->height_left <= 0) + return; if (dx == 0) { e->x.quo = x1; @@ -733,6 +742,8 @@ polygon_add_line(struct polygon *polygon, e->ytop = top; e->height_left = bot - top; + if (e->height_left <= 0) + return; if (dx == 0) { e->x.quo = p1->x; @@ -797,6 +808,9 @@ merge_sorted_edges(struct edge *head_a, struct edge *head_b) struct edge *head, **next, *prev; int32_t x; + if (head_b == NULL) + return head_a; + prev = head_a->prev; next = &head; if (head_a->x.quo <= head_b->x.quo) { @@ -869,11 +883,39 @@ sort_edges(struct edge *list, return remaining; } +static struct edge *filter(struct edge *edges) +{ + struct edge *e; + + e = edges; + do { + struct edge *n = e->next; + if (e->dir == -n->dir && + e->height_left == n->height_left && + *(uint64_t *)&e->x == *(uint64_t *)&n->x && + *(uint64_t *)&e->dxdy == *(uint64_t *)&n->dxdy) { + if (e->prev) + e->prev->next = n->next; + else + edges = n->next; + if (n->next) + n->next->prev = e->prev; + else + break; + + e = n->next; + } else + e = e->next; + } while (e->next); + + return edges; +} + static struct edge * merge_unsorted_edges (struct edge *head, struct edge *unsorted) { sort_edges (unsorted, UINT_MAX, &unsorted); - return merge_sorted_edges (head, unsorted); + return merge_sorted_edges (head, filter(unsorted)); } /* Test if the edges on the active list can be safely advanced by a @@ -881,18 +923,18 @@ merge_unsorted_edges (struct edge *head, struct edge *unsorted) inline static bool can_full_step(struct active_list *active) { - const struct edge *e; - /* Recomputes the minimum height of all edges on the active * list if we have been dropping edges. */ if (active->min_height <= 0) { + const struct edge *e; int min_height = INT_MAX; int is_vertical = 1; for (e = active->head.next; &active->tail != e; e = e->next) { if (e->height_left < min_height) min_height = e->height_left; - is_vertical &= e->dy == 0; + if (is_vertical) + is_vertical = e->dy == 0; } active->is_vertical = is_vertical; @@ -929,7 +971,8 @@ fill_buckets(struct active_list *active, *b = edge; if (edge->height_left < min_height) min_height = edge->height_left; - is_vertical &= edge->dy == 0; + if (is_vertical) + is_vertical = edge->dy == 0; edge = next; } @@ -1836,7 +1879,7 @@ static void mono_add_line(struct mono *mono, int dst_x, int dst_y, xFixed top, xFixed bottom, - xPointFixed *p1, xPointFixed *p2, + const xPointFixed *p1, const xPointFixed *p2, int dir) { struct mono_polygon *polygon = &mono->polygon; @@ -1853,7 +1896,7 @@ mono_add_line(struct mono *mono, dir)); if (top > bottom) { - xPointFixed *t; + const xPointFixed *t; y = top; top = bottom; @@ -1917,6 +1960,9 @@ mono_merge_sorted_edges(struct mono_edge *head_a, struct mono_edge *head_b) struct mono_edge *head, **next, *prev; int32_t x; + if (head_b == NULL) + return head_a; + prev = head_a->prev; next = &head; if (head_a->x.quo <= head_b->x.quo) { @@ -1990,11 +2036,39 @@ mono_sort_edges(struct mono_edge *list, return remaining; } +static struct mono_edge *mono_filter(struct mono_edge *edges) +{ + struct mono_edge *e; + + e = edges; + do { + struct mono_edge *n = e->next; + if (e->dir == -n->dir && + e->height_left == n->height_left && + *(uint64_t *)&e->x == *(uint64_t *)&n->x && + *(uint64_t *)&e->dxdy == *(uint64_t *)&n->dxdy) { + if (e->prev) + e->prev->next = n->next; + else + edges = n->next; + if (n->next) + n->next->prev = e->prev; + else + break; + + e = n->next; + } else + e = e->next; + } while (e->next); + + return edges; +} + static struct mono_edge * mono_merge_unsorted_edges(struct mono_edge *head, struct mono_edge *unsorted) { mono_sort_edges(unsorted, UINT_MAX, &unsorted); - return mono_merge_sorted_edges(head, unsorted); + return mono_merge_sorted_edges(head, mono_filter(unsorted)); } #if 0 @@ -2079,6 +2153,60 @@ mono_span__fast(struct mono *c, int x1, int x2, BoxPtr box) c->op.box(c->sna, &c->op, box); } +struct mono_span_thread_boxes { + const struct sna_composite_op *op; +#define MONO_SPAN_MAX_BOXES (8192/sizeof(BoxRec)) + BoxRec boxes[MONO_SPAN_MAX_BOXES]; + int num_boxes; +}; + +inline static void +thread_mono_span_add_boxes(struct mono *c, const BoxRec *box, int count) +{ + struct mono_span_thread_boxes *b = c->op.priv; + + assert(count > 0 && count <= MONO_SPAN_MAX_BOXES); + if (b->num_boxes + count > MONO_SPAN_MAX_BOXES) { + b->op->thread_boxes(c->sna, b->op, b->boxes, b->num_boxes); + b->num_boxes = 0; + } + + memcpy(b->boxes + b->num_boxes, box, count*sizeof(BoxRec)); + b->num_boxes += count; + assert(b->num_boxes <= MONO_SPAN_MAX_BOXES); +} + +fastcall static void +thread_mono_span_clipped(struct mono *c, int x1, int x2, BoxPtr box) +{ + pixman_region16_t region; + + __DBG(("%s [%d, %d]\n", __FUNCTION__, x1, x2)); + + box->x1 = x1; + box->x2 = x2; + + assert(c->clip.data); + + pixman_region_init_rects(®ion, box, 1); + RegionIntersect(®ion, ®ion, &c->clip); + if (REGION_NUM_RECTS(®ion)) + thread_mono_span_add_boxes(c, + REGION_RECTS(®ion), + REGION_NUM_RECTS(®ion)); + pixman_region_fini(®ion); +} + +fastcall static void +thread_mono_span(struct mono *c, int x1, int x2, BoxPtr box) +{ + __DBG(("%s [%d, %d]\n", __FUNCTION__, x1, x2)); + + box->x1 = x1; + box->x2 = x2; + thread_mono_span_add_boxes(c, box, 1); +} + inline static void mono_row(struct mono *c, int16_t y, int16_t h) { @@ -2196,10 +2324,7 @@ mono_render(struct mono *mono) struct mono_polygon *polygon = &mono->polygon; int i, j, h = mono->clip.extents.y2 - mono->clip.extents.y1; - if (mono->clip.data == NULL && mono->op.damage == NULL) - mono->span = mono_span__fast; - else - mono->span = mono_span; + assert(mono->span); for (i = 0; i < h; i = j) { j = i + 1; @@ -2332,7 +2457,8 @@ is_mono(PicturePtr dst, PictFormatPtr mask) } static bool -trapezoids_inplace_fallback(CARD8 op, +trapezoids_inplace_fallback(struct sna *sna, + CARD8 op, PicturePtr src, PicturePtr dst, PictFormatPtr mask, int ntrap, xTrapezoid *traps) { @@ -2372,7 +2498,7 @@ trapezoids_inplace_fallback(CARD8 op, return false; } - if (is_gpu(dst->pDrawable)) { + if (is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { DBG(("%s: not performing inplace as dst is already on the GPU\n", __FUNCTION__)); return false; @@ -2398,8 +2524,66 @@ trapezoids_inplace_fallback(CARD8 op, return true; } +struct rasterize_traps_thread { + xTrapezoid *traps; + char *ptr; + int stride; + BoxRec bounds; + pixman_format_code_t format; + int ntrap; +}; + +static void rasterize_traps_thread(void *arg) +{ + struct rasterize_traps_thread *thread = arg; + pixman_image_t *image; + int width, height, n; + + width = thread->bounds.x2 - thread->bounds.x1; + height = thread->bounds.y2 - thread->bounds.y1; + + memset(thread->ptr, 0, thread->stride*height); + if (PIXMAN_FORMAT_DEPTH(thread->format) < 8) + image = pixman_image_create_bits(thread->format, + width, height, + NULL, 0); + else + image = pixman_image_create_bits(thread->format, + width, height, + (uint32_t *)thread->ptr, + thread->stride); + if (image == NULL) + return; + + for (n = 0; n < thread->ntrap; n++) + pixman_rasterize_trapezoid(image, + (pixman_trapezoid_t *)&thread->traps[n], + -thread->bounds.x1, -thread->bounds.y1); + + if (PIXMAN_FORMAT_DEPTH(thread->format) < 8) { + pixman_image_t *a8; + + a8 = pixman_image_create_bits(PIXMAN_a8, + width, height, + (uint32_t *)thread->ptr, + thread->stride); + if (a8) { + pixman_image_composite(PIXMAN_OP_SRC, + image, NULL, a8, + 0, 0, + 0, 0, + 0, 0, + width, height); + pixman_image_unref(a8); + } + } + + pixman_image_unref(image); +} + static void -trapezoids_fallback(CARD8 op, PicturePtr src, PicturePtr dst, +trapezoids_fallback(struct sna *sna, + CARD8 op, PicturePtr src, PicturePtr dst, PictFormatPtr maskFormat, INT16 xSrc, INT16 ySrc, int ntrap, xTrapezoid * traps) { @@ -2441,6 +2625,8 @@ trapezoids_fallback(CARD8 op, PicturePtr src, PicturePtr dst, height = bounds.y2 - bounds.y1; bounds.x1 -= dst->pDrawable->x; bounds.y1 -= dst->pDrawable->y; + bounds.x2 -= dst->pDrawable->x; + bounds.y2 -= dst->pDrawable->y; depth = maskFormat->depth; if (depth == 1) { format = PIXMAN_a1; @@ -2452,51 +2638,90 @@ trapezoids_fallback(CARD8 op, PicturePtr src, PicturePtr dst, DBG(("%s: mask (%dx%d) depth=%d, format=%08x\n", __FUNCTION__, width, height, depth, format)); - if (is_gpu(dst->pDrawable) || picture_is_gpu(src)) { + if (is_gpu(sna, dst->pDrawable, PREFER_GPU_RENDER) || + picture_is_gpu(sna, src)) { + int num_threads; + scratch = sna_pixmap_create_upload(screen, width, height, 8, KGEM_BUFFER_WRITE); if (!scratch) return; - if (depth < 8) { - image = pixman_image_create_bits(format, width, height, - NULL, 0); - } else { - memset(scratch->devPrivate.ptr, 0, scratch->devKind*height); - image = pixman_image_create_bits(format, width, height, - scratch->devPrivate.ptr, - scratch->devKind); - } - if (image) { - for (; ntrap; ntrap--, traps++) - pixman_rasterize_trapezoid(image, - (pixman_trapezoid_t *)traps, - -bounds.x1, -bounds.y1); + num_threads = sna_use_threads(width, height, 4); + if (num_threads == 1) { if (depth < 8) { - pixman_image_t *a8; - - a8 = pixman_image_create_bits(PIXMAN_a8, width, height, - scratch->devPrivate.ptr, - scratch->devKind); - if (a8) { - pixman_image_composite(PIXMAN_OP_SRC, - image, NULL, a8, - 0, 0, - 0, 0, - 0, 0, - width, height); - format = PIXMAN_a8; - depth = 8; - pixman_image_unref (a8); + image = pixman_image_create_bits(format, width, height, + NULL, 0); + } else { + memset(scratch->devPrivate.ptr, 0, scratch->devKind*height); + + image = pixman_image_create_bits(format, width, height, + scratch->devPrivate.ptr, + scratch->devKind); + } + if (image) { + for (; ntrap; ntrap--, traps++) + pixman_rasterize_trapezoid(image, + (pixman_trapezoid_t *)traps, + -bounds.x1, -bounds.y1); + if (depth < 8) { + pixman_image_t *a8; + + a8 = pixman_image_create_bits(PIXMAN_a8, width, height, + scratch->devPrivate.ptr, + scratch->devKind); + if (a8) { + pixman_image_composite(PIXMAN_OP_SRC, + image, NULL, a8, + 0, 0, + 0, 0, + 0, 0, + width, height); + format = PIXMAN_a8; + depth = 8; + pixman_image_unref(a8); + } } + + pixman_image_unref(image); + } + if (format != PIXMAN_a8) { + sna_pixmap_destroy(scratch); + return; + } + } else { + struct rasterize_traps_thread threads[num_threads]; + int y, dy, n; + + threads[0].ptr = scratch->devPrivate.ptr; + threads[0].stride = scratch->devKind; + threads[0].traps = traps; + threads[0].ntrap = ntrap; + threads[0].bounds = bounds; + threads[0].format = format; + + y = bounds.y1; + dy = (height + num_threads - 1) / num_threads; + + for (n = 1; n < num_threads; n++) { + threads[n] = threads[0]; + threads[n].ptr += (y - bounds.y1) * threads[n].stride; + threads[n].bounds.y1 = y; + threads[n].bounds.y2 = y += dy; + + sna_threads_run(rasterize_traps_thread, &threads[n]); } - pixman_image_unref(image); - } - if (format != PIXMAN_a8) { - sna_pixmap_destroy(scratch); - return; + threads[0].ptr += (y - bounds.y1) * threads[0].stride; + threads[0].bounds.y1 = y; + threads[0].bounds.y2 = bounds.y2; + rasterize_traps_thread(&threads[0]); + + sna_threads_wait(); + + format = PIXMAN_a8; + depth = 8; } } else { scratch = sna_pixmap_create_unattached(screen, @@ -2538,7 +2763,7 @@ trapezoids_fallback(CARD8 op, PicturePtr src, PicturePtr dst, maskFormat = PictureMatchFormat(screen, 8, PICT_a8); for (; ntrap; ntrap--, traps++) - trapezoids_fallback(op, + trapezoids_fallback(sna, op, src, dst, maskFormat, xSrc, ySrc, 1, traps); } @@ -3073,13 +3298,13 @@ lerp32_unaligned_box_row(PixmapPtr scratch, uint32_t color, uint8_t covered) { int16_t x1 = pixman_fixed_to_int(trap->left.p1.x) + dx; - int16_t fx1 = grid_coverage(SAMPLES_X, trap->left.p1.x); + uint16_t fx1 = grid_coverage(SAMPLES_X, trap->left.p1.x); int16_t x2 = pixman_fixed_to_int(trap->right.p2.x) + dx; - int16_t fx2 = grid_coverage(SAMPLES_X, trap->right.p2.x); + uint16_t fx2 = grid_coverage(SAMPLES_X, trap->right.p2.x); if (x1 < extents->x1) x1 = extents->x1, fx1 = 0; - if (x2 > extents->x2) + if (x2 >= extents->x2) x2 = extents->x2, fx2 = 0; DBG(("%s: x=(%d.%d, %d.%d), y=%dx%d, covered=%d\n", __FUNCTION__, @@ -3171,13 +3396,13 @@ pixsolid_unaligned_box_row(struct pixman_inplace *pi, uint8_t covered) { int16_t x1 = pixman_fixed_to_int(trap->left.p1.x); - int16_t fx1 = grid_coverage(SAMPLES_X, trap->left.p1.x); + uint16_t fx1 = grid_coverage(SAMPLES_X, trap->left.p1.x); int16_t x2 = pixman_fixed_to_int(trap->right.p1.x); - int16_t fx2 = grid_coverage(SAMPLES_X, trap->right.p1.x); + uint16_t fx2 = grid_coverage(SAMPLES_X, trap->right.p1.x); if (x1 < extents->x1) x1 = extents->x1, fx1 = 0; - if (x2 > extents->x2) + if (x2 >= extents->x2) x2 = extents->x2, fx2 = 0; if (x1 < x2) { @@ -3198,7 +3423,8 @@ pixsolid_unaligned_box_row(struct pixman_inplace *pi, } static bool -composite_unaligned_boxes_inplace__solid(CARD8 op, uint32_t color, +composite_unaligned_boxes_inplace__solid(struct sna *sna, + CARD8 op, uint32_t color, PicturePtr dst, int n, xTrapezoid *t, bool force_fallback) { @@ -3206,9 +3432,9 @@ composite_unaligned_boxes_inplace__solid(CARD8 op, uint32_t color, int16_t dx, dy; DBG(("%s: force=%d, is_gpu=%d, op=%d, color=%x\n", __FUNCTION__, - force_fallback, is_gpu(dst->pDrawable), op, color)); + force_fallback, is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS), op, color)); - if (!force_fallback && is_gpu(dst->pDrawable)) { + if (!force_fallback && is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { DBG(("%s: fallback -- can not perform operation in place, destination busy\n", __FUNCTION__)); @@ -3276,9 +3502,9 @@ composite_unaligned_boxes_inplace__solid(CARD8 op, uint32_t color, extents = REGION_RECTS(&clip); while (count--) { int16_t y1 = dy + pixman_fixed_to_int(t->top); - int16_t fy1 = pixman_fixed_frac(t->top); + uint16_t fy1 = pixman_fixed_frac(t->top); int16_t y2 = dy + pixman_fixed_to_int(t->bottom); - int16_t fy2 = pixman_fixed_frac(t->bottom); + uint16_t fy2 = pixman_fixed_frac(t->bottom); DBG(("%s: t=(%d, %d), (%d, %d), extents (%d, %d), (%d, %d)\n", __FUNCTION__, @@ -3291,7 +3517,7 @@ composite_unaligned_boxes_inplace__solid(CARD8 op, uint32_t color, if (y1 < extents->y1) y1 = extents->y1, fy1 = 0; - if (y2 > extents->y2) + if (y2 >= extents->y2) y2 = extents->y2, fy2 = 0; if (y1 < y2) { @@ -3363,13 +3589,13 @@ pixman: extents = REGION_RECTS(&clip); while (count--) { int16_t y1 = pixman_fixed_to_int(t->top); - int16_t fy1 = pixman_fixed_frac(t->top); + uint16_t fy1 = pixman_fixed_frac(t->top); int16_t y2 = pixman_fixed_to_int(t->bottom); - int16_t fy2 = pixman_fixed_frac(t->bottom); + uint16_t fy2 = pixman_fixed_frac(t->bottom); if (y1 < extents->y1) y1 = extents->y1, fy1 = 0; - if (y2 > extents->y2) + if (y2 >= extents->y2) y2 = extents->y2, fy2 = 0; if (y1 < y2) { if (fy1) { @@ -3424,18 +3650,18 @@ pixmask_opacity(struct pixman_inplace *pi, static void pixmask_unaligned_box_row(struct pixman_inplace *pi, const BoxRec *extents, - xTrapezoid *trap, + const xTrapezoid *trap, int16_t y, int16_t h, uint8_t covered) { int16_t x1 = pixman_fixed_to_int(trap->left.p1.x); - int16_t fx1 = grid_coverage(SAMPLES_X, trap->left.p1.x); + uint16_t fx1 = grid_coverage(SAMPLES_X, trap->left.p1.x); int16_t x2 = pixman_fixed_to_int(trap->right.p1.x); - int16_t fx2 = grid_coverage(SAMPLES_X, trap->right.p1.x); + uint16_t fx2 = grid_coverage(SAMPLES_X, trap->right.p1.x); if (x1 < extents->x1) x1 = extents->x1, fx1 = 0; - if (x2 > extents->x2) + if (x2 >= extents->x2) x2 = extents->x2, fx2 = 0; if (x1 < x2) { @@ -3455,13 +3681,82 @@ pixmask_unaligned_box_row(struct pixman_inplace *pi, } } +struct rectilinear_inplace_thread { + pixman_image_t *dst, *src; + const RegionRec *clip; + const xTrapezoid *trap; + int dx, dy, sx, sy; + int y1, y2; + CARD8 op; +}; + +static void rectilinear_inplace_thread(void *arg) +{ + struct rectilinear_inplace_thread *thread = arg; + const xTrapezoid *t = thread->trap; + struct pixman_inplace pi; + const BoxRec *extents; + int count; + + pi.image = thread->dst; + pi.dx = thread->dx; + pi.dy = thread->dy; + + pi.source = thread->src; + pi.sx = thread->sx; + pi.sy = thread->sy; + + pi.mask = pixman_image_create_bits(PIXMAN_a8, 1, 1, &pi.color, 4); + pixman_image_set_repeat(pi.mask, PIXMAN_REPEAT_NORMAL); + pi.bits = pixman_image_get_data(pi.mask); + pi.op = thread->op; + + count = region_count(thread->clip); + extents = region_boxes(thread->clip); + while (count--) { + int16_t y1 = pixman_fixed_to_int(t->top); + uint16_t fy1 = pixman_fixed_frac(t->top); + int16_t y2 = pixman_fixed_to_int(t->bottom); + uint16_t fy2 = pixman_fixed_frac(t->bottom); + + if (y1 < MAX(thread->y1, extents->y1)) + y1 = MAX(thread->y1, extents->y1), fy1 = 0; + if (y2 > MIN(thread->y2, extents->y2)) + y2 = MIN(thread->y2, extents->y2), fy2 = 0; + if (y1 < y2) { + if (fy1) { + pixmask_unaligned_box_row(&pi, extents, t, y1, 1, + SAMPLES_Y - grid_coverage(SAMPLES_Y, fy1)); + y1++; + } + + if (y2 > y1) + pixmask_unaligned_box_row(&pi, extents, t, y1, y2 - y1, + SAMPLES_Y); + + if (fy2) + pixmask_unaligned_box_row(&pi, extents, t, y2, 1, + grid_coverage(SAMPLES_Y, fy2)); + } else if (y1 == y2 && fy2 > fy1) { + pixmask_unaligned_box_row(&pi, extents, t, y1, 1, + grid_coverage(SAMPLES_Y, fy2) - grid_coverage(SAMPLES_Y, fy1)); + } + extents++; + } + + pixman_image_unref(pi.mask); +} + static bool -composite_unaligned_boxes_inplace(CARD8 op, +composite_unaligned_boxes_inplace(struct sna *sna, + CARD8 op, PicturePtr src, int16_t src_x, int16_t src_y, PicturePtr dst, int n, xTrapezoid *t, bool force_fallback) { - if (!force_fallback) { + if (!force_fallback && + (is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS) || + picture_is_gpu(sna, src))) { DBG(("%s: fallback -- not forcing\n", __FUNCTION__)); return false; } @@ -3471,10 +3766,10 @@ composite_unaligned_boxes_inplace(CARD8 op, src_x -= pixman_fixed_to_int(t[0].left.p1.x); src_y -= pixman_fixed_to_int(t[0].left.p1.y); do { - struct pixman_inplace pi; RegionRec clip; BoxPtr extents; int count; + int num_threads; clip.extents.x1 = pixman_fixed_to_int(t->left.p1.x); clip.extents.x2 = pixman_fixed_to_int(t->right.p1.x + pixman_fixed_1_minus_e); @@ -3513,59 +3808,100 @@ composite_unaligned_boxes_inplace(CARD8 op, } } - pi.image = image_from_pict(dst, false, &pi.dx, &pi.dy); - pi.source = image_from_pict(src, false, &pi.sx, &pi.sy); - pi.sx += src_x; - pi.sy += src_y; - pi.mask = pixman_image_create_bits(PIXMAN_a8, 1, 1, NULL, 0); - pixman_image_set_repeat(pi.mask, PIXMAN_REPEAT_NORMAL); - pi.bits = pixman_image_get_data(pi.mask); - pi.op = op; + num_threads = sna_use_threads(clip.extents.x2 - clip.extents.x1, + clip.extents.y2 - clip.extents.y1, + 32); + if (num_threads == 1) { + struct pixman_inplace pi; - count = REGION_NUM_RECTS(&clip); - extents = REGION_RECTS(&clip); - while (count--) { - int16_t y1 = pixman_fixed_to_int(t->top); - int16_t fy1 = pixman_fixed_frac(t->top); - int16_t y2 = pixman_fixed_to_int(t->bottom); - int16_t fy2 = pixman_fixed_frac(t->bottom); + pi.image = image_from_pict(dst, false, &pi.dx, &pi.dy); + pi.source = image_from_pict(src, false, &pi.sx, &pi.sy); + pi.sx += src_x; + pi.sy += src_y; + pi.mask = pixman_image_create_bits(PIXMAN_a8, 1, 1, &pi.color, 4); + pixman_image_set_repeat(pi.mask, PIXMAN_REPEAT_NORMAL); + pi.bits = pixman_image_get_data(pi.mask); + pi.op = op; - if (y1 < extents->y1) - y1 = extents->y1, fy1 = 0; - if (y2 > extents->y2) - y2 = extents->y2, fy2 = 0; - if (y1 < y2) { - if (fy1) { + count = REGION_NUM_RECTS(&clip); + extents = REGION_RECTS(&clip); + while (count--) { + int16_t y1 = pixman_fixed_to_int(t->top); + uint16_t fy1 = pixman_fixed_frac(t->top); + int16_t y2 = pixman_fixed_to_int(t->bottom); + uint16_t fy2 = pixman_fixed_frac(t->bottom); + + if (y1 < extents->y1) + y1 = extents->y1, fy1 = 0; + if (y2 > extents->y2) + y2 = extents->y2, fy2 = 0; + if (y1 < y2) { + if (fy1) { + pixmask_unaligned_box_row(&pi, extents, t, y1, 1, + SAMPLES_Y - grid_coverage(SAMPLES_Y, fy1)); + y1++; + } + + if (y2 > y1) + pixmask_unaligned_box_row(&pi, extents, t, y1, y2 - y1, + SAMPLES_Y); + + if (fy2) + pixmask_unaligned_box_row(&pi, extents, t, y2, 1, + grid_coverage(SAMPLES_Y, fy2)); + } else if (y1 == y2 && fy2 > fy1) { pixmask_unaligned_box_row(&pi, extents, t, y1, 1, - SAMPLES_Y - grid_coverage(SAMPLES_Y, fy1)); - y1++; + grid_coverage(SAMPLES_Y, fy2) - grid_coverage(SAMPLES_Y, fy1)); } + extents++; + } - if (y2 > y1) - pixmask_unaligned_box_row(&pi, extents, t, y1, y2 - y1, - SAMPLES_Y); + pixman_image_unref(pi.image); + pixman_image_unref(pi.source); + pixman_image_unref(pi.mask); + } else { + struct rectilinear_inplace_thread thread[num_threads]; + int i, y, dy; - if (fy2) - pixmask_unaligned_box_row(&pi, extents, t, y2, 1, - grid_coverage(SAMPLES_Y, fy2)); - } else if (y1 == y2 && fy2 > fy1) { - pixmask_unaligned_box_row(&pi, extents, t, y1, 1, - grid_coverage(SAMPLES_Y, fy2) - grid_coverage(SAMPLES_Y, fy1)); + + thread[0].trap = t; + thread[0].dst = image_from_pict(dst, false, &thread[0].dx, &thread[0].dy); + thread[0].src = image_from_pict(src, false, &thread[0].sx, &thread[0].sy); + thread[0].sx += src_x; + thread[0].sy += src_y; + + thread[0].clip = &clip; + thread[0].op = op; + + y = clip.extents.y1; + dy = (clip.extents.y2 - clip.extents.y1 + num_threads - 1) / num_threads; + + for (i = 1; i < num_threads; i++) { + thread[i] = thread[0]; + thread[i].y1 = y; + thread[i].y2 = y += dy; + sna_threads_run(rectilinear_inplace_thread, &thread[i]); } - extents++; + + thread[0].y1 = y; + thread[0].y2 = clip.extents.y2; + rectilinear_inplace_thread(&thread[0]); + + sna_threads_wait(); + + pixman_image_unref(thread[0].dst); + pixman_image_unref(thread[0].src); } RegionUninit(&clip); - pixman_image_unref(pi.image); - pixman_image_unref(pi.source); - pixman_image_unref(pi.mask); } while (--n && t++); return true; } static bool -composite_unaligned_boxes_fallback(CARD8 op, +composite_unaligned_boxes_fallback(struct sna *sna, + CARD8 op, PicturePtr src, PicturePtr dst, INT16 src_x, INT16 src_y, @@ -3579,12 +3915,12 @@ composite_unaligned_boxes_fallback(CARD8 op, int n; if (sna_picture_is_solid(src, &color) && - composite_unaligned_boxes_inplace__solid(op, color, dst, + composite_unaligned_boxes_inplace__solid(sna, op, color, dst, ntrap, traps, force_fallback)) return true; - if (composite_unaligned_boxes_inplace(op, src, src_x, src_y, + if (composite_unaligned_boxes_inplace(sna, op, src, src_x, src_y, dst, ntrap, traps, force_fallback)) return true; @@ -3708,7 +4044,7 @@ composite_unaligned_boxes(struct sna *sna, !sna->render.check_composite_spans(sna, op, src, dst, 0, 0, COMPOSITE_SPANS_RECTILINEAR)) { fallback: - return composite_unaligned_boxes_fallback(op, src, dst, + return composite_unaligned_boxes_fallback(sna, op, src, dst, src_x, src_y, ntrap, traps, force_fallback); @@ -3860,14 +4196,13 @@ static span_func_t choose_span(struct sna_composite_spans_op *tmp, PicturePtr dst, PictFormatPtr maskFormat, - uint8_t op, RegionPtr clip) { span_func_t span; if (is_mono(dst, maskFormat)) { /* XXX An imprecise approximation */ - if (maskFormat && !operator_is_bounded(op)) { + if (maskFormat && !operator_is_bounded(tmp->base.op)) { span = tor_blt_span_mono_unbounded; if (REGION_NUM_RECTS(clip) > 1) span = tor_blt_span_mono_unbounded_clipped; @@ -3888,8 +4223,77 @@ choose_span(struct sna_composite_spans_op *tmp, return span; } +struct mono_span_thread { + struct sna *sna; + const xTrapezoid *traps; + const struct sna_composite_op *op; + RegionPtr clip; + int ntrap; + BoxRec extents; + int dx, dy; +}; + +static void +mono_span_thread(void *arg) +{ + struct mono_span_thread *thread = arg; + struct mono mono; + struct mono_span_thread_boxes boxes; + const xTrapezoid *t; + int n; + + mono.sna = thread->sna; + + mono.clip.extents = thread->extents; + mono.clip.data = NULL; + if (thread->clip->data) { + RegionIntersect(&mono.clip, &mono.clip, thread->clip); + if (RegionNil(&mono.clip)) + return; + } + + boxes.op = thread->op; + boxes.num_boxes = 0; + mono.op.priv = &boxes; + + if (!mono_init(&mono, 2*thread->ntrap)) { + RegionUninit(&mono.clip); + return; + } + + for (n = thread->ntrap, t = thread->traps; n--; t++) { + if (!xTrapezoidValid(t)) + continue; + + if (pixman_fixed_to_int(t->top) + thread->dy >= thread->extents.y2 || + pixman_fixed_to_int(t->bottom) + thread->dy <= thread->extents.y1) + continue; + + mono_add_line(&mono, thread->dx, thread->dy, + t->top, t->bottom, + &t->left.p1, &t->left.p2, 1); + mono_add_line(&mono, thread->dx, thread->dy, + t->top, t->bottom, + &t->right.p1, &t->right.p2, -1); + } + + if (mono.clip.data == NULL) + mono.span = thread_mono_span; + else + mono.span = thread_mono_span_clipped; + + mono_render(&mono); + mono_fini(&mono); + + if (boxes.num_boxes) + thread->op->thread_boxes(thread->sna, thread->op, + boxes.boxes, boxes.num_boxes); + RegionUninit(&mono.clip); +} + static bool -mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, +mono_trapezoids_span_converter(struct sna *sna, + CARD8 op, PicturePtr src, PicturePtr dst, INT16 src_x, INT16 src_y, int ntrap, xTrapezoid *traps) { @@ -3897,8 +4301,8 @@ mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, BoxRec extents; int16_t dst_x, dst_y; int16_t dx, dy; - bool was_clear; - int n; + bool unbounded; + int num_threads, n; if (NO_SCAN_CONVERTER) return false; @@ -3937,11 +4341,69 @@ mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, src_x + mono.clip.extents.x1 - dst_x - dx, src_y + mono.clip.extents.y1 - dst_y - dy)); - mono.sna = to_sna_from_drawable(dst->pDrawable); - if (!mono_init(&mono, 2*ntrap)) + unbounded = (!sna_drawable_is_clear(dst->pDrawable) && + !operator_is_bounded(op)); + + mono.sna = sna; + if (!mono.sna->render.composite(mono.sna, op, src, NULL, dst, + src_x + mono.clip.extents.x1 - dst_x - dx, + src_y + mono.clip.extents.y1 - dst_y - dy, + 0, 0, + mono.clip.extents.x1, mono.clip.extents.y1, + mono.clip.extents.x2 - mono.clip.extents.x1, + mono.clip.extents.y2 - mono.clip.extents.y1, + memset(&mono.op, 0, sizeof(mono.op)))) return false; - was_clear = sna_drawable_is_clear(dst->pDrawable); + num_threads = 1; + if (!NO_GPU_THREADS && + mono.op.thread_boxes && + mono.op.damage == NULL && + !unbounded) + num_threads = sna_use_threads(mono.clip.extents.x2 - mono.clip.extents.x1, + mono.clip.extents.y2 - mono.clip.extents.y1, + 16); + if (num_threads > 1) { + struct mono_span_thread threads[num_threads]; + int y, h; + + DBG(("%s: using %d threads for mono span compositing %dx%d\n", + __FUNCTION__, num_threads, + mono.clip.extents.x2 - mono.clip.extents.x1, + mono.clip.extents.y2 - mono.clip.extents.y1)); + + threads[0].sna = mono.sna; + threads[0].op = &mono.op; + threads[0].traps = traps; + threads[0].ntrap = ntrap; + threads[0].extents = mono.clip.extents; + threads[0].clip = &mono.clip; + threads[0].dx = dx; + threads[0].dy = dy; + + y = extents.y1; + h = extents.y2 - extents.y1; + h = (h + num_threads - 1) / num_threads; + + for (n = 1; n < num_threads; n++) { + threads[n] = threads[0]; + threads[n].extents.y1 = y; + threads[n].extents.y2 = y += h; + + sna_threads_run(mono_span_thread, &threads[n]); + } + + threads[0].extents.y1 = y; + threads[0].extents.y2 = extents.y2; + mono_span_thread(&threads[0]); + + sna_threads_wait(); + mono.op.done(mono.sna, &mono.op); + return true; + } + + if (!mono_init(&mono, 2*ntrap)) + return false; for (n = 0; n < ntrap; n++) { if (!xTrapezoidValid(&traps[n])) @@ -3959,23 +4421,16 @@ mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, &traps[n].right.p1, &traps[n].right.p2, -1); } - memset(&mono.op, 0, sizeof(mono.op)); - if (!mono.sna->render.composite(mono.sna, op, src, NULL, dst, - src_x + mono.clip.extents.x1 - dst_x - dx, - src_y + mono.clip.extents.y1 - dst_y - dy, - 0, 0, - mono.clip.extents.x1, mono.clip.extents.y1, - mono.clip.extents.x2 - mono.clip.extents.x1, - mono.clip.extents.y2 - mono.clip.extents.y1, - &mono.op)) { - mono_fini(&mono); - return false; - } + if (mono.clip.data == NULL && mono.op.damage == NULL) + mono.span = mono_span__fast; + else + mono.span = mono_span; + mono_render(&mono); mono.op.done(mono.sna, &mono.op); mono_fini(&mono); - if (!was_clear && !operator_is_bounded(op)) { + if (unbounded) { xPointFixed p1, p2; if (!mono_init(&mono, 2+2*ntrap)) @@ -4027,26 +4482,171 @@ mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, return true; } +struct span_thread { + struct sna *sna; + const struct sna_composite_spans_op *op; + const xTrapezoid *traps; + RegionPtr clip; + span_func_t span; + BoxRec extents; + int dx, dy, draw_y; + int ntrap; + bool unbounded; +}; + +#define SPAN_THREAD_MAX_BOXES (8192/sizeof(struct sna_opacity_box)) +struct span_thread_boxes { + const struct sna_composite_spans_op *op; + struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES]; + int num_boxes; +}; + +static void span_thread_add_boxes(struct sna *sna, void *data, + const BoxRec *box, int count, float alpha) +{ + struct span_thread_boxes *b = data; + + __DBG(("%s: adding %d boxes with alpha=%f\n", + __FUNCTION__, count, alpha)); + + assert(count > 0 && count <= SPAN_THREAD_MAX_BOXES); + if (b->num_boxes + count > SPAN_THREAD_MAX_BOXES) { + DBG(("%s: flushing %d boxes, adding %d\n", __FUNCTION__, b->num_boxes, count)); + assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES); + b->op->thread_boxes(sna, b->op, b->boxes, b->num_boxes); + b->num_boxes = 0; + } + + do { + b->boxes[b->num_boxes].box = *box++; + b->boxes[b->num_boxes].alpha = alpha; + b->num_boxes++; + } while (--count); + assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES); +} + +static void +span_thread_box(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage) +{ + __DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage)); + span_thread_add_boxes(sna, op, box, 1, AREA_TO_ALPHA(coverage)); +} + +static void +span_thread_clipped_box(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage) +{ + pixman_region16_t region; + + __DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2, + AREA_TO_ALPHA(coverage))); + + pixman_region_init_rects(®ion, box, 1); + RegionIntersect(®ion, ®ion, clip); + if (REGION_NUM_RECTS(®ion)) { + span_thread_add_boxes(sna, op, + REGION_RECTS(®ion), + REGION_NUM_RECTS(®ion), + AREA_TO_ALPHA(coverage)); + } + pixman_region_fini(®ion); +} + +static span_func_t +thread_choose_span(struct sna_composite_spans_op *tmp, + PicturePtr dst, + PictFormatPtr maskFormat, + RegionPtr clip) +{ + span_func_t span; + + if (tmp->base.damage) + return NULL; + + if (is_mono(dst, maskFormat)) { + return NULL; + } else { + if (REGION_NUM_RECTS(clip) > 1) + span = span_thread_clipped_box; + else + span = span_thread_box; + } + + return span; +} + +static void +span_thread(void *arg) +{ + struct span_thread *thread = arg; + struct span_thread_boxes boxes; + struct tor tor; + const xTrapezoid *t; + int n, y1, y2; + + if (tor_init(&tor, &thread->extents, 2*thread->ntrap)) + return; + + boxes.op = thread->op; + boxes.num_boxes = 0; + + y1 = thread->extents.y1 - thread->draw_y; + y2 = thread->extents.y2 - thread->draw_y; + for (n = thread->ntrap, t = thread->traps; n--; t++) { + xTrapezoid tt; + + if (pixman_fixed_to_int(t->top) >= y2 || + pixman_fixed_to_int(t->bottom) < y1) + continue; + + if (!project_trapezoid_onto_grid(t, thread->dx, thread->dy, &tt)) + continue; + + tor_add_edge(&tor, &tt, &tt.left, 1); + tor_add_edge(&tor, &tt, &tt.right, -1); + } + + tor_render(thread->sna, &tor, + (struct sna_composite_spans_op *)&boxes, thread->clip, + thread->span, thread->unbounded); + + tor_fini(&tor); + + if (boxes.num_boxes) { + DBG(("%s: flushing %d boxes\n", __FUNCTION__, boxes.num_boxes)); + assert(boxes.num_boxes <= SPAN_THREAD_MAX_BOXES); + thread->op->thread_boxes(thread->sna, thread->op, + boxes.boxes, boxes.num_boxes); + } +} + static bool -trapezoid_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, +trapezoid_span_converter(struct sna *sna, + CARD8 op, PicturePtr src, PicturePtr dst, PictFormatPtr maskFormat, unsigned int flags, INT16 src_x, INT16 src_y, int ntrap, xTrapezoid *traps) { - struct sna *sna; struct sna_composite_spans_op tmp; - struct tor tor; BoxRec extents; pixman_region16_t clip; int16_t dst_x, dst_y; bool was_clear; int dx, dy, n; + int num_threads; if (NO_SCAN_CONVERTER) return false; if (is_mono(dst, maskFormat)) - return mono_trapezoids_span_converter(op, src, dst, + return mono_trapezoids_span_converter(sna, op, src, dst, src_x, src_y, ntrap, traps); @@ -4057,7 +4657,6 @@ trapezoid_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, return false; } - sna = to_sna_from_drawable(dst->pDrawable); if (!sna->render.check_composite_spans(sna, op, src, dst, 0, 0, flags)) { DBG(("%s: fallback -- composite spans not supported\n", __FUNCTION__)); @@ -4144,29 +4743,78 @@ trapezoid_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, dx *= FAST_SAMPLES_X; dy *= FAST_SAMPLES_Y; - if (tor_init(&tor, &extents, 2*ntrap)) - goto skip; - for (n = 0; n < ntrap; n++) { - xTrapezoid t; + num_threads = 1; + if (!NO_GPU_THREADS && tmp.thread_boxes && + thread_choose_span(&tmp, dst, maskFormat, &clip)) + num_threads = sna_use_threads(extents.x2-extents.x1, + extents.y2-extents.y1, + 16); + if (num_threads == 1) { + struct tor tor; - if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t)) - continue; + if (tor_init(&tor, &extents, 2*ntrap)) + goto skip; - if (pixman_fixed_to_int(traps[n].top) + dst->pDrawable->y >= extents.y2 || - pixman_fixed_to_int(traps[n].bottom) + dst->pDrawable->y < extents.y1) - continue; + for (n = 0; n < ntrap; n++) { + xTrapezoid t; - tor_add_edge(&tor, &t, &t.left, 1); - tor_add_edge(&tor, &t, &t.right, -1); - } + if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t)) + continue; - tor_render(sna, &tor, &tmp, &clip, - choose_span(&tmp, dst, maskFormat, op, &clip), - !was_clear && maskFormat && !operator_is_bounded(op)); + if (pixman_fixed_to_int(traps[n].top) + dst->pDrawable->y >= extents.y2 || + pixman_fixed_to_int(traps[n].bottom) + dst->pDrawable->y < extents.y1) + continue; + + tor_add_edge(&tor, &t, &t.left, 1); + tor_add_edge(&tor, &t, &t.right, -1); + } + + tor_render(sna, &tor, &tmp, &clip, + choose_span(&tmp, dst, maskFormat, &clip), + !was_clear && maskFormat && !operator_is_bounded(op)); skip: - tor_fini(&tor); + tor_fini(&tor); + } else { + struct span_thread threads[num_threads]; + int y, h; + + DBG(("%s: using %d threads for span compositing %dx%d\n", + __FUNCTION__, num_threads, + extents.x2 - extents.x1, + extents.y2 - extents.y1)); + + threads[0].sna = sna; + threads[0].op = &tmp; + threads[0].traps = traps; + threads[0].ntrap = ntrap; + threads[0].extents = extents; + threads[0].clip = &clip; + threads[0].dx = dx; + threads[0].dy = dy; + threads[0].draw_y = dst->pDrawable->y; + threads[0].unbounded = !was_clear && maskFormat && !operator_is_bounded(op); + threads[0].span = thread_choose_span(&tmp, dst, maskFormat, &clip); + + y = extents.y1; + h = extents.y2 - extents.y1; + h = (h + num_threads - 1) / num_threads; + + for (n = 1; n < num_threads; n++) { + threads[n] = threads[0]; + threads[n].extents.y1 = y; + threads[n].extents.y2 = y += h; + + sna_threads_run(span_thread, &threads[n]); + } + + threads[0].extents.y1 = y; + threads[0].extents.y2 = extents.y2; + span_thread(&threads[0]); + + sna_threads_wait(); + } tmp.done(sna, &tmp); REGION_UNINIT(NULL, &clip); @@ -4351,7 +4999,8 @@ struct inplace { static force_inline uint8_t coverage_opacity(int coverage, uint8_t opacity) { coverage = coverage * 256 / FAST_SAMPLES_XY; - return mul_8_8(coverage - (coverage >> 8), opacity); + coverage -= coverage >> 8; + return opacity == 255 ? coverage : mul_8_8(coverage, opacity); } static void @@ -4673,7 +5322,8 @@ mono_inplace_composite_boxes(struct sna *sna, } static bool -trapezoid_spans_maybe_inplace(CARD8 op, PicturePtr src, PicturePtr dst, +trapezoid_spans_maybe_inplace(struct sna *sna, + CARD8 op, PicturePtr src, PicturePtr dst, PictFormatPtr maskFormat) { struct sna_pixmap *priv; @@ -4706,7 +5356,7 @@ trapezoid_spans_maybe_inplace(CARD8 op, PicturePtr src, PicturePtr dst, case PICT_x8r8g8b8: case PICT_a8r8g8b8: - if (picture_is_gpu(src)) + if (picture_is_gpu(sna, src)) return false; switch (op) { @@ -4753,7 +5403,8 @@ out: } static bool -trapezoid_span_mono_inplace(CARD8 op, +trapezoid_span_mono_inplace(struct sna *sna, + CARD8 op, PicturePtr src, PicturePtr dst, INT16 src_x, INT16 src_y, @@ -4799,7 +5450,7 @@ trapezoid_span_mono_inplace(CARD8 op, MOVE_WRITE | MOVE_READ)) return true; - mono.sna = to_sna_from_drawable(dst->pDrawable); + mono.sna = sna; if (!mono_init(&mono, 2*ntrap)) return false; @@ -4855,6 +5506,20 @@ unbounded_pass: op = 0; } else { + if (src->pDrawable) { + if (!sna_drawable_move_to_cpu(src->pDrawable, + MOVE_READ)) { + mono_fini(&mono); + return false; + } + if (src->alphaMap && + !sna_drawable_move_to_cpu(src->alphaMap->pDrawable, + MOVE_READ)) { + mono_fini(&mono); + return false; + } + } + inplace.composite.dst = image_from_pict(dst, false, &inplace.composite.dx, &inplace.composite.dy); @@ -4871,6 +5536,11 @@ unbounded_pass: mono.op.box = mono_inplace_composite_box; mono.op.boxes = mono_inplace_composite_boxes; } + + if (mono.clip.data == NULL && mono.op.damage == NULL) + mono.span = mono_span__fast; + else + mono.span = mono_span; mono_render(&mono); mono_fini(&mono); @@ -4922,6 +5592,45 @@ unbounded_pass: } static void +pixmask_span_solid(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage) +{ + struct pixman_inplace *pi = (struct pixman_inplace *)op; + if (coverage != FAST_SAMPLES_XY) { + coverage = coverage * 256 / FAST_SAMPLES_XY; + coverage -= coverage >> 8; + *pi->bits = mul_4x8_8(pi->color, coverage); + } else + *pi->bits = pi->color; + pixman_image_composite(pi->op, pi->source, NULL, pi->image, + box->x1, box->y1, + 0, 0, + pi->dx + box->x1, pi->dy + box->y1, + box->x2 - box->x1, box->y2 - box->y1); +} +static void +pixmask_span_solid__clipped(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage) +{ + pixman_region16_t region; + int n; + + pixman_region_init_rects(®ion, box, 1); + RegionIntersect(®ion, ®ion, clip); + n = REGION_NUM_RECTS(®ion); + box = REGION_RECTS(®ion); + while (n--) + pixmask_span_solid(sna, op, NULL, box++, coverage); + pixman_region_fini(®ion); +} + +static void pixmask_span(struct sna *sna, struct sna_composite_spans_op *op, pixman_region16_t *clip, @@ -4961,6 +5670,113 @@ pixmask_span__clipped(struct sna *sna, pixman_region_fini(®ion); } +struct inplace_x8r8g8b8_thread { + xTrapezoid *traps; + PicturePtr dst, src; + BoxRec extents; + int dx, dy; + int ntrap; + bool lerp, is_solid; + uint32_t color; + int16_t src_x, src_y; + uint8_t op; +}; + +static void inplace_x8r8g8b8_thread(void *arg) +{ + struct inplace_x8r8g8b8_thread *thread = arg; + struct tor tor; + span_func_t span; + RegionPtr clip; + int y1, y2, n; + + if (tor_init(&tor, &thread->extents, 2*thread->ntrap)) + return; + + y1 = thread->extents.y1 - thread->dst->pDrawable->y; + y2 = thread->extents.y2 - thread->dst->pDrawable->y; + for (n = 0; n < thread->ntrap; n++) { + xTrapezoid t; + + if (!project_trapezoid_onto_grid(&thread->traps[n], thread->dx, thread->dy, &t)) + continue; + + if (pixman_fixed_to_int(thread->traps[n].top) >= y2 || + pixman_fixed_to_int(thread->traps[n].bottom) < y1) + continue; + + tor_add_edge(&tor, &t, &t.left, 1); + tor_add_edge(&tor, &t, &t.right, -1); + } + + clip = thread->dst->pCompositeClip; + if (thread->lerp) { + struct inplace inplace; + int16_t dst_x, dst_y; + PixmapPtr pixmap; + + pixmap = get_drawable_pixmap(thread->dst->pDrawable); + get_drawable_deltas(thread->dst->pDrawable, pixmap, &dst_x, &dst_y); + + inplace.ptr = pixmap->devPrivate.ptr; + inplace.ptr += dst_y * pixmap->devKind + dst_x * 4; + inplace.stride = pixmap->devKind; + inplace.color = thread->color; + + if (clip->data) + span = tor_blt_lerp32_clipped; + else + span = tor_blt_lerp32; + + tor_render(NULL, &tor, (void*)&inplace, clip, span, false); + } else if (thread->is_solid) { + struct pixman_inplace pi; + + pi.image = image_from_pict(thread->dst, false, &pi.dx, &pi.dy); + pi.op = thread->op; + pi.color = thread->color; + + pi.bits = (uint32_t *)&pi.sx; + pi.source = pixman_image_create_bits(PIXMAN_a8r8g8b8, + 1, 1, pi.bits, 0); + pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL); + + if (clip->data) + span = pixmask_span_solid__clipped; + else + span = pixmask_span_solid; + + tor_render(NULL, &tor, (void*)&pi, clip, span, false); + + pixman_image_unref(pi.source); + pixman_image_unref(pi.image); + } else { + struct pixman_inplace pi; + + pi.image = image_from_pict(thread->dst, false, &pi.dx, &pi.dy); + pi.source = image_from_pict(thread->src, false, &pi.sx, &pi.sy); + pi.sx += thread->src_x - pixman_fixed_to_int(thread->traps[0].left.p1.x); + pi.sy += thread->src_y - pixman_fixed_to_int(thread->traps[0].left.p1.y); + pi.mask = pixman_image_create_bits(PIXMAN_a8, 1, 1, NULL, 0); + pixman_image_set_repeat(pi.mask, PIXMAN_REPEAT_NORMAL); + pi.bits = pixman_image_get_data(pi.mask); + pi.op = thread->op; + + if (clip->data) + span = pixmask_span__clipped; + else + span = pixmask_span; + + tor_render(NULL, &tor, (void*)&pi, clip, span, false); + + pixman_image_unref(pi.mask); + pixman_image_unref(pi.source); + pixman_image_unref(pi.image); + } + + tor_fini(&tor); +} + static bool trapezoid_span_inplace__x8r8g8b8(CARD8 op, PicturePtr dst, @@ -4968,17 +5784,15 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, PictFormatPtr maskFormat, int ntrap, xTrapezoid *traps) { - struct tor tor; - span_func_t span; uint32_t color; - bool lerp; + bool lerp, is_solid; RegionRec region; - int16_t dst_x, dst_y; int dx, dy; - int n; + int num_threads, n; lerp = false; - if (sna_picture_is_solid(src, &color)) { + is_solid = sna_picture_is_solid(src, &color); + if (is_solid) { if (op == PictOpOver && (color >> 24) == 0xff) op = PictOpSrc; if (op == PictOpOver && sna_drawable_is_clear(dst->pDrawable)) @@ -5037,43 +5851,66 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, region.extents.x1, region.extents.y1, region.extents.x2, region.extents.y2)); - if (tor_init(&tor, ®ion.extents, 2*ntrap)) + region.data = NULL; + if (!sna_drawable_move_region_to_cpu(dst->pDrawable, ®ion, + MOVE_WRITE | MOVE_READ)) return true; + if (!is_solid && src->pDrawable) { + if (!sna_drawable_move_to_cpu(src->pDrawable, + MOVE_READ)) + return true; + + if (src->alphaMap && + !sna_drawable_move_to_cpu(src->alphaMap->pDrawable, + MOVE_READ)) + return true; + } + dx = dst->pDrawable->x * FAST_SAMPLES_X; dy = dst->pDrawable->y * FAST_SAMPLES_Y; - for (n = 0; n < ntrap; n++) { - xTrapezoid t; + num_threads = sna_use_threads(4*(region.extents.x2 - region.extents.x1), + region.extents.y2 - region.extents.y1, + 8); - if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t)) - continue; + DBG(("%s: %dx%d, format=%x, op=%d, lerp?=%d, num_threads=%d\n", + __FUNCTION__, + region.extents.x2 - region.extents.x1, + region.extents.y2 - region.extents.y1, + dst->format, op, lerp, num_threads)); - if (pixman_fixed_to_int(traps[n].top) >= region.extents.y2 - dst->pDrawable->y || - pixman_fixed_to_int(traps[n].bottom) < region.extents.y1 - dst->pDrawable->y) - continue; + if (num_threads == 1) { + struct tor tor; + span_func_t span; - tor_add_edge(&tor, &t, &t.left, 1); - tor_add_edge(&tor, &t, &t.right, -1); - } + if (tor_init(&tor, ®ion.extents, 2*ntrap)) + return true; - DBG(("%s: move-to-cpu\n", __FUNCTION__)); - region.data = NULL; - if (sna_drawable_move_region_to_cpu(dst->pDrawable, ®ion, - MOVE_WRITE | MOVE_READ)) { - PixmapPtr pixmap; + for (n = 0; n < ntrap; n++) { + xTrapezoid t; - pixmap = get_drawable_pixmap(dst->pDrawable); - get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y); + if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t)) + continue; - DBG(("%s: format=%x, op=%d, color=%x\n", - __FUNCTION__, dst->format, op, color)); + if (pixman_fixed_to_int(traps[n].top) >= region.extents.y2 - dst->pDrawable->y || + pixman_fixed_to_int(traps[n].bottom) < region.extents.y1 - dst->pDrawable->y) + continue; + + tor_add_edge(&tor, &t, &t.left, 1); + tor_add_edge(&tor, &t, &t.right, -1); + } if (lerp) { struct inplace inplace; + PixmapPtr pixmap; + int16_t dst_x, dst_y; + + pixmap = get_drawable_pixmap(dst->pDrawable); + get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y); inplace.ptr = pixmap->devPrivate.ptr; - inplace.ptr += dst_y * pixmap->devKind + dst_x; + inplace.ptr += dst_y * pixmap->devKind + dst_x * 4; inplace.stride = pixmap->devKind; inplace.color = color; @@ -5087,7 +5924,29 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, tor_render(NULL, &tor, (void*)&inplace, dst->pCompositeClip, span, false); - tor_fini(&tor); + } else if (is_solid) { + struct pixman_inplace pi; + + pi.image = image_from_pict(dst, false, &pi.dx, &pi.dy); + pi.op = op; + pi.color = color; + + pi.bits = (uint32_t *)&pi.sx; + pi.source = pixman_image_create_bits(PIXMAN_a8r8g8b8, + 1, 1, pi.bits, 0); + pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL); + + if (dst->pCompositeClip->data) + span = pixmask_span_solid__clipped; + else + span = pixmask_span_solid; + + tor_render(NULL, &tor, (void*)&pi, + dst->pCompositeClip, span, + false); + + pixman_image_unref(pi.source); + pixman_image_unref(pi.image); } else { struct pixman_inplace pi; @@ -5108,24 +5967,106 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, tor_render(NULL, &tor, (void*)&pi, dst->pCompositeClip, span, false); - tor_fini(&tor); pixman_image_unref(pi.mask); pixman_image_unref(pi.source); pixman_image_unref(pi.image); } + + tor_fini(&tor); + } else { + struct inplace_x8r8g8b8_thread threads[num_threads]; + int y, h; + + DBG(("%s: using %d threads for inplace compositing %dx%d\n", + __FUNCTION__, num_threads, + region.extents.x2 - region.extents.x1, + region.extents.y2 - region.extents.y1)); + + threads[0].traps = traps; + threads[0].ntrap = ntrap; + threads[0].extents = region.extents; + threads[0].lerp = lerp; + threads[0].is_solid = is_solid; + threads[0].color = color; + threads[0].dx = dx; + threads[0].dy = dy; + threads[0].dst = dst; + threads[0].src = src; + threads[0].op = op; + threads[0].src_x = src_x; + threads[0].src_y = src_y; + + y = region.extents.y1; + h = region.extents.y2 - region.extents.y1; + h = (h + num_threads - 1) / num_threads; + + for (n = 1; n < num_threads; n++) { + threads[n] = threads[0]; + threads[n].extents.y1 = y; + threads[n].extents.y2 = y += h; + + sna_threads_run(inplace_x8r8g8b8_thread, &threads[n]); + } + + threads[0].extents.y1 = y; + threads[0].extents.y2 = region.extents.y2; + inplace_x8r8g8b8_thread(&threads[0]); + + sna_threads_wait(); } return true; } +struct inplace_thread { + xTrapezoid *traps; + RegionPtr clip; + span_func_t span; + struct inplace inplace; + BoxRec extents; + int dx, dy; + int draw_x, draw_y; + bool unbounded; + int ntrap; +}; + +static void inplace_thread(void *arg) +{ + struct inplace_thread *thread = arg; + struct tor tor; + int n; + + if (tor_init(&tor, &thread->extents, 2*thread->ntrap)) + return; + + for (n = 0; n < thread->ntrap; n++) { + xTrapezoid t; + + if (!project_trapezoid_onto_grid(&thread->traps[n], thread->dx, thread->dy, &t)) + continue; + + if (pixman_fixed_to_int(thread->traps[n].top) >= thread->extents.y2 - thread->draw_y || + pixman_fixed_to_int(thread->traps[n].bottom) < thread->extents.y1 - thread->draw_y) + continue; + + tor_add_edge(&tor, &t, &t.left, 1); + tor_add_edge(&tor, &t, &t.right, -1); + } + + tor_render(NULL, &tor, (void*)&thread->inplace, + thread->clip, thread->span, thread->unbounded); + + tor_fini(&tor); +} + static bool -trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst, +trapezoid_span_inplace(struct sna *sna, + CARD8 op, PicturePtr src, PicturePtr dst, PictFormatPtr maskFormat, INT16 src_x, INT16 src_y, int ntrap, xTrapezoid *traps, bool fallback) { - struct tor tor; struct inplace inplace; span_func_t span; PixmapPtr pixmap; @@ -5135,7 +6076,7 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst, bool unbounded; int16_t dst_x, dst_y; int dx, dy; - int n; + int num_threads, n; if (NO_SCAN_CONVERTER) return false; @@ -5151,7 +6092,7 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst, return false; } - if (!fallback && is_gpu(dst->pDrawable)) { + if (!fallback && is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { DBG(("%s: fallback -- can not perform operation in place, destination busy\n", __FUNCTION__)); @@ -5159,7 +6100,7 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst, } if (is_mono(dst, maskFormat)) - return trapezoid_span_mono_inplace(op, src, dst, + return trapezoid_span_mono_inplace(sna, op, src, dst, src_x, src_y, ntrap, traps); if (dst->format == PICT_a8r8g8b8 || dst->format == PICT_x8r8g8b8) @@ -5234,7 +6175,7 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst, __FUNCTION__)); do { /* XXX unwind errors? */ - if (!trapezoid_span_inplace(op, src, dst, NULL, + if (!trapezoid_span_inplace(sna, op, src, dst, NULL, src_x, src_y, 1, traps++, fallback)) return false; @@ -5266,26 +6207,6 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst, region.extents.x1, region.extents.y1, region.extents.x2, region.extents.y2)); - if (tor_init(&tor, ®ion.extents, 2*ntrap)) - return true; - - dx = dst->pDrawable->x * FAST_SAMPLES_X; - dy = dst->pDrawable->y * FAST_SAMPLES_Y; - - for (n = 0; n < ntrap; n++) { - xTrapezoid t; - - if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t)) - continue; - - if (pixman_fixed_to_int(traps[n].top) >= region.extents.y2 - dst->pDrawable->y || - pixman_fixed_to_int(traps[n].bottom) < region.extents.y1 - dst->pDrawable->y) - continue; - - tor_add_edge(&tor, &t, &t.left, 1); - tor_add_edge(&tor, &t, &t.right, -1); - } - if (op == PictOpSrc) { if (dst->pCompositeClip->data) span = tor_blt_src_clipped; @@ -5310,6 +6231,9 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst, op == PictOpSrc ? MOVE_WRITE | MOVE_INPLACE_HINT : MOVE_WRITE | MOVE_READ)) return true; + dx = dst->pDrawable->x * FAST_SAMPLES_X; + dy = dst->pDrawable->y * FAST_SAMPLES_Y; + get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y); inplace.ptr = pixmap->devPrivate.ptr; @@ -5317,10 +6241,72 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst, inplace.stride = pixmap->devKind; inplace.opacity = color >> 24; - tor_render(NULL, &tor, (void*)&inplace, - dst->pCompositeClip, span, unbounded); + num_threads = sna_use_threads(region.extents.x2 - region.extents.x1, + region.extents.y2 - region.extents.y1, + 8); + if (num_threads == 1) { + struct tor tor; - tor_fini(&tor); + if (tor_init(&tor, ®ion.extents, 2*ntrap)) + return true; + + for (n = 0; n < ntrap; n++) { + xTrapezoid t; + + if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t)) + continue; + + if (pixman_fixed_to_int(traps[n].top) >= region.extents.y2 - dst->pDrawable->y || + pixman_fixed_to_int(traps[n].bottom) < region.extents.y1 - dst->pDrawable->y) + continue; + + tor_add_edge(&tor, &t, &t.left, 1); + tor_add_edge(&tor, &t, &t.right, -1); + } + + tor_render(NULL, &tor, (void*)&inplace, + dst->pCompositeClip, span, unbounded); + + tor_fini(&tor); + } else { + struct inplace_thread threads[num_threads]; + int y, h; + + DBG(("%s: using %d threads for inplace compositing %dx%d\n", + __FUNCTION__, num_threads, + region.extents.x2 - region.extents.x1, + region.extents.y2 - region.extents.y1)); + + threads[0].traps = traps; + threads[0].ntrap = ntrap; + threads[0].inplace = inplace; + threads[0].extents = region.extents; + threads[0].clip = dst->pCompositeClip; + threads[0].span = span; + threads[0].unbounded = unbounded; + threads[0].dx = dx; + threads[0].dy = dy; + threads[0].draw_x = dst->pDrawable->x; + threads[0].draw_y = dst->pDrawable->y; + + y = region.extents.y1; + h = region.extents.y2 - region.extents.y1; + h = (h + num_threads - 1) / num_threads; + + for (n = 1; n < num_threads; n++) { + threads[n] = threads[0]; + threads[n].extents.y1 = y; + threads[n].extents.y2 = y += h; + + sna_threads_run(inplace_thread, &threads[n]); + } + + threads[0].extents.y1 = y; + threads[0].extents.y2 = region.extents.y2; + inplace_thread(&threads[0]); + + sna_threads_wait(); + } return true; } @@ -5441,32 +6427,14 @@ trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst, region.extents.y2 = region.extents.y1 + extents.y2; region.data = NULL; - DBG(("%s: move-to-cpu\n", __FUNCTION__)); - if (!sna_drawable_move_region_to_cpu(dst->pDrawable, ®ion, - MOVE_READ | MOVE_WRITE)) - goto done; - if (dst->alphaMap && - !sna_drawable_move_to_cpu(dst->alphaMap->pDrawable, - MOVE_READ | MOVE_WRITE)) - goto done; - if (src->pDrawable) { - if (!sna_drawable_move_to_cpu(src->pDrawable, - MOVE_READ)) - goto done; - if (src->alphaMap && - !sna_drawable_move_to_cpu(src->alphaMap->pDrawable, - MOVE_READ)) - goto done; - } - DBG(("%s: fbComposite()\n", __FUNCTION__)); - fbComposite(op, src, mask, dst, - src_x + dst_x - pixman_fixed_to_int(traps[0].left.p1.x), - src_y + dst_y - pixman_fixed_to_int(traps[0].left.p1.y), - 0, 0, - dst_x, dst_y, - extents.x2, extents.y2); -done: + sna_composite_fb(op, src, mask, dst, ®ion, + src_x + dst_x - pixman_fixed_to_int(traps[0].left.p1.x), + src_y + dst_y - pixman_fixed_to_int(traps[0].left.p1.y), + 0, 0, + dst_x, dst_y, + extents.x2, extents.y2); + FreePicture(mask, 0); } sna_pixmap_destroy(scratch); @@ -5518,11 +6486,14 @@ sna_composite_trapezoids(CARD8 op, force_fallback = FORCE_FALLBACK > 0; if ((too_small(priv) || DAMAGE_IS_ALL(priv->cpu_damage)) && - !picture_is_gpu(src)) { - DBG(("%s: force fallbacks -- dst is too small, %dx%d\n", + !picture_is_gpu(sna, src) && untransformed(src)) { + DBG(("%s: force fallbacks --too small, %dx%d? %d, all-cpu? %d, src-is-cpu? %d\n", __FUNCTION__, dst->pDrawable->width, - dst->pDrawable->height)); + dst->pDrawable->height, + too_small(priv), + (int)DAMAGE_IS_ALL(priv->cpu_damage), + !picture_is_gpu(sna, src))); force_fallback = true; } if (FORCE_FALLBACK < 0) @@ -5589,24 +6560,24 @@ sna_composite_trapezoids(CARD8 op, goto fallback; if (is_mono(dst, maskFormat) && - mono_trapezoids_span_converter(op, src, dst, + mono_trapezoids_span_converter(sna, op, src, dst, xSrc, ySrc, ntrap, traps)) return; - if (trapezoid_spans_maybe_inplace(op, src, dst, maskFormat)) { + if (trapezoid_spans_maybe_inplace(sna, op, src, dst, maskFormat)) { flags |= COMPOSITE_SPANS_INPLACE_HINT; - if (trapezoid_span_inplace(op, src, dst, maskFormat, + if (trapezoid_span_inplace(sna, op, src, dst, maskFormat, xSrc, ySrc, ntrap, traps, false)) return; } - if (trapezoid_span_converter(op, src, dst, maskFormat, flags, + if (trapezoid_span_converter(sna, op, src, dst, maskFormat, flags, xSrc, ySrc, ntrap, traps)) return; - if (trapezoid_span_inplace(op, src, dst, maskFormat, + if (trapezoid_span_inplace(sna, op, src, dst, maskFormat, xSrc, ySrc, ntrap, traps, false)) return; @@ -5616,7 +6587,7 @@ sna_composite_trapezoids(CARD8 op, return; fallback: - if (trapezoid_span_inplace(op, src, dst, maskFormat, + if (trapezoid_span_inplace(sna, op, src, dst, maskFormat, xSrc, ySrc, ntrap, traps, true)) return; @@ -5625,12 +6596,13 @@ fallback: xSrc, ySrc, ntrap, traps)) return; - if (trapezoids_inplace_fallback(op, src, dst, maskFormat, ntrap, traps)) + if (trapezoids_inplace_fallback(sna, op, src, dst, maskFormat, + ntrap, traps)) return; DBG(("%s: fallback mask=%08x, ntrap=%d\n", __FUNCTION__, maskFormat ? (unsigned)maskFormat->format : 0, ntrap)); - trapezoids_fallback(op, src, dst, maskFormat, + trapezoids_fallback(sna, op, src, dst, maskFormat, xSrc, ySrc, ntrap, traps); } @@ -5652,7 +6624,8 @@ project_trap_onto_grid(const xTrap *in, } static bool -mono_trap_span_converter(PicturePtr dst, +mono_trap_span_converter(struct sna *sna, + PicturePtr dst, INT16 x, INT16 y, int ntrap, xTrap *traps) { @@ -5677,7 +6650,7 @@ mono_trap_span_converter(PicturePtr dst, mono.clip.extents.x2, mono.clip.extents.y2, x, y)); - mono.sna = to_sna_from_drawable(dst->pDrawable); + mono.sna = sna; if (!mono_init(&mono, 2*ntrap)) return false; @@ -5722,11 +6695,11 @@ mono_trap_span_converter(PicturePtr dst, } static bool -trap_span_converter(PicturePtr dst, +trap_span_converter(struct sna *sna, + PicturePtr dst, INT16 src_x, INT16 src_y, int ntrap, xTrap *trap) { - struct sna *sna; struct sna_composite_spans_op tmp; struct tor tor; BoxRec extents; @@ -5740,9 +6713,8 @@ trap_span_converter(PicturePtr dst, return false; if (dst->polyEdge == PolyEdgeSharp) - return mono_trap_span_converter(dst, src_x, src_y, ntrap, trap); + return mono_trap_span_converter(sna, dst, src_x, src_y, ntrap, trap); - sna = to_sna_from_drawable(dst->pDrawable); if (!sna->render.check_composite_spans(sna, PictOpAdd, sna->render.white_picture, dst, dst->pCompositeClip->extents.x2 - dst->pCompositeClip->extents.x1, dst->pCompositeClip->extents.y2 - dst->pCompositeClip->extents.y1, @@ -5806,7 +6778,7 @@ trap_span_converter(PicturePtr dst, } tor_render(sna, &tor, &tmp, clip, - choose_span(&tmp, dst, NULL, PictOpAdd, clip), false); + choose_span(&tmp, dst, NULL, clip), false); skip: tor_fini(&tor); @@ -5827,7 +6799,6 @@ static void mark_damaged(PixmapPtr pixmap, struct sna_pixmap *priv, pixmap->drawable.width, pixmap->drawable.height); list_del(&priv->list); - priv->undamaged = false; } else { sna_damage_add_box(&priv->gpu_damage, box); sna_damage_subtract_box(&priv->cpu_damage, box); @@ -5835,11 +6806,11 @@ static void mark_damaged(PixmapPtr pixmap, struct sna_pixmap *priv, } static bool -trap_mask_converter(PicturePtr picture, +trap_mask_converter(struct sna *sna, + PicturePtr picture, INT16 x, INT16 y, int ntrap, xTrap *trap) { - struct sna *sna; struct tor tor; ScreenPtr screen = picture->pDrawable->pScreen; PixmapPtr scratch, pixmap; @@ -6033,13 +7004,18 @@ trap_upload(PicturePtr picture, void sna_add_traps(PicturePtr picture, INT16 x, INT16 y, int n, xTrap *t) { + struct sna *sna; + DBG(("%s (%d, %d) x %d\n", __FUNCTION__, x, y, n)); - if (is_gpu(picture->pDrawable)) { - if (trap_span_converter(picture, x, y, n, t)) + sna = to_sna_from_drawable(picture->pDrawable); + if (is_gpu(sna, picture->pDrawable, PREFER_GPU_SPANS)) { + if (trap_span_converter(sna, picture, x, y, n, t)) return; + } - if (trap_mask_converter(picture, x, y, n, t)) + if (is_gpu(sna, picture->pDrawable, PREFER_GPU_RENDER)) { + if (trap_mask_converter(sna, picture, x, y, n, t)) return; if (trap_upload(picture, x, y, n, t)) @@ -6070,6 +7046,7 @@ project_point_onto_grid(const xPointFixed *in, out->y = dy + pixman_fixed_to_grid(in->y); } +#if HAS_PIXMAN_TRIANGLES static inline bool xTriangleValid(const xTriangle *t) { @@ -6104,7 +7081,8 @@ project_triangle_onto_grid(const xTriangle *in, } static bool -mono_triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, +mono_triangles_span_converter(struct sna *sna, + CARD8 op, PicturePtr src, PicturePtr dst, INT16 src_x, INT16 src_y, int count, xTriangle *tri) { @@ -6115,7 +7093,7 @@ mono_triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, bool was_clear; int n; - mono.sna = to_sna_from_drawable(dst->pDrawable); + mono.sna = sna; dst_x = pixman_fixed_to_int(tri[0].p1.x); dst_y = pixman_fixed_to_int(tri[0].p1.y); @@ -6177,6 +7155,10 @@ mono_triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, mono.clip.extents.x2 - mono.clip.extents.x1, mono.clip.extents.y2 - mono.clip.extents.y1, &mono.op)) { + if (mono.clip.data == NULL && mono.op.damage == NULL) + mono.span = mono_span__fast; + else + mono.span = mono_span; mono_render(&mono); mono.op.done(mono.sna, &mono.op); } @@ -6220,6 +7202,10 @@ mono_triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, mono.clip.extents.x2 - mono.clip.extents.x1, mono.clip.extents.y2 - mono.clip.extents.y1, &mono.op)) { + if (mono.clip.data == NULL && mono.op.damage == NULL) + mono.span = mono_span__fast; + else + mono.span = mono_span; mono_render(&mono); mono.op.done(mono.sna, &mono.op); } @@ -6232,11 +7218,11 @@ mono_triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, } static bool -triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, +triangles_span_converter(struct sna *sna, + CARD8 op, PicturePtr src, PicturePtr dst, PictFormatPtr maskFormat, INT16 src_x, INT16 src_y, int count, xTriangle *tri) { - struct sna *sna; struct sna_composite_spans_op tmp; struct tor tor; BoxRec extents; @@ -6249,7 +7235,7 @@ triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, return false; if (is_mono(dst, maskFormat)) - return mono_triangles_span_converter(op, src, dst, + return mono_triangles_span_converter(sna, op, src, dst, src_x, src_y, count, tri); @@ -6260,7 +7246,6 @@ triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, return false; } - sna = to_sna_from_drawable(dst->pDrawable); if (!sna->render.check_composite_spans(sna, op, src, dst, 0, 0, 0)) { DBG(("%s: fallback -- composite spans not supported\n", __FUNCTION__)); @@ -6352,7 +7337,7 @@ triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, } tor_render(sna, &tor, &tmp, &clip, - choose_span(&tmp, dst, maskFormat, op, &clip), + choose_span(&tmp, dst, maskFormat, &clip), !was_clear && maskFormat && !operator_is_bounded(op)); skip: @@ -6585,7 +7570,9 @@ sna_composite_triangles(CARD8 op, INT16 xSrc, INT16 ySrc, int n, xTriangle *tri) { - if (triangles_span_converter(op, src, dst, maskFormat, + struct sna *sna = to_sna_from_drawable(dst->pDrawable); + + if (triangles_span_converter(sna, op, src, dst, maskFormat, xSrc, ySrc, n, tri)) return; @@ -6599,11 +7586,11 @@ sna_composite_triangles(CARD8 op, } static bool -tristrip_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, +tristrip_span_converter(struct sna *sna, + CARD8 op, PicturePtr src, PicturePtr dst, PictFormatPtr maskFormat, INT16 src_x, INT16 src_y, int count, xPointFixed *points) { - struct sna *sna; struct sna_composite_spans_op tmp; struct tor tor; BoxRec extents; @@ -6624,7 +7611,6 @@ tristrip_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, return false; } - sna = to_sna_from_drawable(dst->pDrawable); if (!sna->render.check_composite_spans(sna, op, src, dst, 0, 0, 0)) { DBG(("%s: fallback -- composite spans not supported\n", __FUNCTION__)); @@ -6726,7 +7712,7 @@ tristrip_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, assert(tor.polygon->num_edges <= 2*count); tor_render(sna, &tor, &tmp, &clip, - choose_span(&tmp, dst, maskFormat, op, &clip), + choose_span(&tmp, dst, maskFormat, &clip), !was_clear && maskFormat && !operator_is_bounded(op)); skip: @@ -6865,7 +7851,9 @@ sna_composite_tristrip(CARD8 op, INT16 xSrc, INT16 ySrc, int n, xPointFixed *points) { - if (tristrip_span_converter(op, src, dst, maskFormat, xSrc, ySrc, n, points)) + struct sna *sna = to_sna_from_drawable(dst->pDrawable); + + if (tristrip_span_converter(sna, op, src, dst, maskFormat, xSrc, ySrc, n, points)) return; tristrip_fallback(op, src, dst, maskFormat, xSrc, ySrc, n, points); @@ -7001,3 +7989,4 @@ sna_composite_trifan(CARD8 op, { trifan_fallback(op, src, dst, maskFormat, xSrc, ySrc, n, points); } +#endif diff --git a/src/sna/sna_vertex.c b/src/sna/sna_vertex.c new file mode 100644 index 000000000..6755d9aad --- /dev/null +++ b/src/sna/sna_vertex.c @@ -0,0 +1,37 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#include "sna.h" + +#include <unistd.h> + +void sna_vertex_init(struct sna *sna) +{ + pthread_mutex_init(&sna->render.lock, NULL); + pthread_cond_init(&sna->render.wait, NULL); + sna->render.active = 0; +} diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c index 7bf20e96b..07fa829fa 100644 --- a/src/sna/sna_video.c +++ b/src/sna/sna_video.c @@ -100,9 +100,16 @@ sna_video_buffer(struct sna *sna, if (video->buf && __kgem_bo_size(video->buf) < frame->size) sna_video_free_buffers(sna, video); - if (video->buf == NULL) - video->buf = kgem_create_linear(&sna->kgem, frame->size, - CREATE_GTT_MAP); + if (video->buf == NULL) { + if (video->tiled) { + video->buf = kgem_create_2d(&sna->kgem, + frame->width, frame->height, 32, + I915_TILING_X, CREATE_EXACT); + } else { + video->buf = kgem_create_linear(&sna->kgem, frame->size, + CREATE_GTT_MAP); + } + } return video->buf; } @@ -166,14 +173,20 @@ sna_video_clip_helper(ScrnInfoPtr scrn, if (crtc_region != reg) RegionUninit(crtc_region); - frame->top = y1 >> 16; - frame->left = (x1 >> 16) & ~1; - frame->npixels = ALIGN(((x2 + 0xffff) >> 16), 2) - frame->left; + frame->src.x1 = x1 >> 16; + frame->src.y1 = y1 >> 16; + frame->src.x2 = (x2 + 0xffff) >> 16; + frame->src.y2 = (y2 + 0xffff) >> 16; + + frame->image.x1 = frame->src.x1 & ~1; + frame->image.x2 = ALIGN(frame->src.x2, 2); if (is_planar_fourcc(frame->id)) { - frame->top &= ~1; - frame->nlines = ALIGN(((y2 + 0xffff) >> 16), 2) - frame->top; - } else - frame->nlines = ((y2 + 0xffff) >> 16) - frame->top; + frame->image.y1 = frame->src.y1 & ~1; + frame->image.y2 = ALIGN(frame->src.y2, 2); + } else { + frame->image.y1 = frame->src.y1; + frame->image.y2 = frame->src.y2; + } return ret; } @@ -186,51 +199,38 @@ sna_video_frame_init(struct sna *sna, { int align; + DBG(("%s: id=%d [planar? %d], width=%d, height=%d, align=%d\n", + __FUNCTION__, id, is_planar_fourcc(id), width, height, video->alignment)); + assert(width && height); + frame->bo = NULL; frame->id = id; frame->width = width; frame->height = height; - /* Only needs to be DWORD-aligned for textured on i915, but overlay has - * stricter requirements. - */ - if (video->textured) { - align = 4; - } else { - if (sna->kgem.gen >= 40) - /* Actually the alignment is 64 bytes, too. But the - * stride must be at least 512 bytes. Take the easy fix - * and align on 512 bytes unconditionally. */ - align = 512; - else if (sna->kgem.gen < 21) - /* Harsh, errata on these chipsets limit the stride - * to be a multiple of 256 bytes. - */ - align = 256; - else - align = 64; - } - + align = video->alignment; #if SNA_XVMC /* for i915 xvmc, hw requires 1kb aligned surfaces */ - if (id == FOURCC_XVMC && sna->kgem.gen < 40) + if (id == FOURCC_XVMC && sna->kgem.gen < 040 && align < 1024) align = 1024; #endif - - /* Determine the desired destination pitch (representing the chroma's pitch, - * in the planar case. + /* Determine the desired destination pitch (representing the + * chroma's pitch in the planar case). */ if (is_planar_fourcc(id)) { + assert((width & 1) == 0); + assert((height & 1) == 0); if (video->rotation & (RR_Rotate_90 | RR_Rotate_270)) { frame->pitch[0] = ALIGN((height / 2), align); frame->pitch[1] = ALIGN(height, align); - frame->size = 3U * frame->pitch[0] * width; + frame->size = width; } else { frame->pitch[0] = ALIGN((width / 2), align); frame->pitch[1] = ALIGN(width, align); - frame->size = 3U * frame->pitch[0] * height; + frame->size = height; } + frame->size *= frame->pitch[0] + frame->pitch[1]; } else { if (video->rotation & (RR_Rotate_90 | RR_Rotate_270)) { frame->pitch[0] = ALIGN((height << 1), align); @@ -251,51 +251,68 @@ sna_video_frame_init(struct sna *sna, frame->VBufOffset = frame->UBufOffset + (int)frame->pitch[0] * height / 2; } + + assert(frame->size); } -static void sna_memcpy_plane(uint8_t *dst, const uint8_t *src, - int height, int width, - int dstPitch, int srcPitch, - Rotation rotation) +static void sna_memcpy_plane(struct sna_video *video, + uint8_t *dst, const uint8_t *src, + const struct sna_video_frame *frame, int sub) { + int dstPitch = frame->pitch[!sub], srcPitch; const uint8_t *s; int i, j = 0; + int x, y, w, h; + + x = frame->image.x1; + y = frame->image.y1; + w = frame->image.x2 - frame->image.x1; + h = frame->image.y2 - frame->image.y1; + if (sub) { + x >>= 1; w >>= 1; + y >>= 1; h >>= 1; + srcPitch = ALIGN((frame->width >> 1), 4); + } else + srcPitch = ALIGN(frame->width, 4); + + src += y * srcPitch + x; + if (!video->textured) + x = y = 0; - switch (rotation) { + switch (video->rotation) { case RR_Rotate_0: - /* optimise for the case of no clipping */ - if (srcPitch == dstPitch && srcPitch == width) - memcpy(dst, src, srcPitch * height); - else while (height--) { - memcpy(dst, src, width); + dst += y * dstPitch + x; + if (srcPitch == dstPitch && srcPitch == w) + memcpy(dst, src, srcPitch * h); + else while (h--) { + memcpy(dst, src, w); src += srcPitch; dst += dstPitch; } break; case RR_Rotate_90: - for (i = 0; i < height; i++) { + for (i = 0; i < h; i++) { s = src; - for (j = 0; j < width; j++) { - dst[(i) + ((width - j - 1) * dstPitch)] = *s++; - } + for (j = 0; j < w; j++) + dst[i + ((x + w - j - 1) * dstPitch)] = *s++; src += srcPitch; } break; case RR_Rotate_180: - for (i = 0; i < height; i++) { + for (i = 0; i < h; i++) { s = src; - for (j = 0; j < width; j++) { - dst[(width - j - 1) + - ((height - i - 1) * dstPitch)] = *s++; + for (j = 0; j < w; j++) { + dst[(x + w - j - 1) + + ((h - i - 1) * dstPitch)] = *s++; } src += srcPitch; } break; case RR_Rotate_270: - for (i = 0; i < height; i++) { + for (i = 0; i < h; i++) { s = src; - for (j = 0; j < width; j++) { - dst[(height - i - 1) + (j * dstPitch)] = *s++; + for (j = 0; j < w; j++) { + dst[(h - i - 1) + (x + j * dstPitch)] = *s++; } src += srcPitch; } @@ -309,36 +326,22 @@ sna_copy_planar_data(struct sna_video *video, const uint8_t *src, uint8_t *dst) { uint8_t *d; - int w = frame->npixels; - int h = frame->nlines; - int pitch; - pitch = ALIGN(frame->width, 4); - sna_memcpy_plane(dst, src + frame->top * pitch + frame->left, - h, w, frame->pitch[1], pitch, video->rotation); - - src += frame->height * pitch; /* move over Luma plane */ - - /* align to beginning of chroma planes */ - pitch = ALIGN((frame->width >> 1), 0x4); - src += (frame->top >> 1) * pitch + (frame->left >> 1); - w >>= 1; - h >>= 1; + sna_memcpy_plane(video, dst, src, frame, 0); + src += frame->height * ALIGN(frame->width, 4); if (frame->id == FOURCC_I420) d = dst + frame->UBufOffset; else d = dst + frame->VBufOffset; - - sna_memcpy_plane(d, src, h, w, frame->pitch[0], pitch, video->rotation); - src += (frame->height >> 1) * pitch; /* move over Chroma plane */ + sna_memcpy_plane(video, d, src, frame, 1); + src += (frame->height >> 1) * ALIGN(frame->width >> 1, 4); if (frame->id == FOURCC_I420) d = dst + frame->VBufOffset; else d = dst + frame->UBufOffset; - - sna_memcpy_plane(d, src, h, w, frame->pitch[0], pitch, video->rotation); + sna_memcpy_plane(video, d, src, frame, 1); } static void @@ -349,11 +352,22 @@ sna_copy_packed_data(struct sna_video *video, { int pitch = frame->width << 1; const uint8_t *src, *s; - int w = frame->npixels; - int h = frame->nlines; + int x, y, w, h; int i, j; - src = buf + (frame->top * pitch) + (frame->left << 1); + if (video->textured) { + /* XXX support copying cropped extents */ + x = y = 0; + w = frame->width; + h = frame->height; + } else { + x = frame->image.x1; + y = frame->image.y1; + w = frame->image.x2 - frame->image.x1; + h = frame->image.y2 - frame->image.y1; + } + + src = buf + (y * pitch) + (x << 1); switch (video->rotation) { case RR_Rotate_0: @@ -376,7 +390,7 @@ sna_copy_packed_data(struct sna_video *video, src += pitch; } h >>= 1; - src = buf + (frame->top * pitch) + (frame->left << 1); + src = buf + (y * pitch) + (x << 1); for (i = 0; i < h; i += 2) { for (j = 0; j < w; j += 2) { /* Copy U */ @@ -412,7 +426,7 @@ sna_copy_packed_data(struct sna_video *video, src += pitch; } h >>= 1; - src = buf + (frame->top * pitch) + (frame->left << 1); + src = buf + (y * pitch) + (x << 1); for (i = 0; i < h; i += 2) { for (j = 0; j < w; j += 2) { /* Copy U */ @@ -435,27 +449,28 @@ sna_video_copy_data(struct sna *sna, { uint8_t *dst; - DBG(("%s: handle=%d, size=%dx%d, rotation=%d\n", + DBG(("%s: handle=%d, size=%dx%d [%d], rotation=%d, is-texture=%d\n", __FUNCTION__, frame->bo ? frame->bo->handle : 0, - frame->width, frame->height, video->rotation)); - DBG(("%s: top=%d, left=%d\n", __FUNCTION__, frame->top, frame->left)); + frame->width, frame->height, frame->size, + video->rotation, video->textured)); + DBG(("%s: image=(%d, %d), (%d, %d), source=(%d, %d), (%d, %d)\n", + __FUNCTION__, + frame->image.x1, frame->image.y1, frame->image.x2, frame->image.y2, + frame->src.x1, frame->src.y1, frame->src.x2, frame->src.y2)); + assert(frame->width && frame->height); + assert(frame->size); /* In the common case, we can simply the upload in a single pwrite */ - if (video->rotation == RR_Rotate_0) { + if (video->rotation == RR_Rotate_0 && !video->tiled) { if (is_planar_fourcc(frame->id)) { - uint16_t pitch[2] = { - ALIGN((frame->width >> 1), 0x4), - ALIGN(frame->width, 0x4), - }; - if (pitch[0] == frame->pitch[0] && - pitch[1] == frame->pitch[1] && - frame->top == 0 && frame->left == 0) { - uint32_t len = - (uint32_t)pitch[1]*frame->height + - (uint32_t)pitch[0]*frame->height; + int w = frame->image.x2 - frame->image.x1; + int h = frame->image.y2 - frame->image.y1; + if (ALIGN(h, 2) == frame->height && + ALIGN(w >> 1, 4) == frame->pitch[0] && + ALIGN(w, 4) == frame->pitch[1]) { if (frame->bo) { kgem_bo_write(&sna->kgem, frame->bo, - buf, len); + buf, frame->size); } else { frame->bo = kgem_create_buffer(&sna->kgem, frame->size, KGEM_BUFFER_WRITE | KGEM_BUFFER_WRITE_INPLACE, @@ -463,7 +478,7 @@ sna_video_copy_data(struct sna *sna, if (frame->bo == NULL) return false; - memcpy(dst, buf, len); + memcpy(dst, buf, frame->size); } if (frame->id != FOURCC_I420) { uint32_t tmp; @@ -477,8 +492,8 @@ sna_video_copy_data(struct sna *sna, if (frame->width*2 == frame->pitch[0]) { if (frame->bo) { kgem_bo_write(&sna->kgem, frame->bo, - buf + (2U*frame->top * frame->width) + (frame->left << 1), - 2U*frame->nlines*frame->width); + buf + (2U*frame->image.y1 * frame->width) + (frame->image.x1 << 1), + 2U*(frame->image.y2-frame->image.y1)*frame->width); } else { frame->bo = kgem_create_buffer(&sna->kgem, frame->size, KGEM_BUFFER_WRITE | KGEM_BUFFER_WRITE_INPLACE, @@ -487,8 +502,8 @@ sna_video_copy_data(struct sna *sna, return false; memcpy(dst, - buf + (frame->top * frame->width*2) + (frame->left << 1), - 2U*frame->nlines*frame->width); + buf + (frame->image.y1 * frame->width*2) + (frame->image.x1 << 1), + 2U*(frame->image.y2-frame->image.y1)*frame->width); } return true; } diff --git a/src/sna/sna_video.h b/src/sna/sna_video.h index 3ce72c009..c0c023cf9 100644 --- a/src/sna/sna_video.h +++ b/src/sna/sna_video.h @@ -57,6 +57,8 @@ struct sna_video { struct kgem_bo *old_buf[2]; struct kgem_bo *buf; + int alignment; + bool tiled; bool textured; Rotation rotation; int plane; @@ -75,8 +77,8 @@ struct sna_video_frame { uint16_t pitch[2]; /* extents */ - uint16_t top, left; - uint16_t npixels, nlines; + BoxRec image; + BoxRec src; }; void sna_video_init(struct sna *sna, ScreenPtr screen); diff --git a/src/sna/sna_video_hwmc.c b/src/sna/sna_video_hwmc.c index b0e8d25d2..b3e065d95 100644 --- a/src/sna/sna_video_hwmc.c +++ b/src/sna/sna_video_hwmc.c @@ -36,63 +36,72 @@ #include <X11/extensions/XvMC.h> #include <fourcc.h> -static int create_subpicture(ScrnInfoPtr scrn, XvMCSubpicturePtr subpicture, - int *num_priv, CARD32 ** priv) +extern DevPrivateKey XF86XvScreenKey; + +static int create_subpicture(XvMCSubpicturePtr sub, int *size, CARD32 **priv) { return Success; } -static void destroy_subpicture(ScrnInfoPtr scrn, XvMCSubpicturePtr subpicture) +static void destroy_subpicture(XvMCSubpicturePtr sub) { } -static int create_surface(ScrnInfoPtr scrn, XvMCSurfacePtr surface, - int *num_priv, CARD32 ** priv) +static int create_surface(XvMCSurfacePtr surface, int *size, CARD32 **priv) { return Success; } -static void destroy_surface(ScrnInfoPtr scrn, XvMCSurfacePtr surface) +static void destroy_surface(XvMCSurfacePtr surface) { } -static int create_context(ScrnInfoPtr scrn, XvMCContextPtr pContext, - int *num_priv, CARD32 **priv) +static int create_context(XvPortPtr port, XvMCContextPtr ctx, + int *size, CARD32 **out) { - struct sna *sna = to_sna(scrn); - struct sna_xvmc_hw_context *contextRec; + struct sna *sna = to_sna_from_screen(ctx->pScreen); + struct intel_xvmc_hw_context { + unsigned int type; + union { + struct { + unsigned int use_phys_addr : 1; + } i915; + struct { + unsigned int is_g4x:1; + unsigned int is_965_q:1; + unsigned int is_igdng:1; + } i965; + }; + } *priv; - *priv = calloc(1, sizeof(struct sna_xvmc_hw_context)); - contextRec = (struct sna_xvmc_hw_context *) *priv; - if (!contextRec) { - *num_priv = 0; - return BadAlloc; - } + ctx->port_priv = port->devPriv.ptr; - *num_priv = sizeof(struct sna_xvmc_hw_context) >> 2; + priv = calloc(1, sizeof(*priv)); + if (priv == NULL) + return BadAlloc; - if (sna->kgem.gen >= 40) { - if (sna->kgem.gen >= 45) - contextRec->type = XVMC_I965_MPEG2_VLD; + if (sna->kgem.gen >= 040) { + if (sna->kgem.gen >= 045) + priv->type = XVMC_I965_MPEG2_VLD; else - contextRec->type = XVMC_I965_MPEG2_MC; - contextRec->i965.is_g4x = sna->kgem.gen == 45; - contextRec->i965.is_965_q = IS_965_Q(sna); - contextRec->i965.is_igdng = sna->kgem.gen == 50; - } else { - contextRec->type = XVMC_I915_MPEG2_MC; - contextRec->i915.use_phys_addr = 0; - } + priv->type = XVMC_I965_MPEG2_MC; + priv->i965.is_g4x = sna->kgem.gen == 045; + priv->i965.is_965_q = IS_965_Q(sna); + priv->i965.is_igdng = sna->kgem.gen == 050; + } else + priv->type = XVMC_I915_MPEG2_MC; + *size = sizeof(*priv) >> 2; + *out = priv; return Success; } -static void destroy_context(ScrnInfoPtr scrn, XvMCContextPtr context) +static void destroy_context(XvMCContextPtr ctx) { } /* i915 hwmc support */ -static XF86MCSurfaceInfoRec i915_YV12_mpg2_surface = { +static XvMCSurfaceInfoRec i915_YV12_mpg2_surface = { FOURCC_YV12, XVMC_CHROMA_FORMAT_420, 0, @@ -107,7 +116,7 @@ static XF86MCSurfaceInfoRec i915_YV12_mpg2_surface = { NULL, }; -static XF86MCSurfaceInfoRec i915_YV12_mpg1_surface = { +static XvMCSurfaceInfoRec i915_YV12_mpg1_surface = { FOURCC_YV12, XVMC_CHROMA_FORMAT_420, 0, @@ -121,9 +130,9 @@ static XF86MCSurfaceInfoRec i915_YV12_mpg1_surface = { NULL, }; -static XF86MCSurfaceInfoPtr surface_info_i915[2] = { - (XF86MCSurfaceInfoPtr) & i915_YV12_mpg2_surface, - (XF86MCSurfaceInfoPtr) & i915_YV12_mpg1_surface +static XvMCSurfaceInfoPtr surface_info_i915[2] = { + &i915_YV12_mpg2_surface, + &i915_YV12_mpg1_surface }; /* i965 and later hwmc support */ @@ -131,7 +140,7 @@ static XF86MCSurfaceInfoPtr surface_info_i915[2] = { #define XVMC_VLD 0x00020000 #endif -static XF86MCSurfaceInfoRec yv12_mpeg2_vld_surface = { +static XvMCSurfaceInfoRec yv12_mpeg2_vld_surface = { FOURCC_YV12, XVMC_CHROMA_FORMAT_420, 0, @@ -144,7 +153,7 @@ static XF86MCSurfaceInfoRec yv12_mpeg2_vld_surface = { NULL }; -static XF86MCSurfaceInfoRec yv12_mpeg2_i965_surface = { +static XvMCSurfaceInfoRec yv12_mpeg2_i965_surface = { FOURCC_YV12, XVMC_CHROMA_FORMAT_420, 0, @@ -159,7 +168,7 @@ static XF86MCSurfaceInfoRec yv12_mpeg2_i965_surface = { NULL }; -static XF86MCSurfaceInfoRec yv12_mpeg1_i965_surface = { +static XvMCSurfaceInfoRec yv12_mpeg1_i965_surface = { FOURCC_YV12, XVMC_CHROMA_FORMAT_420, 0, @@ -176,12 +185,12 @@ static XF86MCSurfaceInfoRec yv12_mpeg1_i965_surface = { NULL }; -static XF86MCSurfaceInfoPtr surface_info_i965[] = { +static XvMCSurfaceInfoPtr surface_info_i965[] = { &yv12_mpeg2_i965_surface, &yv12_mpeg1_i965_surface }; -static XF86MCSurfaceInfoPtr surface_info_vld[] = { +static XvMCSurfaceInfoPtr surface_info_vld[] = { &yv12_mpeg2_vld_surface, &yv12_mpeg2_i965_surface, }; @@ -191,63 +200,76 @@ Bool sna_video_xvmc_setup(struct sna *sna, ScreenPtr screen, XF86VideoAdaptorPtr target) { - XF86MCAdaptorRec *pAdapt; + XvMCAdaptorRec *adaptors; + XvScreenPtr xv; const char *name; - char buf[64]; + char bus[64]; + int i; + + if (!xf86LoaderCheckSymbol("XvMCScreenInit")) + return FALSE; /* Needs KMS support. */ - if (sna->kgem.gen < 31) + if (sna->kgem.gen < 031) return FALSE; /* Not implemented */ - if (sna->kgem.gen >= 60) + if (sna->kgem.gen >= 060) return FALSE; - pAdapt = calloc(1, sizeof(XF86MCAdaptorRec)); - if (!pAdapt) + adaptors = calloc(1, sizeof(XvMCAdaptorRec)); + if (adaptors == NULL) return FALSE; - pAdapt->name = target->name; - pAdapt->num_subpictures = 0; - pAdapt->subpictures = NULL; - pAdapt->CreateContext = create_context; - pAdapt->DestroyContext = destroy_context; - pAdapt->CreateSurface = create_surface; - pAdapt->DestroySurface = destroy_surface; - pAdapt->CreateSubpicture = create_subpicture; - pAdapt->DestroySubpicture = destroy_subpicture; - - if (sna->kgem.gen >= 45) { + xv = dixLookupPrivate(&screen->devPrivates, XF86XvScreenKey); + for (i = 0; i< xv->nAdaptors;i++) { + if (strcmp(xv->pAdaptors[i].name, target->name) == 0) { + adaptors->xv_adaptor = &xv->pAdaptors[i]; + break; + } + } + assert(adaptors->xv_adaptor); + + adaptors->num_subpictures = 0; + adaptors->subpictures = NULL; + adaptors->CreateContext = create_context; + adaptors->DestroyContext = destroy_context; + adaptors->CreateSurface = create_surface; + adaptors->DestroySurface = destroy_surface; + adaptors->CreateSubpicture = create_subpicture; + adaptors->DestroySubpicture = destroy_subpicture; + + if (sna->kgem.gen >= 045) { name = "xvmc_vld", - pAdapt->num_surfaces = ARRAY_SIZE(surface_info_vld); - pAdapt->surfaces = surface_info_vld; - } else if (sna->kgem.gen >= 40) { + adaptors->num_surfaces = ARRAY_SIZE(surface_info_vld); + adaptors->surfaces = surface_info_vld; + } else if (sna->kgem.gen >= 040) { name = "i965_xvmc", - pAdapt->num_surfaces = ARRAY_SIZE(surface_info_i965); - pAdapt->surfaces = surface_info_i965; + adaptors->num_surfaces = ARRAY_SIZE(surface_info_i965); + adaptors->surfaces = surface_info_i965; } else { name = "i915_xvmc", - pAdapt->num_surfaces = ARRAY_SIZE(surface_info_i915); - pAdapt->surfaces = surface_info_i915; + adaptors->num_surfaces = ARRAY_SIZE(surface_info_i915); + adaptors->surfaces = surface_info_i915; } - if (xf86XvMCScreenInit(screen, 1, &pAdapt)) { - xf86DrvMsg(sna->scrn->scrnIndex, X_INFO, - "[XvMC] %s driver initialized.\n", - name); - } else { + if (XvMCScreenInit(screen, 1, adaptors) != Success) { xf86DrvMsg(sna->scrn->scrnIndex, X_INFO, "[XvMC] Failed to initialize XvMC.\n"); + free(adaptors); return FALSE; } - sprintf(buf, "pci:%04x:%02x:%02x.%d", + sprintf(bus, "pci:%04x:%02x:%02x.%d", sna->PciInfo->domain, sna->PciInfo->bus, sna->PciInfo->dev, sna->PciInfo->func); - xf86XvMCRegisterDRInfo(screen, SNA_XVMC_LIBNAME, - buf, + xf86XvMCRegisterDRInfo(screen, SNA_XVMC_LIBNAME, bus, SNA_XVMC_MAJOR, SNA_XVMC_MINOR, SNA_XVMC_PATCHLEVEL); + + xf86DrvMsg(sna->scrn->scrnIndex, X_INFO, + "[XvMC] %s driver initialized.\n", + name); return TRUE; } diff --git a/src/sna/sna_video_hwmc.h b/src/sna/sna_video_hwmc.h index 2494d44bd..44de456e9 100644 --- a/src/sna/sna_video_hwmc.h +++ b/src/sna/sna_video_hwmc.h @@ -32,38 +32,12 @@ #define SNA_XVMC_MINOR 1 #define SNA_XVMC_PATCHLEVEL 0 -/* - * Commands that client submits through XvPutImage: - */ - -#define SNA_XVMC_COMMAND_DISPLAY 0x00 -#define SNA_XVMC_COMMAND_UNDISPLAY 0x01 - /* hw xvmc support type */ #define XVMC_I915_MPEG2_MC 0x01 #define XVMC_I965_MPEG2_MC 0x02 #define XVMC_I945_MPEG2_VLD 0x04 #define XVMC_I965_MPEG2_VLD 0x08 -struct sna_xvmc_hw_context { - unsigned int type; - union { - struct { - unsigned int use_phys_addr : 1; - } i915; - struct { - unsigned int is_g4x:1; - unsigned int is_965_q:1; - unsigned int is_igdng:1; - } i965; - }; -}; - -/* Intel private XvMC command to DDX driver */ -struct sna_xvmc_command { - uint32_t handle; -}; - #ifdef _SNA_XVMC_SERVER_ #include <xf86xvmc.h> Bool sna_video_xvmc_setup(struct sna *sna, diff --git a/src/sna/sna_video_overlay.c b/src/sna/sna_video_overlay.c index b73e9ddf8..3655b8763 100644 --- a/src/sna/sna_video_overlay.c +++ b/src/sna/sna_video_overlay.c @@ -41,7 +41,7 @@ #define MAKE_ATOM(a) MakeAtom(a, sizeof(a) - 1, TRUE) -#define HAS_GAMMA(sna) ((sna)->kgem.gen >= 30) +#define HAS_GAMMA(sna) ((sna)->kgem.gen >= 030) static Atom xvBrightness, xvContrast, xvSaturation, xvColorKey, xvPipe; static Atom xvGamma0, xvGamma1, xvGamma2, xvGamma3, xvGamma4, xvGamma5; @@ -296,7 +296,7 @@ sna_video_overlay_query_best_size(ScrnInfoPtr scrn, drw_h = vid_h >> 1; } - if (sna->kgem.gen < 21) { + if (sna->kgem.gen < 021) { max_w = IMAGE_MAX_WIDTH_LEGACY; max_h = IMAGE_MAX_HEIGHT_LEGACY; } else { @@ -532,6 +532,7 @@ sna_video_overlay_put_image(ScrnInfoPtr scrn, return BadAlloc; } + frame.bo->domain = DOMAIN_NONE; sna_video_buffer_fini(sna, video); /* update cliplist */ @@ -554,7 +555,7 @@ sna_video_overlay_query_video_attributes(ScrnInfoPtr scrn, DBG(("%s: w is %d, h is %d\n", __FUNCTION__, *w, *h)); - if (sna->kgem.gen < 21) { + if (sna->kgem.gen < 021) { if (*w > IMAGE_MAX_WIDTH_LEGACY) *w = IMAGE_MAX_WIDTH_LEGACY; if (*h > IMAGE_MAX_HEIGHT_LEGACY) @@ -664,7 +665,7 @@ XF86VideoAdaptorPtr sna_video_overlay_setup(struct sna *sna, adaptor->nEncodings = 1; adaptor->pEncodings = xnfalloc(sizeof(DummyEncoding)); memcpy(adaptor->pEncodings, DummyEncoding, sizeof(DummyEncoding)); - if (sna->kgem.gen < 21) { + if (sna->kgem.gen < 021) { adaptor->pEncodings->width = IMAGE_MAX_WIDTH_LEGACY; adaptor->pEncodings->height = IMAGE_MAX_HEIGHT_LEGACY; } @@ -701,6 +702,18 @@ XF86VideoAdaptorPtr sna_video_overlay_setup(struct sna *sna, adaptor->PutImage = sna_video_overlay_put_image; adaptor->QueryImageAttributes = sna_video_overlay_query_video_attributes; + if (sna->kgem.gen >= 040) + /* Actually the alignment is 64 bytes, too. But the + * stride must be at least 512 bytes. Take the easy fix + * and align on 512 bytes unconditionally. */ + video->alignment = 512; + else if (sna->kgem.gen < 021) + /* Harsh, errata on these chipsets limit the stride + * to be a multiple of 256 bytes. + */ + video->alignment = 256; + else + video->alignment = 64; video->textured = false; video->color_key = sna_video_overlay_color_key(sna); video->brightness = -19; /* (255/219) * -16 */ diff --git a/src/sna/sna_video_sprite.c b/src/sna/sna_video_sprite.c index a912590fe..7737460b5 100644 --- a/src/sna/sna_video_sprite.c +++ b/src/sna/sna_video_sprite.c @@ -37,8 +37,11 @@ #include <xf86xv.h> #include <X11/extensions/Xv.h> #include <fourcc.h> -#include <drm_fourcc.h> #include <i915_drm.h> +#include <errno.h> + +#ifdef DRM_IOCTL_MODE_GETPLANERESOURCES +#include <drm_fourcc.h> #define IMAGE_MAX_WIDTH 2048 #define IMAGE_MAX_HEIGHT 2048 @@ -60,13 +63,14 @@ static XF86AttributeRec attribs[] = { static void sna_video_sprite_off(struct sna *sna, struct sna_video *video) { + struct drm_mode_set_plane s; + if (video->plane == 0) return; - if (drmModeSetPlane(sna->kgem.fd, - video->plane, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0)) + memset(&s, 0, sizeof(s)); + s.plane_id = video->plane; + if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETPLANE, &s)) xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, "failed to disable plane\n"); @@ -114,8 +118,15 @@ static void sna_video_sprite_best_size(ScrnInfoPtr scrn, Bool motion, unsigned int *p_w, unsigned int *p_h, pointer data) { - *p_w = vid_w; - *p_h = vid_h; + struct sna *sna = to_sna(scrn); + + if (sna->kgem.gen == 075) { + *p_w = vid_w; + *p_h = vid_h; + } else { + *p_w = drw_w; + *p_h = drw_h; + } } static void @@ -174,7 +185,10 @@ sna_video_sprite_show(struct sna *sna, xf86CrtcPtr crtc, BoxPtr dstBox) { - int plane = sna_crtc_to_plane(crtc); + struct drm_mode_set_plane s; + + VG_CLEAR(s); + s.plane_id = sna_crtc_to_plane(crtc); update_dst_box_to_crtc_coords(sna, crtc, dstBox); if (crtc->rotation & (RR_Rotate_90 | RR_Rotate_270)) { @@ -184,13 +198,13 @@ sna_video_sprite_show(struct sna *sna, } #if defined(DRM_I915_SET_SPRITE_DESTKEY) - if (video->color_key_changed || video->plane != plane) { + if (video->color_key_changed || video->plane != s.plane_id) { struct drm_intel_set_sprite_destkey set; DBG(("%s: updating color key: %x\n", __FUNCTION__, video->color_key)); - set.plane_id = plane; + set.plane_id = s.plane_id; set.value = video->color_key; if (drmIoctl(sna->kgem.fd, @@ -221,8 +235,9 @@ sna_video_sprite_show(struct sna *sna, pitches[0] = frame->pitch[0]; offsets[0] = 0; - DBG(("%s: creating new fb for handle=%d\n", - __FUNCTION__, frame->bo->handle)); + DBG(("%s: creating new fb for handle=%d, width=%d, height=%d, stride=%d\n", + __FUNCTION__, frame->bo->handle, + frame->width, frame->height, frame->pitch[0])); if (drmModeAddFB2(sna->kgem.fd, frame->width, frame->height, pixel_format, @@ -236,21 +251,33 @@ sna_video_sprite_show(struct sna *sna, frame->bo->scanout = true; } - DBG(("%s: updating plane=%d, handle=%d [fb %d], dst=(%d,%d)x(%d,%d)\n", - __FUNCTION__, plane, frame->bo->handle, frame->bo->delta, - dstBox->x1, dstBox->y1, - dstBox->x2 - dstBox->x1, dstBox->y2 - dstBox->y1)); assert(frame->bo->scanout); assert(frame->bo->delta); - if (drmModeSetPlane(sna->kgem.fd, - plane, sna_crtc_id(crtc), frame->bo->delta, 0, - dstBox->x1, dstBox->y1, - dstBox->x2 - dstBox->x1, dstBox->y2 - dstBox->y1, - 0, 0, frame->width << 16, frame->height << 16)) + s.crtc_id = sna_crtc_id(crtc); + s.fb_id = frame->bo->delta; + s.flags = 0; + s.crtc_x = dstBox->x1; + s.crtc_y = dstBox->y1; + s.crtc_w = dstBox->x2 - dstBox->x1; + s.crtc_h = dstBox->y2 - dstBox->y1; + s.src_x = 0; + s.src_y = 0; + s.src_w = (frame->image.x2 - frame->image.x1) << 16; + s.src_h = (frame->image.y2 - frame->image.y1) << 16; + + DBG(("%s: updating crtc=%d, plane=%d, handle=%d [fb %d], dst=(%d,%d)x(%d,%d), src=(%d,%d)x(%d,%d)\n", + __FUNCTION__, s.crtc_id, s.plane_id, frame->bo->handle, s.fb_id, + s.crtc_x, s.crtc_y, s.crtc_w, s.crtc_h, + s.src_x >> 16, s.src_y >> 16, s.src_w >> 16, s.src_h >> 16)); + + if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETPLANE, &s)) { + DBG(("SET_PLANE failed: ret=%d\n", errno)); return false; + } - video->plane = plane; + frame->bo->domain = DOMAIN_NONE; + video->plane = s.plane_id; return true; } @@ -278,7 +305,7 @@ static int sna_video_sprite_put_image(ScrnInfoPtr scrn, clip)) return Success; - if (!crtc || !sna_crtc_to_plane(crtc)) { + if (!crtc || sna_crtc_to_plane(crtc) == 0) { /* If the video isn't visible on any CRTC, turn it off */ sna_video_sprite_off(sna, video); return Success; @@ -370,6 +397,7 @@ XF86VideoAdaptorPtr sna_video_sprite_setup(struct sna *sna, memset(&r, 0, sizeof(struct drm_mode_get_plane_res)); if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPLANERESOURCES, &r)) return NULL; + DBG(("%s: %d sprite planes\n", __FUNCTION__, r.count_planes)); if (r.count_planes == 0) return NULL; @@ -411,7 +439,7 @@ XF86VideoAdaptorPtr sna_video_sprite_setup(struct sna *sna, adaptor->PutImage = sna_video_sprite_put_image; adaptor->QueryImageAttributes = sna_video_sprite_query_attrs; - video->textured = false; + video->alignment = 64; video->color_key = sna_video_sprite_color_key(sna); video->color_key_changed = true; video->brightness = -19; /* (255/219) * -16 */ @@ -433,3 +461,9 @@ XF86VideoAdaptorPtr sna_video_sprite_setup(struct sna *sna, return adaptor; } +#else +XF86VideoAdaptorPtr sna_video_sprite_setup(struct sna *sna, ScreenPtr screen) +{ + return NULL; +} +#endif diff --git a/src/sna/sna_video_textured.c b/src/sna/sna_video_textured.c index 27fc09f47..e5cae859e 100644 --- a/src/sna/sna_video_textured.c +++ b/src/sna/sna_video_textured.c @@ -254,7 +254,7 @@ sna_video_textured_put_image(ScrnInfoPtr scrn, DBG(("%s: using passthough, name=%d\n", __FUNCTION__, *(uint32_t *)buf)); - if (sna->kgem.gen < 31) { + if (sna->kgem.gen < 031) { /* XXX: i915 is not support and needs some * serious care. grep for KMS in i915_hwmc.c */ return BadAlloc; @@ -267,6 +267,10 @@ sna_video_textured_put_image(ScrnInfoPtr scrn, } assert(kgem_bo_size(frame.bo) >= frame.size); + frame.image.x1 = 0; + frame.image.y1 = 0; + frame.image.x2 = frame.width; + frame.image.y2 = frame.height; } else { if (!sna_video_copy_data(sna, video, &frame, buf)) { DBG(("%s: failed to copy frame\n", __FUNCTION__)); @@ -276,15 +280,17 @@ sna_video_textured_put_image(ScrnInfoPtr scrn, } if (crtc && video->SyncToVblank != 0 && - sna_pixmap_is_scanout(sna, pixmap)) + sna_pixmap_is_scanout(sna, pixmap)) { + kgem_set_mode(&sna->kgem, KGEM_RENDER, sna_pixmap(pixmap)->gpu_bo); flush = sna_wait_for_scanline(sna, pixmap, crtc, &clip->extents); + } ret = Success; if (!sna->render.video(sna, video, &frame, clip, - src_w, src_h, - drw_w, drw_h, - pixmap)) { + src_w, src_h, drw_w, drw_h, + drw_x - src_x, drw_y - src_y, + pixmap)) { DBG(("%s: failed to render video\n", __FUNCTION__)); ret = BadAlloc; } else @@ -355,7 +361,7 @@ sna_video_textured_query(ScrnInfoPtr scrn, #ifdef SNA_XVMC case FOURCC_XVMC: *h = (*h + 1) & ~1; - size = sizeof(struct sna_xvmc_command); + size = sizeof(uint32_t); if (pitches) pitches[0] = size; break; @@ -447,6 +453,7 @@ XF86VideoAdaptorPtr sna_video_textured_setup(struct sna *sna, struct sna_video *v = &video[i]; v->textured = true; + v->alignment = 4; v->rotation = RR_Rotate_0; v->SyncToVblank = 1; diff --git a/src/xvmc/Makefile.am b/src/xvmc/Makefile.am index d3ed4499a..51c98b0c8 100644 --- a/src/xvmc/Makefile.am +++ b/src/xvmc/Makefile.am @@ -17,7 +17,8 @@ libIntelXvMC_la_SOURCES = intel_xvmc.c \ intel_batchbuffer.h AM_CFLAGS = @XORG_CFLAGS@ @DRM_CFLAGS@ @DRI_CFLAGS@ \ - @XVMCLIB_CFLAGS@ -I$(top_srcdir)/src -DTRUE=1 -DFALSE=0 + @XVMCLIB_CFLAGS@ @XCB_CFLAGS@ \ + -I$(top_srcdir)/src -DTRUE=1 -DFALSE=0 libIntelXvMC_la_LDFLAGS = -version-number 1:0:0 -libIntelXvMC_la_LIBADD = @DRI_LIBS@ @DRM_LIBS@ @XVMCLIB_LIBS@ -lpthread -ldrm_intel +libIntelXvMC_la_LIBADD = @DRI_LIBS@ @DRM_LIBS@ @XVMCLIB_LIBS@ @XCB_LIBS@ @DRMINTEL_LIBS@ -lpthread diff --git a/src/xvmc/Makefile.in b/src/xvmc/Makefile.in index 028ea3729..36f565528 100644 --- a/src/xvmc/Makefile.in +++ b/src/xvmc/Makefile.in @@ -245,7 +245,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MAN_SUBSTS = @MAN_SUBSTS@ @@ -284,6 +283,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@ VERSION = @VERSION@ X11_CFLAGS = @X11_CFLAGS@ X11_LIBS = @X11_LIBS@ +XCB_CFLAGS = @XCB_CFLAGS@ +XCB_LIBS = @XCB_LIBS@ XORG_CFLAGS = @XORG_CFLAGS@ XORG_LIBS = @XORG_LIBS@ XORG_MAN_PAGE = @XORG_MAN_PAGE@ @@ -357,15 +358,16 @@ libIntelXvMC_la_SOURCES = intel_xvmc.c \ intel_batchbuffer.h AM_CFLAGS = @XORG_CFLAGS@ @DRM_CFLAGS@ @DRI_CFLAGS@ \ - @XVMCLIB_CFLAGS@ -I$(top_srcdir)/src -DTRUE=1 -DFALSE=0 + @XVMCLIB_CFLAGS@ @XCB_CFLAGS@ \ + -I$(top_srcdir)/src -DTRUE=1 -DFALSE=0 libIntelXvMC_la_LDFLAGS = -version-number 1:0:0 -libIntelXvMC_la_LIBADD = @DRI_LIBS@ @DRM_LIBS@ @XVMCLIB_LIBS@ -lpthread -ldrm_intel +libIntelXvMC_la_LIBADD = @DRI_LIBS@ @DRM_LIBS@ @XVMCLIB_LIBS@ @XCB_LIBS@ @DRMINTEL_LIBS@ -lpthread all: all-recursive .SUFFIXES: .SUFFIXES: .c .lo .o .obj -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ @@ -390,9 +392,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) +$(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): install-libLTLIBRARIES: $(lib_LTLIBRARIES) diff --git a/src/xvmc/shader/Makefile.in b/src/xvmc/shader/Makefile.in index 04fe1979c..1910c289b 100644 --- a/src/xvmc/shader/Makefile.in +++ b/src/xvmc/shader/Makefile.in @@ -183,7 +183,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MAN_SUBSTS = @MAN_SUBSTS@ @@ -222,6 +221,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@ VERSION = @VERSION@ X11_CFLAGS = @X11_CFLAGS@ X11_LIBS = @X11_LIBS@ +XCB_CFLAGS = @XCB_CFLAGS@ +XCB_LIBS = @XCB_LIBS@ XORG_CFLAGS = @XORG_CFLAGS@ XORG_LIBS = @XORG_LIBS@ XORG_MAN_PAGE = @XORG_MAN_PAGE@ @@ -284,7 +285,7 @@ SUBDIRS = mc vld all: all-recursive .SUFFIXES: -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ @@ -309,9 +310,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) +$(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): diff --git a/src/xvmc/shader/mc/Makefile.am b/src/xvmc/shader/mc/Makefile.am index c1bff77dd..8d6576917 100644 --- a/src/xvmc/shader/mc/Makefile.am +++ b/src/xvmc/shader/mc/Makefile.am @@ -109,9 +109,9 @@ if HAVE_GEN4ASM SUFFIXES = .g4a .g4b .g4a.g4b: - m4 -I$(srcdir) $(srcdir)/$*.g4a > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m + $(AM_V_GEN)m4 -I$(srcdir) $(srcdir)/$*.g4a > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m -$(INTEL_G4B): $(INTEL_G4I) +$(INTEL_G4B): $(INTEL_GEN4ASM) $(INTEL_G4I) BUILT_SOURCES= $(INTEL_G4B) diff --git a/src/xvmc/shader/mc/Makefile.in b/src/xvmc/shader/mc/Makefile.in index 165ebc807..1a196be3e 100644 --- a/src/xvmc/shader/mc/Makefile.in +++ b/src/xvmc/shader/mc/Makefile.in @@ -143,7 +143,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MAN_SUBSTS = @MAN_SUBSTS@ @@ -182,6 +181,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@ VERSION = @VERSION@ X11_CFLAGS = @X11_CFLAGS@ X11_LIBS = @X11_LIBS@ +XCB_CFLAGS = @XCB_CFLAGS@ +XCB_LIBS = @XCB_LIBS@ XORG_CFLAGS = @XORG_CFLAGS@ XORG_LIBS = @XORG_LIBS@ XORG_MAN_PAGE = @XORG_MAN_PAGE@ @@ -353,7 +354,7 @@ all: $(BUILT_SOURCES) .SUFFIXES: .SUFFIXES: .g4a .g4b -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ @@ -378,9 +379,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) +$(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): @@ -545,9 +546,9 @@ uninstall-am: uninstall uninstall-am @HAVE_GEN4ASM_TRUE@.g4a.g4b: -@HAVE_GEN4ASM_TRUE@ m4 -I$(srcdir) $(srcdir)/$*.g4a > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m +@HAVE_GEN4ASM_TRUE@ $(AM_V_GEN)m4 -I$(srcdir) $(srcdir)/$*.g4a > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m -@HAVE_GEN4ASM_TRUE@$(INTEL_G4B): $(INTEL_G4I) +@HAVE_GEN4ASM_TRUE@$(INTEL_G4B): $(INTEL_GEN4ASM) $(INTEL_G4I) @HAVE_GEN4ASM_TRUE@clean-local: @HAVE_GEN4ASM_TRUE@ -rm -f $(INTEL_G4B) diff --git a/src/xvmc/shader/vld/Makefile.am b/src/xvmc/shader/vld/Makefile.am index 9280f1513..8f1047e02 100644 --- a/src/xvmc/shader/vld/Makefile.am +++ b/src/xvmc/shader/vld/Makefile.am @@ -62,9 +62,9 @@ if HAVE_GEN4ASM SUFFIXES = .g4a .g4b .g4a.g4b: - m4 $*.g4a > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m + $(AM_V_GEN)m4 $*.g4a > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m -$(INTEL_G4B): $(INTEL_G4I) +$(INTEL_G4B): $(INTEL_GEN4ASM) $(INTEL_G4I) BUILT_SOURCES= $(INTEL_G4B) diff --git a/src/xvmc/shader/vld/Makefile.in b/src/xvmc/shader/vld/Makefile.in index 5cf44c18f..595948ebc 100644 --- a/src/xvmc/shader/vld/Makefile.in +++ b/src/xvmc/shader/vld/Makefile.in @@ -143,7 +143,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MAN_SUBSTS = @MAN_SUBSTS@ @@ -182,6 +181,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@ VERSION = @VERSION@ X11_CFLAGS = @X11_CFLAGS@ X11_LIBS = @X11_LIBS@ +XCB_CFLAGS = @XCB_CFLAGS@ +XCB_LIBS = @XCB_LIBS@ XORG_CFLAGS = @XORG_CFLAGS@ XORG_LIBS = @XORG_LIBS@ XORG_MAN_PAGE = @XORG_MAN_PAGE@ @@ -306,7 +307,7 @@ all: $(BUILT_SOURCES) .SUFFIXES: .SUFFIXES: .g4a .g4b -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ @@ -331,9 +332,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) +$(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): @@ -498,9 +499,9 @@ uninstall-am: uninstall uninstall-am @HAVE_GEN4ASM_TRUE@.g4a.g4b: -@HAVE_GEN4ASM_TRUE@ m4 $*.g4a > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m +@HAVE_GEN4ASM_TRUE@ $(AM_V_GEN)m4 $*.g4a > $*.g4m && @INTEL_GEN4ASM@ -o $@ $*.g4m && @INTEL_GEN4ASM@ -g 5 -o $@.gen5 $*.g4m && rm $*.g4m -@HAVE_GEN4ASM_TRUE@$(INTEL_G4B): $(INTEL_G4I) +@HAVE_GEN4ASM_TRUE@$(INTEL_G4B): $(INTEL_GEN4ASM) $(INTEL_G4I) @HAVE_GEN4ASM_TRUE@clean-local: @HAVE_GEN4ASM_TRUE@ -rm -f $(INTEL_G4B) diff --git a/test/Makefile.am b/test/Makefile.am index 96c87f824..0f9bd7d09 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -21,6 +21,8 @@ stress_TESTS = \ check_PROGRAMS = $(stress_TESTS) +noinst_PROGRAMS = lowlevel-blt-bench + AM_CFLAGS = @CWARNFLAGS@ @X11_CFLAGS@ @DRM_CFLAGS@ LDADD = libtest.la @X11_LIBS@ -lXfixes @DRM_LIBS@ -lrt @@ -35,4 +37,11 @@ libtest_la_SOURCES = \ dri2.h \ $(NULL) -EXTRA_DIST = README +vsync.avi: mkvsync.sh + ./mkvsync.sh $@ + +clean-vsync-avi: + rm -rf vsync.avi .build.tmp + +EXTRA_DIST = README mkvsync.sh +clean-local: clean-vsync-avi diff --git a/test/Makefile.in b/test/Makefile.in index 315802172..b462d6f4b 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -15,6 +15,7 @@ @SET_MAKE@ + VPATH = @srcdir@ am__make_dryrun = \ { \ @@ -52,6 +53,7 @@ POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ check_PROGRAMS = $(am__EXEEXT_1) +noinst_PROGRAMS = lowlevel-blt-bench$(EXEEXT) subdir = test DIST_COMMON = README $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 @@ -79,6 +81,7 @@ am__EXEEXT_1 = basic-fillrect$(EXEEXT) basic-rectangle$(EXEEXT) \ render-composite-solid$(EXEEXT) render-copyarea$(EXEEXT) \ render-copyarea-size$(EXEEXT) render-copy-alphaless$(EXEEXT) \ mixed-stress$(EXEEXT) dri2-swap$(EXEEXT) +PROGRAMS = $(noinst_PROGRAMS) basic_copyarea_SOURCES = basic-copyarea.c basic_copyarea_OBJECTS = basic-copyarea.$(OBJEXT) basic_copyarea_LDADD = $(LDADD) @@ -115,6 +118,10 @@ dri2_swap_SOURCES = dri2-swap.c dri2_swap_OBJECTS = dri2-swap.$(OBJEXT) dri2_swap_LDADD = $(LDADD) dri2_swap_DEPENDENCIES = libtest.la +lowlevel_blt_bench_SOURCES = lowlevel-blt-bench.c +lowlevel_blt_bench_OBJECTS = lowlevel-blt-bench.$(OBJEXT) +lowlevel_blt_bench_LDADD = $(LDADD) +lowlevel_blt_bench_DEPENDENCIES = libtest.la mixed_stress_SOURCES = mixed-stress.c mixed_stress_OBJECTS = mixed-stress.$(OBJEXT) mixed_stress_LDADD = $(LDADD) @@ -180,14 +187,14 @@ am__v_GEN_0 = @echo " GEN " $@; SOURCES = $(libtest_la_SOURCES) basic-copyarea.c basic-copyarea-size.c \ basic-fillrect.c basic-lines.c basic-putimage.c \ basic-rectangle.c basic-stress.c basic-string.c dri2-swap.c \ - mixed-stress.c render-composite-solid.c \ + lowlevel-blt-bench.c mixed-stress.c render-composite-solid.c \ render-copy-alphaless.c render-copyarea.c \ render-copyarea-size.c render-fill.c render-fill-copy.c \ render-trapezoid.c render-trapezoid-image.c DIST_SOURCES = $(libtest_la_SOURCES) basic-copyarea.c \ basic-copyarea-size.c basic-fillrect.c basic-lines.c \ basic-putimage.c basic-rectangle.c basic-stress.c \ - basic-string.c dri2-swap.c mixed-stress.c \ + basic-string.c dri2-swap.c lowlevel-blt-bench.c mixed-stress.c \ render-composite-solid.c render-copy-alphaless.c \ render-copyarea.c render-copyarea-size.c render-fill.c \ render-fill-copy.c render-trapezoid.c render-trapezoid-image.c @@ -268,7 +275,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MAN_SUBSTS = @MAN_SUBSTS@ @@ -307,6 +313,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@ VERSION = @VERSION@ X11_CFLAGS = @X11_CFLAGS@ X11_LIBS = @X11_LIBS@ +XCB_CFLAGS = @XCB_CFLAGS@ +XCB_LIBS = @XCB_LIBS@ XORG_CFLAGS = @XORG_CFLAGS@ XORG_LIBS = @XORG_LIBS@ XORG_MAN_PAGE = @XORG_MAN_PAGE@ @@ -399,12 +407,12 @@ libtest_la_SOURCES = \ dri2.h \ $(NULL) -EXTRA_DIST = README +EXTRA_DIST = README mkvsync.sh all: all-am .SUFFIXES: .SUFFIXES: .c .lo .o .obj -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ @@ -429,9 +437,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) +$(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): @@ -454,6 +462,15 @@ clean-checkPROGRAMS: list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list basic-copyarea$(EXEEXT): $(basic_copyarea_OBJECTS) $(basic_copyarea_DEPENDENCIES) $(EXTRA_basic_copyarea_DEPENDENCIES) @rm -f basic-copyarea$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_copyarea_OBJECTS) $(basic_copyarea_LDADD) $(LIBS) @@ -481,6 +498,9 @@ basic-string$(EXEEXT): $(basic_string_OBJECTS) $(basic_string_DEPENDENCIES) $(EX dri2-swap$(EXEEXT): $(dri2_swap_OBJECTS) $(dri2_swap_DEPENDENCIES) $(EXTRA_dri2_swap_DEPENDENCIES) @rm -f dri2-swap$(EXEEXT) $(AM_V_CCLD)$(LINK) $(dri2_swap_OBJECTS) $(dri2_swap_LDADD) $(LIBS) +lowlevel-blt-bench$(EXEEXT): $(lowlevel_blt_bench_OBJECTS) $(lowlevel_blt_bench_DEPENDENCIES) $(EXTRA_lowlevel_blt_bench_DEPENDENCIES) + @rm -f lowlevel-blt-bench$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(lowlevel_blt_bench_OBJECTS) $(lowlevel_blt_bench_LDADD) $(LIBS) mixed-stress$(EXEEXT): $(mixed_stress_OBJECTS) $(mixed_stress_DEPENDENCIES) $(EXTRA_mixed_stress_DEPENDENCIES) @rm -f mixed-stress$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mixed_stress_OBJECTS) $(mixed_stress_LDADD) $(LIBS) @@ -525,6 +545,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/basic-string.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dri2-swap.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dri2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lowlevel-blt-bench.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mixed-stress.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/render-composite-solid.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/render-copy-alphaless.Po@am__quote@ @@ -651,7 +672,7 @@ distdir: $(DISTFILES) check-am: all-am $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) check: check-am -all-am: Makefile $(LTLIBRARIES) +all-am: Makefile $(LTLIBRARIES) $(PROGRAMS) installdirs: install: install-am install-exec: install-exec-am @@ -685,8 +706,8 @@ maintainer-clean-generic: @echo "it deletes files that may require special tools to rebuild." clean: clean-am -clean-am: clean-checkPROGRAMS clean-generic clean-libtool \ - clean-noinstLTLIBRARIES mostlyclean-am +clean-am: clean-checkPROGRAMS clean-generic clean-libtool clean-local \ + clean-noinstLTLIBRARIES clean-noinstPROGRAMS mostlyclean-am distclean: distclean-am -rm -rf ./$(DEPDIR) @@ -757,19 +778,26 @@ uninstall-am: .MAKE: check-am install-am install-strip .PHONY: CTAGS GTAGS all all-am check check-am clean \ - clean-checkPROGRAMS clean-generic clean-libtool \ - clean-noinstLTLIBRARIES ctags distclean distclean-compile \ - distclean-generic distclean-libtool distclean-tags distdir dvi \ - dvi-am html html-am info info-am install install-am \ - install-data install-data-am install-dvi install-dvi-am \ - install-exec install-exec-am install-html install-html-am \ - install-info install-info-am install-man install-pdf \ - install-pdf-am install-ps install-ps-am install-strip \ - installcheck installcheck-am installdirs maintainer-clean \ - maintainer-clean-generic mostlyclean mostlyclean-compile \ - mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ - tags uninstall uninstall-am - + clean-checkPROGRAMS clean-generic clean-libtool clean-local \ + clean-noinstLTLIBRARIES clean-noinstPROGRAMS ctags distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags uninstall uninstall-am + + +vsync.avi: mkvsync.sh + ./mkvsync.sh $@ + +clean-vsync-avi: + rm -rf vsync.avi .build.tmp +clean-local: clean-vsync-avi # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. diff --git a/test/lowlevel-blt-bench.c b/test/lowlevel-blt-bench.c new file mode 100644 index 000000000..0cea0a81a --- /dev/null +++ b/test/lowlevel-blt-bench.c @@ -0,0 +1,135 @@ +/* + * Copyright © 2009 Nokia Corporation + * Copyright © 2010 Movial Creative Technologies Oy + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <stdbool.h> + +#include <X11/X.h> +#include <X11/Xutil.h> /* for XDestroyImage */ +#include <pixman.h> /* for pixman blt functions */ + +#include "test.h" + +static const struct format { + const char *name; + pixman_format_code_t pixman_format; +} formats[] = { + { "a8r8g8b8", PIXMAN_a8r8g8b8 }, + { "x8r8g8b8", PIXMAN_x8r8g8b8 }, + { "a8", PIXMAN_a8 }, + { "a4", PIXMAN_a4 }, + { "a1", PIXMAN_a1 }, +}; + +static const struct op { + const char *name; +} ops[] = { + [PictOpClear] = { "Clear" }, + [PictOpSrc] = { "Src" }, + [PictOpDst] = { "Dst" }, + [PictOpOver] = { "Over" }, + [PictOpOverReverse] = { "OverReverse" }, + [PictOpIn] = { "In" }, + [PictOpInReverse] = { "InReverse" }, + [PictOpOut] = { "Out" }, + [PictOpOutReverse] = { "OutReverse" }, + [PictOpAtop] = { "Atop" }, + [PictOpAtopReverse] = { "AtopReverse" }, + [PictOpXor] = { "Xor" }, + [PictOpAdd] = { "Add" }, + [PictOpSaturate] = { "Saturate" }, +}; + +static double _bench(struct test_display *t, enum target target_type, + int op, int src_format, + int loops) +{ + XRenderColor render_color = { 0x8000, 0x8000, 0x8000, 0x8000 }; + struct test_target target; + Pixmap pixmap; + Picture picture; + struct timespec tv; + double elapsed; + + test_target_create_render(t, target_type, &target); + XRenderFillRectangle(t->dpy, PictOpClear, target.picture, &render_color, + 0, 0, target.width, target.height); + + pixmap = XCreatePixmap(t->dpy, t->root, + target.width, target.height, + PIXMAN_FORMAT_DEPTH(formats[src_format].pixman_format)); + + picture = XRenderCreatePicture(t->dpy, pixmap, + XRenderFindStandardFormat(t->dpy, src_format), + 0, NULL); + XRenderFillRectangle(t->dpy, PictOpSrc, picture, &render_color, + 0, 0, target.width, target.height); + + test_timer_start(t, &tv); + while (loops--) + XRenderComposite(t->dpy, op, + picture, 0, target.picture, + 0, 0, + 0, 0, + 0, 0, + target.width, target.height); + elapsed = test_timer_stop(t, &tv); + + XRenderFreePicture(t->dpy, picture); + XFreePixmap(t->dpy, pixmap); + test_target_destroy_render(t, &target); + + return elapsed; +} + +static void bench(struct test *t, enum target target, int op, int sf) +{ + double real, ref; + + ref = _bench(&t->ref, target, op, sf, 1000); + real = _bench(&t->real, target, op, sf, 1000); + + fprintf (stdout, "Testing %s with %s: ref=%f, real=%f\n", + formats[sf].name, ops[op].name, ref, real); +} + +int main(int argc, char **argv) +{ + struct test test; + int op, sf; + + test_init(&test, argc, argv); + + for (op = 0; op < sizeof(ops)/sizeof(ops[0]); op++) { + for (sf = 0; sf < sizeof(formats)/sizeof(formats[0]); sf++) + bench(&test, ROOT, op, sf); + fprintf (stdout, "\n"); + } + + return 0; +} diff --git a/test/mkvsync.sh b/test/mkvsync.sh new file mode 100755 index 000000000..dd96ad8df --- /dev/null +++ b/test/mkvsync.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +OUT="$1" +[ -n "$OUT" ] || OUT="vsync.avi" + +TMP=".build.tmp" + +rm -rf ${TMP} +mkdir ${TMP} +convert -size 640x480 -depth 24 canvas:black png24:${TMP}/black.png +convert -size 640x480 -depth 24 canvas:white png24:${TMP}/white.png + +mkdir ${TMP}/anim + +for ((a=0; $a < 1000; a=$a+2)); do + ln -s ../black.png ${TMP}/anim/$a.png +done + +for ((a=1; $a < 1000; a=$a+2)); do + ln -s ../white.png ${TMP}/anim/$a.png +done + +mencoder "mf://${TMP}/anim/*.png" -v -vf-clr -mf fps=60 -o "${OUT}" -ovc lavc +exitcode=$? +rm -rf ${TMP} + +exit ${exitcode} diff --git a/test/test.h b/test/test.h index 1e3995bbf..7ef4dca4c 100644 --- a/test/test.h +++ b/test/test.h @@ -2,6 +2,8 @@ #define TEST_H #include <stdint.h> +#include <time.h> + #include <X11/Xlib.h> #include <X11/extensions/XShm.h> #include <X11/extensions/Xrender.h> @@ -107,6 +109,9 @@ static inline uint32_t color(uint8_t red, uint8_t green, uint8_t blue, uint8_t a return alpha << 24 | ra >> 8 << 16 | ga >> 8 << 8 | ba >> 8; } +void test_timer_start(struct test_display *t, struct timespec *tv); +double test_timer_stop(struct test_display *t, struct timespec *tv); + #ifndef MAX #define MAX(a,b) ((a) > (b) ? (a) : (b)) #endif diff --git a/test/test_display.c b/test/test_display.c index ad3e40bc7..b5e7e06ed 100644 --- a/test/test_display.c +++ b/test/test_display.c @@ -148,3 +148,20 @@ void test_init(struct test *test, int argc, char **argv) memset(test, 0, sizeof(*test)); test_get_displays(argc, argv, &test->real, &test->ref); } + +void test_timer_start(struct test_display *t, struct timespec *tv) +{ + clock_gettime(CLOCK_MONOTONIC, tv); +} + +double test_timer_stop(struct test_display *t, struct timespec *tv) +{ + XImage *image; + struct timespec now; + + image = XGetImage(t->dpy, t->root, 0, 0, 1, 1, AllPlanes, ZPixmap); + clock_gettime(CLOCK_MONOTONIC, &now); + XDestroyImage(image); + + return (now.tv_sec - tv->tv_sec) + 1e-9*(now.tv_nsec - tv->tv_nsec); +} diff --git a/uxa/Makefile.in b/uxa/Makefile.in index 417eb9cfc..6a01effe9 100644 --- a/uxa/Makefile.in +++ b/uxa/Makefile.in @@ -175,7 +175,6 @@ LIB_MAN_SUFFIX = @LIB_MAN_SUFFIX@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MAN_SUBSTS = @MAN_SUBSTS@ @@ -214,6 +213,8 @@ VALGRIND_LIBS = @VALGRIND_LIBS@ VERSION = @VERSION@ X11_CFLAGS = @X11_CFLAGS@ X11_LIBS = @X11_LIBS@ +XCB_CFLAGS = @XCB_CFLAGS@ +XCB_LIBS = @XCB_LIBS@ XORG_CFLAGS = @XORG_CFLAGS@ XORG_LIBS = @XORG_LIBS@ XORG_MAN_PAGE = @XORG_MAN_PAGE@ @@ -293,7 +294,7 @@ all: all-am .SUFFIXES: .SUFFIXES: .c .lo .o .obj -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ @@ -318,9 +319,9 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) +$(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): diff --git a/uxa/uxa-accel.c b/uxa/uxa-accel.c index 76425fe9f..27215ddf6 100644 --- a/uxa/uxa-accel.c +++ b/uxa/uxa-accel.c @@ -1027,7 +1027,7 @@ uxa_push_pixels(GCPtr pGC, PixmapPtr pBitmap, ok = glamor_push_pixels_nf(pGC, pBitmap, pDrawable, w, h, x, y); uxa_finish_access(&pBitmap->drawable, UXA_GLAMOR_ACCESS_RO); } - uxa_prepare_access(pDrawable, UXA_GLAMOR_ACCESS_RW); + uxa_finish_access(pDrawable, UXA_GLAMOR_ACCESS_RW); } if (!ok) goto fallback; diff --git a/uxa/uxa-render.c b/uxa/uxa-render.c index 4463dc2f0..d783ea26c 100644 --- a/uxa/uxa-render.c +++ b/uxa/uxa-render.c @@ -962,7 +962,7 @@ uxa_try_driver_composite(CARD8 op, RegionRec region; BoxPtr pbox; int nbox; - int xDst_copy, yDst_copy; + int xDst_copy = 0, yDst_copy = 0; int src_off_x, src_off_y, mask_off_x, mask_off_y, dst_off_x, dst_off_y; PixmapPtr pSrcPix, pMaskPix = NULL, pDstPix; PicturePtr localSrc, localMask = NULL; |