Initial commitHEAD submit/2.0alpha/20130131.232756 accepted/2.0alpha/20130131.230204 accepted/tizen_generic accepted/tizen_common 2.0alpha

author: Graydon, Tracy <tracy.graydon@intel.com> 2013-01-31 15:14:28 -0800
committer: Graydon, Tracy <tracy.graydon@intel.com> 2013-01-31 15:14:28 -0800
commit: 3c7b03f2f7b4ecfb18fb30f2e43b6321cb1aa4de (patch)
tree: bb8c57f401c0087a3ce4e96dc733abff854c3a43 /tests
download: intel-gpu-tools-3c7b03f2f7b4ecfb18fb30f2e43b6321cb1aa4de.tar.gz
intel-gpu-tools-3c7b03f2f7b4ecfb18fb30f2e43b6321cb1aa4de.tar.bz2
intel-gpu-tools-3c7b03f2f7b4ecfb18fb30f2e43b6321cb1aa4de.zip
92 files changed, 17172 insertions, 0 deletions
diff --git a/tests/.gitignore b/tests/.gitignore
new file mode 100644
index 00000000..1a6795bc
--- /dev/null
+++ b/tests/.gitignore
@@ -0,0 +1,78 @@
+flip_test
+drm_vma_limiter
+drm_vma_limiter_cached
+drm_vma_limiter_cpu
+drm_vma_limiter_gtt
+gem_bad_address
+gem_bad_batch
+gem_bad_blit
+gem_bad_length
+gem_basic
+gem_cs_prefetch
+gem_cpu_concurrent_blit
+gem_ctx_bad_destroy
+gem_ctx_bad_exec
+gem_ctx_basic
+gem_ctx_create
+gem_ctx_exec
+gem_double_irq_loop
+gem_dummy_reloc_loop
+gem_exec_bad_domains
+gem_exec_blt
+gem_exec_faulting_reloc
+gem_exec_nop
+gem_fenced_exec_thrash
+gem_fence_thrash
+gem_flink
+gem_gtt_speed
+gem_gtt_concurrent_blit
+gem_gtt_cpu_tlb
+gem_hang
+gem_hangcheck_forcewake
+gem_largeobject
+gem_linear_blits
+gem_mmap
+gem_mmap_gtt
+gem_mmap_offset_exhaustion
+gem_partial_pwrite_pread
+gem_pipe_control_store_loop
+gem_pread_after_blit
+gem_pwrite
+gem_readwrite
+gem_reloc_vs_gpu
+gem_reg_read
+gem_ringfill
+gem_ring_sync_loop
+gem_set_tiling_vs_blt
+gem_set_tiling_vs_gtt
+gem_set_tiling_vs_pwrite
+gem_storedw_batches_loop
+gem_storedw_loop_blt
+gem_storedw_loop_bsd
+gem_storedw_loop_render
+gem_stress
+gem_tiled_blits
+gem_tiled_fence_blits
+gem_tiled_partial_pwrite_pread
+gem_tiled_pread
+gem_tiled_pread_pwrite
+gem_tiled_swapping
+gem_unfence_active_buffers
+gem_unref_active_buffers
+gem_vmap_blits
+gem_wait_render_timeout
+gen3_mixed_blits
+gen3_render_linear_blits
+gen3_render_mixed_blits
+gen3_render_tiledx_blits
+gen3_render_tiledy_blits
+getclient
+getstats
+getversion
+prime_nv_api
+prime_nv_pcopy
+prime_nv_test
+prime_self_import
+testdisplay
+sysfs_rc6_residency
+# Please keep sorted alphabetically
diff --git a/tests/Makefile.am b/tests/Makefile.am
new file mode 100644
index 00000000..e29a383a
--- /dev/null
+++ b/tests/Makefile.am
@@ -0,0 +1,148 @@
+noinst_PROGRAMS = \
+	gem_stress \
+	$(TESTS_progs) \
+	$(HANG) \
+	$(NULL)
+
+if HAVE_NOUVEAU
+NOUVEAU_TESTS = \
+	prime_nv_api  \
+	prime_nv_pcopy \
+	prime_nv_test
+endif
+
+TESTS_progs = \
+	getversion \
+	getclient \
+	getstats \
+	gem_basic \
+	gem_cpu_concurrent_blit \
+	gem_gtt_concurrent_blit \
+	gem_exec_nop \
+	gem_exec_blt \
+	gem_exec_bad_domains \
+	gem_exec_faulting_reloc \
+	gem_flink \
+	gem_readwrite \
+	gem_ringfill \
+	gem_mmap \
+	gem_mmap_gtt \
+	gem_mmap_offset_exhaustion \
+	gem_pwrite \
+	gem_pread_after_blit \
+	gem_set_tiling_vs_blt \
+	gem_set_tiling_vs_gtt \
+	gem_set_tiling_vs_pwrite \
+	gem_tiled_pread \
+	gem_tiled_pread_pwrite \
+	gem_tiled_partial_pwrite_pread \
+	gem_tiled_swapping \
+	gem_partial_pwrite_pread \
+	gem_linear_blits \
+	gem_vmap_blits \
+	gem_tiled_blits \
+	gem_tiled_fence_blits \
+	gem_largeobject \
+	gem_bad_length \
+	gem_fence_thrash \
+	gem_fenced_exec_thrash \
+	gem_gtt_speed \
+	gem_gtt_cpu_tlb \
+	gem_cs_prefetch \
+	gen3_render_linear_blits \
+	gen3_render_tiledx_blits \
+	gen3_render_tiledy_blits \
+	gen3_render_mixed_blits \
+	gen3_mixed_blits \
+	gem_storedw_loop_render \
+	gem_storedw_loop_blt \
+	gem_storedw_loop_bsd \
+	gem_storedw_batches_loop \
+	gem_dummy_reloc_loop \
+	gem_double_irq_loop \
+	gem_ring_sync_loop \
+	gem_pipe_control_store_loop \
+	gem_unfence_active_buffers \
+	gem_unref_active_buffers \
+	gem_reloc_vs_gpu \
+	drm_vma_limiter \
+	drm_vma_limiter_cpu \
+	drm_vma_limiter_gtt \
+	drm_vma_limiter_cached \
+	sysfs_rc6_residency \
+	flip_test \
+	gem_wait_render_timeout \
+	gem_ctx_create \
+	gem_ctx_bad_destroy \
+	gem_ctx_exec \
+	gem_ctx_bad_exec \
+	gem_ctx_basic \
+	gem_reg_read \
+	$(NOUVEAU_TESTS) \
+	prime_self_import \
+	$(NULL)
+
+# IMPORTANT: The ZZ_ tests need to be run last!
+# ... and make can't deal with inlined comments ...
+TESTS_scripts = \
+	debugfs_reader \
+	debugfs_emon_crash \
+	sysfs_l3_parity \
+	sysfs_edid_timing \
+	module_reload \
+	ZZ_check_dmesg \
+	ZZ_hangman \
+	$(NULL)
+
+kernel_tests = \
+	$(TESTS_progs) \
+	$(TESTS_scripts) \
+	$(NULL)
+
+TESTS = \
+	$(NULL)
+
+test:
+	whoami | grep root || ( echo ERROR: not running as root; exit 1 )
+	./check_drm_clients
+	make TESTS="${kernel_tests}" check
+
+HANG = \
+	gem_bad_batch \
+	gem_hang \
+	gem_bad_blit \
+	gem_bad_address \
+	$(NULL)
+
+EXTRA_PROGRAMS = $(TESTS_progs) $(HANG)
+EXTRA_DIST = $(TESTS_scripts) drm_lib.sh check_drm_clients debugfs_wedged
+CLEANFILES = $(EXTRA_PROGRAMS)
+
+AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) \
+	-I$(srcdir)/.. \
+	-I$(srcdir)/../lib
+LDADD = ../lib/libintel_tools.la $(PCIACCESS_LIBS) $(DRM_LIBS) 
+
+testdisplay_SOURCES = \
+	testdisplay.c \
+	testdisplay.h \
+	testdisplay_hotplug.c \
+	$(NULL)
+
+TESTS_progs += testdisplay
+LDADD += $(CAIRO_LIBS) $(LIBUDEV_LIBS) $(GLIB_LIBS)
+AM_CFLAGS += $(CAIRO_CFLAGS) $(LIBUDEV_CFLAGS) $(GLIB_CFLAGS)
+
+gem_fence_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
+gem_fence_thrash_LDADD = $(LDADD) -lpthread
+
+gem_wait_render_timeout_LDADD = $(LDADD) -lrt
+
+gem_ctx_basic_LDADD = $(LDADD) -lpthread
+
+prime_nv_test_CFLAGS = $(AM_CFLAGS) $(DRM_NOUVEAU_CFLAGS)
+prime_nv_test_LDADD = $(LDADD) $(DRM_NOUVEAU_LIBS)
+prime_nv_api_CFLAGS = $(AM_CFLAGS) $(DRM_NOUVEAU_CFLAGS)
+prime_nv_api_LDADD = $(LDADD) $(DRM_NOUVEAU_LIBS)
+prime_nv_pcopy_CFLAGS = $(AM_CFLAGS) $(DRM_NOUVEAU_CFLAGS)
+prime_nv_pcopy_LDADD = $(LDADD) $(DRM_NOUVEAU_LIBS)
diff --git a/tests/ZZ_check_dmesg b/tests/ZZ_check_dmesg
new file mode 100755
index 00000000..e28ba35f
--- /dev/null
+++ b/tests/ZZ_check_dmesg
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+if dmesg | grep '\*ERROR\*'  > /dev/null ; then
+	echo "DRM_ERROR dirt in dmesg"
+	exit 1
+fi
+
+if dmesg | grep -- '------\[ cut here \]----' > /dev/null  ; then
+	echo "found a backtrace in dmesg"
+	exit 1
+fi
diff --git a/tests/ZZ_hangman b/tests/ZZ_hangman
new file mode 100755
index 00000000..b8f4a682
--- /dev/null
+++ b/tests/ZZ_hangman
@@ -0,0 +1,42 @@
+#!/bin/bash
+#
+# Testcase: Simulate gpu hang
+#
+# This check uses the stop_rings facility to exercise the gpu hang code.
+# by reading /sys/kernel/debug/dri/0/i915_emon_status too quickly
+#
+
+SOURCE_DIR="$( dirname "${BASH_SOURCE[0]}" )"
+. $SOURCE_DIR/drm_lib.sh
+
+oldpath=`pwd`
+
+cd $i915_path
+
+if [ ! -f i915_ring_stop ] ; then
+	echo "kernel doesn't support ring stopping"
+	exit 77
+fi
+
+if cat i915_error_state | grep -v "no error state collected" > /dev/null ; then
+	echo "gpu hang dectected"
+	exit 1
+fi
+
+# stop rings
+echo 0xf > i915_ring_stop
+echo "rings stopped"
+
+(cd $oldpath; $SOURCE_DIR/gem_exec_nop) > /dev/null
+
+if cat i915_error_state | grep -v "no error state collected" > /dev/null ; then
+	echo "gpu hang correctly dectected"
+else
+	echo "gpu hang not dectected"
+	exit 2
+fi
+
+# clear error state
+echo > i915_error_state
+
+exit 0
diff --git a/tests/check_drm_clients b/tests/check_drm_clients
new file mode 100755
index 00000000..eb12416a
--- /dev/null
+++ b/tests/check_drm_clients
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+SOURCE_DIR="$( dirname "${BASH_SOURCE[0]}" )"
+. $SOURCE_DIR/drm_lib.sh
+
+exit 0
diff --git a/tests/debugfs_emon_crash b/tests/debugfs_emon_crash
new file mode 100755
index 00000000..6e139a41
--- /dev/null
+++ b/tests/debugfs_emon_crash
@@ -0,0 +1,16 @@
+#!/bin/bash
+#
+# This check if we can crash the kernel with segmentation-fault
+# by reading /sys/kernel/debug/dri/0/i915_emon_status too quickly
+#
+
+SOURCE_DIR="$( dirname "${BASH_SOURCE[0]}" )"
+. $SOURCE_DIR/drm_lib.sh
+
+for z in $(seq 1 1000); do
+	cat $i915_path/i915_emon_status > /dev/null 2&>1
+done
+
+# If we got here, we haven't crashed
+
+exit 0
diff --git a/tests/debugfs_reader b/tests/debugfs_reader
new file mode 100755
index 00000000..80d59988
--- /dev/null
+++ b/tests/debugfs_reader
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+SOURCE_DIR="$( dirname "${BASH_SOURCE[0]}" )"
+. $SOURCE_DIR/drm_lib.sh
+
+# read everything we can
+cat $i915_path/* > /dev/null 2>&1 
+
+exit 0
diff --git a/tests/debugfs_wedged b/tests/debugfs_wedged
new file mode 100755
index 00000000..80a32f61
--- /dev/null
+++ b/tests/debugfs_wedged
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+SOURCE_DIR="$( dirname "${BASH_SOURCE[0]}" )"
+. $SOURCE_DIR/drm_lib.sh
+
+# Testcase: wedge the hw to check the error_state reading
+# 
+# Unfortunately wedged is permanent, so this test is not run by default
+echo 1 > ${i915_path}/i915_wedged
+cat $i915_path/i915_error_state > /dev/null 2>&1
diff --git a/tests/drm_lib.sh b/tests/drm_lib.sh
new file mode 100755
index 00000000..a76fd474
--- /dev/null
+++ b/tests/drm_lib.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+die() {
+	echo "$@"
+	exit 1
+}
+
+if [ -d /debug/dri ] ; then
+	debugfs_path=/debug/dri
+fi
+
+if [ -d /sys/kernel/debug/dri ] ; then
+	debugfs_path=/sys/kernel/debug/dri
+fi
+
+i915_path=x
+for dir in `ls $debugfs_path` ; do
+	if [ -f $debugfs_path/$dir/i915_error_state ] ; then
+		i915_path=$debugfs_path/$dir
+		break
+	fi
+done
+
+if [ $i915_path = "x" ] ; then
+	die " i915 debugfs path not found."
+fi
+
+# read everything we can
+if [ `cat $i915_path/clients | wc -l` -gt "2" ] ; then
+	die "ERROR: other drm clients running"
+fi
+
+
diff --git a/tests/drm_vma_limiter.c b/tests/drm_vma_limiter.c
new file mode 100644
index 00000000..1971e2dc
--- /dev/null
+++ b/tests/drm_vma_limiter.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+/* Testcase: check whether the libdrm vma limiter works
+ *
+ * We've had reports of the X server exhausting the default rlimit of 64k vma's
+ * in the kernel. libdrm has grown facilities to limit the vma caching since,
+ * this checks whether they actually work.
+ */
+
+/* we do both cpu and gtt maps, so only need half of 64k to exhaust */
+#define BO_ARRAY_SIZE 35000
+drm_intel_bo *bos[BO_ARRAY_SIZE];
+
+int main(int argc, char **argv)
+{
+	int fd;
+	int i;
+	char *ptr;
+
+	fd = drm_open_any();
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	drm_intel_bufmgr_gem_set_vma_cache_size(bufmgr, 500);
+
+	for (i = 0; i < BO_ARRAY_SIZE; i++) {
+		bos[i] = drm_intel_bo_alloc(bufmgr, "mmap bo", 4096, 4096);
+		assert(bos[i]);
+
+		drm_intel_bo_map(bos[i], 1);
+		ptr = bos[i]->virtual;
+		assert(ptr);
+		*ptr = 'c';
+		drm_intel_bo_unmap(bos[i]);
+
+		drm_intel_gem_bo_map_gtt(bos[i]);
+		ptr = bos[i]->virtual;
+		assert(ptr);
+		*ptr = 'c';
+		drm_intel_gem_bo_unmap_gtt(bos[i]);
+	}
+
+	/* and recheck whether a second map of the same still works */
+	for (i = 0; i < BO_ARRAY_SIZE; i++) {
+		bos[i] = drm_intel_bo_alloc(bufmgr, "mmap bo", 4096, 4096);
+		assert(bos[i]);
+
+		drm_intel_bo_map(bos[i], 1);
+		ptr = bos[i]->virtual;
+		assert(*ptr = 'c');
+		drm_intel_bo_unmap(bos[i]);
+
+		drm_intel_gem_bo_map_gtt(bos[i]);
+		ptr = bos[i]->virtual;
+		assert(*ptr = 'c');
+		drm_intel_gem_bo_unmap_gtt(bos[i]);
+	}
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/drm_vma_limiter_cached.c b/tests/drm_vma_limiter_cached.c
new file mode 100644
index 00000000..37976185
--- /dev/null
+++ b/tests/drm_vma_limiter_cached.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+/* Testcase: check whether the libdrm vma limiter works
+ *
+ * We've had reports of the X server exhausting the default rlimit of 64k vma's
+ * in the kernel. libdrm has grown facilities to limit the vma caching since,
+ * this checks whether they actually work.
+ *
+ * This one checks whether mmaps of unused cached bos are also properly reaped.
+ */
+
+/* we do both cpu and gtt maps, so only need half of 64k to exhaust */
+
+int main(int argc, char **argv)
+{
+	int fd;
+	int i;
+	char *ptr;
+	drm_intel_bo *load_bo;
+
+	fd = drm_open_any();
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	load_bo = drm_intel_bo_alloc(bufmgr, "target bo", 1024*4096, 4096);
+	assert(load_bo);
+
+	drm_intel_bufmgr_gem_set_vma_cache_size(bufmgr, 500);
+
+	/* IMPORTANT: we need to enable buffer reuse, otherwise we won't test
+	 * the libdrm bo cache! */
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+	/* put some load onto the gpu to keep the light buffers active for long
+	 * enough */
+	for (i = 0; i < 10000; i++) {
+		BEGIN_BATCH(8);
+		OUT_BATCH(XY_SRC_COPY_BLT_CMD |
+			  XY_SRC_COPY_BLT_WRITE_ALPHA |
+			  XY_SRC_COPY_BLT_WRITE_RGB);
+		OUT_BATCH((3 << 24) | /* 32 bits */
+			  (0xcc << 16) | /* copy ROP */
+			  4096);
+		OUT_BATCH(0); /* dst x1,y1 */
+		OUT_BATCH((1024 << 16) | 512);
+		OUT_RELOC(load_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+		OUT_BATCH((0 << 16) | 512); /* src x1, y1 */
+		OUT_BATCH(4096);
+		OUT_RELOC(load_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+		ADVANCE_BATCH();
+	}
+
+#define GROUP_SZ 100
+	for (i = 0; i < 68000; ) {
+		int j;
+		drm_intel_bo *bo[GROUP_SZ];
+
+		for (j = 0; j < GROUP_SZ; j++, i++) {
+			bo[j] = drm_intel_bo_alloc(bufmgr, "mmap bo", 4096, 4096);
+			assert(bo[j]);
+
+			drm_intel_gem_bo_map_gtt(bo[j]);
+			ptr = bo[j]->virtual;
+			assert(ptr);
+			*ptr = 'c';
+			drm_intel_gem_bo_unmap_gtt(bo[j]);
+
+			/* put it onto the active list ... */
+			BEGIN_BATCH(6);
+			OUT_BATCH(XY_COLOR_BLT_CMD |
+				  XY_COLOR_BLT_WRITE_ALPHA |
+				  XY_COLOR_BLT_WRITE_RGB);
+			OUT_BATCH((3 << 24) | /* 32 bits */
+				  128);
+			OUT_BATCH(0); /* dst x1,y1 */
+			OUT_BATCH((1 << 16) | 1);
+			OUT_RELOC(bo[j], I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+			OUT_BATCH(0xffffffff); /* color */
+			ADVANCE_BATCH();
+		}
+		intel_batchbuffer_flush(batch);
+
+		for (j = 0; j < GROUP_SZ; j++)
+			drm_intel_bo_unreference(bo[j]);
+	}
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/drm_vma_limiter_cpu.c b/tests/drm_vma_limiter_cpu.c
new file mode 100644
index 00000000..24ce188e
--- /dev/null
+++ b/tests/drm_vma_limiter_cpu.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+/* Testcase: check whether the libdrm vma limiter works
+ *
+ * We've had reports of the X server exhausting the default rlimit of 64k vma's
+ * in the kernel. libdrm has grown facilities to limit the vma caching since,
+ * this checks whether they actually work.
+ *
+ * This one checks cpu mmaps only.
+ */
+
+#define BO_ARRAY_SIZE 68000
+drm_intel_bo *bos[BO_ARRAY_SIZE];
+
+int main(int argc, char **argv)
+{
+	int fd;
+	int i;
+	char *ptr;
+
+	fd = drm_open_any();
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	drm_intel_bufmgr_gem_set_vma_cache_size(bufmgr, 500);
+
+	for (i = 0; i < BO_ARRAY_SIZE; i++) {
+		bos[i] = drm_intel_bo_alloc(bufmgr, "mmap bo", 4096, 4096);
+		assert(bos[i]);
+
+		drm_intel_bo_map(bos[i], 1);
+		ptr = bos[i]->virtual;
+		assert(ptr);
+		*ptr = 'c';
+		drm_intel_bo_unmap(bos[i]);
+	}
+
+	/* and recheck whether a second map of the same still works */
+	for (i = 0; i < BO_ARRAY_SIZE; i++) {
+		bos[i] = drm_intel_bo_alloc(bufmgr, "mmap bo", 4096, 4096);
+		assert(bos[i]);
+
+		drm_intel_bo_map(bos[i], 1);
+		ptr = bos[i]->virtual;
+		assert(*ptr = 'c');
+		drm_intel_bo_unmap(bos[i]);
+	}
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/drm_vma_limiter_gtt.c b/tests/drm_vma_limiter_gtt.c
new file mode 100644
index 00000000..540ea917
--- /dev/null
+++ b/tests/drm_vma_limiter_gtt.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+/* Testcase: check whether the libdrm vma limiter works
+ *
+ * We've had reports of the X server exhausting the default rlimit of 64k vma's
+ * in the kernel. libdrm has grown facilities to limit the vma caching since,
+ * this checks whether they actually work.
+ *
+ * This one checks cpu mmaps only.
+ */
+
+/* we do both cpu and gtt maps, so only need half of 64k to exhaust */
+#define BO_ARRAY_SIZE 68000
+drm_intel_bo *bos[BO_ARRAY_SIZE];
+
+int main(int argc, char **argv)
+{
+	int fd;
+	int i;
+	char *ptr;
+
+	fd = drm_open_any();
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	drm_intel_bufmgr_gem_set_vma_cache_size(bufmgr, 500);
+
+	for (i = 0; i < BO_ARRAY_SIZE; i++) {
+		bos[i] = drm_intel_bo_alloc(bufmgr, "mmap bo", 4096, 4096);
+		assert(bos[i]);
+
+		drm_intel_gem_bo_map_gtt(bos[i]);
+		ptr = bos[i]->virtual;
+		assert(ptr);
+		*ptr = 'c';
+		drm_intel_gem_bo_unmap_gtt(bos[i]);
+	}
+
+	/* and recheck whether a second map of the same still works */
+	for (i = 0; i < BO_ARRAY_SIZE; i++) {
+		bos[i] = drm_intel_bo_alloc(bufmgr, "mmap bo", 4096, 4096);
+		assert(bos[i]);
+
+		drm_intel_gem_bo_map_gtt(bos[i]);
+		ptr = bos[i]->virtual;
+		assert(*ptr = 'c');
+		drm_intel_gem_bo_unmap_gtt(bos[i]);
+	}
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/flip_test.c b/tests/flip_test.c
new file mode 100644
index 00000000..67105905
--- /dev/null
+++ b/tests/flip_test.c
@@ -0,0 +1,317 @@
+/*
+ * Copyright 2012 Intel Corporation
+ *   Jesse Barnes <jesse.barnes@intel.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "config.h"
+
+#include <assert.h>
+#include <cairo.h>
+#include <errno.h>
+#include <math.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/poll.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "testdisplay.h"
+
+drmModeRes *resources;
+int drm_fd;
+int test_time = 3;
+
+uint32_t *fb_ptr;
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+struct type_name {
+	int type;
+	const char *name;
+};
+
+struct test_output {
+	uint32_t id;
+	int mode_valid;
+	drmModeModeInfo mode;
+	drmModeEncoder *encoder;
+	drmModeConnector *connector;
+	int crtc;
+	int pipe;
+	unsigned int current_fb_id;
+	unsigned int fb_ids[2];
+};
+
+static void page_flip_handler(int fd, unsigned int frame, unsigned int sec,
+			      unsigned int usec, void *data)
+{
+	struct test_output *o = data;
+	unsigned int new_fb_id;
+
+	if (o->current_fb_id == o->fb_ids[0])
+		new_fb_id = o->fb_ids[1];
+	else
+		new_fb_id = o->fb_ids[0];
+
+	drmModePageFlip(drm_fd, o->crtc, new_fb_id,
+			DRM_MODE_PAGE_FLIP_EVENT, o);
+	o->current_fb_id = new_fb_id;
+}
+
+static void connector_find_preferred_mode(struct test_output *o, int crtc_id)
+{
+	drmModeConnector *connector;
+	drmModeEncoder *encoder = NULL;
+	int i, j;
+
+	/* First, find the connector & mode */
+	o->mode_valid = 0;
+	o->crtc = 0;
+	connector = drmModeGetConnector(drm_fd, o->id);
+	assert(connector);
+
+	if (connector->connection != DRM_MODE_CONNECTED) {
+		drmModeFreeConnector(connector);
+		return;
+	}
+
+	if (!connector->count_modes) {
+		fprintf(stderr, "connector %d has no modes\n", o->id);
+		drmModeFreeConnector(connector);
+		return;
+	}
+
+	if (connector->connector_id != o->id) {
+		fprintf(stderr, "connector id doesn't match (%d != %d)\n",
+			connector->connector_id, o->id);
+		drmModeFreeConnector(connector);
+		return;
+	}
+
+	for (j = 0; j < connector->count_modes; j++) {
+		o->mode = connector->modes[j];
+		if (o->mode.type & DRM_MODE_TYPE_PREFERRED) {
+			o->mode_valid = 1;
+			break;
+		}
+	}
+
+	if (!o->mode_valid) {
+		if (connector->count_modes > 0) {
+			/* use the first mode as test mode */
+			o->mode = connector->modes[0];
+			o->mode_valid = 1;
+		}
+		else {
+			fprintf(stderr, "failed to find any modes on connector %d\n",
+				o->id);
+			return;
+		}
+	}
+
+	/* Now get the encoder */
+	for (i = 0; i < connector->count_encoders; i++) {
+		encoder = drmModeGetEncoder(drm_fd, connector->encoders[i]);
+
+		if (!encoder) {
+			fprintf(stderr, "could not get encoder %i: %s\n",
+				resources->encoders[i], strerror(errno));
+			drmModeFreeEncoder(encoder);
+			continue;
+		}
+
+		break;
+	}
+
+	o->encoder = encoder;
+
+	if (i == resources->count_encoders) {
+		fprintf(stderr, "failed to find encoder\n");
+		o->mode_valid = 0;
+		return;
+	}
+
+	/* Find first CRTC not in use */
+	for (i = 0; i < resources->count_crtcs; i++) {
+		if (resources->crtcs[i] != crtc_id)
+			continue;
+		if (resources->crtcs[i] &&
+		    (o->encoder->possible_crtcs & (1<<i))) {
+			o->crtc = resources->crtcs[i];
+			break;
+		}
+	}
+
+	if (!o->crtc) {
+		fprintf(stderr, "could not find requested crtc %d\n", crtc_id);
+		o->mode_valid = 0;
+		return;
+	}
+
+	o->pipe = i;
+
+	o->connector = connector;
+}
+
+static void
+paint_flip_mode(cairo_t *cr, int width, int height, void *priv)
+{
+	bool odd_frame = (bool) priv;
+
+	if (odd_frame)
+		cairo_rectangle(cr, width/4, height/2, width/4, height/8);
+	else
+		cairo_rectangle(cr, width/2, height/2, width/4, height/8);
+
+	cairo_set_source_rgb(cr, 1, 1, 1);
+	cairo_fill(cr);
+}
+
+static void set_mode(struct test_output *o, int crtc)
+{
+	int ret;
+	int bpp = 32, depth = 24;
+	drmEventContext evctx;
+	int width, height;
+	struct timeval end;
+	struct kmstest_fb fb_info[2];
+
+	connector_find_preferred_mode(o, crtc);
+	if (!o->mode_valid)
+		return;
+
+	width = o->mode.hdisplay;
+	height = o->mode.vdisplay;
+
+	o->fb_ids[0] = kmstest_create_fb(drm_fd, width, height, bpp, depth,
+					 false, &fb_info[0],
+					 paint_flip_mode, (void *)false);
+	o->fb_ids[1] = kmstest_create_fb(drm_fd, width, height, bpp, depth,
+					 false, &fb_info[1],
+					 paint_flip_mode, (void *)true);
+	if (!o->fb_ids[0] || !o->fb_ids[1]) {
+		fprintf(stderr, "failed to create fbs\n");
+		exit(3);
+	}
+
+	gem_close(drm_fd, fb_info[0].gem_handle);
+	gem_close(drm_fd, fb_info[1].gem_handle);
+
+	kmstest_dump_mode(&o->mode);
+	if (drmModeSetCrtc(drm_fd, o->crtc, o->fb_ids[0], 0, 0,
+			   &o->id, 1, &o->mode)) {
+		fprintf(stderr, "failed to set mode (%dx%d@%dHz): %s\n",
+			width, height, o->mode.vrefresh,
+			strerror(errno));
+		exit(3);
+	}
+
+	ret = drmModePageFlip(drm_fd, o->crtc, o->fb_ids[1],
+			      DRM_MODE_PAGE_FLIP_EVENT, o);
+	if (ret) {
+		fprintf(stderr, "failed to page flip: %s\n", strerror(errno));
+		exit(4);
+	}
+	o->current_fb_id = o->fb_ids[1];
+
+	memset(&evctx, 0, sizeof evctx);
+	evctx.version = DRM_EVENT_CONTEXT_VERSION;
+	evctx.vblank_handler = NULL;
+	evctx.page_flip_handler = page_flip_handler;
+
+	gettimeofday(&end, NULL);
+	end.tv_sec += 3;
+
+	while (1) {
+		struct timeval now, timeout = { .tv_sec = 3, .tv_usec = 0 };
+		fd_set fds;
+
+		FD_ZERO(&fds);
+		FD_SET(0, &fds);
+		FD_SET(drm_fd, &fds);
+		ret = select(drm_fd + 1, &fds, NULL, NULL, &timeout);
+
+		if (ret <= 0) {
+			fprintf(stderr, "select timed out or error (ret %d)\n",
+				ret);
+			exit(1);
+		} else if (FD_ISSET(0, &fds)) {
+			fprintf(stderr, "no fds active, breaking\n");
+			exit(2);
+		}
+
+		gettimeofday(&now, NULL);
+		if (now.tv_sec > end.tv_sec ||
+		    (now.tv_sec == end.tv_sec && now.tv_usec >= end.tv_usec)) {
+			ret = 0;
+			break;
+		}
+
+		drmHandleEvent(drm_fd, &evctx);
+	}
+
+	fprintf(stdout, "page flipping on crtc %d, connector %d: PASSED\n",
+		crtc, o->id);
+
+	drmModeFreeEncoder(o->encoder);
+	drmModeFreeConnector(o->connector);
+}
+
+static int run_test(void)
+{
+	struct test_output *connectors;
+	int c, i;
+
+	resources = drmModeGetResources(drm_fd);
+	if (!resources) {
+		fprintf(stderr, "drmModeGetResources failed: %s\n",
+			strerror(errno));
+		exit(5);
+	}
+
+	connectors = calloc(resources->count_connectors,
+			    sizeof(struct test_output));
+	assert(connectors);
+
+	/* Find any connected displays */
+	for (c = 0; c < resources->count_connectors; c++) {
+		connectors[c].id = resources->connectors[c];
+		for (i = 0; i < resources->count_crtcs; i++)
+			set_mode(&connectors[c], resources->crtcs[i]);
+	}
+
+	drmModeFreeResources(resources);
+	return 1;
+}
+
+int main(int argc, char **argv)
+{
+	drm_fd = drm_open_any();
+
+	run_test();
+
+	close(drm_fd);
+
+	return 0;
+}
diff --git a/tests/gem_bad_address.c b/tests/gem_bad_address.c
new file mode 100644
index 00000000..fbb96497
--- /dev/null
+++ b/tests/gem_bad_address.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Jesse Barnes <jbarnes@virtuousgeek.org> (based on gem_bad_blit.c)
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+#define BAD_GTT_DEST ((512*1024*1024)) /* past end of aperture */
+
+static void
+bad_store(void)
+{
+	BEGIN_BATCH(4);
+	OUT_BATCH(MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL | 1 << 21);
+	OUT_BATCH(0);
+	OUT_BATCH(BAD_GTT_DEST);
+	OUT_BATCH(0xdeadbeef);
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush(batch);
+}
+
+int main(int argc, char **argv)
+{
+	int fd;
+
+	fd = drm_open_any();
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	bad_store();
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_bad_batch.c b/tests/gem_bad_batch.c
new file mode 100644
index 00000000..db6636ae
--- /dev/null
+++ b/tests/gem_bad_batch.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Jesse Barnes <jbarnes@virtuousgeek.org> (based on gem_bad_blit.c)
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+static void
+bad_batch(void)
+{
+	BEGIN_BATCH(2);
+	OUT_BATCH(MI_BATCH_BUFFER_START);
+	OUT_BATCH(0);
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush(batch);
+}
+
+int main(int argc, char **argv)
+{
+	int fd;
+
+	fd = drm_open_any();
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	bad_batch();
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_bad_blit.c b/tests/gem_bad_blit.c
new file mode 100644
index 00000000..22165270
--- /dev/null
+++ b/tests/gem_bad_blit.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file gem_tiled_blits.c
+ *
+ * This is a test of doing many tiled blits, with a working set
+ * larger than the aperture size.
+ *
+ * The goal is to catch a couple types of failure;
+ * - Fence management problems on pre-965.
+ * - A17 or L-shaped memory tiling workaround problems in acceleration.
+ *
+ * The model is to fill a collection of 1MB objects in a way that can't trip
+ * over A6 swizzling -- upload data to a non-tiled object, blit to the tiled
+ * object.  Then, copy the 1MB objects randomly between each other for a while.
+ * Finally, download their data through linear objects again and see what
+ * resulted.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+#define BAD_GTT_DEST ((256*1024*1024)) /* past end of aperture */
+
+static void
+bad_blit(drm_intel_bo *src_bo, uint32_t devid)
+{
+	uint32_t src_pitch = 512, dst_pitch = 512;
+	uint32_t cmd_bits = 0;
+
+	if (IS_965(devid)) {
+		src_pitch /= 4;
+		cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED;
+	}
+
+	if (IS_965(devid)) {
+		dst_pitch /= 4;
+		cmd_bits |= XY_SRC_COPY_BLT_DST_TILED;
+	}
+
+	BEGIN_BATCH(8);
+	OUT_BATCH(XY_SRC_COPY_BLT_CMD |
+		  XY_SRC_COPY_BLT_WRITE_ALPHA |
+		  XY_SRC_COPY_BLT_WRITE_RGB |
+		  cmd_bits);
+	OUT_BATCH((3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  dst_pitch);
+	OUT_BATCH(0); /* dst x1,y1 */
+	OUT_BATCH((64 << 16) | 64); /* 64x64 blit */
+	OUT_BATCH(BAD_GTT_DEST);
+	OUT_BATCH(0); /* src x1,y1 */
+	OUT_BATCH(src_pitch);
+	OUT_RELOC(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush(batch);
+}
+
+int main(int argc, char **argv)
+{
+	drm_intel_bo *src;
+	int fd;
+
+	fd = drm_open_any();
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	src = drm_intel_bo_alloc(bufmgr, "src", 128 * 128, 4096);
+
+	bad_blit(src, batch->devid);
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_bad_length.c b/tests/gem_bad_length.c
new file mode 100644
index 00000000..41f44d7f
--- /dev/null
+++ b/tests/gem_bad_length.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+
+#define MI_BATCH_BUFFER_END	(0xA<<23)
+
+/*
+ * Testcase: Minmal bo_create and batchbuffer exec
+ *
+ * Originally this caught an kernel oops due to the unchecked assumption that
+ * objects have size > 0.
+ */
+
+static uint32_t do_gem_create(int fd, int size, int *retval)
+{
+	struct drm_i915_gem_create create;
+	int ret;
+
+	create.handle = 0;
+	create.size = (size + 4095) & -4096;
+	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);
+	assert(retval || ret == 0);
+	if (retval)
+		*retval = errno;
+
+	return create.handle;
+}
+
+static int gem_exec(int fd, struct drm_i915_gem_execbuffer2 *execbuf)
+{
+	return drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
+}
+
+static void create0(int fd)
+{
+	int retval = 0;
+	printf("trying to create a zero-length gem object\n");
+	do_gem_create(fd, 0, &retval);
+	assert(retval == EINVAL);
+}
+
+static void exec0(int fd)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 exec[1];
+	uint32_t buf[2] = { MI_BATCH_BUFFER_END, 0 };
+
+	/* Just try executing with a zero-length bo.
+	 * We expect the kernel to either accept the nop batch, or reject it
+	 * for the zero-length buffer, but never crash.
+	 */
+
+	exec[0].handle = gem_create(fd, 4096);
+	gem_write(fd, exec[0].handle, 0, buf, sizeof(buf));
+	exec[0].relocation_count = 0;
+	exec[0].relocs_ptr = 0;
+	exec[0].alignment = 0;
+	exec[0].offset = 0;
+	exec[0].flags = 0;
+	exec[0].rsvd1 = 0;
+	exec[0].rsvd2 = 0;
+
+	execbuf.buffers_ptr = (uintptr_t)exec;
+	execbuf.buffer_count = 1;
+	execbuf.batch_start_offset = 0;
+	execbuf.batch_len = sizeof(buf);
+	execbuf.cliprects_ptr = 0;
+	execbuf.num_cliprects = 0;
+	execbuf.DR1 = 0;
+	execbuf.DR4 = 0;
+	execbuf.flags = 0;
+	i915_execbuffer2_set_context_id(execbuf, 0);
+	execbuf.rsvd2 = 0;
+
+	printf("trying to run an empty batchbuffer\n");
+	gem_exec(fd, &execbuf);
+
+	gem_close(fd, exec[0].handle);
+}
+
+int main(int argc, char **argv)
+{
+	int fd;
+
+	fd = drm_open_any();
+
+	create0(fd);
+
+	//exec0(fd);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_basic.c b/tests/gem_basic.c
new file mode 100644
index 00000000..24ad445f
--- /dev/null
+++ b/tests/gem_basic.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+
+static void
+test_bad_close(int fd)
+{
+	struct drm_gem_close close_bo;
+	int ret;
+
+	printf("Testing error return on bad close ioctl.\n");
+
+	close_bo.handle = 0x10101010;
+	ret = ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close_bo);
+
+	assert(ret == -1 && errno == EINVAL);
+}
+
+static void
+test_create_close(int fd)
+{
+	uint32_t handle;
+
+	printf("Testing creating and closing an object.\n");
+
+	handle = gem_create(fd, 16*1024);
+
+	gem_close(fd, handle);
+}
+
+static void
+test_create_fd_close(int fd)
+{
+	printf("Testing closing with an object allocated.\n");
+
+	gem_create(fd, 16*1024);
+	/* leak it */
+
+	close(fd);
+}
+
+int main(int argc, char **argv)
+{
+	int fd;
+
+	fd = drm_open_any();
+
+	test_bad_close(fd);
+	test_create_close(fd);
+	test_create_fd_close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_cpu_concurrent_blit.c b/tests/gem_cpu_concurrent_blit.c
new file mode 100644
index 00000000..fd517d00
--- /dev/null
+++ b/tests/gem_cpu_concurrent_blit.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright © 2009,2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+/** @file gem_cpu_concurrent_blit.c
+ *
+ * This is a test of CPU read/write behavior when writing to active
+ * buffers.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static void
+set_bo(drm_intel_bo *bo, uint32_t val, int width, int height)
+{
+	int size = width * height;
+	uint32_t *vaddr;
+
+	drm_intel_bo_map(bo, true);
+	vaddr = bo->virtual;
+	while (size--)
+		*vaddr++ = val;
+	drm_intel_bo_unmap(bo);
+}
+
+static void
+cmp_bo(drm_intel_bo *bo, uint32_t val, int width, int height)
+{
+	int size = width * height;
+	uint32_t *vaddr;
+
+	drm_intel_bo_map(bo, false);
+	vaddr = bo->virtual;
+	while (size--)
+		assert(*vaddr++ == val);
+	drm_intel_bo_unmap(bo);
+}
+
+static drm_intel_bo *
+create_bo(drm_intel_bufmgr *bufmgr, uint32_t val, int width, int height)
+{
+	drm_intel_bo *bo;
+
+	bo = drm_intel_bo_alloc(bufmgr, "bo", 4*width*height, 0);
+	assert(bo);
+
+	set_bo(bo, val, width, height);
+
+	return bo;
+}
+
+int
+main(int argc, char **argv)
+{
+	drm_intel_bufmgr *bufmgr;
+	struct intel_batchbuffer *batch;
+	int num_buffers = 128, max;
+	drm_intel_bo *src[128], *dst[128], *dummy;
+	int width = 512, height = 512;
+	int fd;
+	int i;
+
+	fd = drm_open_any();
+
+	max = gem_aperture_size (fd) / (1024 * 1024) / 2;
+	if (num_buffers > max)
+		num_buffers = max;
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	for (i = 0; i < num_buffers; i++) {
+		src[i] = create_bo(bufmgr, i, width, height);
+		dst[i] = create_bo(bufmgr, ~i, width, height);
+	}
+	dummy = create_bo(bufmgr, 0, width, height);
+
+	/* try to overwrite the source values */
+	for (i = 0; i < num_buffers; i++)
+		intel_copy_bo(batch, dst[i], src[i], width, height);
+	for (i = num_buffers; i--; )
+		set_bo(src[i], 0xdeadbeef, width, height);
+	for (i = 0; i < num_buffers; i++)
+		cmp_bo(dst[i], i, width, height);
+
+	/* try to read the results before the copy completes */
+	for (i = 0; i < num_buffers; i++)
+		intel_copy_bo(batch, dst[i], src[i], width, height);
+	for (i = num_buffers; i--; )
+		cmp_bo(dst[i], 0xdeadbeef, width, height);
+
+	/* and finally try to trick the kernel into loosing the pending write */
+	for (i = num_buffers; i--; )
+		set_bo(src[i], 0xabcdabcd, width, height);
+	for (i = 0; i < num_buffers; i++)
+		intel_copy_bo(batch, dst[i], src[i], width, height);
+	for (i = num_buffers; i--; )
+		intel_copy_bo(batch, dummy, dst[i], width, height);
+	for (i = num_buffers; i--; )
+		cmp_bo(dst[i], 0xabcdabcd, width, height);
+
+	return 0;
+}
diff --git a/tests/gem_cs_prefetch.c b/tests/gem_cs_prefetch.c
new file mode 100644
index 00000000..4fb2fc4e
--- /dev/null
+++ b/tests/gem_cs_prefetch.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+/*
+ * Testcase: Test the CS prefetch behaviour on batches
+ *
+ * Historically the batch prefetcher doesn't check whether it's crossing page
+ * boundaries and likes to throw up when it gets a pagefault in return for his
+ * over-eager behaviour. Check for this.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+static void exec(int fd, uint32_t handle)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 gem_exec[1];
+	int ret = 0;
+
+	gem_exec[0].handle = handle;
+	gem_exec[0].relocation_count = 0;
+	gem_exec[0].relocs_ptr = 0;
+	gem_exec[0].alignment = 0;
+	gem_exec[0].offset = 0;
+	gem_exec[0].flags = 0;
+	gem_exec[0].rsvd1 = 0;
+	gem_exec[0].rsvd2 = 0;
+
+	execbuf.buffers_ptr = (uintptr_t)gem_exec;
+	execbuf.buffer_count = 1;
+	execbuf.batch_start_offset = 0;
+	execbuf.batch_len = 4096;
+	execbuf.cliprects_ptr = 0;
+	execbuf.num_cliprects = 0;
+	execbuf.DR1 = 0;
+	execbuf.DR4 = 0;
+	execbuf.flags = 0;
+	execbuf.rsvd1 = 0;
+	execbuf.rsvd2 = 0;
+
+	ret = drmIoctl(fd,
+		       DRM_IOCTL_I915_GEM_EXECBUFFER2,
+		       &execbuf);
+	gem_sync(fd, handle);
+	assert(ret == 0);
+}
+
+int main(int argc, char **argv)
+{
+	uint32_t batch_end[4] = {MI_BATCH_BUFFER_END, 0, 0, 0};
+	int fd, i, ret;
+	uint64_t aper_size;
+	int count;
+	drm_intel_bo *sample_batch_bo;
+
+	fd = drm_open_any();
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	if (!bufmgr) {
+		fprintf(stderr, "failed to init libdrm\n");
+		exit(-1);
+	}
+
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	
+	aper_size = gem_aperture_size(fd);
+
+	/* presume a big per-bo overhead */
+	if (intel_get_total_ram_mb() < (aper_size / (1024*1024)) * 3 / 2) {
+		fprintf(stderr, "not enough mem to run test\n");
+		return 77;
+	}
+
+	count = aper_size / 4096;
+
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+	assert(batch);
+
+	sample_batch_bo = drm_intel_bo_alloc(bufmgr, "", 4096, 4096);
+	assert(sample_batch_bo);
+	ret = drm_intel_bo_subdata(sample_batch_bo, 4096-sizeof(batch_end),
+				   sizeof(batch_end), batch_end);
+	assert(ret == 0);
+
+	/* fill the entire gart with batches and run them */
+	for (i = 0; i < count; i++) {
+		drm_intel_bo *batch_bo;
+
+		batch_bo = drm_intel_bo_alloc(bufmgr, "", 4096, 4096);
+		assert(batch_bo);
+
+		/* copy the sample batch with the gpu to the new one, so that we
+		 * also test the unmappable part of the gtt. */
+		BEGIN_BATCH(8);
+		OUT_BATCH(XY_SRC_COPY_BLT_CMD |
+			  XY_SRC_COPY_BLT_WRITE_ALPHA |
+			  XY_SRC_COPY_BLT_WRITE_RGB);
+		OUT_BATCH((3 << 24) | /* 32 bits */
+			  (0xcc << 16) | /* copy ROP */
+			  4096);
+		OUT_BATCH(0); /* dst y1,x1 */
+		OUT_BATCH((1 << 16) | 1024);
+		OUT_RELOC(batch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+		OUT_BATCH((0 << 16) | 0); /* src x1, y1 */
+		OUT_BATCH(4096);
+		OUT_RELOC(sample_batch_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+		ADVANCE_BATCH();
+
+		intel_batchbuffer_flush(batch);
+		if (i % 100 == 0)
+			gem_sync(fd, batch_bo->handle);
+
+		drm_intel_bo_disable_reuse(batch_bo);
+
+		/* launch the newly created batch */
+		exec(fd, batch_bo->handle);
+
+		// leak buffers
+		//drm_intel_bo_unreference(batch_bo);
+		drmtest_progress("gem_cs_prefetch: ", i, count);
+	}
+
+	fprintf(stderr, "Test suceeded, cleanup up - this might take a while.\n");
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_ctx_bad_destroy.c b/tests/gem_ctx_bad_destroy.c
new file mode 100644
index 00000000..02d24eb3
--- /dev/null
+++ b/tests/gem_ctx_bad_destroy.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <ben@bwidawsk.net>
+ *
+ */
+
+/*
+ * Negative test cases for destroy contexts
+  */
+
+#include <stdio.h>
+#include <string.h>
+#include "i915_drm.h"
+#include "drmtest.h"
+
+struct local_drm_i915_context_create {
+	__u32 ctx_id;
+	__u32 pad;
+};
+
+struct local_drm_i915_context_destroy {
+	__u32 ctx_id;
+	__u32 pad;
+};
+
+#define CONTEXT_CREATE_IOCTL DRM_IOWR(DRM_COMMAND_BASE + 0x2d, struct local_drm_i915_context_create)
+#define CONTEXT_DESTROY_IOCTL DRM_IOWR(DRM_COMMAND_BASE + 0x2e, struct local_drm_i915_context_destroy)
+
+static uint32_t context_create(int fd)
+{
+	struct local_drm_i915_context_create create;
+	int ret;
+
+	ret = drmIoctl(fd, CONTEXT_CREATE_IOCTL, &create);
+	if (ret == -1 && (errno == ENODEV || errno == EINVAL))
+		exit(77);
+	else if (ret)
+		abort();
+
+	return create.ctx_id;
+}
+
+static void handle_bad(int ret, int lerrno, int expected, const char *desc)
+{
+	if (ret != 0 && lerrno != expected) {
+		fprintf(stderr, "%s - errno was %d, but should have been %d\n",
+				desc, lerrno, expected);
+		exit(EXIT_FAILURE);
+	} else if (ret == 0) {
+		fprintf(stderr, "%s - Command succeeded, but should have failed\n",
+			desc);
+		exit(EXIT_FAILURE);
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	struct local_drm_i915_context_destroy destroy;
+	uint32_t ctx_id;
+	int ret, fd;
+
+	fd = drm_open_any();
+
+	ctx_id = context_create(fd);
+
+	destroy.ctx_id = ctx_id;
+	/* Make sure a proper destroy works first */
+	ret = drmIoctl(fd, CONTEXT_DESTROY_IOCTL, &destroy);
+	assert(ret == 0);
+
+	/* try double destroy */
+	ret = drmIoctl(fd, CONTEXT_DESTROY_IOCTL, &destroy);
+	handle_bad(ret, errno, ENOENT, "double destroy");
+
+	/* destroy something random */
+	destroy.ctx_id = 2;
+	ret = drmIoctl(fd, CONTEXT_DESTROY_IOCTL, &destroy);
+	handle_bad(ret, errno, ENOENT, "random destroy");
+
+	/* Try to destroy the default context */
+	destroy.ctx_id = 0;
+	ret = drmIoctl(fd, CONTEXT_DESTROY_IOCTL, &destroy);
+	handle_bad(ret, errno, ENOENT, "default destroy");
+
+	close(fd);
+
+	exit(EXIT_SUCCESS);
+}
diff --git a/tests/gem_ctx_bad_exec.c b/tests/gem_ctx_bad_exec.c
new file mode 100644
index 00000000..8a57fd41
--- /dev/null
+++ b/tests/gem_ctx_bad_exec.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <ben@bwidawsk.net>
+ *
+ */
+
+/*
+ * Negative test cases:
+ *  test we can't submit contexts to unsupported rings
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+
+struct local_drm_i915_gem_context_create {
+	__u32 ctx_id;
+	__u32 pad;
+};
+
+#define CONTEXT_CREATE_IOCTL DRM_IOWR(DRM_COMMAND_BASE + 0x2d, struct local_drm_i915_gem_context_create)
+
+static uint32_t context_create(int fd)
+{
+	struct local_drm_i915_gem_context_create create;
+	int ret;
+
+	ret = drmIoctl(fd, CONTEXT_CREATE_IOCTL, &create);
+	if (ret == -1 && (errno == ENODEV || errno == EINVAL)) {
+		exit(77);
+	} else if (ret) {
+		abort();
+	}
+
+	return create.ctx_id;
+}
+
+/* Copied from gem_exec_nop.c */
+static int exec(int fd, uint32_t handle, int ring, int ctx_id)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 gem_exec;
+	int ret = 0;
+
+	gem_exec.handle = handle;
+	gem_exec.relocation_count = 0;
+	gem_exec.relocs_ptr = 0;
+	gem_exec.alignment = 0;
+	gem_exec.offset = 0;
+	gem_exec.flags = 0;
+	gem_exec.rsvd1 = 0;
+	gem_exec.rsvd2 = 0;
+
+	execbuf.buffers_ptr = (uintptr_t)&gem_exec;
+	execbuf.buffer_count = 1;
+	execbuf.batch_start_offset = 0;
+	execbuf.batch_len = 8;
+	execbuf.cliprects_ptr = 0;
+	execbuf.num_cliprects = 0;
+	execbuf.DR1 = 0;
+	execbuf.DR4 = 0;
+	execbuf.flags = ring;
+	i915_execbuffer2_set_context_id(execbuf, ctx_id);
+	execbuf.rsvd2 = 0;
+
+	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2,
+			&execbuf);
+	gem_sync(fd, handle);
+
+	return ret;
+}
+
+#define MI_BATCH_BUFFER_END	(0xA<<23)
+int main(int argc, char *argv[])
+{
+	uint32_t handle;
+	uint32_t batch[2] = {MI_BATCH_BUFFER_END};
+	uint32_t ctx_id;
+	int fd;
+	fd = drm_open_any();
+
+	ctx_id = context_create(fd);
+
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, batch, sizeof(batch));
+	assert(exec(fd, handle, I915_EXEC_RENDER, ctx_id) == 0);
+	assert(exec(fd, handle, I915_EXEC_BSD, ctx_id) != 0);
+	assert(exec(fd, handle, I915_EXEC_BLT, ctx_id) != 0);
+
+	exit(EXIT_SUCCESS);
+}
diff --git a/tests/gem_ctx_basic.c b/tests/gem_ctx_basic.c
new file mode 100644
index 00000000..632651ae
--- /dev/null
+++ b/tests/gem_ctx_basic.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <ben@bwidawsk.net>
+ *
+ */
+
+/*
+ * This test is useful for finding memory and refcount leaks.
+ */
+
+#include <pthread.h>
+#include "rendercopy.h"
+
+/* options */
+int num_contexts = 10;
+int uncontexted = 0; /* test only context create/destroy */
+int multiple_fds = 1;
+int iter = 10000;
+
+/* globals */
+pthread_t *threads;
+int *returns;
+int devid;
+int fd;
+
+static void init_buffer(drm_intel_bufmgr *bufmgr,
+			struct scratch_buf *buf,
+			uint32_t size)
+{
+	buf->bo = drm_intel_bo_alloc(bufmgr, "", size, 4096);
+	buf->size = size;
+	assert(buf->bo);
+	buf->tiling = I915_TILING_NONE;
+	buf->stride = 4096;
+}
+
+static void *work(void *arg)
+{
+	struct intel_batchbuffer *batch;
+	drm_intel_context *context;
+	drm_intel_bufmgr *bufmgr;
+	int thread_id = *(int *)arg;
+	int td_fd;
+	int i;
+
+	if (multiple_fds)
+		td_fd = fd = drm_open_any();
+	else
+		td_fd = fd;
+
+	assert(td_fd >= 0);
+
+	bufmgr = drm_intel_bufmgr_gem_init(td_fd, 4096);
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+	context = drm_intel_gem_context_create(bufmgr);
+
+	if (!context) {
+		returns[thread_id] = 77;
+		goto out;
+	}
+
+	for (i = 0; i < iter; i++) {
+		struct scratch_buf src, dst;
+
+		init_buffer(bufmgr, &src, 4096);
+		init_buffer(bufmgr, &dst, 4096);
+
+
+		if (uncontexted) {
+			gen6_render_copyfunc(batch, &src, 0, 0, 0, 0, &dst, 0, 0);
+		} else {
+			int ret;
+			ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
+			assert(ret == 0);
+			intel_batchbuffer_flush_with_context(batch, context);
+		}
+	}
+
+out:
+	drm_intel_gem_context_destroy(context);
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	if (multiple_fds)
+		close(td_fd);
+
+	pthread_exit(&returns[thread_id]);
+}
+
+static void parse(int argc, char *argv[])
+{
+	int opt;
+	while ((opt = getopt(argc, argv, "i:c:n:muh?")) != -1) {
+		switch (opt) {
+		case 'i':
+			iter = atoi(optarg);
+			break;
+		case 'c':
+			num_contexts = atoi(optarg);
+			break;
+		case 'm':
+			multiple_fds = 1;
+			break;
+		case 'u':
+			uncontexted = 1;
+			break;
+		case 'h':
+		case '?':
+		default:
+			exit(EXIT_SUCCESS);
+			break;
+		}
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	int i;
+
+	fd = drm_open_any();
+	devid = intel_get_drm_devid(fd);
+
+	parse(argc, argv);
+
+	threads = calloc(num_contexts, sizeof(*threads));
+	returns = calloc(num_contexts, sizeof(*returns));
+
+	for (i = 0; i < num_contexts; i++)
+		pthread_create(&threads[i], NULL, work, &i);
+
+	for (i = 0; i < num_contexts; i++) {
+		int thread_status, ret;
+		void *retval;
+		ret = pthread_join(threads[i], &retval);
+		thread_status = *(int *)retval;
+		if (!ret && thread_status)
+			exit(thread_status);
+	}
+
+	free(returns);
+	free(threads);
+	close(fd);
+
+	exit(EXIT_SUCCESS);
+}
diff --git a/tests/gem_ctx_create.c b/tests/gem_ctx_create.c
new file mode 100644
index 00000000..def76d34
--- /dev/null
+++ b/tests/gem_ctx_create.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <ben@bwidawsk.net>
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include "i915_drm.h"
+#include "drmtest.h"
+
+struct local_drm_i915_gem_context_create {
+	__u32 ctx_id;
+	__u32 pad;
+};
+
+#define CONTEXT_CREATE_IOCTL DRM_IOWR(DRM_COMMAND_BASE + 0x2d, struct local_drm_i915_gem_context_create)
+
+int main(int argc, char *argv[])
+{
+	int ret, fd;
+	struct local_drm_i915_gem_context_create create;
+
+	create.ctx_id = rand();
+	create.pad = rand();
+
+	fd = drm_open_any();
+
+	ret = drmIoctl(fd, CONTEXT_CREATE_IOCTL, &create);
+	if (ret != 0 && (errno == ENODEV || errno == EINVAL)) {
+		fprintf(stderr, "Kernel is too old, or contexts not supported: %s\n",
+			strerror(errno));
+		exit(77);
+	} else if (ret != 0) {
+		fprintf(stderr, "%s\n", strerror(errno));
+		exit(EXIT_FAILURE);
+	}
+	assert(create.ctx_id != 0);
+
+	close(fd);
+
+	exit(EXIT_SUCCESS);
+}
diff --git a/tests/gem_ctx_exec.c b/tests/gem_ctx_exec.c
new file mode 100644
index 00000000..423f1eec
--- /dev/null
+++ b/tests/gem_ctx_exec.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <ben@bwidawsk.net>
+ *
+ */
+
+/*
+ * This test covers basic context switch functionality
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+
+struct local_drm_i915_gem_context_create {
+	__u32 ctx_id;
+	__u32 pad;
+};
+
+struct local_drm_i915_gem_context_destroy {
+	__u32 ctx_id;
+	__u32 pad;
+};
+
+#define CONTEXT_CREATE_IOCTL DRM_IOWR(DRM_COMMAND_BASE + 0x2d, struct local_drm_i915_gem_context_create)
+#define CONTEXT_DESTROY_IOCTL DRM_IOWR(DRM_COMMAND_BASE + 0x23, struct local_drm_i915_gem_context_destroy)
+
+static uint32_t context_create(int fd)
+{
+	struct local_drm_i915_gem_context_create create;
+	int ret;
+
+	ret = drmIoctl(fd, CONTEXT_CREATE_IOCTL, &create);
+	if (ret == -1 && (errno == ENODEV || errno == EINVAL))
+		exit(77);
+	else if (ret)
+		abort();
+
+	return create.ctx_id;
+}
+
+static void context_destroy(int fd, uint32_t ctx_id)
+{
+	struct local_drm_i915_gem_context_destroy destroy;
+	destroy.ctx_id = ctx_id;
+	do_ioctl(fd, CONTEXT_DESTROY_IOCTL, &destroy);
+}
+
+/* Copied from gem_exec_nop.c */
+static int exec(int fd, uint32_t handle, int ring, int ctx_id)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 gem_exec;
+	int ret = 0;
+
+	gem_exec.handle = handle;
+	gem_exec.relocation_count = 0;
+	gem_exec.relocs_ptr = 0;
+	gem_exec.alignment = 0;
+	gem_exec.offset = 0;
+	gem_exec.flags = 0;
+	gem_exec.rsvd1 = 0;
+	gem_exec.rsvd2 = 0;
+
+	execbuf.buffers_ptr = (uintptr_t)&gem_exec;
+	execbuf.buffer_count = 1;
+	execbuf.batch_start_offset = 0;
+	execbuf.batch_len = 8;
+	execbuf.cliprects_ptr = 0;
+	execbuf.num_cliprects = 0;
+	execbuf.DR1 = 0;
+	execbuf.DR4 = 0;
+	execbuf.flags = ring;
+	i915_execbuffer2_set_context_id(execbuf, ctx_id);
+	execbuf.rsvd2 = 0;
+
+	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2,
+			&execbuf);
+	gem_sync(fd, handle);
+
+	return ret;
+}
+
+#define MI_BATCH_BUFFER_END	(0xA<<23)
+int main(int argc, char *argv[])
+{
+	uint32_t handle;
+	uint32_t batch[2] = {0, MI_BATCH_BUFFER_END};
+	uint32_t ctx_id;
+	int fd;
+	fd = drm_open_any();
+
+	ctx_id = context_create(fd);
+	handle = gem_create(fd, 4096);
+
+	gem_write(fd, handle, 0, batch, sizeof(batch));
+	assert(exec(fd, handle, I915_EXEC_RENDER, ctx_id) == 0);
+	context_destroy(fd, ctx_id);
+
+	ctx_id = context_create(fd);
+	assert(exec(fd, handle, I915_EXEC_RENDER, ctx_id) == 0);
+	context_destroy(fd, ctx_id);
+
+	exit(EXIT_SUCCESS);
+}
diff --git a/tests/gem_double_irq_loop.c b/tests/gem_double_irq_loop.c
new file mode 100644
index 00000000..f2f8b1a8
--- /dev/null
+++ b/tests/gem_double_irq_loop.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch> (based on gem_storedw_*.c)
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+#include "i830_reg.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+static drm_intel_bo *target_buffer, *blt_bo;
+
+/*
+ * Testcase: Basic check for missed irqs on blt
+ *
+ * Execs one large and then immediately a tiny batch on the blt ring. Then waits
+ * on the second batch. This hopefully catches races in our irq acknowledgement.
+ */
+
+
+#define MI_COND_BATCH_BUFFER_END	(0x36<<23 | 1)
+#define MI_DO_COMPARE			(1<<21)
+static void
+dummy_reloc_loop(void)
+{
+	int i;
+
+	for (i = 0; i < 0x800; i++) {
+		BEGIN_BATCH(8);
+		OUT_BATCH(XY_SRC_COPY_BLT_CMD |
+			  XY_SRC_COPY_BLT_WRITE_ALPHA |
+			  XY_SRC_COPY_BLT_WRITE_RGB);
+		OUT_BATCH((3 << 24) | /* 32 bits */
+			  (0xcc << 16) | /* copy ROP */
+			  4*4096);
+		OUT_BATCH(2048 << 16 | 0);
+		OUT_BATCH((4096) << 16 | (2048));
+		OUT_RELOC_FENCED(blt_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+		OUT_BATCH(0 << 16 | 0);
+		OUT_BATCH(4*4096);
+		OUT_RELOC_FENCED(blt_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+		ADVANCE_BATCH();
+		intel_batchbuffer_flush(batch);
+
+		BEGIN_BATCH(4);
+		OUT_BATCH(MI_FLUSH_DW | 1);
+		OUT_BATCH(0); /* reserved */
+		OUT_RELOC(target_buffer, I915_GEM_DOMAIN_RENDER,
+				I915_GEM_DOMAIN_RENDER, 0);
+		OUT_BATCH(MI_NOOP | (1<<22) | (0xf));
+		ADVANCE_BATCH();
+		intel_batchbuffer_flush(batch);
+
+		drm_intel_bo_map(target_buffer, 0);
+		// map to force completion
+		drm_intel_bo_unmap(target_buffer);
+	}
+}
+
+int main(int argc, char **argv)
+{
+	int fd;
+	int devid;
+
+	if (argc != 1) {
+		fprintf(stderr, "usage: %s\n", argv[0]);
+		exit(-1);
+	}
+
+	fd = drm_open_any();
+	devid = intel_get_drm_devid(fd);
+	if (!HAS_BLT_RING(devid)) {
+		fprintf(stderr, "not (yet) implemented for pre-snb\n");
+		return 77;
+	}
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	if (!bufmgr) {
+		fprintf(stderr, "failed to init libdrm\n");
+		exit(-1);
+	}
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+	if (!batch) {
+		fprintf(stderr, "failed to create batch buffer\n");
+		exit(-1);
+	}
+
+	target_buffer = drm_intel_bo_alloc(bufmgr, "target bo", 4096, 4096);
+	if (!target_buffer) {
+		fprintf(stderr, "failed to alloc target buffer\n");
+		exit(-1);
+	}
+
+	blt_bo = drm_intel_bo_alloc(bufmgr, "target bo", 4*4096*4096, 4096);
+	if (!blt_bo) {
+		fprintf(stderr, "failed to alloc blt buffer\n");
+		exit(-1);
+	}
+
+	dummy_reloc_loop();
+
+	drm_intel_bo_unreference(target_buffer);
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_dummy_reloc_loop.c b/tests/gem_dummy_reloc_loop.c
new file mode 100644
index 00000000..82d8f77e
--- /dev/null
+++ b/tests/gem_dummy_reloc_loop.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch> (based on gem_storedw_*.c)
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+#include "i830_reg.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+static drm_intel_bo *target_buffer;
+
+/*
+ * Testcase: Basic check of ring<->cpu sync using a dummy reloc
+ *
+ * The last test (that randomly switches the ring) seems to be pretty effective
+ * at hitting the missed irq bug that's worked around with the HWSTAM irq write.
+ */
+
+
+#define MI_COND_BATCH_BUFFER_END	(0x36<<23 | 1)
+#define MI_DO_COMPARE			(1<<21)
+static void
+dummy_reloc_loop(int ring)
+{
+	int i;
+
+	for (i = 0; i < 0x100000; i++) {
+		if (ring == I915_EXEC_RENDER) {
+			BEGIN_BATCH(4);
+			OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE);
+			OUT_BATCH(0xffffffff); /* compare dword */
+			OUT_RELOC(target_buffer, I915_GEM_DOMAIN_RENDER,
+					I915_GEM_DOMAIN_RENDER, 0);
+			OUT_BATCH(MI_NOOP);
+			ADVANCE_BATCH();
+		} else {
+			BEGIN_BATCH(4);
+			OUT_BATCH(MI_FLUSH_DW | 1);
+			OUT_BATCH(0); /* reserved */
+			OUT_RELOC(target_buffer, I915_GEM_DOMAIN_RENDER,
+					I915_GEM_DOMAIN_RENDER, 0);
+			OUT_BATCH(MI_NOOP | (1<<22) | (0xf));
+			ADVANCE_BATCH();
+		}
+		intel_batchbuffer_flush_on_ring(batch, ring);
+
+		drm_intel_bo_map(target_buffer, 0);
+		// map to force completion
+		drm_intel_bo_unmap(target_buffer);
+	}
+}
+
+static void
+dummy_reloc_loop_random_ring(void)
+{
+	int i;
+
+	srandom(0xdeadbeef);
+
+	for (i = 0; i < 0x100000; i++) {
+		int ring = random() % 3 + 1;
+
+		if (ring == I915_EXEC_RENDER) {
+			BEGIN_BATCH(4);
+			OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE);
+			OUT_BATCH(0xffffffff); /* compare dword */
+			OUT_RELOC(target_buffer, I915_GEM_DOMAIN_RENDER,
+					I915_GEM_DOMAIN_RENDER, 0);
+			OUT_BATCH(MI_NOOP);
+			ADVANCE_BATCH();
+		} else {
+			BEGIN_BATCH(4);
+			OUT_BATCH(MI_FLUSH_DW | 1);
+			OUT_BATCH(0); /* reserved */
+			OUT_RELOC(target_buffer, I915_GEM_DOMAIN_RENDER,
+					I915_GEM_DOMAIN_RENDER, 0);
+			OUT_BATCH(MI_NOOP | (1<<22) | (0xf));
+			ADVANCE_BATCH();
+		}
+		intel_batchbuffer_flush_on_ring(batch, ring);
+
+		drm_intel_bo_map(target_buffer, 0);
+		// map to force waiting on rendering
+		drm_intel_bo_unmap(target_buffer);
+	}
+}
+
+int main(int argc, char **argv)
+{
+	int fd;
+	int devid;
+
+	if (argc != 1) {
+		fprintf(stderr, "usage: %s\n", argv[0]);
+		exit(-1);
+	}
+
+	fd = drm_open_any();
+	devid = intel_get_drm_devid(fd);
+	if (!HAS_BLT_RING(devid)) {
+		fprintf(stderr, "not (yet) implemented for pre-snb\n");
+		return 77;
+	}
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	if (!bufmgr) {
+		fprintf(stderr, "failed to init libdrm\n");
+		exit(-1);
+	}
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+	if (!batch) {
+		fprintf(stderr, "failed to create batch buffer\n");
+		exit(-1);
+	}
+
+	target_buffer = drm_intel_bo_alloc(bufmgr, "target bo", 4096, 4096);
+	if (!target_buffer) {
+		fprintf(stderr, "failed to alloc target buffer\n");
+		exit(-1);
+	}
+
+	fprintf(stderr, "running dummy loop on render\n");
+	dummy_reloc_loop(I915_EXEC_RENDER);
+	fprintf(stderr, "dummy loop run on render completed\n");
+
+	if (!HAS_BSD_RING(devid))
+		goto skip;
+
+	sleep(2);
+	fprintf(stderr, "running dummy loop on bsd\n");
+	dummy_reloc_loop(I915_EXEC_BSD);
+	fprintf(stderr, "dummy loop run on bsd completed\n");
+
+	if (!HAS_BLT_RING(devid))
+		goto skip;
+
+	sleep(2);
+	fprintf(stderr, "running dummy loop on blt\n");
+	dummy_reloc_loop(I915_EXEC_BLT);
+	fprintf(stderr, "dummy loop run on blt completed\n");
+
+	sleep(2);
+	fprintf(stderr, "running dummy loop on random rings\n");
+	dummy_reloc_loop_random_ring();
+	fprintf(stderr, "dummy loop run on random rings completed\n");
+
+skip:
+	drm_intel_bo_unreference(target_buffer);
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_exec_bad_domains.c b/tests/gem_exec_bad_domains.c
new file mode 100644
index 00000000..f3ee08b7
--- /dev/null
+++ b/tests/gem_exec_bad_domains.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+/* Testcase: Test whether the kernel rejects relocations with non-gpu domains
+ *
+ * If it does not, it'll oops somewhen later on because we don't expect that.
+ */
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+#define BAD_GTT_DEST ((512*1024*1024)) /* past end of aperture */
+
+static int
+run_batch(void)
+{
+	unsigned int used = batch->ptr - batch->buffer;
+	int ret;
+
+	if (used == 0)
+		return 0;
+
+	/* Round batchbuffer usage to 2 DWORDs. */
+	if ((used & 4) == 0) {
+		*(uint32_t *) (batch->ptr) = 0; /* noop */
+		batch->ptr += 4;
+	}
+
+	/* Mark the end of the buffer. */
+	*(uint32_t *)(batch->ptr) = MI_BATCH_BUFFER_END; /* noop */
+	batch->ptr += 4;
+	used = batch->ptr - batch->buffer;
+
+	ret = drm_intel_bo_subdata(batch->bo, 0, used, batch->buffer);
+	assert(ret == 0);
+
+	batch->ptr = NULL;
+
+	ret = drm_intel_bo_mrb_exec(batch->bo, used, NULL, 0, 0, 0);
+
+	intel_batchbuffer_reset(batch);
+
+	return ret;
+}
+
+int main(int argc, char **argv)
+{
+	int fd, ret;
+	drm_intel_bo *tmp;
+
+	fd = drm_open_any();
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	tmp = drm_intel_bo_alloc(bufmgr, "tmp", 128 * 128, 4096);
+
+	BEGIN_BATCH(2);
+	OUT_BATCH(0);
+	OUT_RELOC(tmp, I915_GEM_DOMAIN_CPU, 0, 0);
+	ADVANCE_BATCH();
+	ret = run_batch();
+	if (ret != -EINVAL) {
+		fprintf(stderr, "(cpu, 0) reloc not rejected\n");
+		exit(1);
+	}
+
+	BEGIN_BATCH(2);
+	OUT_BATCH(0);
+	OUT_RELOC(tmp, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU, 0);
+	ADVANCE_BATCH();
+	ret = run_batch();
+	if (ret != -EINVAL) {
+		fprintf(stderr, "(cpu, cpu) reloc not rejected\n");
+		exit(1);
+	}
+
+	BEGIN_BATCH(2);
+	OUT_BATCH(0);
+	OUT_RELOC(tmp, I915_GEM_DOMAIN_GTT, 0, 0);
+	ADVANCE_BATCH();
+	ret = run_batch();
+	if (ret != -EINVAL) {
+		fprintf(stderr, "(gtt, 0) reloc not rejected\n");
+		exit(1);
+	}
+
+	BEGIN_BATCH(2);
+	OUT_BATCH(0);
+	OUT_RELOC(tmp, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT, 0);
+	ADVANCE_BATCH();
+	ret = run_batch();
+	if (ret != -EINVAL) {
+		fprintf(stderr, "(gtt, gtt) reloc not rejected\n");
+		exit(1);
+	}
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_exec_blt.c b/tests/gem_exec_blt.c
new file mode 100644
index 00000000..eb5ae668
--- /dev/null
+++ b/tests/gem_exec_blt.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_chipset.h"
+#include "intel_gpu_tools.h"
+
+#define OBJECT_SIZE 16384
+
+#define COPY_BLT_CMD		(2<<29|0x53<<22|0x6)
+#define BLT_WRITE_ALPHA		(1<<21)
+#define BLT_WRITE_RGB		(1<<20)
+#define BLT_SRC_TILED		(1<<15)
+#define BLT_DST_TILED		(1<<11)
+
+static int gem_linear_blt(uint32_t *batch,
+			  uint32_t src,
+			  uint32_t dst,
+			  uint32_t length,
+			  struct drm_i915_gem_relocation_entry *reloc)
+{
+	uint32_t *b = batch;
+	int height = length / (16 * 1024);
+
+	assert(height <= 1<<16);
+
+	if (height) {
+		b[0] = COPY_BLT_CMD | BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+		b[1] = 0xcc << 16 | 1 << 25 | 1 << 24 | (16*1024);
+		b[2] = 0;
+		b[3] = height << 16 | (4*1024);
+		b[4] = 0;
+		reloc->offset = (b-batch+4) * sizeof(uint32_t);
+		reloc->delta = 0;
+		reloc->target_handle = dst;
+		reloc->read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc->write_domain = I915_GEM_DOMAIN_RENDER;
+		reloc->presumed_offset = 0;
+		reloc++;
+
+		b[5] = 0;
+		b[6] = 16*1024;
+		b[7] = 0;
+		reloc->offset = (b-batch+7) * sizeof(uint32_t);
+		reloc->delta = 0;
+		reloc->target_handle = src;
+		reloc->read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc->write_domain = 0;
+		reloc->presumed_offset = 0;
+		reloc++;
+
+		b += 8;
+		length -= height * 16*1024;
+	}
+	
+	if (length) {
+		b[0] = COPY_BLT_CMD | BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+		b[1] = 0xcc << 16 | 1 << 25 | 1 << 24 | (16*1024);
+		b[2] = height << 16;
+		b[3] = (1+height) << 16 | (length / 4);
+		b[4] = 0;
+		reloc->offset = (b-batch+4) * sizeof(uint32_t);
+		reloc->delta = 0;
+		reloc->target_handle = dst;
+		reloc->read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc->write_domain = I915_GEM_DOMAIN_RENDER;
+		reloc->presumed_offset = 0;
+		reloc++;
+
+		b[5] = height << 16;
+		b[6] = 16*1024;
+		b[7] = 0;
+		reloc->offset = (b-batch+7) * sizeof(uint32_t);
+		reloc->delta = 0;
+		reloc->target_handle = src;
+		reloc->read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc->write_domain = 0;
+		reloc->presumed_offset = 0;
+		reloc++;
+
+		b += 8;
+	}
+
+	b[0] = MI_BATCH_BUFFER_END;
+	b[1] = 0;
+
+	return (b+2 - batch) * sizeof(uint32_t);
+}
+
+static int gem_exec(int fd, struct drm_i915_gem_execbuffer2 *execbuf, int loops)
+{
+	int ret = 0;
+
+	while (loops-- && ret == 0) {
+		ret = drmIoctl(fd,
+			       DRM_IOCTL_I915_GEM_EXECBUFFER2,
+			       execbuf);
+	}
+
+	return ret;
+}
+
+static double elapsed(const struct timeval *start,
+		      const struct timeval *end,
+		      int loop)
+{
+	return (1e6*(end->tv_sec - start->tv_sec) + (end->tv_usec - start->tv_usec))/loop;
+}
+
+static const char *bytes_per_sec(char *buf, double v)
+{
+	const char *order[] = {
+		"",
+		"KiB",
+		"MiB",
+		"GiB",
+		"TiB",
+		"PiB",
+		NULL,
+	}, **o = order;
+
+	while (v > 1024 && o[1]) {
+		v /= 1024;
+		o++;
+	}
+	sprintf(buf, "%.1f%s/s", v, *o);
+	return buf;
+}
+
+static void run(int object_size)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 exec[3];
+	struct drm_i915_gem_relocation_entry reloc[4];
+	uint32_t buf[20];
+	uint32_t handle, src, dst;
+	int fd, len, count;
+	int ring;
+
+	fd = drm_open_any();
+	handle = gem_create(fd, 4096);
+	src = gem_create(fd, object_size);
+	dst = gem_create(fd, object_size);
+
+	len = gem_linear_blt(buf, src, dst, object_size, reloc);
+	gem_write(fd, handle, 0, buf, len);
+
+	exec[0].handle = src;
+	exec[0].relocation_count = 0;
+	exec[0].relocs_ptr = 0;
+	exec[0].alignment = 0;
+	exec[0].offset = 0;
+	exec[0].flags = 0;
+	exec[0].rsvd1 = 0;
+	exec[0].rsvd2 = 0;
+
+	exec[1].handle = dst;
+	exec[1].relocation_count = 0;
+	exec[1].relocs_ptr = 0;
+	exec[1].alignment = 0;
+	exec[1].offset = 0;
+	exec[1].flags = 0;
+	exec[1].rsvd1 = 0;
+	exec[1].rsvd2 = 0;
+
+	exec[2].handle = handle;
+	exec[2].relocation_count = len > 40 ? 4 : 2;
+	exec[2].relocs_ptr = (uintptr_t)reloc;
+	exec[2].alignment = 0;
+	exec[2].offset = 0;
+	exec[2].flags = 0;
+	exec[2].rsvd1 = 0;
+	exec[2].rsvd2 = 0;
+
+	ring = 0;
+	if (HAS_BLT_RING(intel_get_drm_devid(fd)))
+		ring = I915_EXEC_BLT;
+
+	execbuf.buffers_ptr = (uintptr_t)exec;
+	execbuf.buffer_count = 3;
+	execbuf.batch_start_offset = 0;
+	execbuf.batch_len = len;
+	execbuf.cliprects_ptr = 0;
+	execbuf.num_cliprects = 0;
+	execbuf.DR1 = 0;
+	execbuf.DR4 = 0;
+	execbuf.flags = ring;
+	i915_execbuffer2_set_context_id(execbuf, 0);
+	execbuf.rsvd2 = 0;
+
+	for (count = 1; count <= 1<<17; count <<= 1) {
+		struct timeval start, end;
+
+		gettimeofday(&start, NULL);
+		if (gem_exec(fd, &execbuf, count))
+			exit(1);
+		gem_sync(fd, handle);
+		gettimeofday(&end, NULL);
+		printf("Time to blt %d bytes x %6d:	%7.3fµs, %s\n",
+		       object_size, count,
+		       elapsed(&start, &end, count),
+		       bytes_per_sec((char *)buf, object_size/elapsed(&start, &end, count)*1e6));
+		fflush(stdout);
+	}
+	gem_close(fd, handle);
+
+	close(fd);
+}
+
+int main(int argc, char **argv)
+{
+	int i;
+
+	if (argc > 1) {
+		for (i = 1; i < argc; i++) {
+			int object_size = atoi(argv[i]);
+			if (object_size)
+				run((object_size + 3) & -4);
+		}
+	} else
+		run(OBJECT_SIZE);
+
+	return 0;
+}
diff --git a/tests/gem_exec_faulting_reloc.c b/tests/gem_exec_faulting_reloc.c
new file mode 100644
index 00000000..863a1b03
--- /dev/null
+++ b/tests/gem_exec_faulting_reloc.c
@@ -0,0 +1,226 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_chipset.h"
+#include "intel_gpu_tools.h"
+
+/* Testcase: Submit patches with relocations in memory that will fault
+ *
+ * To be really evil, use a gtt mmap for them.
+ */
+
+#define OBJECT_SIZE 16384
+
+#define COPY_BLT_CMD		(2<<29|0x53<<22|0x6)
+#define BLT_WRITE_ALPHA		(1<<21)
+#define BLT_WRITE_RGB		(1<<20)
+#define BLT_SRC_TILED		(1<<15)
+#define BLT_DST_TILED		(1<<11)
+
+static int gem_linear_blt(uint32_t *batch,
+			  uint32_t src,
+			  uint32_t dst,
+			  uint32_t length,
+			  struct drm_i915_gem_relocation_entry *reloc)
+{
+	uint32_t *b = batch;
+	int height = length / (16 * 1024);
+
+	assert(height <= 1<<16);
+
+	if (height) {
+		b[0] = COPY_BLT_CMD | BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+		b[1] = 0xcc << 16 | 1 << 25 | 1 << 24 | (16*1024);
+		b[2] = 0;
+		b[3] = height << 16 | (4*1024);
+		b[4] = 0;
+		reloc->offset = (b-batch+4) * sizeof(uint32_t);
+		reloc->delta = 0;
+		reloc->target_handle = dst;
+		reloc->read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc->write_domain = I915_GEM_DOMAIN_RENDER;
+		reloc->presumed_offset = 0;
+		reloc++;
+
+		b[5] = 0;
+		b[6] = 16*1024;
+		b[7] = 0;
+		reloc->offset = (b-batch+7) * sizeof(uint32_t);
+		reloc->delta = 0;
+		reloc->target_handle = src;
+		reloc->read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc->write_domain = 0;
+		reloc->presumed_offset = 0;
+		reloc++;
+
+		b += 8;
+		length -= height * 16*1024;
+	}
+	
+	if (length) {
+		b[0] = COPY_BLT_CMD | BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+		b[1] = 0xcc << 16 | 1 << 25 | 1 << 24 | (16*1024);
+		b[2] = height << 16;
+		b[3] = (1+height) << 16 | (length / 4);
+		b[4] = 0;
+		reloc->offset = (b-batch+4) * sizeof(uint32_t);
+		reloc->delta = 0;
+		reloc->target_handle = dst;
+		reloc->read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc->write_domain = I915_GEM_DOMAIN_RENDER;
+		reloc->presumed_offset = 0;
+		reloc++;
+
+		b[5] = height << 16;
+		b[6] = 16*1024;
+		b[7] = 0;
+		reloc->offset = (b-batch+7) * sizeof(uint32_t);
+		reloc->delta = 0;
+		reloc->target_handle = src;
+		reloc->read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc->write_domain = 0;
+		reloc->presumed_offset = 0;
+		reloc++;
+
+		b += 8;
+	}
+
+	b[0] = MI_BATCH_BUFFER_END;
+	b[1] = 0;
+
+	return (b+2 - batch) * sizeof(uint32_t);
+}
+
+static void gem_exec(int fd, struct drm_i915_gem_execbuffer2 *execbuf)
+{
+	int ret;
+
+	ret = drmIoctl(fd,
+		       DRM_IOCTL_I915_GEM_EXECBUFFER2,
+		       execbuf);
+	assert(ret == 0);
+}
+
+static void run(int object_size)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 exec[3];
+	struct drm_i915_gem_relocation_entry reloc[4];
+	uint32_t buf[20];
+	uint32_t handle, handle_relocs, src, dst;
+	void *gtt_relocs;
+	int fd, len;
+	int ring;
+
+	fd = drm_open_any();
+	handle = gem_create(fd, 4096);
+	src = gem_create(fd, object_size);
+	dst = gem_create(fd, object_size);
+
+	len = gem_linear_blt(buf, src, dst, object_size, reloc);
+	gem_write(fd, handle, 0, buf, len);
+
+	exec[0].handle = src;
+	exec[0].relocation_count = 0;
+	exec[0].relocs_ptr = 0;
+	exec[0].alignment = 0;
+	exec[0].offset = 0;
+	exec[0].flags = 0;
+	exec[0].rsvd1 = 0;
+	exec[0].rsvd2 = 0;
+
+	exec[1].handle = dst;
+	exec[1].relocation_count = 0;
+	exec[1].relocs_ptr = 0;
+	exec[1].alignment = 0;
+	exec[1].offset = 0;
+	exec[1].flags = 0;
+	exec[1].rsvd1 = 0;
+	exec[1].rsvd2 = 0;
+
+	handle_relocs = gem_create(fd, 4096);
+	gem_write(fd, handle_relocs, 0, reloc, sizeof(reloc));
+	gtt_relocs = gem_mmap(fd, handle_relocs, 4096,
+			      PROT_READ | PROT_WRITE);
+	assert(gtt_relocs);
+
+	exec[2].handle = handle;
+	exec[2].relocation_count = len > 40 ? 4 : 2;
+	/* A newly mmap gtt bo will fault on first access. */
+	exec[2].relocs_ptr = (uintptr_t)gtt_relocs;
+	exec[2].alignment = 0;
+	exec[2].offset = 0;
+	exec[2].flags = 0;
+	exec[2].rsvd1 = 0;
+	exec[2].rsvd2 = 0;
+
+	ring = 0;
+	if (HAS_BLT_RING(intel_get_drm_devid(fd)))
+		ring = I915_EXEC_BLT;
+
+	execbuf.buffers_ptr = (uintptr_t)exec;
+	execbuf.buffer_count = 3;
+	execbuf.batch_start_offset = 0;
+	execbuf.batch_len = len;
+	execbuf.cliprects_ptr = 0;
+	execbuf.num_cliprects = 0;
+	execbuf.DR1 = 0;
+	execbuf.DR4 = 0;
+	execbuf.flags = ring;
+	i915_execbuffer2_set_context_id(execbuf, 0);
+	execbuf.rsvd2 = 0;
+
+	gem_exec(fd, &execbuf);
+	gem_sync(fd, handle);
+
+	gem_close(fd, handle);
+
+	close(fd);
+}
+
+int main(int argc, char **argv)
+{
+	run(OBJECT_SIZE);
+
+	return 0;
+}
diff --git a/tests/gem_exec_nop.c b/tests/gem_exec_nop.c
new file mode 100644
index 00000000..9dd055cf
--- /dev/null
+++ b/tests/gem_exec_nop.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+
+#define MI_BATCH_BUFFER_END	(0xA<<23)
+
+static double elapsed(const struct timeval *start,
+		      const struct timeval *end,
+		      int loop)
+{
+	return (1e6*(end->tv_sec - start->tv_sec) + (end->tv_usec - start->tv_usec))/loop;
+}
+
+static int exec(int fd, uint32_t handle, int loops)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 gem_exec[1];
+	int ret = 0;
+
+	gem_exec[0].handle = handle;
+	gem_exec[0].relocation_count = 0;
+	gem_exec[0].relocs_ptr = 0;
+	gem_exec[0].alignment = 0;
+	gem_exec[0].offset = 0;
+	gem_exec[0].flags = 0;
+	gem_exec[0].rsvd1 = 0;
+	gem_exec[0].rsvd2 = 0;
+
+	execbuf.buffers_ptr = (uintptr_t)gem_exec;
+	execbuf.buffer_count = 1;
+	execbuf.batch_start_offset = 0;
+	execbuf.batch_len = 8;
+	execbuf.cliprects_ptr = 0;
+	execbuf.num_cliprects = 0;
+	execbuf.DR1 = 0;
+	execbuf.DR4 = 0;
+	execbuf.flags = 0;
+	i915_execbuffer2_set_context_id(execbuf, 0);
+	execbuf.rsvd2 = 0;
+
+	while (loops-- && ret == 0) {
+		ret = drmIoctl(fd,
+			       DRM_IOCTL_I915_GEM_EXECBUFFER2,
+			       &execbuf);
+	}
+	gem_sync(fd, handle);
+
+	return ret;
+}
+
+int main(int argc, char **argv)
+{
+	uint32_t batch[2] = {MI_BATCH_BUFFER_END};
+	uint32_t handle;
+	int count;
+	int fd;
+
+	fd = drm_open_any();
+
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, batch, sizeof(batch));
+
+	for (count = 1; count <= 1<<17; count <<= 1) {
+		struct timeval start, end;
+
+		gettimeofday(&start, NULL);
+		if (exec(fd, handle, count))
+			exit(1);
+		gettimeofday(&end, NULL);
+		printf("Time to exec x %d:		%7.3fµs\n",
+		       count, elapsed(&start, &end, count));
+		fflush(stdout);
+	}
+	gem_close(fd, handle);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_fence_thrash.c b/tests/gem_fence_thrash.c
new file mode 100644
index 00000000..3d50e334
--- /dev/null
+++ b/tests/gem_fence_thrash.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright © 2008-9 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "config.h"
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <pthread.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+
+#define OBJECT_SIZE (128*1024) /* restricted to 1MiB alignment on i915 fences */
+
+/* Before introduction of the LRU list for fences, allocation of a fence for a page
+ * fault would use the first inactive fence (i.e. in preference one with no outstanding
+ * GPU activity, or it would wait on the first to finish). Given the choice, it would simply
+ * reuse the fence that had just been allocated for the previous page-fault - the worst choice
+ * when copying between two buffers and thus constantly swapping fences.
+ */
+
+static void *
+bo_create (int fd)
+{
+	void *ptr;
+	int handle;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+
+	gem_set_tiling(fd, handle, I915_TILING_X, 1024);
+
+	ptr = gem_mmap(fd, handle, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+
+	/* XXX: mmap_gtt pulls the bo into the GTT read domain. */
+	gem_sync(fd, handle);
+
+	return ptr;
+}
+
+static void *
+bo_copy (void *_arg)
+{
+	int fd = *(int *)_arg;
+	int n;
+	char *a, *b;
+
+	a = bo_create (fd);
+	b = bo_create (fd);
+
+	for (n = 0; n < 1000; n++) {
+		memcpy (a, b, OBJECT_SIZE);
+		sched_yield ();
+	}
+
+	return NULL;
+}
+
+int
+main(int argc, char **argv)
+{
+	drm_i915_getparam_t gp;
+	pthread_t threads[32];
+	int n, num_fences;
+	int fd, ret;
+
+	fd = drm_open_any();
+
+	gp.param = I915_PARAM_NUM_FENCES_AVAIL;
+	gp.value = &num_fences;
+	ret = ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
+	assert (ret == 0);
+
+	printf ("creating %d threads\n", num_fences);
+	assert (num_fences < sizeof (threads) / sizeof (threads[0]));
+
+	for (n = 0; n < num_fences; n++)
+		pthread_create (&threads[n], NULL, bo_copy, &fd);
+
+	for (n = 0; n < num_fences; n++)
+		pthread_join (threads[n], NULL);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_fenced_exec_thrash.c b/tests/gem_fenced_exec_thrash.c
new file mode 100644
index 00000000..8281449e
--- /dev/null
+++ b/tests/gem_fenced_exec_thrash.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <drm.h>
+#include <i915_drm.h>
+
+#include "drmtest.h"
+
+#define WIDTH 1024
+#define HEIGHT 1024
+#define OBJECT_SIZE (4*WIDTH*HEIGHT)
+
+#define BATCH_SIZE 4096
+
+#define MAX_FENCES 16
+
+#define MI_BATCH_BUFFER_END	(0xA<<23)
+
+/*
+ * Testcase: execbuf fence accounting
+ *
+ * We had a bug where we were falsely accounting upon reservation already
+ * fenced buffers as occupying a fence register even if they did not require
+ * one for the batch.
+ *
+ * We aim to exercise this by performing a sequence of fenced BLT
+ * with 2*num_avail_fence buffers, but alternating which half are fenced in
+ * each command.
+ */
+
+static uint32_t
+tiled_bo_create (int fd)
+{
+	uint32_t handle;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+
+	gem_set_tiling(fd, handle, I915_TILING_X, WIDTH*4);
+
+	return handle;
+}
+
+static uint32_t
+batch_create (int fd)
+{
+	uint32_t buf[] = { MI_BATCH_BUFFER_END, 0 };
+	uint32_t batch_handle;
+
+	batch_handle = gem_create(fd, BATCH_SIZE);
+
+	gem_write(fd, batch_handle, 0, buf, sizeof(buf));
+
+	return batch_handle;
+}
+
+static int get_num_fences(int fd)
+{
+	drm_i915_getparam_t gp;
+	int ret, val;
+
+	gp.param = I915_PARAM_NUM_FENCES_AVAIL;
+	gp.value = &val;
+	ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
+	assert (ret == 0);
+
+	printf ("total %d fences\n", val);
+	assert(val > 4);
+
+	return val - 2;
+}
+
+static void fill_reloc(struct drm_i915_gem_relocation_entry *reloc, uint32_t handle)
+{
+	reloc->offset = 2 * sizeof(uint32_t);
+	reloc->target_handle = handle;
+	reloc->read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc->write_domain = 0;
+}
+
+int
+main(int argc, char **argv)
+{
+	struct drm_i915_gem_execbuffer2 execbuf[2];
+	struct drm_i915_gem_exec_object2 exec[2][2*MAX_FENCES+1];
+	struct drm_i915_gem_relocation_entry reloc[2*MAX_FENCES];
+
+	int fd = drm_open_any();
+	int i, n, num_fences;
+	int loop = 1000;
+
+	memset(execbuf, 0, sizeof(execbuf));
+	memset(exec, 0, sizeof(exec));
+	memset(reloc, 0, sizeof(reloc));
+
+	num_fences = get_num_fences(fd) & ~1;
+	assert(num_fences <= MAX_FENCES);
+	for (n = 0; n < 2*num_fences; n++) {
+		uint32_t handle = tiled_bo_create(fd);
+		exec[1][2*num_fences - n-1].handle = exec[0][n].handle = handle;
+		fill_reloc(&reloc[n], handle);
+	}
+
+	for (i = 0; i < 2; i++) {
+		for (n = 0; n < num_fences; n++)
+			exec[i][n].flags = EXEC_OBJECT_NEEDS_FENCE;
+
+		exec[i][2*num_fences].handle = batch_create(fd);
+		exec[i][2*num_fences].relocs_ptr = (uintptr_t)reloc;
+		exec[i][2*num_fences].relocation_count = 2*num_fences;
+
+		execbuf[i].buffers_ptr = (uintptr_t)exec[i];
+		execbuf[i].buffer_count = 2*num_fences+1;
+		execbuf[i].batch_len = 2*sizeof(uint32_t);
+	}
+
+	do {
+		int ret;
+
+		ret = drmIoctl(fd,
+			       DRM_IOCTL_I915_GEM_EXECBUFFER2,
+			       &execbuf[0]);
+		assert(ret == 0);
+
+		ret = drmIoctl(fd,
+			       DRM_IOCTL_I915_GEM_EXECBUFFER2,
+			       &execbuf[1]);
+		assert(ret == 0);
+	} while (--loop);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_flink.c b/tests/gem_flink.c
new file mode 100644
index 00000000..c8694808
--- /dev/null
+++ b/tests/gem_flink.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+
+static void
+test_flink(int fd)
+{
+	struct drm_i915_gem_create create;
+	struct drm_gem_flink flink;
+	struct drm_gem_open gem_open;
+	int ret;
+
+	printf("Testing flink and open.\n");
+
+	memset(&create, 0, sizeof(create));
+	create.size = 16 * 1024;
+	ret = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);
+	assert(ret == 0);
+
+	flink.handle = create.handle;
+	ret = ioctl(fd, DRM_IOCTL_GEM_FLINK, &flink);
+	assert(ret == 0);
+
+	gem_open.name = flink.name;
+	ret = ioctl(fd, DRM_IOCTL_GEM_OPEN, &gem_open);
+	assert(ret == 0);
+	assert(gem_open.handle != 0);
+}
+
+static void
+test_double_flink(int fd)
+{
+	struct drm_i915_gem_create create;
+	struct drm_gem_flink flink;
+	struct drm_gem_flink flink2;
+	int ret;
+
+	printf("Testing repeated flink.\n");
+
+	memset(&create, 0, sizeof(create));
+	create.size = 16 * 1024;
+	ret = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);
+	assert(ret == 0);
+
+	flink.handle = create.handle;
+	ret = ioctl(fd, DRM_IOCTL_GEM_FLINK, &flink);
+	assert(ret == 0);
+
+	flink2.handle = create.handle;
+	ret = ioctl(fd, DRM_IOCTL_GEM_FLINK, &flink2);
+	assert(ret == 0);
+	assert(flink2.name == flink.name);
+}
+
+static void
+test_bad_flink(int fd)
+{
+	struct drm_gem_flink flink;
+	int ret;
+
+	printf("Testing error return on bad flink ioctl.\n");
+
+	flink.handle = 0x10101010;
+	ret = ioctl(fd, DRM_IOCTL_GEM_FLINK, &flink);
+	assert(ret == -1 && errno == ENOENT);
+}
+
+static void
+test_bad_open(int fd)
+{
+	struct drm_gem_open gem_open;
+	int ret;
+
+	printf("Testing error return on bad open ioctl.\n");
+
+	gem_open.name = 0x10101010;
+	ret = ioctl(fd, DRM_IOCTL_GEM_OPEN, &gem_open);
+
+	assert(ret == -1 && errno == ENOENT);
+}
+
+int main(int argc, char **argv)
+{
+	int fd;
+
+	fd = drm_open_any();
+
+	test_flink(fd);
+	test_double_flink(fd);
+	test_bad_flink(fd);
+	test_bad_open(fd);
+
+	return 0;
+}
diff --git a/tests/gem_gtt_concurrent_blit.c b/tests/gem_gtt_concurrent_blit.c
new file mode 100644
index 00000000..c68af7b5
--- /dev/null
+++ b/tests/gem_gtt_concurrent_blit.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright © 2009,2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+/** @file gem_cpu_concurrent_blit.c
+ *
+ * This is a test of GTT mmap read/write behavior when writing to active
+ * buffers.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static void
+set_bo(drm_intel_bo *bo, uint32_t val, int width, int height)
+{
+	int size = width * height;
+	uint32_t *vaddr;
+
+	drm_intel_gem_bo_start_gtt_access(bo, true);
+	vaddr = bo->virtual;
+	while (size--)
+		*vaddr++ = val;
+}
+
+static void
+cmp_bo(drm_intel_bo *bo, uint32_t val, int width, int height)
+{
+	int size = width * height;
+	uint32_t *vaddr;
+
+	drm_intel_gem_bo_start_gtt_access(bo, false);
+	vaddr = bo->virtual;
+	while (size--)
+		assert(*vaddr++ == val);
+}
+
+static drm_intel_bo *
+create_bo(drm_intel_bufmgr *bufmgr, uint32_t val, int width, int height)
+{
+	drm_intel_bo *bo;
+
+	bo = drm_intel_bo_alloc(bufmgr, "bo", 4*width*height, 0);
+	assert(bo);
+
+	/* gtt map doesn't have a write parameter, so just keep the mapping
+	 * around (to avoid the set_domain with the gtt write domain set) and
+	 * manually tell the kernel when we start access the gtt. */
+	drm_intel_gem_bo_map_gtt(bo);
+
+	set_bo(bo, val, width, height);
+
+	return bo;
+}
+
+int
+main(int argc, char **argv)
+{
+	drm_intel_bufmgr *bufmgr;
+	struct intel_batchbuffer *batch;
+	int num_buffers = 128, max;
+	drm_intel_bo *src[128], *dst[128], *dummy;
+	int width = 512, height = 512;
+	int fd;
+	int i;
+
+	fd = drm_open_any();
+
+	max = gem_aperture_size (fd) / (1024 * 1024) / 2;
+	if (num_buffers > max)
+		num_buffers = max;
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	for (i = 0; i < num_buffers; i++) {
+		src[i] = create_bo(bufmgr, i, width, height);
+		dst[i] = create_bo(bufmgr, ~i, width, height);
+	}
+	dummy = create_bo(bufmgr, 0, width, height);
+
+	/* try to overwrite the source values */
+	for (i = 0; i < num_buffers; i++)
+		intel_copy_bo(batch, dst[i], src[i], width, height);
+	for (i = num_buffers; i--; )
+		set_bo(src[i], 0xdeadbeef, width, height);
+	for (i = 0; i < num_buffers; i++)
+		cmp_bo(dst[i], i, width, height);
+
+	/* try to read the results before the copy completes */
+	for (i = 0; i < num_buffers; i++)
+		intel_copy_bo(batch, dst[i], src[i], width, height);
+	for (i = num_buffers; i--; )
+		cmp_bo(dst[i], 0xdeadbeef, width, height);
+
+	/* and finally try to trick the kernel into loosing the pending write */
+	for (i = num_buffers; i--; )
+		set_bo(src[i], 0xabcdabcd, width, height);
+	for (i = 0; i < num_buffers; i++)
+		intel_copy_bo(batch, dst[i], src[i], width, height);
+	for (i = num_buffers; i--; )
+		intel_copy_bo(batch, dummy, dst[i], width, height);
+	for (i = num_buffers; i--; )
+		cmp_bo(dst[i], 0xabcdabcd, width, height);
+
+	return 0;
+}
diff --git a/tests/gem_gtt_cpu_tlb.c b/tests/gem_gtt_cpu_tlb.c
new file mode 100644
index 00000000..68533709
--- /dev/null
+++ b/tests/gem_gtt_cpu_tlb.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+/** @file gem_gtt_cpu_tlb.c
+ *
+ * This test checks whether gtt tlbs for cpu access are correctly invalidated.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_gpu_tools.h"
+
+#define OBJ_SIZE (1024*1024)
+
+#define PAGE_SIZE 4096
+
+static uint32_t
+create_bo(int fd)
+{
+	uint32_t handle;
+	uint32_t *data;
+	int i;
+
+	handle = gem_create(fd, OBJ_SIZE);
+
+	/* Fill the BO with dwords starting at start_val */
+	data = gem_mmap(fd, handle, OBJ_SIZE, PROT_READ | PROT_WRITE);
+	for (i = 0; i < OBJ_SIZE/4; i++)
+		data[i] = i;
+	munmap(data, OBJ_SIZE);
+
+	return handle;
+}
+
+int
+main(int argc, char **argv)
+{
+	int fd;
+	int i;
+	uint32_t handle;
+
+	uint32_t *ptr;
+
+	fd = drm_open_any();
+
+	handle = gem_create(fd, OBJ_SIZE);
+
+	/* touch one page */
+	ptr = gem_mmap(fd, handle, OBJ_SIZE, PROT_READ | PROT_WRITE);
+	*ptr = 0xdeadbeef;
+	munmap(ptr, OBJ_SIZE);
+
+	gem_close(fd, handle);
+
+	/* stirr up the page allocator a bit. */
+	ptr = malloc(OBJ_SIZE);
+	assert(ptr);
+	memset(ptr, 0x1, OBJ_SIZE);
+
+	handle = create_bo(fd);
+
+	/* Read a bunch of random subsets of the data and check that they come
+	 * out right.
+	 */
+	gem_read(fd, handle, 0, ptr, OBJ_SIZE);
+	for (i = 0; i < OBJ_SIZE/4; i++)
+		assert(ptr[i] == i);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_gtt_speed.c b/tests/gem_gtt_speed.c
new file mode 100644
index 00000000..73a3c6dc
--- /dev/null
+++ b/tests/gem_gtt_speed.c
@@ -0,0 +1,292 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+
+#define OBJECT_SIZE 16384
+
+static double elapsed(const struct timeval *start,
+		      const struct timeval *end,
+		      int loop)
+{
+	return (1e6*(end->tv_sec - start->tv_sec) + (end->tv_usec - start->tv_usec))/loop;
+}
+
+int main(int argc, char **argv)
+{
+	struct timeval start, end;
+	uint8_t *buf;
+	uint32_t handle;
+	int size = OBJECT_SIZE;
+	int loop, i, tiling;
+	int fd;
+
+	if (argc > 1)
+		size = atoi(argv[1]);
+	if (size == 0) {
+		fprintf(stderr, "Invalid object size specified\n");
+		return 1;
+	}
+
+	buf = malloc(size);
+	memset(buf, 0, size);
+	fd = drm_open_any();
+
+	handle = gem_create(fd, size);
+	assert(handle);
+
+	for (tiling = I915_TILING_NONE; tiling <= I915_TILING_Y; tiling++) {
+		if (tiling != I915_TILING_NONE) {
+			printf("\nSetting tiling mode to %s\n",
+			       tiling == I915_TILING_X ? "X" : "Y");
+			gem_set_tiling(fd, handle, tiling, 512);
+		}
+
+		if (tiling == I915_TILING_NONE) {
+			gem_set_domain(fd, handle,
+				       I915_GEM_DOMAIN_CPU,
+				       I915_GEM_DOMAIN_CPU);
+
+			{
+				uint32_t *base = gem_mmap__cpu(fd, handle, size, PROT_READ | PROT_WRITE);
+				volatile uint32_t *ptr = base;
+				int x = 0;
+
+				for (i = 0; i < size/sizeof(*ptr); i++)
+					x += ptr[i];
+
+				/* force overtly clever gcc to actually compute x */
+				ptr[0] = x;
+
+				munmap(base, size);
+
+				/* mmap read */
+				gettimeofday(&start, NULL);
+				for (loop = 0; loop < 1000; loop++) {
+					base = gem_mmap__cpu(fd, handle, size, PROT_READ | PROT_WRITE);
+					ptr = base;
+					x = 0;
+
+					for (i = 0; i < size/sizeof(*ptr); i++)
+						x += ptr[i];
+
+					/* force overtly clever gcc to actually compute x */
+					ptr[0] = x;
+
+					munmap(base, size);
+				}
+				gettimeofday(&end, NULL);
+				printf("Time to read %dk through a CPU map:		%7.3fµs\n",
+				       size/1024, elapsed(&start, &end, loop));
+
+				/* mmap write */
+				gettimeofday(&start, NULL);
+				for (loop = 0; loop < 1000; loop++) {
+					base = gem_mmap__cpu(fd, handle, size, PROT_READ | PROT_WRITE);
+					ptr = base;
+
+					for (i = 0; i < size/sizeof(*ptr); i++)
+						ptr[i] = i;
+
+					munmap(base, size);
+				}
+				gettimeofday(&end, NULL);
+				printf("Time to write %dk through a CPU map:		%7.3fµs\n",
+				       size/1024, elapsed(&start, &end, loop));
+			}
+
+			/* CPU pwrite */
+			gettimeofday(&start, NULL);
+			for (loop = 0; loop < 1000; loop++)
+				gem_write(fd, handle, 0, buf, size);
+			gettimeofday(&end, NULL);
+			printf("Time to pwrite %dk through the CPU:		%7.3fµs\n",
+			       size/1024, elapsed(&start, &end, loop));
+
+			/* CPU pread */
+			gettimeofday(&start, NULL);
+			for (loop = 0; loop < 1000; loop++)
+				gem_read(fd, handle, 0, buf, size);
+			gettimeofday(&end, NULL);
+			printf("Time to pread %dk through the CPU:		%7.3fµs\n",
+			       size/1024, elapsed(&start, &end, loop));
+		}
+
+		/* prefault into gtt */
+		{
+			uint32_t *base = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE);
+			volatile uint32_t *ptr = base;
+			int x = 0;
+
+			for (i = 0; i < size/sizeof(*ptr); i++)
+				x += ptr[i];
+
+			/* force overtly clever gcc to actually compute x */
+			ptr[0] = x;
+
+			munmap(base, size);
+		}
+		/* mmap read */
+		gettimeofday(&start, NULL);
+		for (loop = 0; loop < 1000; loop++) {
+			uint32_t *base = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE);
+			volatile uint32_t *ptr = base;
+			int x = 0;
+
+			for (i = 0; i < size/sizeof(*ptr); i++)
+				x += ptr[i];
+
+			/* force overtly clever gcc to actually compute x */
+			ptr[0] = x;
+
+			munmap(base, size);
+		}
+		gettimeofday(&end, NULL);
+		printf("Time to read %dk through a GTT map:		%7.3fµs\n",
+		       size/1024, elapsed(&start, &end, loop));
+
+		/* mmap write */
+		gettimeofday(&start, NULL);
+		for (loop = 0; loop < 1000; loop++) {
+			uint32_t *base = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE);
+			volatile uint32_t *ptr = base;
+
+			for (i = 0; i < size/sizeof(*ptr); i++)
+				ptr[i] = i;
+
+			munmap(base, size);
+		}
+		gettimeofday(&end, NULL);
+		printf("Time to write %dk through a GTT map:		%7.3fµs\n",
+		       size/1024, elapsed(&start, &end, loop));
+
+		/* mmap read */
+		gettimeofday(&start, NULL);
+		for (loop = 0; loop < 1000; loop++) {
+			uint32_t *base = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE);
+			volatile uint32_t *ptr = base;
+			int x = 0;
+
+			for (i = 0; i < size/sizeof(*ptr); i++)
+				x += ptr[i];
+
+			/* force overtly clever gcc to actually compute x */
+			ptr[0] = x;
+
+			munmap(base, size);
+		}
+		gettimeofday(&end, NULL);
+		printf("Time to read %dk (again) through a GTT map:	%7.3fµs\n",
+		       size/1024, elapsed(&start, &end, loop));
+
+		if (tiling == I915_TILING_NONE) {
+			/* GTT pwrite */
+			gettimeofday(&start, NULL);
+			for (loop = 0; loop < 1000; loop++)
+				gem_write(fd, handle, 0, buf, size);
+			gettimeofday(&end, NULL);
+			printf("Time to pwrite %dk through the GTT:		%7.3fµs\n",
+			       size/1024, elapsed(&start, &end, loop));
+
+			/* GTT pread */
+			gettimeofday(&start, NULL);
+			for (loop = 0; loop < 1000; loop++)
+				gem_read(fd, handle, 0, buf, size);
+			gettimeofday(&end, NULL);
+			printf("Time to pread %dk through the GTT:		%7.3fµs\n",
+			       size/1024, elapsed(&start, &end, loop));
+
+			/* GTT pwrite, including clflush */
+			gettimeofday(&start, NULL);
+			for (loop = 0; loop < 1000; loop++) {
+				gem_write(fd, handle, 0, buf, size);
+				gem_sync(fd, handle);
+			}
+			gettimeofday(&end, NULL);
+			printf("Time to pwrite %dk through the GTT (clflush):	%7.3fµs\n",
+			       size/1024, elapsed(&start, &end, loop));
+
+			/* GTT pread, including clflush */
+			gettimeofday(&start, NULL);
+			for (loop = 0; loop < 1000; loop++) {
+				gem_sync(fd, handle);
+				gem_read(fd, handle, 0, buf, size);
+			}
+			gettimeofday(&end, NULL);
+			printf("Time to pread %dk through the GTT (clflush):	%7.3fµs\n",
+			       size/1024, elapsed(&start, &end, loop));
+
+			/* partial writes */
+			printf("Now partial writes.\n");
+			size /= 4;
+
+			/* partial GTT pwrite, including clflush */
+			gettimeofday(&start, NULL);
+			for (loop = 0; loop < 1000; loop++) {
+				gem_write(fd, handle, 0, buf, size);
+				gem_sync(fd, handle);
+			}
+			gettimeofday(&end, NULL);
+			printf("Time to pwrite %dk through the GTT (clflush):	%7.3fµs\n",
+			       size/1024, elapsed(&start, &end, loop));
+
+			/* partial GTT pread, including clflush */
+			gettimeofday(&start, NULL);
+			for (loop = 0; loop < 1000; loop++) {
+				gem_sync(fd, handle);
+				gem_read(fd, handle, 0, buf, size);
+			}
+			gettimeofday(&end, NULL);
+			printf("Time to pread %dk through the GTT (clflush):	%7.3fµs\n",
+			       size/1024, elapsed(&start, &end, loop));
+
+			size *= 4;
+		}
+
+	}
+
+	gem_close(fd, handle);
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_hang.c b/tests/gem_hang.c
new file mode 100644
index 00000000..f9db3400
--- /dev/null
+++ b/tests/gem_hang.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Jesse Barnes <jbarnes@virtuousgeek.org> (based on gem_bad_blit.c)
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+static int bad_pipe;
+
+static void
+gpu_hang(void)
+{
+	int cmd;
+
+	cmd = bad_pipe ? MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW :
+		MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW;
+
+	BEGIN_BATCH(6);
+	/* The documentation says that the LOAD_SCAN_LINES command
+	 * always comes in pairs. Don't ask me why. */
+	OUT_BATCH(MI_LOAD_SCAN_LINES_INCL | (bad_pipe << 20));
+	OUT_BATCH((0 << 16) | 2048);
+	OUT_BATCH(MI_LOAD_SCAN_LINES_INCL | (bad_pipe << 20));
+	OUT_BATCH((0 << 16) | 2048);
+	OUT_BATCH(MI_WAIT_FOR_EVENT | cmd);
+	OUT_BATCH(MI_NOOP);
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush(batch);
+}
+
+int main(int argc, char **argv)
+{
+	int fd;
+
+	if (argc != 2) {
+		fprintf(stderr, "usage: %s <disabled pipe number>\n",
+			argv[0]);
+		exit(-1);
+	}
+
+	bad_pipe = atoi(argv[1]);
+
+	fd = drm_open_any();
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	gpu_hang();
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_hangcheck_forcewake.c b/tests/gem_hangcheck_forcewake.c
new file mode 100644
index 00000000..96a30fef
--- /dev/null
+++ b/tests/gem_hangcheck_forcewake.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+/*
+ * Testcase: Provoke the hangcheck timer on an otherwise idle system
+ *
+ * This tries to hit forcewake locking bugs when the hangcheck runs. Somehow we
+ * often luck out and the hangcheck runs while someone else is already holding
+ * the dev->struct_mutex.
+ *
+ * It's imperative that nothing else runs while this test runs, i.e. kill your X
+ * session, please.
+ */
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+uint32_t blob[2048*2048];
+
+#define MAX_BLT_SIZE 128
+int main(int argc, char **argv)
+{
+	drm_intel_bo *bo = NULL;
+	uint32_t tiling_mode = I915_TILING_X;
+	unsigned long pitch, act_size;
+	int fd, i, devid;
+
+	memset(blob, 'A', sizeof(blob));
+
+	fd = drm_open_any();
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	devid = intel_get_drm_devid(fd);
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+	act_size = 2048;
+	printf("filling ring\n");
+	drm_intel_bo_unreference(bo);
+	bo = drm_intel_bo_alloc_tiled(bufmgr, "tiled bo", act_size, act_size,
+				      4, &tiling_mode, &pitch, 0);
+
+	drm_intel_bo_subdata(bo, 0, act_size*act_size*4, blob);
+
+	if (IS_965(devid))
+		pitch /= 4;
+
+	for (i = 0; i < 10000; i++) {
+		BEGIN_BATCH(8);
+		OUT_BATCH(XY_SRC_COPY_BLT_CMD |
+			  XY_SRC_COPY_BLT_WRITE_ALPHA |
+			  XY_SRC_COPY_BLT_WRITE_RGB |
+			  XY_SRC_COPY_BLT_SRC_TILED |
+			  XY_SRC_COPY_BLT_DST_TILED);
+		OUT_BATCH((3 << 24) | /* 32 bits */
+			  (0xcc << 16) | /* copy ROP */
+			  pitch);
+		OUT_BATCH(0 << 16 | 1024);
+		OUT_BATCH((2048) << 16 | (2048));
+		OUT_RELOC_FENCED(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+		OUT_BATCH(0 << 16 | 0);
+		OUT_BATCH(pitch);
+		OUT_RELOC_FENCED(bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+		ADVANCE_BATCH();
+
+		if (IS_GEN6(devid) || IS_GEN7(devid)) {
+			BEGIN_BATCH(3);
+			OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+			ADVANCE_BATCH();
+		}
+	}
+
+	printf("waiting\n");
+	sleep(10);
+
+	printf("done waiting, check dmesg\n");
+	drm_intel_bo_unreference(bo);
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_largeobject.c b/tests/gem_largeobject.c
new file mode 100644
index 00000000..163bf101
--- /dev/null
+++ b/tests/gem_largeobject.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jesse Barnes <jbarnes@virtuousgeek.org>
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+
+/* Should take 64 pages to store the page pointers on 64 bit */
+#define OBJ_SIZE (128 * 1024 * 1024)
+
+unsigned char data[OBJ_SIZE];
+
+static void
+test_large_object(int fd)
+{
+	struct drm_i915_gem_create create;
+	struct drm_i915_gem_pin pin;
+	uint32_t obj_size;
+	int ret;
+
+	memset(&create, 0, sizeof(create));
+	memset(&pin, 0, sizeof(pin));
+
+	if (gem_aperture_size(fd)*3/4 < OBJ_SIZE/2)
+		obj_size = OBJ_SIZE / 4;
+	else if (gem_aperture_size(fd)*3/4 < OBJ_SIZE)
+		obj_size = OBJ_SIZE / 2;
+	else
+		obj_size = OBJ_SIZE;
+	create.size = obj_size;
+	printf("obj size %i\n", obj_size);
+
+	ret = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);
+	if (ret) {
+		fprintf(stderr, "object creation failed: %s\n",
+			strerror(errno));
+		exit(ret);
+	}
+
+	pin.handle = create.handle;
+	ret = ioctl(fd, DRM_IOCTL_I915_GEM_PIN, &pin);
+	if (ret) {
+		fprintf(stderr, "pin failed: %s\n",
+			strerror(errno));
+		exit(ret);
+	}
+
+	gem_write(fd, create.handle, 0, data, obj_size);
+
+	/* kernel should clean this up for us */
+}
+
+int main(int argc, char **argv)
+{
+	int fd;
+
+	fd = drm_open_any();
+
+	test_large_object(fd);
+
+	return 0;
+}
diff --git a/tests/gem_linear_blits.c b/tests/gem_linear_blits.c
new file mode 100644
index 00000000..fe15f1d1
--- /dev/null
+++ b/tests/gem_linear_blits.c
@@ -0,0 +1,250 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file gem_linear_blits.c
+ *
+ * This is a test of doing many blits, with a working set
+ * larger than the aperture size.
+ *
+ * The goal is to simply ensure the basics work.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+#define WIDTH 512
+#define HEIGHT 512
+
+static uint32_t linear[WIDTH*HEIGHT];
+
+static void
+copy(int fd, uint32_t dst, uint32_t src)
+{
+	uint32_t batch[10];
+	struct drm_i915_gem_relocation_entry reloc[2];
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_execbuffer2 exec;
+	uint32_t handle;
+	int ret;
+
+	batch[0] = XY_SRC_COPY_BLT_CMD |
+		  XY_SRC_COPY_BLT_WRITE_ALPHA |
+		  XY_SRC_COPY_BLT_WRITE_RGB;
+	batch[1] = (3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  WIDTH*4;
+	batch[2] = 0; /* dst x1,y1 */
+	batch[3] = (HEIGHT << 16) | WIDTH; /* dst x2,y2 */
+	batch[4] = 0; /* dst reloc */
+	batch[5] = 0; /* src x1,y1 */
+	batch[6] = WIDTH*4;
+	batch[7] = 0; /* src reloc */
+	batch[8] = MI_BATCH_BUFFER_END;
+	batch[9] = MI_NOOP;
+
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, batch, sizeof(batch));
+
+	reloc[0].target_handle = dst;
+	reloc[0].delta = 0;
+	reloc[0].offset = 4 * sizeof(batch[0]);
+	reloc[0].presumed_offset = 0;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+
+	reloc[1].target_handle = src;
+	reloc[1].delta = 0;
+	reloc[1].offset = 7 * sizeof(batch[0]);
+	reloc[1].presumed_offset = 0;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;;
+	reloc[1].write_domain = 0;
+
+	obj[0].handle = dst;
+	obj[0].relocation_count = 0;
+	obj[0].relocs_ptr = 0;
+	obj[0].alignment = 0;
+	obj[0].offset = 0;
+	obj[0].flags = 0;
+	obj[0].rsvd1 = 0;
+	obj[0].rsvd2 = 0;
+
+	obj[1].handle = src;
+	obj[1].relocation_count = 0;
+	obj[1].relocs_ptr = 0;
+	obj[1].alignment = 0;
+	obj[1].offset = 0;
+	obj[1].flags = 0;
+	obj[1].rsvd1 = 0;
+	obj[1].rsvd2 = 0;
+
+	obj[2].handle = handle;
+	obj[2].relocation_count = 2;
+	obj[2].relocs_ptr = (uintptr_t)reloc;
+	obj[2].alignment = 0;
+	obj[2].offset = 0;
+	obj[2].flags = 0;
+	obj[2].rsvd1 = obj[2].rsvd2 = 0;
+
+	exec.buffers_ptr = (uintptr_t)obj;
+	exec.buffer_count = 3;
+	exec.batch_start_offset = 0;
+	exec.batch_len = sizeof(batch);
+	exec.DR1 = exec.DR4 = 0;
+	exec.num_cliprects = 0;
+	exec.cliprects_ptr = 0;
+	exec.flags = HAS_BLT_RING(intel_get_drm_devid(fd)) ? I915_EXEC_BLT : 0;
+	i915_execbuffer2_set_context_id(exec, 0);
+	exec.rsvd2 = 0;
+
+	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec);
+	while (ret && errno == EBUSY) {
+		drmCommandNone(fd, DRM_I915_GEM_THROTTLE);
+		ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec);
+	}
+	assert(ret == 0);
+
+	gem_close(fd, handle);
+}
+
+static uint32_t
+create_bo(int fd, uint32_t val)
+{
+	uint32_t handle;
+	int i;
+
+	handle = gem_create(fd, sizeof(linear));
+
+	/* Fill the BO with dwords starting at val */
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		linear[i] = val++;
+	gem_write(fd, handle, 0, linear, sizeof(linear));
+
+	return handle;
+}
+
+static void
+check_bo(int fd, uint32_t handle, uint32_t val)
+{
+	int i;
+
+	gem_read(fd, handle, 0, linear, sizeof(linear));
+	for (i = 0; i < WIDTH*HEIGHT; i++) {
+		if (linear[i] != val) {
+			fprintf(stderr, "Expected 0x%08x, found 0x%08x "
+				"at offset 0x%08x\n",
+				val, linear[i], i * 4);
+			abort();
+		}
+		val++;
+	}
+}
+
+int main(int argc, char **argv)
+{
+	uint32_t *handle, *start_val;
+	uint32_t start = 0;
+	int i, fd, count;
+
+	fd = drm_open_any();
+
+	count = 0;
+	if (argc > 1)
+		count = atoi(argv[1]);
+	if (count == 0)
+		count = 3 * gem_aperture_size(fd) / (1024*1024) / 2;
+
+	if (count > intel_get_total_ram_mb() * 9 / 10) {
+		count = intel_get_total_ram_mb() * 9 / 10;
+		printf("not enough RAM to run test, reducing buffer count\n");
+	}
+
+	printf("Using %d 1MiB buffers\n", count);
+
+	handle = malloc(sizeof(uint32_t)*count*2);
+	start_val = handle + count;
+
+	for (i = 0; i < count; i++) {
+		handle[i] = create_bo(fd, start);
+		start_val[i] = start;
+		start += 1024 * 1024 / 4;
+	}
+
+	printf("Verifying initialisation...\n");
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+
+	printf("Cyclic blits, forward...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = i % count;
+		int dst = (i + 1) % count;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+
+	printf("Cyclic blits, backward...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = (i + 1) % count;
+		int dst = i % count;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+
+	printf("Random blits...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = random() % count;
+		int dst = random() % count;
+
+		if (src == dst)
+			continue;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+
+	return 0;
+}
diff --git a/tests/gem_mmap.c b/tests/gem_mmap.c
new file mode 100644
index 00000000..f9369f43
--- /dev/null
+++ b/tests/gem_mmap.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+
+#define OBJECT_SIZE 16384
+
+int main(int argc, char **argv)
+{
+	int fd;
+	struct drm_i915_gem_mmap arg;
+	uint8_t expected[OBJECT_SIZE];
+	uint8_t buf[OBJECT_SIZE];
+	uint8_t *addr;
+	int ret;
+	int handle;
+
+	fd = drm_open_any();
+
+	memset(&arg, 0, sizeof(arg));
+	arg.handle = 0x10101010;
+	arg.offset = 0;
+	arg.size = 4096;
+	printf("Testing mmaping of bad object.\n");
+	ret = ioctl(fd, DRM_IOCTL_I915_GEM_MMAP, &arg);
+	assert(ret == -1 && errno == ENOENT);
+
+	handle = gem_create(fd, OBJECT_SIZE);
+
+	printf("Testing mmaping of newly created object.\n");
+	arg.handle = handle;
+	arg.offset = 0;
+	arg.size = OBJECT_SIZE;
+	ret = ioctl(fd, DRM_IOCTL_I915_GEM_MMAP, &arg);
+	assert(ret == 0);
+	addr = (uint8_t *)(uintptr_t)arg.addr_ptr;
+
+	printf("Testing contents of newly created object.\n");
+	memset(expected, 0, sizeof(expected));
+	assert(memcmp(addr, expected, sizeof(expected)) == 0);
+
+	printf("Testing coherency of writes and mmap reads.\n");
+	memset(buf, 0, sizeof(buf));
+	memset(buf + 1024, 0x01, 1024);
+	memset(expected + 1024, 0x01, 1024);
+	gem_write(fd, handle, 0, buf, OBJECT_SIZE);
+	assert(memcmp(buf, addr, sizeof(buf)) == 0);
+
+	printf("Testing that mapping stays after close\n");
+	gem_close(fd, handle);
+	assert(memcmp(buf, addr, sizeof(buf)) == 0);
+
+	printf("Testing unmapping\n");
+	munmap(addr, OBJECT_SIZE);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_mmap_gtt.c b/tests/gem_mmap_gtt.c
new file mode 100644
index 00000000..e7a48679
--- /dev/null
+++ b/tests/gem_mmap_gtt.c
@@ -0,0 +1,161 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+
+#define OBJECT_SIZE (16*1024*1024)
+
+static void set_domain(int fd, uint32_t handle)
+{
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+}
+
+static void *
+mmap_bo(int fd, uint32_t handle)
+{
+	void *ptr;
+
+	ptr = gem_mmap(fd, handle, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+	assert(ptr != MAP_FAILED);
+
+	return ptr;
+}
+
+static void *
+create_pointer(int fd)
+{
+	uint32_t handle;
+	void *ptr;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+
+	ptr = mmap_bo(fd, handle);
+
+	gem_close(fd, handle);
+
+	return ptr;
+}
+
+static void
+test_copy(int fd)
+{
+	void *src, *dst;
+
+	/* copy from a fresh src to fresh dst to force pagefault on both */
+	src = create_pointer(fd);
+	dst = create_pointer(fd);
+
+	memcpy(dst, src, OBJECT_SIZE);
+	memcpy(src, dst, OBJECT_SIZE);
+
+	munmap(dst, OBJECT_SIZE);
+	munmap(src, OBJECT_SIZE);
+}
+
+static void
+test_write(int fd)
+{
+	void *src;
+	uint32_t dst;
+
+	/* copy from a fresh src to fresh dst to force pagefault on both */
+	src = create_pointer(fd);
+	dst = gem_create(fd, OBJECT_SIZE);
+
+	gem_write(fd, dst, 0, src, OBJECT_SIZE);
+
+	gem_close(fd, dst);
+	munmap(src, OBJECT_SIZE);
+}
+
+static void
+test_write_gtt(int fd)
+{
+	uint32_t dst;
+	char *dst_gtt;
+	void *src;
+
+	dst = gem_create(fd, OBJECT_SIZE);
+
+	/* prefault object into gtt */
+	dst_gtt = mmap_bo(fd, dst);
+	set_domain(fd, dst);
+	memset(dst_gtt, 0, OBJECT_SIZE);
+	munmap(dst_gtt, OBJECT_SIZE);
+
+	src = create_pointer(fd);
+
+	gem_write(fd, dst, 0, src, OBJECT_SIZE);
+
+	gem_close(fd, dst);
+	munmap(src, OBJECT_SIZE);
+}
+
+static void
+test_read(int fd)
+{
+	void *dst;
+	uint32_t src;
+
+	/* copy from a fresh src to fresh dst to force pagefault on both */
+	dst = create_pointer(fd);
+	src = gem_create(fd, OBJECT_SIZE);
+
+	gem_read(fd, src, 0, dst, OBJECT_SIZE);
+
+	gem_close(fd, src);
+	munmap(dst, OBJECT_SIZE);
+}
+
+int main(int argc, char **argv)
+{
+	int fd;
+
+	fd = drm_open_any();
+
+	test_copy(fd);
+	test_read(fd);
+	test_write(fd);
+	test_write_gtt(fd);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_mmap_offset_exhaustion.c b/tests/gem_mmap_offset_exhaustion.c
new file mode 100644
index 00000000..51ae5990
--- /dev/null
+++ b/tests/gem_mmap_offset_exhaustion.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+
+#define OBJECT_SIZE (1024*1024)
+
+/* Testcase: checks whether the kernel handles mmap offset exhaustion correctly
+ *
+ * Currently the kernel doesn't reap the mmap offset of purged objects, albeit
+ * there's nothing that prevents it ABI-wise and it helps to get out of corners
+ * (because drm_mm is only 32bit on 32bit archs unfortunately.
+ *
+ * Note that on 64bit machines we have plenty of address space (because drm_mm
+ * uses unsigned long).
+ */
+
+static void
+create_and_map_bo(int fd)
+{
+	uint32_t handle;
+	char *ptr;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+
+	ptr = gem_mmap(fd, handle, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+
+	if (!ptr) {
+		fprintf(stderr, "mmap failed\n");
+		assert(ptr);
+	}
+
+	/* touch it to force it into the gtt */
+	*ptr = 0;
+
+	/* but then unmap it again because we only have limited address space on
+	 * 32 bit */
+	munmap(ptr, OBJECT_SIZE);
+
+	/* we happily leak objects to exhaust mmap offset space, the kernel will
+	 * reap backing storage. */
+	gem_madvise(fd, handle, I915_MADV_DONTNEED);
+}
+
+int main(int argc, char **argv)
+{
+	int fd, i;
+
+	fd = drm_open_any();
+
+	/* we have 32bit of address space, so try to fit one MB more
+	 * than that. */
+	for (i = 0; i < 4096 + 1; i++)
+		create_and_map_bo(fd);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_partial_pwrite_pread.c b/tests/gem_partial_pwrite_pread.c
new file mode 100644
index 00000000..5c8f6f5f
--- /dev/null
+++ b/tests/gem_partial_pwrite_pread.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+/*
+ * Testcase: pwrite/pread consistency when touching partial cachelines
+ *
+ * Some fancy new pwrite/pread optimizations clflush in-line while
+ * reading/writing. Check whether all required clflushes happen.
+ *
+ */
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+drm_intel_bo *scratch_bo;
+drm_intel_bo *staging_bo;
+#define BO_SIZE (4*4096)
+uint32_t devid;
+uint64_t mappable_gtt_limit;
+int fd;
+
+static void
+copy_bo(drm_intel_bo *src, drm_intel_bo *dst)
+{
+	BEGIN_BATCH(8);
+	OUT_BATCH(XY_SRC_COPY_BLT_CMD |
+		  XY_SRC_COPY_BLT_WRITE_ALPHA |
+		  XY_SRC_COPY_BLT_WRITE_RGB);
+	OUT_BATCH((3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  4096);
+	OUT_BATCH(0 << 16 | 0);
+	OUT_BATCH((BO_SIZE/4096) << 16 | 1024);
+	OUT_RELOC_FENCED(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	OUT_BATCH(0 << 16 | 0);
+	OUT_BATCH(4096);
+	OUT_RELOC_FENCED(src, I915_GEM_DOMAIN_RENDER, 0, 0);
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush(batch);
+}
+
+static void
+blt_bo_fill(drm_intel_bo *tmp_bo, drm_intel_bo *bo, int val)
+{
+	uint8_t *gtt_ptr;
+	int i;
+
+	drm_intel_gem_bo_map_gtt(tmp_bo);
+	gtt_ptr = tmp_bo->virtual;
+
+	for (i = 0; i < BO_SIZE; i++)
+		gtt_ptr[i] = val;
+
+	drm_intel_gem_bo_unmap_gtt(tmp_bo);
+
+	if (bo->offset < mappable_gtt_limit &&
+	    (IS_G33(devid) || intel_gen(devid) >= 4))
+		drmtest_trash_aperture();
+
+	copy_bo(tmp_bo, bo);
+}
+
+#define MAX_BLT_SIZE 128
+#define ROUNDS 1000
+int main(int argc, char **argv)
+{
+	int i, j;
+	uint8_t tmp[BO_SIZE];
+	uint8_t *gtt_ptr;
+
+	srandom(0xdeadbeef);
+
+	fd = drm_open_any();
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	//drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	devid = intel_get_drm_devid(fd);
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+	/* overallocate the buffers we're actually using because */
+	scratch_bo = drm_intel_bo_alloc(bufmgr, "scratch bo", BO_SIZE, 4096);
+	staging_bo = drm_intel_bo_alloc(bufmgr, "staging bo", BO_SIZE, 4096);
+
+	drmtest_init_aperture_trashers(bufmgr);
+	mappable_gtt_limit = gem_mappable_aperture_size();
+
+	printf("checking partial reads\n");
+	for (i = 0; i < ROUNDS; i++) {
+		int start, len;
+		int val = i % 256;
+
+		blt_bo_fill(staging_bo, scratch_bo, i);
+
+		start = random() % BO_SIZE;
+		len = random() % (BO_SIZE-start) + 1;
+
+		drm_intel_bo_get_subdata(scratch_bo, start, len, tmp);
+		for (j = 0; j < len; j++) {
+			if (tmp[j] != val) {
+				printf("mismatch at %i, got: %i, expected: %i\n",
+				       j, tmp[j], val);
+				exit(1);
+			}
+		}
+
+		drmtest_progress("partial reads test: ", i, ROUNDS);
+	}
+
+	printf("checking partial writes\n");
+	for (i = 0; i < ROUNDS; i++) {
+		int start, len;
+		int val = i % 256;
+
+		blt_bo_fill(staging_bo, scratch_bo, i);
+
+		start = random() % BO_SIZE;
+		len = random() % (BO_SIZE-start) + 1;
+
+		memset(tmp, i + 63, BO_SIZE);
+
+		drm_intel_bo_subdata(scratch_bo, start, len, tmp);
+
+		copy_bo(scratch_bo, staging_bo);
+		drm_intel_gem_bo_map_gtt(staging_bo);
+		gtt_ptr = staging_bo->virtual;
+
+		for (j = 0; j < start; j++) {
+			if (gtt_ptr[j] != val) {
+				printf("mismatch at %i, got: %i, expected: %i\n",
+				       j, tmp[j], val);
+				exit(1);
+			}
+		}
+		for (; j < start + len; j++) {
+			if (gtt_ptr[j] != tmp[0]) {
+				printf("mismatch at %i, got: %i, expected: %i\n",
+				       j, tmp[j], i);
+				exit(1);
+			}
+		}
+		for (; j < BO_SIZE; j++) {
+			if (gtt_ptr[j] != val) {
+				printf("mismatch at %i, got: %i, expected: %i\n",
+				       j, tmp[j], val);
+				exit(1);
+			}
+		}
+		drm_intel_gem_bo_unmap_gtt(staging_bo);
+
+		drmtest_progress("partial writes test: ", i, ROUNDS);
+	}
+
+	printf("checking partial writes after partial reads\n");
+	for (i = 0; i < ROUNDS; i++) {
+		int start, len;
+		int val = i % 256;
+
+		blt_bo_fill(staging_bo, scratch_bo, i);
+
+		/* partial read */
+		start = random() % BO_SIZE;
+		len = random() % (BO_SIZE-start) + 1;
+
+		drm_intel_bo_get_subdata(scratch_bo, start, len, tmp);
+		for (j = 0; j < len; j++) {
+			if (tmp[j] != val) {
+				printf("mismatch in read at %i, got: %i, expected: %i\n",
+				       j, tmp[j], val);
+				exit(1);
+			}
+		}
+
+		/* Change contents through gtt to make the pread cachelines
+		 * stale. */
+		val = (i + 17) % 256;
+		blt_bo_fill(staging_bo, scratch_bo, val);
+
+		/* partial write */
+		start = random() % BO_SIZE;
+		len = random() % (BO_SIZE-start) + 1;
+
+		memset(tmp, i + 63, BO_SIZE);
+
+		drm_intel_bo_subdata(scratch_bo, start, len, tmp);
+
+		copy_bo(scratch_bo, staging_bo);
+		drm_intel_gem_bo_map_gtt(staging_bo);
+		gtt_ptr = staging_bo->virtual;
+
+		for (j = 0; j < start; j++) {
+			if (gtt_ptr[j] != val) {
+				printf("mismatch at %i, got: %i, expected: %i\n",
+				       j, tmp[j], val);
+				exit(1);
+			}
+		}
+		for (; j < start + len; j++) {
+			if (gtt_ptr[j] != tmp[0]) {
+				printf("mismatch at %i, got: %i, expected: %i\n",
+				       j, tmp[j], tmp[0]);
+				exit(1);
+			}
+		}
+		for (; j < BO_SIZE; j++) {
+			if (gtt_ptr[j] != val) {
+				printf("mismatch at %i, got: %i, expected: %i\n",
+				       j, tmp[j], val);
+				exit(1);
+			}
+		}
+		drm_intel_gem_bo_unmap_gtt(staging_bo);
+
+		drmtest_progress("partial read/writes test: ", i, ROUNDS);
+	}
+
+	drmtest_cleanup_aperture_trashers();
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_pipe_control_store_loop.c b/tests/gem_pipe_control_store_loop.c
new file mode 100644
index 00000000..e03cddd7
--- /dev/null
+++ b/tests/gem_pipe_control_store_loop.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch> (based on gem_storedw_*.c)
+ *
+ */
+
+/*
+ * Testcase: (TLB-)Coherency of pipe_control QW writes
+ *
+ * Writes a counter-value into an always newly allocated target bo (by disabling
+ * buffer reuse). Decently trashes on tlb inconsistencies, too.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+uint32_t devid;
+
+#define GFX_OP_PIPE_CONTROL	((0x3<<29)|(0x3<<27)|(0x2<<24)|2)
+#define   PIPE_CONTROL_WRITE_IMMEDIATE	(1<<14)
+#define   PIPE_CONTROL_WRITE_TIMESTAMP	(3<<14)
+#define   PIPE_CONTROL_DEPTH_STALL (1<<13)
+#define   PIPE_CONTROL_WC_FLUSH	(1<<12)
+#define   PIPE_CONTROL_IS_FLUSH	(1<<11) /* MBZ on Ironlake */
+#define   PIPE_CONTROL_TC_FLUSH (1<<10) /* GM45+ only */
+#define   PIPE_CONTROL_STALL_AT_SCOREBOARD (1<<1)
+#define   PIPE_CONTROL_CS_STALL	(1<<20)
+#define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
+
+/* Like the store dword test, but we create new command buffers each time */
+static void
+store_pipe_control_loop(void)
+{
+	int i, val = 0;
+	uint32_t *buf;
+	drm_intel_bo *target_bo;
+
+	for (i = 0; i < 0x10000; i++) {
+		/* we want to check tlb consistency of the pipe_control target,
+		 * so get a new buffer every time around */
+		target_bo = drm_intel_bo_alloc(bufmgr, "target bo", 4096, 4096);
+		if (!target_bo) {
+			fprintf(stderr, "failed to alloc target buffer\n");
+			exit(-1);
+		}
+
+		/* gem_storedw_batches_loop.c is a bit overenthusiastic with
+		 * creating new batchbuffers - with buffer reuse disabled, the
+		 * support code will do that for us. */
+		if (intel_gen(devid) >= 6) {
+			/* work-around hw issue, see intel_emit_post_sync_nonzero_flush
+			 * in mesa sources. */
+			BEGIN_BATCH(4);
+			OUT_BATCH(GFX_OP_PIPE_CONTROL);
+			OUT_BATCH(PIPE_CONTROL_CS_STALL |
+			     PIPE_CONTROL_STALL_AT_SCOREBOARD);
+			OUT_BATCH(0); /* address */
+			OUT_BATCH(0); /* write data */
+			ADVANCE_BATCH();
+
+			BEGIN_BATCH(4);
+			OUT_BATCH(GFX_OP_PIPE_CONTROL);
+			OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
+			OUT_RELOC(target_bo,
+			     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 
+			     PIPE_CONTROL_GLOBAL_GTT);
+			OUT_BATCH(val); /* write data */
+			ADVANCE_BATCH();
+		} else if (intel_gen(devid) >= 4) {
+			BEGIN_BATCH(4);
+			OUT_BATCH(GFX_OP_PIPE_CONTROL | PIPE_CONTROL_WC_FLUSH |
+					PIPE_CONTROL_TC_FLUSH |
+					PIPE_CONTROL_WRITE_IMMEDIATE | 2);
+			OUT_RELOC(target_bo,
+				I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+				PIPE_CONTROL_GLOBAL_GTT);
+			OUT_BATCH(val);
+			OUT_BATCH(0xdeadbeef);
+			ADVANCE_BATCH();
+		}
+
+		intel_batchbuffer_flush_on_ring(batch, 0);
+
+		drm_intel_bo_map(target_bo, 1);
+
+		buf = target_bo->virtual;
+		if (buf[0] != val) {
+			fprintf(stderr,
+				"value mismatch: cur 0x%08x, stored 0x%08x\n",
+				buf[0], val);
+			exit(-1);
+		}
+		buf[0] = 0; /* let batch write it again */
+		drm_intel_bo_unmap(target_bo);
+
+		drm_intel_bo_unreference(target_bo);
+
+		val++;
+	}
+
+	printf("completed %d writes successfully\n", i);
+}
+
+int main(int argc, char **argv)
+{
+	int fd;
+
+	if (argc != 1) {
+		fprintf(stderr, "usage: %s\n", argv[0]);
+		exit(-1);
+	}
+
+	fd = drm_open_any();
+	devid = intel_get_drm_devid(fd);
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	if (!bufmgr) {
+		fprintf(stderr, "failed to init libdrm\n");
+		exit(-1);
+	}
+
+	if (IS_GEN2(devid) || IS_GEN3(devid)) {
+		fprintf(stderr, "no pipe_control on gen2/3\n");
+		return 77;
+	}
+	if (devid == PCI_CHIP_I965_G) {
+		fprintf(stderr, "pipe_control totally broken on i965\n");
+		return 77;
+	}
+	/* IMPORTANT: No call to
+	 * drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	 * here because we wan't to have fresh buffers (to trash the tlb)
+	 * every time! */
+
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+	if (!batch) {
+		fprintf(stderr, "failed to create batch buffer\n");
+		exit(-1);
+	}
+
+	store_pipe_control_loop();
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_pread_after_blit.c b/tests/gem_pread_after_blit.c
new file mode 100644
index 00000000..c9c8b02c
--- /dev/null
+++ b/tests/gem_pread_after_blit.c
@@ -0,0 +1,175 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file gem_pread_after_blit.c
+ *
+ * This is a test of pread's behavior when getting values out of just-drawn-to
+ * buffers.
+ *
+ * The goal is to catch failure in the whole-buffer-flush or
+ * ranged-buffer-flush paths in the kernel.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+static const int width = 512, height = 512;
+static const int size = 1024 * 1024;
+
+#define PAGE_SIZE 4096
+
+static drm_intel_bo *
+create_bo(uint32_t val)
+{
+	drm_intel_bo *bo;
+	uint32_t *vaddr;
+	int i;
+
+	bo = drm_intel_bo_alloc(bufmgr, "src bo", size, 4096);
+
+	/* Fill the BO with dwords starting at start_val */
+	drm_intel_bo_map(bo, 1);
+	vaddr = bo->virtual;
+
+	for (i = 0; i < 1024 * 1024 / 4; i++)
+		vaddr[i] = val++;
+
+	drm_intel_bo_unmap(bo);
+
+	return bo;
+}
+
+static void
+verify_large_read(drm_intel_bo *bo, uint32_t val)
+{
+	uint32_t buf[size / 4];
+	int i;
+
+	drm_intel_bo_get_subdata(bo, 0, size, buf);
+
+	for (i = 0; i < size / 4; i++) {
+		if (buf[i] != val) {
+			fprintf(stderr,
+				"Unexpected value 0x%08x instead of "
+				"0x%08x at offset 0x%08x (%p)\n",
+				buf[i], val, i * 4, buf);
+			abort();
+		}
+		val++;
+	}
+}
+
+/** This reads at the size that Mesa usees for software fallbacks. */
+static void
+verify_small_read(drm_intel_bo *bo, uint32_t val)
+{
+	uint32_t buf[4096 / 4];
+	int offset, i;
+
+	for (i = 0; i < 4096 / 4; i++)
+		buf[i] = 0x00c0ffee;
+
+	for (offset = 0; offset < size; offset += PAGE_SIZE) {
+		drm_intel_bo_get_subdata(bo, offset, PAGE_SIZE, buf);
+
+		for (i = 0; i < PAGE_SIZE; i += 4) {
+			if (buf[i / 4] != val) {
+				fprintf(stderr,
+					"Unexpected value 0x%08x instead of "
+					"0x%08x at offset 0x%08x\n",
+					buf[i / 4], val, i * 4);
+				abort();
+			}
+			val++;
+		}
+	}
+}
+
+int
+main(int argc, char **argv)
+{
+	int fd;
+	drm_intel_bo *src1, *src2, *bo;
+	uint32_t start1 = 0;
+	uint32_t start2 = 1024 * 1024 / 4;
+
+	fd = drm_open_any();
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	src1 = create_bo(start1);
+	src2 = create_bo(start2);
+
+	bo = drm_intel_bo_alloc(bufmgr, "dst bo", size, 4096);
+
+	/* First, do a full-buffer read after blitting */
+	printf("Large read after blit 1\n");
+	intel_copy_bo(batch, bo, src1, width, height);
+	verify_large_read(bo, start1);
+	printf("Large read after blit 2\n");
+	intel_copy_bo(batch, bo, src2, width, height);
+	verify_large_read(bo, start2);
+
+	printf("Small reads after blit 1\n");
+	intel_copy_bo(batch, bo, src1, width, height);
+	verify_small_read(bo, start1);
+	printf("Small reads after blit 2\n");
+	intel_copy_bo(batch, bo, src2, width, height);
+	verify_small_read(bo, start2);
+
+	printf("Large read after blit 3\n");
+	intel_copy_bo(batch, bo, src1, width, height);
+	verify_large_read(bo, start1);
+
+	drm_intel_bo_unreference(src1);
+	drm_intel_bo_unreference(src2);
+	drm_intel_bo_unreference(bo);
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_pwrite.c b/tests/gem_pwrite.c
new file mode 100644
index 00000000..051ed3b5
--- /dev/null
+++ b/tests/gem_pwrite.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+
+#define OBJECT_SIZE 16384
+
+#define COPY_BLT_CMD		(2<<29|0x53<<22|0x6)
+#define BLT_WRITE_ALPHA		(1<<21)
+#define BLT_WRITE_RGB		(1<<20)
+#define BLT_SRC_TILED		(1<<15)
+#define BLT_DST_TILED		(1<<11)
+#define MI_BATCH_BUFFER_END	(0xA<<23)
+
+static void do_gem_write(int fd, uint32_t handle, void *buf, int len, int loops)
+{
+	while (loops--)
+		gem_write(fd, handle, 0, buf, len);
+}
+
+static double elapsed(const struct timeval *start,
+		      const struct timeval *end,
+		      int loop)
+{
+	return (1e6*(end->tv_sec - start->tv_sec) + (end->tv_usec - start->tv_usec))/loop;
+}
+
+static const char *bytes_per_sec(char *buf, double v)
+{
+	const char *order[] = {
+		"",
+		"KiB",
+		"MiB",
+		"GiB",
+		"TiB",
+		NULL,
+	}, **o = order;
+
+	while (v > 1000 && o[1]) {
+		v /= 1000;
+		o++;
+	}
+	sprintf(buf, "%.1f%s/s", v, *o);
+	return buf;
+}
+
+
+int main(int argc, char **argv)
+{
+	int object_size = 0;
+	uint32_t buf[20];
+	uint32_t *src, dst;
+	int fd, count;
+
+	if (argc > 1)
+		object_size = atoi(argv[1]);
+	if (object_size == 0)
+		object_size = OBJECT_SIZE;
+	object_size = (object_size + 3) & -4;
+
+	fd = drm_open_any();
+
+	dst = gem_create(fd, object_size);
+	src = malloc(object_size);
+	for (count = 1; count <= 1<<17; count <<= 1) {
+		struct timeval start, end;
+
+		gettimeofday(&start, NULL);
+		do_gem_write(fd, dst, src, object_size, count);
+		gettimeofday(&end, NULL);
+		printf("Time to pwrite %d bytes x %6d:	%7.3fµs, %s\n",
+		       object_size, count,
+		       elapsed(&start, &end, count),
+		       bytes_per_sec((char *)buf, object_size/elapsed(&start, &end, count)*1e6));
+		fflush(stdout);
+	}
+	free(src);
+	gem_close(fd, dst);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_readwrite.c b/tests/gem_readwrite.c
new file mode 100644
index 00000000..68c3ff28
--- /dev/null
+++ b/tests/gem_readwrite.c
@@ -0,0 +1,134 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+
+#define OBJECT_SIZE 16384
+
+static int
+do_read(int fd, int handle, void *buf, int offset, int size)
+{
+	struct drm_i915_gem_pread read;
+
+	/* Ensure that we don't have any convenient data in buf in case
+	 * we fail.
+	 */
+	memset(buf, 0xd0, size);
+
+	memset(&read, 0, sizeof(read));
+	read.handle = handle;
+	read.data_ptr = (uintptr_t)buf;
+	read.size = size;
+	read.offset = offset;
+
+	return ioctl(fd, DRM_IOCTL_I915_GEM_PREAD, &read);
+}
+
+static int
+do_write(int fd, int handle, void *buf, int offset, int size)
+{
+	struct drm_i915_gem_pwrite write;
+
+	memset(&write, 0, sizeof(write));
+	write.handle = handle;
+	write.data_ptr = (uintptr_t)buf;
+	write.size = size;
+	write.offset = offset;
+
+	return ioctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &write);
+}
+
+int main(int argc, char **argv)
+{
+	int fd;
+	uint8_t expected[OBJECT_SIZE];
+	uint8_t buf[OBJECT_SIZE];
+	int ret;
+	int handle;
+
+	fd = drm_open_any();
+
+	handle = gem_create(fd, OBJECT_SIZE);
+
+	printf("Testing contents of newly created object.\n");
+	ret = do_read(fd, handle, buf, 0, OBJECT_SIZE);
+	assert(ret == 0);
+	memset(&expected, 0, sizeof(expected));
+	assert(memcmp(expected, buf, sizeof(expected)) == 0);
+
+	printf("Testing read beyond end of buffer.\n");
+	ret = do_read(fd, handle, buf, OBJECT_SIZE / 2, OBJECT_SIZE);
+	assert(ret == -1 && errno == EINVAL);
+
+	printf("Testing full write of buffer\n");
+	memset(buf, 0, sizeof(buf));
+	memset(buf + 1024, 0x01, 1024);
+	memset(expected + 1024, 0x01, 1024);
+	ret = do_write(fd, handle, buf, 0, OBJECT_SIZE);
+	assert(ret == 0);
+	ret = do_read(fd, handle, buf, 0, OBJECT_SIZE);
+	assert(ret == 0);
+	assert(memcmp(buf, expected, sizeof(buf)) == 0);
+
+	printf("Testing partial write of buffer\n");
+	memset(buf + 4096, 0x02, 1024);
+	memset(expected + 4096, 0x02, 1024);
+	ret = do_write(fd, handle, buf + 4096, 4096, 1024);
+	assert(ret == 0);
+	ret = do_read(fd, handle, buf, 0, OBJECT_SIZE);
+	assert(ret == 0);
+	assert(memcmp(buf, expected, sizeof(buf)) == 0);
+
+	printf("Testing partial read of buffer\n");
+	ret = do_read(fd, handle, buf, 512, 1024);
+	assert(ret == 0);
+	assert(memcmp(buf, expected + 512, 1024) == 0);
+
+	printf("Testing read of bad buffer handle\n");
+	ret = do_read(fd, 1234, buf, 0, 1024);
+	assert(ret == -1 && errno == ENOENT);
+
+	printf("Testing write of bad buffer handle\n");
+	ret = do_write(fd, 1234, buf, 0, 1024);
+	assert(ret == -1 && errno == ENOENT);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_reg_read.c b/tests/gem_reg_read.c
new file mode 100644
index 00000000..1d6db1f1
--- /dev/null
+++ b/tests/gem_reg_read.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <ben@bwidawsk.net>
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include "i915_drm.h"
+#include "drmtest.h"
+
+struct local_drm_i915_reg_read {
+	__u64 offset;
+	__u64 val; /* Return value */
+};
+
+#define REG_READ_IOCTL DRM_IOWR(DRM_COMMAND_BASE + 0x31, struct local_drm_i915_reg_read)
+
+static void handle_bad(int ret, int lerrno, int expected, const char *desc)
+{
+	if (ret != 0 && lerrno != expected) {
+		fprintf(stderr, "%s - errno was %d, but should have been %d\n",
+				desc, lerrno, expected);
+		exit(EXIT_FAILURE);
+	} else if (ret == 0) {
+		fprintf(stderr, "%s - Command succeeded, but should have failed\n",
+			desc);
+		exit(EXIT_FAILURE);
+	}
+}
+
+static uint64_t timer_query(int fd)
+{
+	struct local_drm_i915_reg_read read;
+	int ret;
+
+	read.offset = 0x2358;
+	ret = drmIoctl(fd, REG_READ_IOCTL, &read);
+	if (ret) {
+		perror("positive test case failed: ");
+		exit(EXIT_FAILURE);
+	}
+
+	return read.val;
+}
+
+int main(int argc, char *argv[])
+{
+	struct local_drm_i915_reg_read read;
+	int ret, fd;
+	uint64_t val;
+
+	fd = drm_open_any();
+
+	read.offset = 0x2358;
+	ret = drmIoctl(fd, REG_READ_IOCTL, &read);
+	if (errno == EINVAL)
+		exit(77);
+	else if (ret)
+		exit(EXIT_FAILURE);
+
+	val = timer_query(fd);
+	sleep(1);
+	if (timer_query(fd) == val) {
+		fprintf(stderr, "Timer isn't moving, probably busted\n");
+		exit(EXIT_FAILURE);
+	}
+
+	/* bad reg */
+	read.offset = 0x12345678;
+	ret = drmIoctl(fd, REG_READ_IOCTL, &read);
+	handle_bad(ret, errno, EINVAL, "bad register");
+
+	close(fd);
+
+	exit(EXIT_SUCCESS);
+}
diff --git a/tests/gem_reloc_vs_gpu.c b/tests/gem_reloc_vs_gpu.c
new file mode 100644
index 00000000..47681d51
--- /dev/null
+++ b/tests/gem_reloc_vs_gpu.c
@@ -0,0 +1,206 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+/*
+ * Testcase: Kernel relocations vs. gpu races
+ *
+ */
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+uint32_t blob[2048*2048];
+#define NUM_TARGET_BOS 16
+drm_intel_bo *pc_target_bo[NUM_TARGET_BOS];
+drm_intel_bo *dummy_bo;
+drm_intel_bo *special_bo;
+uint32_t devid;
+int special_reloc_ofs;
+int special_batch_len;
+
+#define GFX_OP_PIPE_CONTROL	((0x3<<29)|(0x3<<27)|(0x2<<24)|2)
+#define   PIPE_CONTROL_WRITE_IMMEDIATE	(1<<14)
+#define   PIPE_CONTROL_WRITE_TIMESTAMP	(3<<14)
+#define   PIPE_CONTROL_DEPTH_STALL (1<<13)
+#define   PIPE_CONTROL_WC_FLUSH	(1<<12)
+#define   PIPE_CONTROL_IS_FLUSH	(1<<11) /* MBZ on Ironlake */
+#define   PIPE_CONTROL_TC_FLUSH (1<<10) /* GM45+ only */
+#define   PIPE_CONTROL_STALL_AT_SCOREBOARD (1<<1)
+#define   PIPE_CONTROL_CS_STALL	(1<<20)
+#define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
+
+static void create_special_bo(void)
+{
+	uint32_t data[1024];
+	int len = 0;
+	int small_pitch = 64;
+#define BATCH(dw) data[len++] = (dw);
+
+	memset(data, 0, 4096);
+	special_bo = drm_intel_bo_alloc(bufmgr, "special batch", 4096, 4096);
+
+	BATCH(XY_COLOR_BLT_CMD | COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB);
+	BATCH((3 << 24) | (0xf0 << 16) | small_pitch);
+	BATCH(0);
+	BATCH(1 << 16 | 1);
+	special_reloc_ofs = 4*len;
+	BATCH(0);
+	BATCH(0xdeadbeef);
+
+#define CMD_POLY_STIPPLE_OFFSET       0x7906
+	/* batchbuffer end */
+	if (IS_GEN5(batch->devid)) {
+		BATCH(CMD_POLY_STIPPLE_OFFSET << 16);
+		BATCH(0);
+	}
+	assert(len % 2 == 0);
+	BATCH(MI_NOOP);
+	BATCH(MI_BATCH_BUFFER_END);
+
+	drm_intel_bo_subdata(special_bo, 0, 4096, data);
+	special_batch_len = len*4;
+}
+
+static void emit_dummy_load(int pitch)
+{
+	int i;
+	uint32_t tile_flags = 0;
+
+	if (IS_965(devid)) {
+		pitch /= 4;
+		tile_flags = XY_SRC_COPY_BLT_SRC_TILED |
+			XY_SRC_COPY_BLT_DST_TILED;
+	}
+
+	for (i = 0; i < 10; i++) {
+		BEGIN_BATCH(8);
+		OUT_BATCH(XY_SRC_COPY_BLT_CMD |
+			  XY_SRC_COPY_BLT_WRITE_ALPHA |
+			  XY_SRC_COPY_BLT_WRITE_RGB |
+			  tile_flags);
+		OUT_BATCH((3 << 24) | /* 32 bits */
+			  (0xcc << 16) | /* copy ROP */
+			  pitch);
+		OUT_BATCH(0 << 16 | 1024);
+		OUT_BATCH((2048) << 16 | (2048));
+		OUT_RELOC_FENCED(dummy_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+		OUT_BATCH(0 << 16 | 0);
+		OUT_BATCH(pitch);
+		OUT_RELOC_FENCED(dummy_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+		ADVANCE_BATCH();
+
+		if (IS_GEN6(devid) || IS_GEN7(devid)) {
+			BEGIN_BATCH(3);
+			OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+			ADVANCE_BATCH();
+		}
+	}
+	intel_batchbuffer_flush(batch);
+}
+
+#define MAX_BLT_SIZE 128
+int main(int argc, char **argv)
+{
+	uint32_t tiling_mode = I915_TILING_X;
+	unsigned long pitch, act_size;
+	int fd, i, ring;
+	uint32_t test;
+
+	memset(blob, 'A', sizeof(blob));
+
+	fd = drm_open_any();
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	/* disable reuse, otherwise the test fails */
+	//drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	devid = intel_get_drm_devid(fd);
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+	act_size = 2048;
+	dummy_bo = drm_intel_bo_alloc_tiled(bufmgr, "tiled dummy_bo", act_size, act_size,
+				      4, &tiling_mode, &pitch, 0);
+
+	drm_intel_bo_subdata(dummy_bo, 0, act_size*act_size*4, blob);
+
+	create_special_bo();
+
+	if (intel_gen(devid) >= 6)
+		ring = I915_EXEC_BLT;
+	else
+		ring = 0;
+
+	for (i = 0; i < NUM_TARGET_BOS; i++) {
+		pc_target_bo[i] = drm_intel_bo_alloc(bufmgr, "special batch", 4096, 4096);
+		emit_dummy_load(pitch);
+		assert(pc_target_bo[i]->offset == 0);
+		drm_intel_bo_emit_reloc(special_bo, special_reloc_ofs,
+					pc_target_bo[i],
+					0,
+					I915_GEM_DOMAIN_RENDER,
+					I915_GEM_DOMAIN_RENDER);
+		drm_intel_bo_mrb_exec(special_bo, special_batch_len, NULL,
+				      0, 0, ring);
+	}
+
+	/* Only check at the end to avoid unnecessary synchronous behaviour. */
+	for (i = 0; i < NUM_TARGET_BOS; i++) {
+		drm_intel_bo_get_subdata(pc_target_bo[i], 0, 4, &test);
+		if (test != 0xdeadbeef) {
+			fprintf(stderr, "mismatch in buffer %i: 0x%08x instead of 0xdeadbeef\n", i, test);
+			exit(1);
+		}
+		drm_intel_bo_unreference(pc_target_bo[i]);
+	}
+
+	drm_intel_gem_bo_map_gtt(dummy_bo);
+	drm_intel_gem_bo_unmap_gtt(dummy_bo);
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_ring_sync_loop.c b/tests/gem_ring_sync_loop.c
new file mode 100644
index 00000000..b689bcde
--- /dev/null
+++ b/tests/gem_ring_sync_loop.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch> (based on gem_storedw_*.c)
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+#include "i830_reg.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+static drm_intel_bo *target_buffer;
+
+/*
+ * Testcase: Basic check of ring<->ring sync using a dummy reloc
+ *
+ * Extremely efficient at catching missed irqs with semaphores=0 ...
+ */
+
+#define MI_COND_BATCH_BUFFER_END	(0x36<<23 | 1)
+#define MI_DO_COMPARE			(1<<21)
+
+static void
+store_dword_loop(void)
+{
+	int i;
+
+	srandom(0xdeadbeef);
+
+	for (i = 0; i < 0x100000; i++) {
+		int ring = random() % 3 + 1;
+
+		if (ring == I915_EXEC_RENDER) {
+			BEGIN_BATCH(4);
+			OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE);
+			OUT_BATCH(0xffffffff); /* compare dword */
+			OUT_RELOC(target_buffer, I915_GEM_DOMAIN_RENDER,
+					I915_GEM_DOMAIN_RENDER, 0);
+			OUT_BATCH(MI_NOOP);
+			ADVANCE_BATCH();
+		} else {
+			BEGIN_BATCH(4);
+			OUT_BATCH(MI_FLUSH_DW | 1);
+			OUT_BATCH(0); /* reserved */
+			OUT_RELOC(target_buffer, I915_GEM_DOMAIN_RENDER,
+					I915_GEM_DOMAIN_RENDER, 0);
+			OUT_BATCH(MI_NOOP | (1<<22) | (0xf));
+			ADVANCE_BATCH();
+		}
+		intel_batchbuffer_flush_on_ring(batch, ring);
+	}
+
+	drm_intel_bo_map(target_buffer, 0);
+	// map to force waiting on rendering
+	drm_intel_bo_unmap(target_buffer);
+}
+
+int main(int argc, char **argv)
+{
+	int fd;
+	int devid;
+
+	if (argc != 1) {
+		fprintf(stderr, "usage: %s\n", argv[0]);
+		exit(-1);
+	}
+
+	fd = drm_open_any();
+	devid = intel_get_drm_devid(fd);
+	if (!HAS_BLT_RING(devid)) {
+		fprintf(stderr, "inter ring check needs gen6+\n");
+		return 77;
+	}
+
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	if (!bufmgr) {
+		fprintf(stderr, "failed to init libdrm\n");
+		exit(-1);
+	}
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+	if (!batch) {
+		fprintf(stderr, "failed to create batch buffer\n");
+		exit(-1);
+	}
+
+	target_buffer = drm_intel_bo_alloc(bufmgr, "target bo", 4096, 4096);
+	if (!target_buffer) {
+		fprintf(stderr, "failed to alloc target buffer\n");
+		exit(-1);
+	}
+
+	store_dword_loop();
+
+	drm_intel_bo_unreference(target_buffer);
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_ringfill.c b/tests/gem_ringfill.c
new file mode 100644
index 00000000..5bae8f11
--- /dev/null
+++ b/tests/gem_ringfill.c
@@ -0,0 +1,233 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file gem_ringfill.c
+ *
+ * This is a test of doing many tiny batchbuffer operations, in the hope of
+ * catching failure to manage the ring properly near full.
+ */
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+
+#include "drm.h"
+#include "i915_drm.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+#include "rendercopy.h"
+
+struct bo {
+	const char *ring;
+	drm_intel_bo *src, *dst, *tmp;
+};
+
+static const int width = 512, height = 512;
+
+static void create_bo(drm_intel_bufmgr *bufmgr,
+		      struct bo *b,
+		      const char *ring)
+{
+	int size = 4 * width * height, i;
+	uint32_t *map;
+
+	b->ring = ring;
+	b->src = drm_intel_bo_alloc(bufmgr, "src", size, 4096);
+	b->dst = drm_intel_bo_alloc(bufmgr, "dst", size, 4096);
+	b->tmp = drm_intel_bo_alloc(bufmgr, "tmp", size, 4096);
+
+	/* Fill the src with indexes of the pixels */
+	drm_intel_bo_map(b->src, true);
+	map = b->src->virtual;
+	for (i = 0; i < width * height; i++)
+		map[i] = i;
+	drm_intel_bo_unmap(b->src);
+
+	/* Fill the dst with garbage. */
+	drm_intel_bo_map(b->dst, true);
+	map = b->dst->virtual;
+	for (i = 0; i < width * height; i++)
+		map[i] = 0xd0d0d0d0;
+	drm_intel_bo_unmap(b->dst);
+}
+
+static int check_bo(struct bo *b)
+{
+	const uint32_t *map;
+	int i, fails = 0;
+
+	drm_intel_bo_map(b->dst, false);
+	map = b->dst->virtual;
+	for (i = 0; i < width*height; i++) {
+		if (map[i] != i && ++fails <= 9) {
+			int x = i % width;
+			int y = i / width;
+
+			printf("%s: copy #%d at %d,%d failed: read 0x%08x\n",
+			       b->ring, i, x, y, map[i]);
+		}
+	}
+	drm_intel_bo_unmap(b->dst);
+
+	return fails;
+}
+
+static void destroy_bo(struct bo *b)
+{
+	drm_intel_bo_unreference(b->src);
+	drm_intel_bo_unreference(b->tmp);
+	drm_intel_bo_unreference(b->dst);
+}
+
+static int check_ring(drm_intel_bufmgr *bufmgr,
+		      struct intel_batchbuffer *batch,
+		      const char *ring,
+		      render_copyfunc_t copy)
+{
+	struct scratch_buf src, tmp, dst;
+	struct bo bo;
+	char output[100];
+	int i;
+
+	snprintf(output, 100, "filling %s ring: ", ring);
+
+	create_bo(bufmgr, &bo, ring);
+
+	src.stride = 4 * width;
+	src.tiling = 0;
+	src.data = src.cpu_mapping = NULL;
+	src.size = 4 * width * height;
+	src.num_tiles = 4 * width * height;
+	dst = tmp = src;
+
+	src.bo = bo.src;
+	tmp.bo = bo.tmp;
+	dst.bo = bo.dst;
+
+	/* The ring we've been using is 128k, and each rendering op
+	 * will use at least 8 dwords:
+	 *
+	 * BATCH_START
+	 * BATCH_START offset
+	 * MI_FLUSH
+	 * STORE_DATA_INDEX
+	 * STORE_DATA_INDEX offset
+	 * STORE_DATA_INDEX value
+	 * MI_USER_INTERRUPT
+	 * (padding)
+	 *
+	 * So iterate just a little more than that -- if we don't fill the ring
+	 * doing this, we aren't likely to with this test.
+	 */
+	for (i = 0; i < width * height; i++) {
+		int x = i % width;
+		int y = i / width;
+
+		drmtest_progress(output, i, width*height);
+
+		assert(y < height);
+
+		/* Dummy load to fill the ring */
+		copy(batch, &src, 0, 0, width, height, &tmp, 0, 0);
+		/* And copy the src into dst, pixel by pixel */
+		copy(batch, &src, x, y, 1, 1, &dst, x, y);
+	}
+
+	/* verify */
+	printf("verifying\n");
+	i = check_bo(&bo);
+	destroy_bo(&bo);
+
+	return i;
+}
+
+static void blt_copy(struct intel_batchbuffer *batch,
+		     struct scratch_buf *src, unsigned src_x, unsigned src_y,
+		     unsigned w, unsigned h,
+		     struct scratch_buf *dst, unsigned dst_x, unsigned dst_y)
+{
+	BEGIN_BATCH(8);
+	OUT_BATCH(XY_SRC_COPY_BLT_CMD |
+		  XY_SRC_COPY_BLT_WRITE_ALPHA |
+		  XY_SRC_COPY_BLT_WRITE_RGB);
+	OUT_BATCH((3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  dst->stride);
+	OUT_BATCH((dst_y << 16) | dst_x); /* dst x1,y1 */
+	OUT_BATCH(((dst_y + h) << 16) | (dst_x + w)); /* dst x2,y2 */
+	OUT_RELOC(dst->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	OUT_BATCH((src_y << 16) | src_x); /* src x1,y1 */
+	OUT_BATCH(src->stride);
+	OUT_RELOC(src->bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush(batch);
+}
+
+int main(int argc, char **argv)
+{
+	drm_intel_bufmgr *bufmgr;
+	struct intel_batchbuffer *batch;
+	render_copyfunc_t copy;
+	int fd, fails = 0;
+
+	fd = drm_open_any();
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	fails += check_ring(bufmgr, batch, "blt", blt_copy);
+
+	/* Strictly only required on architectures with a separate BLT ring,
+	 * but lets stress everybody.
+	 */
+	copy = NULL;
+	if (IS_GEN2(batch->devid))
+		copy = gen2_render_copyfunc;
+	else if (IS_GEN3(batch->devid))
+		copy = gen3_render_copyfunc;
+	else if (IS_GEN6(batch->devid))
+		copy = gen6_render_copyfunc;
+	if (copy)
+		fails += check_ring(bufmgr, batch, "render", copy);
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return fails != 0;
+}
diff --git a/tests/gem_set_tiling_vs_blt.c b/tests/gem_set_tiling_vs_blt.c
new file mode 100644
index 00000000..5fa90d49
--- /dev/null
+++ b/tests/gem_set_tiling_vs_blt.c
@@ -0,0 +1,269 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+/** @file gem_set_tiling_vs_blt.c
+ *
+ * Testcase: Check for proper synchronization of tiling changes vs. tiled gpu
+ * access
+ *
+ * The blitter on gen3 and earlier needs properly set up fences. Which also
+ * means that for untiled blits we may not set up a fence before that blt has
+ * finished.
+ *
+ * Current kernels have a bug there, but it's pretty hard to hit because you
+ * need:
+ * - a blt on an untiled object which is aligned correctly for tiling.
+ * - a set_tiling to switch that object to tiling
+ * - another blt without any intervening cpu access that uses this object.
+ *
+ * Testcase has been extended to also check tiled->untiled and tiled->tiled
+ * transitions (i.e. changing stride).
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <stdbool.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+uint32_t devid;
+
+#define TEST_SIZE (1024*1024)
+#define TEST_STRIDE (4*1024)
+#define TEST_HEIGHT(stride)	(TEST_SIZE/(stride))
+#define TEST_WIDTH(stride)	((stride)/4)
+
+uint32_t data[TEST_SIZE/4];
+
+static void do_test(uint32_t tiling, unsigned stride,
+		    uint32_t tiling_after, unsigned stride_after)
+{
+	drm_intel_bo *busy_bo, *test_bo, *target_bo;
+	int i, ret;
+	uint32_t *ptr;
+	uint32_t test_bo_handle;
+	uint32_t blt_stride, blt_bits;
+	bool tiling_changed = false;
+
+	printf("filling ring .. ");
+	busy_bo = drm_intel_bo_alloc(bufmgr, "busy bo bo", 16*1024*1024, 4096);
+
+	for (i = 0; i < 250; i++) {
+		BEGIN_BATCH(8);
+		OUT_BATCH(XY_SRC_COPY_BLT_CMD |
+			  XY_SRC_COPY_BLT_WRITE_ALPHA |
+			  XY_SRC_COPY_BLT_WRITE_RGB);
+		OUT_BATCH((3 << 24) | /* 32 bits */
+			  (0xcc << 16) | /* copy ROP */
+			  2*1024*4);
+		OUT_BATCH(0 << 16 | 1024);
+		OUT_BATCH((2048) << 16 | (2048));
+		OUT_RELOC_FENCED(busy_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+		OUT_BATCH(0 << 16 | 0);
+		OUT_BATCH(2*1024*4);
+		OUT_RELOC_FENCED(busy_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+		ADVANCE_BATCH();
+
+		if (IS_GEN6(devid) || IS_GEN7(devid)) {
+			BEGIN_BATCH(3);
+			OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+			ADVANCE_BATCH();
+		}
+	}
+	intel_batchbuffer_flush(batch);
+
+	printf("playing tricks .. ");
+	/* first allocate the target so it gets out of the way of playing funky
+	 * tricks */
+	target_bo = drm_intel_bo_alloc(bufmgr, "target bo", TEST_SIZE, 4096);
+
+	/* allocate buffer with parameters _after_ transition we want to check
+	 * and touch it, so that it's properly aligned in the gtt. */
+	test_bo = drm_intel_bo_alloc(bufmgr, "tiled busy bo", TEST_SIZE, 4096);
+	test_bo_handle = test_bo->handle;
+	ret = drm_intel_bo_set_tiling(test_bo, &tiling_after, stride_after);
+	assert(ret == 0);
+	drm_intel_gem_bo_map_gtt(test_bo);
+	ptr = test_bo->virtual;
+	*ptr = 0;
+	ptr = NULL;
+	drm_intel_gem_bo_unmap_gtt(test_bo);
+
+	drm_intel_bo_unreference(test_bo);
+
+	test_bo = NULL;
+
+	/* note we need a bo bigger than batches, otherwise the buffer reuse
+	 * trick will fail. */
+	test_bo = drm_intel_bo_alloc(bufmgr, "busy bo", TEST_SIZE, 4096);
+	if (test_bo_handle != test_bo->handle)
+		fprintf(stderr, "libdrm reuse trick failed\n");
+	test_bo_handle = test_bo->handle;
+	/* ensure we have the right tiling before we start. */
+	ret = drm_intel_bo_set_tiling(test_bo, &tiling, stride);
+	assert(ret == 0);
+
+	if (tiling == I915_TILING_NONE) {
+		drm_intel_bo_subdata(test_bo, 0, TEST_SIZE, data);
+	} else {
+		drm_intel_gem_bo_map_gtt(test_bo);
+		ptr = test_bo->virtual;
+		memcpy(ptr, data, TEST_SIZE);
+		ptr = NULL;
+		drm_intel_gem_bo_unmap_gtt(test_bo);
+	}
+
+	blt_stride = stride;
+	blt_bits = 0;
+	if (intel_gen(devid) >= 4 && tiling != I915_TILING_NONE) {
+		blt_stride /= 4;
+		blt_bits = XY_SRC_COPY_BLT_SRC_TILED;
+	}
+
+	BEGIN_BATCH(8);
+	OUT_BATCH(XY_SRC_COPY_BLT_CMD |
+		  XY_SRC_COPY_BLT_WRITE_ALPHA |
+		  blt_bits |
+		  XY_SRC_COPY_BLT_WRITE_RGB);
+	OUT_BATCH((3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  stride);
+	OUT_BATCH(0 << 16 | 0);
+	OUT_BATCH((TEST_HEIGHT(stride)) << 16 | (TEST_WIDTH(stride)));
+	OUT_RELOC_FENCED(target_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	OUT_BATCH(0 << 16 | 0);
+	OUT_BATCH(blt_stride);
+	OUT_RELOC_FENCED(test_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+	ADVANCE_BATCH();
+	intel_batchbuffer_flush(batch);
+
+	drm_intel_bo_unreference(test_bo);
+
+	test_bo = drm_intel_bo_alloc_for_render(bufmgr, "tiled busy bo", TEST_SIZE, 4096);
+	if (test_bo_handle != test_bo->handle)
+		fprintf(stderr, "libdrm reuse trick failed\n");
+	ret = drm_intel_bo_set_tiling(test_bo, &tiling_after, stride_after);
+	assert(ret == 0);
+
+	/* Note: We don't care about gen4+ here because the blitter doesn't use
+	 * fences there. So not setting tiling flags on the tiled buffer is ok.
+	 */
+	BEGIN_BATCH(8);
+	OUT_BATCH(XY_SRC_COPY_BLT_CMD |
+		  XY_SRC_COPY_BLT_WRITE_ALPHA |
+		  XY_SRC_COPY_BLT_WRITE_RGB);
+	OUT_BATCH((3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  stride_after);
+	OUT_BATCH(0 << 16 | 0);
+	OUT_BATCH((1) << 16 | (1));
+	OUT_RELOC_FENCED(test_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	OUT_BATCH(0 << 16 | 0);
+	OUT_BATCH(stride_after);
+	OUT_RELOC_FENCED(test_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+	ADVANCE_BATCH();
+	intel_batchbuffer_flush(batch);
+
+	/* Now try to trick the kernel the kernel into changing up the fencing
+	 * too early. */
+
+	printf("checking .. ");
+	memset(data, 0, TEST_SIZE);
+	drm_intel_bo_get_subdata(target_bo, 0, TEST_SIZE, data);
+	for (i = 0; i < TEST_SIZE/4; i++)
+		assert(data[i] == i);
+
+	/* check whether tiling on the test_bo actually changed. */
+	drm_intel_gem_bo_map_gtt(test_bo);
+	ptr = test_bo->virtual;
+	for (i = 0; i < TEST_SIZE/4; i++)
+		if (ptr[i] != data[i])
+			tiling_changed = true;
+	ptr = NULL;
+	drm_intel_gem_bo_unmap_gtt(test_bo);
+	assert(tiling_changed);
+
+	drm_intel_bo_unreference(test_bo);
+	drm_intel_bo_unreference(target_bo);
+	drm_intel_bo_unreference(busy_bo);
+	printf("done\n");
+}
+
+int main(int argc, char **argv)
+{
+	int i, fd;
+	uint32_t tiling, tiling_after;
+
+	for (i = 0; i < 1024*256; i++)
+		data[i] = i;
+
+	fd = drm_open_any();
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	devid = intel_get_drm_devid(fd);
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+
+	printf("testing untiled->tiled transisition:\n");
+	tiling = I915_TILING_NONE;
+	tiling_after = I915_TILING_X;
+	do_test(tiling, TEST_STRIDE, tiling_after, TEST_STRIDE);
+	assert(tiling == I915_TILING_NONE);
+	assert(tiling_after == I915_TILING_X);
+
+	printf("testing tiled->untiled transisition:\n");
+	tiling = I915_TILING_X;
+	tiling_after = I915_TILING_NONE;
+	do_test(tiling, TEST_STRIDE, tiling_after, TEST_STRIDE);
+	assert(tiling == I915_TILING_X);
+	assert(tiling_after == I915_TILING_NONE);
+
+	printf("testing tiled->tiled transisition:\n");
+	tiling = I915_TILING_X;
+	tiling_after = I915_TILING_X;
+	do_test(tiling, TEST_STRIDE/2, tiling_after, TEST_STRIDE);
+	assert(tiling == I915_TILING_X);
+	assert(tiling_after == I915_TILING_X);
+
+	return 0;
+}
diff --git a/tests/gem_set_tiling_vs_gtt.c b/tests/gem_set_tiling_vs_gtt.c
new file mode 100644
index 00000000..1241b54d
--- /dev/null
+++ b/tests/gem_set_tiling_vs_gtt.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_gpu_tools.h"
+
+#define OBJECT_SIZE (1024*1024)
+#define TEST_STRIDE (1024*4)
+
+/**
+ * Testcase: Check set_tiling vs gtt mmap coherency
+ */
+
+int main(int argc, char **argv)
+{
+	int fd;
+	uint32_t *ptr;
+	uint32_t data[OBJECT_SIZE/4];
+	int i;
+	uint32_t handle;
+	bool tiling_changed;
+	int tile_height;
+
+	fd = drm_open_any();
+
+	if (IS_GEN2(intel_get_drm_devid(fd)))
+		tile_height = 16;
+	else
+		tile_height = 8;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+	ptr = gem_mmap(fd, handle, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+	assert(ptr);
+
+	/* gtt coherency is done with set_domain in libdrm, don't break that */
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	for (i = 0; i < OBJECT_SIZE/4; i++)
+		ptr[i] = data[i] = i;
+
+	gem_set_tiling(fd, handle, I915_TILING_X, TEST_STRIDE);
+
+	printf("testing untiled->tiled\n");
+	tiling_changed = false;
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, 0);
+	/* Too lazy to check for the correct tiling, and impossible anyway on
+	 * bit17 swizzling machines. */
+	for (i = 0; i < OBJECT_SIZE/4; i++)
+		if (ptr[i] != data[i])
+			tiling_changed = true;
+	assert(tiling_changed);
+
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	for (i = 0; i < OBJECT_SIZE/4; i++)
+		ptr[i] = data[i] = i;
+
+	gem_set_tiling(fd, handle, I915_TILING_X, TEST_STRIDE*2);
+
+	printf("testing tiled->tiled\n");
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, 0);
+	for (i = 0; i < OBJECT_SIZE/4; i++) {
+		int tile_row = i / (TEST_STRIDE * tile_height / 4);
+		int row = i / (TEST_STRIDE * 2 / 4);
+		int half = i & (TEST_STRIDE / 4);
+		int ofs = i % (TEST_STRIDE / 4);
+		int data_i = (tile_row/2)*(TEST_STRIDE * tile_height / 4)
+			+ row*TEST_STRIDE/4
+			+ half*tile_height + ofs;
+		uint32_t val = data[data_i];
+
+		if (ptr[i] != val) {
+			printf("mismatch at %i, row=%i, half=%i, ofs=%i\n",
+			       i, row, half, ofs);
+			printf("read: 0x%08x, expected: 0x%08x\n",
+			       ptr[i], val);
+			assert(0);
+		}
+
+	}
+
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	for (i = 0; i < OBJECT_SIZE/4; i++)
+		ptr[i] = data[i] = i;
+
+	gem_set_tiling(fd, handle, I915_TILING_NONE, 0);
+	printf("testing tiled->untiled\n");
+	tiling_changed = false;
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, 0);
+	/* Too lazy to check for the correct tiling, and impossible anyway on
+	 * bit17 swizzling machines. */
+	for (i = 0; i < OBJECT_SIZE/4; i++)
+		if (ptr[i] != data[i])
+			tiling_changed = true;
+	assert(tiling_changed);
+
+	munmap(ptr, OBJECT_SIZE);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_set_tiling_vs_pwrite.c b/tests/gem_set_tiling_vs_pwrite.c
new file mode 100644
index 00000000..35ec5cdc
--- /dev/null
+++ b/tests/gem_set_tiling_vs_pwrite.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_gpu_tools.h"
+
+#define OBJECT_SIZE (1024*1024)
+#define TEST_STRIDE (1024*4)
+
+/**
+ * Testcase: Check set_tiling vs pwrite coherency
+ */
+
+int main(int argc, char **argv)
+{
+	int fd;
+	uint32_t *ptr;
+	uint32_t data[OBJECT_SIZE/4];
+	int i;
+	uint32_t handle;
+
+	fd = drm_open_any();
+
+	for (i = 0; i < OBJECT_SIZE/4; i++)
+		data[i] = i;
+
+	handle = gem_create(fd, OBJECT_SIZE);
+	ptr = gem_mmap(fd, handle, OBJECT_SIZE, PROT_READ | PROT_WRITE);
+	assert(ptr);
+
+	gem_set_tiling(fd, handle, I915_TILING_X, TEST_STRIDE);
+
+	/* touch it */
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	*ptr = 0xdeadbeef;
+
+	printf("testing pwrite on tiled buffer\n");
+	gem_write(fd, handle, 0, data, OBJECT_SIZE);
+	memset(data, 0, OBJECT_SIZE);
+	gem_read(fd, handle, 0, data, OBJECT_SIZE);
+	for (i = 0; i < OBJECT_SIZE/4; i++)
+		assert(i == data[i]);
+
+	/* touch it before changing the tiling, so that the fence sticks around */
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	*ptr = 0xdeadbeef;
+
+	gem_set_tiling(fd, handle, I915_TILING_NONE, 0);
+
+	printf("testing pwrite on untiled, but still fenced buffer\n");
+	gem_write(fd, handle, 0, data, OBJECT_SIZE);
+	memset(data, 0, OBJECT_SIZE);
+	gem_read(fd, handle, 0, data, OBJECT_SIZE);
+	for (i = 0; i < OBJECT_SIZE/4; i++)
+		assert(i == data[i]);
+
+	munmap(ptr, OBJECT_SIZE);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_storedw_batches_loop.c b/tests/gem_storedw_batches_loop.c
new file mode 100644
index 00000000..8cf5f719
--- /dev/null
+++ b/tests/gem_storedw_batches_loop.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Jesse Barnes <jbarnes@virtuousgeek.org> (based on gem_bad_blit.c)
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static drm_intel_bufmgr *bufmgr;
+static drm_intel_bo *target_bo;
+static int has_ppgtt = 0;
+
+/* Like the store dword test, but we create new command buffers each time */
+static void
+store_dword_loop(void)
+{
+	int cmd, i, val = 0, ret;
+	uint32_t *buf;
+	drm_intel_bo *cmd_bo;
+
+	cmd = MI_STORE_DWORD_IMM;
+	if (!has_ppgtt)
+		cmd |= MI_MEM_VIRTUAL;
+
+	for (i = 0; i < 0x80000; i++) {
+		cmd_bo = drm_intel_bo_alloc(bufmgr, "cmd bo", 4096, 4096);
+		if (!cmd_bo) {
+			fprintf(stderr, "failed to alloc cmd bo\n");
+			exit(-1);
+		}
+
+		drm_intel_bo_map(cmd_bo, 1);
+		buf = cmd_bo->virtual;
+
+		buf[0] = cmd;
+		buf[1] = 0;
+		buf[2] = target_bo->offset;
+		buf[3] = 0x42000000 + val;
+
+		ret = drm_intel_bo_references(cmd_bo, target_bo);
+		if (ret) {
+			fprintf(stderr, "failed to link cmd & target bos\n");
+			exit(-1);
+		}
+
+		ret = drm_intel_bo_emit_reloc(cmd_bo, 8, target_bo, 0,
+					      I915_GEM_DOMAIN_INSTRUCTION,
+					      I915_GEM_DOMAIN_INSTRUCTION);
+		if (ret) {
+			fprintf(stderr, "failed to emit reloc\n");
+			exit(-1);
+		}
+
+		buf[4] = MI_BATCH_BUFFER_END;
+		buf[5] = MI_BATCH_BUFFER_END;
+
+		drm_intel_bo_unmap(cmd_bo);
+
+		ret = drm_intel_bo_references(cmd_bo, target_bo);
+		if (ret != 1) {
+			fprintf(stderr, "bad bo reference count: %d\n", ret);
+			exit(-1);
+		}
+
+		ret = drm_intel_bo_exec(cmd_bo, 6 * 4, NULL, 0, 0);
+		if (ret) {
+			fprintf(stderr, "bo exec failed: %d\n", ret);
+			exit(-1);
+		}
+
+		drm_intel_bo_wait_rendering(cmd_bo);
+
+		drm_intel_bo_map(target_bo, 1);
+
+		buf = target_bo->virtual;
+		if (buf[0] != (0x42000000 | val)) {
+			fprintf(stderr,
+				"value mismatch: cur 0x%08x, stored 0x%08x\n",
+				buf[0], 0x42000000 | val);
+			exit(-1);
+		}
+		buf[0] = 0; /* let batch write it again */
+		drm_intel_bo_unmap(target_bo);
+
+		drm_intel_bo_unreference(cmd_bo);
+
+		val++;
+	}
+
+	printf("completed %d writes successfully\n", i);
+}
+
+int main(int argc, char **argv)
+{
+	int fd;
+	int devid;
+
+	if (argc != 1) {
+		fprintf(stderr, "usage: %s\n", argv[0]);
+		exit(-1);
+	}
+
+	fd = drm_open_any();
+	devid = intel_get_drm_devid(fd);
+
+	has_ppgtt = gem_uses_aliasing_ppgtt(fd);
+
+	if (IS_GEN2(devid) || IS_GEN3(devid) || IS_GEN4(devid) || IS_GEN5(devid)) {
+
+		fprintf(stderr, "MI_STORE_DATA can only use GTT address on gen4+/g33 and"
+			"needs snoopable mem on pre-gen6\n");
+		return 77;
+	}
+
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	if (!bufmgr) {
+		fprintf(stderr, "failed to init libdrm\n");
+		exit(-1);
+	}
+//	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+	target_bo = drm_intel_bo_alloc(bufmgr, "target bo", 4096, 4096);
+	if (!target_bo) {
+		fprintf(stderr, "failed to alloc target buffer\n");
+		exit(-1);
+	}
+
+	store_dword_loop();
+
+	drm_intel_bo_unreference(target_bo);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_storedw_loop_blt.c b/tests/gem_storedw_loop_blt.c
new file mode 100644
index 00000000..dda9b835
--- /dev/null
+++ b/tests/gem_storedw_loop_blt.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Jesse Barnes <jbarnes@virtuousgeek.org> (based on gem_bad_blit.c)
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+static drm_intel_bo *target_buffer;
+static int has_ppgtt = 0;
+
+/*
+ * Testcase: Basic blitter MI check using MI_STORE_DATA_IMM
+ */
+
+static void
+store_dword_loop(void)
+{
+	int cmd, i, val = 0;
+	uint32_t *buf;
+
+	cmd = MI_STORE_DWORD_IMM;
+	if (!has_ppgtt)
+		cmd |= MI_MEM_VIRTUAL;
+
+	for (i = 0; i < 0x100000; i++) {
+		BEGIN_BATCH(4);
+		OUT_BATCH(cmd);
+		OUT_BATCH(0); /* reserved */
+		OUT_RELOC(target_buffer, I915_GEM_DOMAIN_INSTRUCTION,
+			  I915_GEM_DOMAIN_INSTRUCTION, 0);
+		OUT_BATCH(val);
+		ADVANCE_BATCH();
+
+		intel_batchbuffer_flush_on_ring(batch, I915_EXEC_BLT);
+
+		drm_intel_bo_map(target_buffer, 0);
+
+		buf = target_buffer->virtual;
+		if (buf[0] != val) {
+			fprintf(stderr,
+				"value mismatch: cur 0x%08x, stored 0x%08x\n",
+				buf[0], val);
+			exit(-1);
+		}
+
+		drm_intel_bo_unmap(target_buffer);
+
+		val++;
+	}
+
+	drm_intel_bo_map(target_buffer, 0);
+	buf = target_buffer->virtual;
+
+	printf("completed %d writes successfully, current value: 0x%08x\n", i,
+			buf[0]);
+	drm_intel_bo_unmap(target_buffer);
+}
+
+int main(int argc, char **argv)
+{
+	int fd;
+	int devid;
+
+	if (argc != 1) {
+		fprintf(stderr, "usage: %s\n", argv[0]);
+		exit(-1);
+	}
+
+	fd = drm_open_any();
+	devid = intel_get_drm_devid(fd);
+
+	has_ppgtt = gem_uses_aliasing_ppgtt(fd);
+
+	if (IS_GEN2(devid) || IS_GEN3(devid) || IS_GEN4(devid) || IS_GEN5(devid)) {
+
+		fprintf(stderr, "MI_STORE_DATA can only use GTT address on gen4+/g33 and "
+			"needs snoopable mem on pre-gen6\n");
+		return 77;
+	}
+
+	/* This only works with ppgtt */
+	if (!has_ppgtt) {
+		fprintf(stderr, "no ppgtt detected, which is required\n");
+		return 77;
+	}
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	if (!bufmgr) {
+		fprintf(stderr, "failed to init libdrm\n");
+		exit(-1);
+	}
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+	if (!batch) {
+		fprintf(stderr, "failed to create batch buffer\n");
+		exit(-1);
+	}
+
+	target_buffer = drm_intel_bo_alloc(bufmgr, "target bo", 4096, 4096);
+	if (!target_buffer) {
+		fprintf(stderr, "failed to alloc target buffer\n");
+		exit(-1);
+	}
+
+	store_dword_loop();
+
+	drm_intel_bo_unreference(target_buffer);
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_storedw_loop_bsd.c b/tests/gem_storedw_loop_bsd.c
new file mode 100644
index 00000000..d7c61047
--- /dev/null
+++ b/tests/gem_storedw_loop_bsd.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Jesse Barnes <jbarnes@virtuousgeek.org> (based on gem_bad_blit.c)
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+static drm_intel_bo *target_buffer;
+static int has_ppgtt = 0;
+
+/*
+ * Testcase: Basic bsd MI check using MI_STORE_DATA_IMM
+ */
+
+static void
+store_dword_loop(void)
+{
+	int cmd, i, val = 0;
+	uint32_t *buf;
+
+	cmd = MI_STORE_DWORD_IMM;
+	if (!has_ppgtt)
+		cmd |= MI_MEM_VIRTUAL;
+
+	for (i = 0; i < 0x100000; i++) {
+		BEGIN_BATCH(4);
+		OUT_BATCH(cmd);
+		OUT_BATCH(0); /* reserved */
+		OUT_RELOC(target_buffer, I915_GEM_DOMAIN_INSTRUCTION,
+			  I915_GEM_DOMAIN_INSTRUCTION, 0);
+		OUT_BATCH(val);
+		ADVANCE_BATCH();
+
+		intel_batchbuffer_flush_on_ring(batch, I915_EXEC_BSD);
+
+		drm_intel_bo_map(target_buffer, 0);
+
+		buf = target_buffer->virtual;
+		if (buf[0] != val) {
+			fprintf(stderr,
+				"value mismatch: cur 0x%08x, stored 0x%08x\n",
+				buf[0], val);
+			exit(-1);
+		}
+
+		drm_intel_bo_unmap(target_buffer);
+
+		val++;
+	}
+
+	drm_intel_bo_map(target_buffer, 0);
+	buf = target_buffer->virtual;
+
+	printf("completed %d writes successfully, current value: 0x%08x\n", i,
+			buf[0]);
+	drm_intel_bo_unmap(target_buffer);
+}
+
+int main(int argc, char **argv)
+{
+	int fd;
+	int devid;
+
+	if (argc != 1) {
+		fprintf(stderr, "usage: %s\n", argv[0]);
+		exit(-1);
+	}
+
+	fd = drm_open_any();
+	devid = intel_get_drm_devid(fd);
+
+	has_ppgtt = gem_uses_aliasing_ppgtt(fd);
+
+	if (IS_GEN2(devid) || IS_GEN3(devid) || IS_GEN4(devid) || IS_GEN5(devid)) {
+
+		fprintf(stderr, "MI_STORE_DATA can only use GTT address on gen4+/g33 and "
+			"needs snoopable mem on pre-gen6\n");
+		return 77;
+	}
+
+	if (IS_GEN6(devid)) {
+
+		fprintf(stderr, "MI_STORE_DATA broken on gen6 bsd\n");
+		return 77;
+	}
+
+	/* This only works with ppgtt */
+	if (!has_ppgtt) {
+		fprintf(stderr, "no ppgtt detected, which is required\n");
+		return 77;
+	}
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	if (!bufmgr) {
+		fprintf(stderr, "failed to init libdrm\n");
+		exit(-1);
+	}
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+	if (!batch) {
+		fprintf(stderr, "failed to create batch buffer\n");
+		exit(-1);
+	}
+
+	target_buffer = drm_intel_bo_alloc(bufmgr, "target bo", 4096, 4096);
+	if (!target_buffer) {
+		fprintf(stderr, "failed to alloc target buffer\n");
+		exit(-1);
+	}
+
+	store_dword_loop();
+
+	drm_intel_bo_unreference(target_buffer);
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_storedw_loop_render.c b/tests/gem_storedw_loop_render.c
new file mode 100644
index 00000000..19a41b65
--- /dev/null
+++ b/tests/gem_storedw_loop_render.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Jesse Barnes <jbarnes@virtuousgeek.org> (based on gem_bad_blit.c)
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+static drm_intel_bo *target_buffer;
+static int has_ppgtt = 0;
+
+/*
+ * Testcase: Basic render MI check using MI_STORE_DATA_IMM
+ */
+
+static void
+store_dword_loop(void)
+{
+	int cmd, i, val = 0;
+	uint32_t *buf;
+
+	cmd = MI_STORE_DWORD_IMM;
+	if (!has_ppgtt)
+		cmd |= MI_MEM_VIRTUAL;
+
+	for (i = 0; i < 0x100000; i++) {
+		BEGIN_BATCH(4);
+		OUT_BATCH(cmd);
+		OUT_BATCH(0); /* reserved */
+		OUT_RELOC(target_buffer, I915_GEM_DOMAIN_INSTRUCTION,
+			  I915_GEM_DOMAIN_INSTRUCTION, 0);
+		OUT_BATCH(val);
+		ADVANCE_BATCH();
+
+		intel_batchbuffer_flush_on_ring(batch, 0);
+
+		drm_intel_bo_map(target_buffer, 0);
+
+		buf = target_buffer->virtual;
+		if (buf[0] != val) {
+			fprintf(stderr,
+				"value mismatch: cur 0x%08x, stored 0x%08x\n",
+				buf[0], val);
+			exit(-1);
+		}
+
+		drm_intel_bo_unmap(target_buffer);
+
+		val++;
+	}
+
+	drm_intel_bo_map(target_buffer, 0);
+	buf = target_buffer->virtual;
+
+	printf("completed %d writes successfully, current value: 0x%08x\n", i,
+			buf[0]);
+	drm_intel_bo_unmap(target_buffer);
+}
+
+int main(int argc, char **argv)
+{
+	int fd;
+	int devid;
+
+	if (argc != 1) {
+		fprintf(stderr, "usage: %s\n", argv[0]);
+		exit(-1);
+	}
+
+	fd = drm_open_any();
+	devid = intel_get_drm_devid(fd);
+
+	has_ppgtt = gem_uses_aliasing_ppgtt(fd);
+
+	if (IS_GEN2(devid) || IS_GEN3(devid) || IS_GEN4(devid) || IS_GEN5(devid)) {
+
+		fprintf(stderr, "MI_STORE_DATA can only use GTT address on gen4+/g33 and "
+			"needs snoopable mem on pre-gen6\n");
+		return 77;
+	}
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	if (!bufmgr) {
+		fprintf(stderr, "failed to init libdrm\n");
+		exit(-1);
+	}
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+	if (!batch) {
+		fprintf(stderr, "failed to create batch buffer\n");
+		exit(-1);
+	}
+
+	target_buffer = drm_intel_bo_alloc(bufmgr, "target bo", 4096, 4096);
+	if (!target_buffer) {
+		fprintf(stderr, "failed to alloc target buffer\n");
+		exit(-1);
+	}
+
+	store_dword_loop();
+
+	drm_intel_bo_unreference(target_buffer);
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_stress.c b/tests/gem_stress.c
new file mode 100644
index 00000000..69239ac1
--- /dev/null
+++ b/tests/gem_stress.c
@@ -0,0 +1,945 @@
+/*
+ * Copyright © 2011 Daniel Vetter
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ * Partially based upon gem_tiled_fence_blits.c
+ */
+
+/** @file gem_stress.c
+ *
+ * This is a general gem coherency test. It's designed to eventually replicate
+ * any possible sequence of access patterns. It works by copying a set of tiles
+ * between two sets of backing buffer objects, randomly permutating the assinged
+ * position on each copy operations.
+ *
+ * The copy operation are done in tiny portions (to reduce any race windows
+ * for corruptions, hence increasing the chances for observing one) and are
+ * constantly switched between all means to copy stuff (fenced blitter, unfenced
+ * render, mmap, pwrite/read).
+ *
+ * After every complete move of a set tiling parameters of a buffer are randomly
+ * changed to simulate the effects of libdrm caching.
+ *
+ * Buffers are 1mb big to nicely fit into fences on gen2/3. A few are further
+ * split up to test relaxed fencing. Using this to push the average working set
+ * size over the available gtt space forces objects to be mapped as unfenceable
+ * (and as a side-effect tests gtt map/unmap coherency).
+ *
+ * In short: designed for maximum evilness.
+ */
+
+#include "rendercopy.h"
+
+#define CMD_POLY_STIPPLE_OFFSET       0x7906
+
+/** TODO:
+ * - beat on relaxed fencing (i.e. mappable/fenceable tracking in the kernel)
+ * - render copy (to check fence tracking and cache coherency management by the
+ *   kernel)
+ * - multi-threading: probably just a wrapper script to launch multiple
+ *   instances + an option to accordingly reduce the working set
+ * - gen6 inter-ring coherency (needs render copy, first)
+ * - variable buffer size
+ * - add an option to fork a second process that randomly sends signals to the
+ *   first one (to check consistency of the kernel recovery paths)
+ */
+
+drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+int drm_fd;
+int devid;
+int num_fences;
+
+drm_intel_bo *busy_bo;
+
+struct option_struct {
+    unsigned scratch_buf_size;
+    unsigned max_dimension;
+    unsigned num_buffers;
+    int trace_tile;
+    int no_hw;
+    int gpu_busy_load;
+    int use_render;
+    int use_blt;
+    int forced_tiling;
+    int use_cpu_maps;
+    int total_rounds;
+    int fail;
+    int tiles_per_buf;
+    int ducttape;
+    int tile_size;
+    int check_render_cpyfn;
+    int use_signal_helper;
+};
+
+struct option_struct options;
+
+#define MAX_BUFS		4096
+#define SCRATCH_BUF_SIZE	1024*1024
+#define BUSY_BUF_SIZE		(256*4096)
+#define TILE_BYTES(size)	((size)*(size)*sizeof(uint32_t))
+
+static struct scratch_buf buffers[2][MAX_BUFS];
+/* tile i is at logical position tile_permutation[i] */
+static unsigned *tile_permutation;
+static unsigned num_buffers = 0;
+static unsigned current_set = 0;
+static unsigned target_set = 0;
+static unsigned num_total_tiles = 0;
+
+int fence_storm = 0;
+static int gpu_busy_load = 10;
+
+struct {
+	unsigned num_failed;
+	unsigned max_failed_reads;
+} stats;
+
+static void tile2xy(struct scratch_buf *buf, unsigned tile, unsigned *x, unsigned *y)
+{
+	assert(tile < buf->num_tiles);
+	*x = (tile*options.tile_size) % (buf->stride/sizeof(uint32_t));
+	*y = ((tile*options.tile_size) / (buf->stride/sizeof(uint32_t))) * options.tile_size;
+}
+
+static void emit_blt(drm_intel_bo *src_bo, uint32_t src_tiling, unsigned src_pitch,
+		     unsigned src_x, unsigned src_y, unsigned w, unsigned h,
+		     drm_intel_bo *dst_bo, uint32_t dst_tiling, unsigned dst_pitch,
+		     unsigned dst_x, unsigned dst_y)
+{
+	uint32_t cmd_bits = 0;
+
+	if (IS_965(devid) && src_tiling) {
+		src_pitch /= 4;
+		cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED;
+	}
+
+	if (IS_965(devid) && dst_tiling) {
+		dst_pitch /= 4;
+		cmd_bits |= XY_SRC_COPY_BLT_DST_TILED;
+	}
+
+	/* copy lower half to upper half */
+	BEGIN_BATCH(8);
+	OUT_BATCH(XY_SRC_COPY_BLT_CMD |
+		  XY_SRC_COPY_BLT_WRITE_ALPHA |
+		  XY_SRC_COPY_BLT_WRITE_RGB |
+		  cmd_bits);
+	OUT_BATCH((3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  dst_pitch);
+	OUT_BATCH(dst_y << 16 | dst_x);
+	OUT_BATCH((dst_y+h) << 16 | (dst_x+w));
+	OUT_RELOC_FENCED(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	OUT_BATCH(src_y << 16 | src_x);
+	OUT_BATCH(src_pitch);
+	OUT_RELOC_FENCED(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+	ADVANCE_BATCH();
+
+	if (IS_GEN6(devid) || IS_GEN7(devid)) {
+		BEGIN_BATCH(3);
+		OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
+		OUT_BATCH(0);
+		OUT_BATCH(0);
+		ADVANCE_BATCH();
+	}
+}
+
+/* All this gem trashing wastes too much cpu time, so give the gpu something to
+ * do to increase changes for races. */
+static void keep_gpu_busy(void)
+{
+	int tmp;
+
+	tmp = 1 << gpu_busy_load;
+	assert(tmp <= 1024);
+
+	emit_blt(busy_bo, 0, 4096, 0, 0, tmp, 128,
+		 busy_bo, 0, 4096, 0, 128);
+}
+
+static void set_to_cpu_domain(struct scratch_buf *buf, int writing)
+{
+	gem_set_domain(drm_fd, buf->bo->handle, I915_GEM_DOMAIN_CPU,
+		       writing ? I915_GEM_DOMAIN_CPU : 0);
+}
+
+static unsigned int copyfunc_seq = 0;
+static void (*copyfunc)(struct scratch_buf *src, unsigned src_x, unsigned src_y,
+			struct scratch_buf *dst, unsigned dst_x, unsigned dst_y,
+			unsigned logical_tile_no);
+
+/* stride, x, y in units of uint32_t! */
+static void cpucpy2d(uint32_t *src, unsigned src_stride, unsigned src_x, unsigned src_y,
+		     uint32_t *dst, unsigned dst_stride, unsigned dst_x, unsigned dst_y,
+		     unsigned logical_tile_no)
+{
+	int i, j;
+	int failed = 0;
+
+	for (i = 0; i < options.tile_size; i++) {
+		for (j = 0; j < options.tile_size; j++) {
+			unsigned dst_ofs = dst_x + j + dst_stride * (dst_y + i);
+			unsigned src_ofs = src_x + j + src_stride * (src_y + i);
+			unsigned expect = logical_tile_no*options.tile_size*options.tile_size
+			    + i*options.tile_size + j;
+			uint32_t tmp = src[src_ofs];
+			if (tmp != expect) {
+			    printf("mismatch at tile %i pos %i, read %i, expected %i, diff %i\n",
+				    logical_tile_no, i*options.tile_size + j, tmp, expect, (int) tmp - expect);
+			    if (options.trace_tile >= 0 && options.fail)
+				    exit(1);
+			    failed++;
+			}
+			/* when not aborting, correct any errors */
+			dst[dst_ofs] = expect;
+		}
+	}
+	if (failed && options.fail)
+		exit(1);
+
+	if (failed > stats.max_failed_reads)
+		stats.max_failed_reads = failed;
+	if (failed)
+		stats.num_failed++;
+}
+
+static void cpu_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y,
+			 struct scratch_buf *dst, unsigned dst_x, unsigned dst_y,
+			 unsigned logical_tile_no)
+{
+	assert(batch->ptr == batch->buffer);
+
+	if (options.ducttape)
+		drm_intel_bo_wait_rendering(dst->bo);
+
+	if (options.use_cpu_maps) {
+		set_to_cpu_domain(src, 0);
+		set_to_cpu_domain(dst, 1);
+	}
+
+	cpucpy2d(src->data, src->stride/sizeof(uint32_t), src_x, src_y,
+		 dst->data, dst->stride/sizeof(uint32_t), dst_x, dst_y,
+		 logical_tile_no);
+}
+
+static void prw_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y,
+			 struct scratch_buf *dst, unsigned dst_x, unsigned dst_y,
+			 unsigned logical_tile_no)
+{
+	uint32_t tmp_tile[options.tile_size*options.tile_size];
+	int i;
+
+	assert(batch->ptr == batch->buffer);
+
+	if (options.ducttape)
+		drm_intel_bo_wait_rendering(dst->bo);
+
+	if (src->tiling == I915_TILING_NONE) {
+		for (i = 0; i < options.tile_size; i++) {
+			unsigned ofs = src_x*sizeof(uint32_t) + src->stride*(src_y + i);
+			drm_intel_bo_get_subdata(src->bo, ofs,
+						 options.tile_size*sizeof(uint32_t),
+						 tmp_tile + options.tile_size*i);
+		}
+	} else {
+		if (options.use_cpu_maps)
+			set_to_cpu_domain(src, 0);
+
+		cpucpy2d(src->data, src->stride/sizeof(uint32_t), src_x, src_y,
+			 tmp_tile, options.tile_size, 0, 0, logical_tile_no);
+	}
+
+	if (dst->tiling == I915_TILING_NONE) {
+		for (i = 0; i < options.tile_size; i++) {
+			unsigned ofs = dst_x*sizeof(uint32_t) + dst->stride*(dst_y + i);
+			drm_intel_bo_subdata(dst->bo, ofs,
+					     options.tile_size*sizeof(uint32_t),
+					     tmp_tile + options.tile_size*i);
+		}
+	} else {
+		if (options.use_cpu_maps)
+			set_to_cpu_domain(dst, 1);
+
+		cpucpy2d(tmp_tile, options.tile_size, 0, 0,
+			 dst->data, dst->stride/sizeof(uint32_t), dst_x, dst_y,
+			 logical_tile_no);
+	}
+}
+
+static void blitter_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y,
+			     struct scratch_buf *dst, unsigned dst_x, unsigned dst_y,
+			     unsigned logical_tile_no)
+{
+	static unsigned keep_gpu_busy_counter = 0;
+
+	/* check both edges of the fence usage */
+	if (keep_gpu_busy_counter & 1 && !fence_storm)
+		keep_gpu_busy();
+
+	emit_blt(src->bo, src->tiling, src->stride, src_x, src_y,
+		 options.tile_size, options.tile_size,
+		 dst->bo, dst->tiling, dst->stride, dst_x, dst_y);
+
+	if (!(keep_gpu_busy_counter & 1) && !fence_storm)
+		keep_gpu_busy();
+
+	keep_gpu_busy_counter++;
+
+	if (src->tiling)
+		fence_storm--;
+	if (dst->tiling)
+		fence_storm--;
+
+	if (fence_storm <= 1) {
+		fence_storm = 0;
+		intel_batchbuffer_flush(batch);
+	}
+}
+
+static void render_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y,
+			    struct scratch_buf *dst, unsigned dst_x, unsigned dst_y,
+			    unsigned logical_tile_no)
+{
+	static unsigned keep_gpu_busy_counter = 0;
+
+	/* check both edges of the fence usage */
+	if (keep_gpu_busy_counter & 1)
+		keep_gpu_busy();
+
+	if (IS_GEN2(devid))
+		gen2_render_copyfunc(batch,
+				     src, src_x, src_y,
+				     options.tile_size, options.tile_size,
+				     dst, dst_x, dst_y);
+	else if (IS_GEN3(devid))
+		gen3_render_copyfunc(batch,
+				     src, src_x, src_y,
+				     options.tile_size, options.tile_size,
+				     dst, dst_x, dst_y);
+	else if (IS_GEN6(devid))
+		gen6_render_copyfunc(batch,
+				     src, src_x, src_y,
+				     options.tile_size, options.tile_size,
+				     dst, dst_x, dst_y);
+	else if (IS_GEN7(devid))
+		gen7_render_copyfunc(batch,
+				     src, src_x, src_y,
+				     options.tile_size, options.tile_size,
+				     dst, dst_x, dst_y);
+	else
+		blitter_copyfunc(src, src_x, src_y,
+				 dst, dst_x, dst_y,
+				 logical_tile_no);
+	if (!(keep_gpu_busy_counter & 1))
+		keep_gpu_busy();
+
+	keep_gpu_busy_counter++;
+	intel_batchbuffer_flush(batch);
+}
+
+static void next_copyfunc(int tile)
+{
+	if (fence_storm) {
+		if (tile == options.trace_tile)
+			printf(" using fence storm\n");
+		return;
+	}
+
+	if (copyfunc_seq % 61 == 0
+			&& options.forced_tiling != I915_TILING_NONE) {
+		if (tile == options.trace_tile)
+			printf(" using fence storm\n");
+		fence_storm = num_fences;
+		copyfunc = blitter_copyfunc;
+	} else if (copyfunc_seq % 17 == 0) {
+		if (tile == options.trace_tile)
+			printf(" using cpu\n");
+		copyfunc = cpu_copyfunc;
+	} else if (copyfunc_seq % 19 == 0) {
+		if (tile == options.trace_tile)
+			printf(" using prw\n");
+		copyfunc = prw_copyfunc;
+	} else if (copyfunc_seq % 3 == 0 && options.use_render) {
+		if (tile == options.trace_tile)
+			printf(" using render\n");
+		copyfunc = render_copyfunc;
+	} else if (options.use_blt){
+		if (tile == options.trace_tile)
+			printf(" using blitter\n");
+		copyfunc = blitter_copyfunc;
+	} else if (options.use_render){
+		if (tile == options.trace_tile)
+			printf(" using render\n");
+		copyfunc = render_copyfunc;
+	} else {
+		copyfunc = cpu_copyfunc;
+	}
+
+	copyfunc_seq++;
+}
+
+static void fan_out(void)
+{
+	uint32_t tmp_tile[options.tile_size*options.tile_size];
+	uint32_t seq = 0;
+	int i, k;
+	unsigned tile, buf_idx, x, y;
+
+	for (i = 0; i < num_total_tiles; i++) {
+		tile = i;
+		buf_idx = tile / options.tiles_per_buf;
+		tile %= options.tiles_per_buf;
+
+		tile2xy(&buffers[current_set][buf_idx], tile, &x, &y);
+
+		for (k = 0; k < options.tile_size*options.tile_size; k++)
+			tmp_tile[k] = seq++;
+
+		if (options.use_cpu_maps)
+			set_to_cpu_domain(&buffers[current_set][buf_idx], 1);
+
+		cpucpy2d(tmp_tile, options.tile_size, 0, 0,
+			 buffers[current_set][buf_idx].data,
+			 buffers[current_set][buf_idx].stride / sizeof(uint32_t),
+			 x, y, i);
+	}
+
+	for (i = 0; i < num_total_tiles; i++)
+		tile_permutation[i] = i;
+}
+
+static void fan_in_and_check(void)
+{
+	uint32_t tmp_tile[options.tile_size*options.tile_size];
+	unsigned tile, buf_idx, x, y;
+	int i;
+	for (i = 0; i < num_total_tiles; i++) {
+		tile = tile_permutation[i];
+		buf_idx = tile / options.tiles_per_buf;
+		tile %= options.tiles_per_buf;
+
+		tile2xy(&buffers[current_set][buf_idx], tile, &x, &y);
+
+		if (options.use_cpu_maps)
+			set_to_cpu_domain(&buffers[current_set][buf_idx], 0);
+
+		cpucpy2d(buffers[current_set][buf_idx].data,
+			 buffers[current_set][buf_idx].stride / sizeof(uint32_t),
+			 x, y,
+			 tmp_tile, options.tile_size, 0, 0,
+			 i);
+	}
+}
+
+static void sanitize_stride(struct scratch_buf *buf)
+{
+
+	if (buf_height(buf) > options.max_dimension)
+		buf->stride = buf->size / options.max_dimension;
+
+	if (buf_height(buf) < options.tile_size)
+		buf->stride = buf->size / options.tile_size;
+
+	if (buf_width(buf) < options.tile_size)
+		buf->stride = options.tile_size * sizeof(uint32_t);
+
+	assert(buf->stride <= 8192);
+	assert(buf_width(buf) <= options.max_dimension);
+	assert(buf_height(buf) <= options.max_dimension);
+
+	assert(buf_width(buf) >= options.tile_size);
+	assert(buf_height(buf) >= options.tile_size);
+
+}
+
+static void init_buffer(struct scratch_buf *buf, unsigned size)
+{
+	buf->bo = drm_intel_bo_alloc(bufmgr, "tiled bo", size, 4096);
+	buf->size = size;
+	assert(buf->bo);
+	buf->tiling = I915_TILING_NONE;
+	buf->stride = 4096;
+
+	sanitize_stride(buf);
+
+	if (options.no_hw)
+		buf->data = malloc(size);
+	else {
+		if (options.use_cpu_maps)
+			drm_intel_bo_map(buf->bo, 1);
+		else
+			drm_intel_gem_bo_map_gtt(buf->bo);
+		buf->data = buf->bo->virtual;
+	}
+
+	buf->num_tiles = options.tiles_per_buf;
+}
+
+static void exchange_buf(void *array, unsigned i, unsigned j)
+{
+	struct scratch_buf *buf_arr, tmp;
+	buf_arr = array;
+
+	memcpy(&tmp, &buf_arr[i], sizeof(struct scratch_buf));
+	memcpy(&buf_arr[i], &buf_arr[j], sizeof(struct scratch_buf));
+	memcpy(&buf_arr[j], &tmp, sizeof(struct scratch_buf));
+}
+
+
+static void init_set(unsigned set)
+{
+	long int r;
+	int i;
+
+	drmtest_permute_array(buffers[set], num_buffers, exchange_buf);
+
+	if (current_set == 1 && options.gpu_busy_load == 0) {
+		gpu_busy_load++;
+		if (gpu_busy_load > 10)
+			gpu_busy_load = 6;
+	}
+
+	for (i = 0; i < num_buffers; i++) {
+		r = random();
+		if ((r & 3) != 0)
+		    continue;
+		r >>= 2;
+
+		if ((r & 3) != 0)
+			buffers[set][i].tiling = I915_TILING_X;
+		else
+			buffers[set][i].tiling = I915_TILING_NONE;
+		r >>= 2;
+		if (options.forced_tiling >= 0)
+			buffers[set][i].tiling = options.forced_tiling;
+
+		if (buffers[set][i].tiling == I915_TILING_NONE) {
+			/* min 64 byte stride */
+			r %= 8;
+			buffers[set][i].stride = 64 * (1 << r);
+		} else if (IS_GEN2(devid)) {
+			/* min 128 byte stride */
+			r %= 7;
+			buffers[set][i].stride = 128 * (1 << r);
+		} else {
+			/* min 512 byte stride */
+			r %= 5;
+			buffers[set][i].stride = 512 * (1 << r);
+		}
+
+		sanitize_stride(&buffers[set][i]);
+
+		gem_set_tiling(drm_fd, buffers[set][i].bo->handle,
+			       buffers[set][i].tiling,
+			       buffers[set][i].stride);
+
+		if (options.trace_tile != -1 && i == options.trace_tile/options.tiles_per_buf)
+			printf("changing buffer %i containing tile %i: tiling %i, stride %i\n", i,
+					options.trace_tile,
+					buffers[set][i].tiling, buffers[set][i].stride);
+	}
+}
+
+static void exchange_uint(void *array, unsigned i, unsigned j)
+{
+	unsigned *i_arr = array;
+	unsigned i_tmp;
+
+	i_tmp = i_arr[i];
+	i_arr[i] = i_arr[j];
+	i_arr[j] = i_tmp;
+}
+
+static void copy_tiles(unsigned *permutation)
+{
+	unsigned src_tile, src_buf_idx, src_x, src_y;
+	unsigned dst_tile, dst_buf_idx, dst_x, dst_y;
+	struct scratch_buf *src_buf, *dst_buf;
+	int i, idx;
+	for (i = 0; i < num_total_tiles; i++) {
+		/* tile_permutation is independent of current_permutation, so
+		 * abuse it to randomize the order of the src bos */
+		idx  = tile_permutation[i];
+		src_buf_idx = idx / options.tiles_per_buf;
+		src_tile = idx % options.tiles_per_buf;
+		src_buf = &buffers[current_set][src_buf_idx];
+
+		tile2xy(src_buf, src_tile, &src_x, &src_y);
+
+		dst_buf_idx = permutation[idx] / options.tiles_per_buf;
+		dst_tile = permutation[idx] % options.tiles_per_buf;
+		dst_buf = &buffers[target_set][dst_buf_idx];
+
+		tile2xy(dst_buf, dst_tile, &dst_x, &dst_y);
+
+		if (options.trace_tile == i)
+			printf("copying tile %i from %i (%i, %i) to %i (%i, %i)", i,
+				tile_permutation[i], src_buf_idx, src_tile,
+				permutation[idx], dst_buf_idx, dst_tile);
+
+		if (options.no_hw) {
+			cpucpy2d(src_buf->data,
+				 src_buf->stride / sizeof(uint32_t),
+				 src_x, src_y,
+				 dst_buf->data,
+				 dst_buf->stride / sizeof(uint32_t),
+				 dst_x, dst_y,
+				 i);
+		} else {
+			next_copyfunc(i);
+
+			copyfunc(src_buf, src_x, src_y, dst_buf, dst_x, dst_y,
+				 i);
+		}
+	}
+
+	intel_batchbuffer_flush(batch);
+}
+
+static int get_num_fences(void)
+{
+	drm_i915_getparam_t gp;
+	int ret, val;
+
+	gp.param = I915_PARAM_NUM_FENCES_AVAIL;
+	gp.value = &val;
+	ret = drmIoctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
+	assert (ret == 0);
+
+	printf ("total %d fences\n", val);
+	assert(val > 4);
+
+	return val - 2;
+}
+
+static void sanitize_tiles_per_buf(void)
+{
+	if (options.tiles_per_buf > options.scratch_buf_size / TILE_BYTES(options.tile_size))
+		options.tiles_per_buf = options.scratch_buf_size / TILE_BYTES(options.tile_size);
+}
+
+static void parse_options(int argc, char **argv)
+{
+	int c, tmp;
+	int option_index = 0;
+	static struct option long_options[] = {
+		{"no-hw", 0, 0, 'd'},
+		{"buf-size", 1, 0, 's'},
+		{"gpu-busy-load", 1, 0, 'g'},
+		{"no-signals", 0, 0, 'S'},
+		{"buffer-count", 1, 0, 'c'},
+		{"trace-tile", 1, 0, 't'},
+		{"disable-blt", 0, 0, 'b'},
+		{"disable-render", 0, 0, 'r'},
+		{"untiled", 0, 0, 'u'},
+		{"x-tiled", 0, 0, 'x'},
+		{"use-cpu-maps", 0, 0, 'm'},
+		{"rounds", 1, 0, 'o'},
+		{"no-fail", 0, 0, 'f'},
+		{"tiles-per-buf", 0, 0, 'p'},
+#define DUCTAPE 0xdead0001
+		{"remove-duct-tape", 0, 0, DUCTAPE},
+#define TILESZ	0xdead0002
+		{"tile-size", 1, 0, TILESZ},
+#define CHCK_RENDER 0xdead0003
+		{"check-render-cpyfn", 0, 0, CHCK_RENDER},
+	};
+
+	options.scratch_buf_size = 256*4096;
+	options.no_hw = 0;
+	options.use_signal_helper = 1;
+	options.gpu_busy_load = 0;
+	options.num_buffers = 0;
+	options.trace_tile = -1;
+	options.use_render = 1;
+	options.use_blt = 1;
+	options.forced_tiling = -1;
+	options.use_cpu_maps = 0;
+	options.total_rounds = 512;
+	options.fail = 1;
+	options.ducttape = 1;
+	options.tile_size = 16;
+	options.tiles_per_buf = options.scratch_buf_size / TILE_BYTES(options.tile_size);
+	options.check_render_cpyfn = 0;
+
+	while((c = getopt_long(argc, argv, "ds:g:c:t:rbuxmo:fp:",
+			       long_options, &option_index)) != -1) {
+		switch(c) {
+		case 'd':
+			options.no_hw = 1;
+			printf("no-hw debug mode\n");
+			break;
+		case 'S':
+			options.use_signal_helper = 0;
+			printf("disabling that pesky nuisance who keeps interrupting us\n");
+			break;
+		case 's':
+			tmp = atoi(optarg);
+			if (tmp < options.tile_size*8192)
+				printf("scratch buffer size needs to be at least %i\n",
+				       options.tile_size*8192);
+			else if (tmp & (tmp - 1)) {
+				printf("scratch buffer size needs to be a power-of-two\n");
+			} else {
+				printf("fixed scratch buffer size to %u\n", tmp);
+				options.scratch_buf_size = tmp;
+				sanitize_tiles_per_buf();
+			}
+			break;
+		case 'g':
+			tmp = atoi(optarg);
+			if (tmp < 0 || tmp > 10)
+				printf("gpu busy load needs to be bigger than 0 and smaller than 10\n");
+			else {
+				printf("gpu busy load factor set to %i\n", tmp);
+				gpu_busy_load = options.gpu_busy_load = tmp;
+			}
+			break;
+		case 'c':
+			options.num_buffers = atoi(optarg);
+			printf("buffer count set to %i\n", options.num_buffers);
+			break;
+		case 't':
+			options.trace_tile = atoi(optarg);
+			printf("tracing tile %i\n", options.trace_tile);
+			break;
+		case 'r':
+			options.use_render = 0;
+			printf("disabling render copy\n");
+			break;
+		case 'b':
+			options.use_blt = 0;
+			printf("disabling blt copy\n");
+			break;
+		case 'u':
+			options.forced_tiling = I915_TILING_NONE;
+			printf("disabling tiling\n");
+			break;
+		case 'x':
+			if (options.use_cpu_maps) {
+				printf("tiling not possible with cpu maps\n");
+			} else {
+				options.forced_tiling = I915_TILING_X;
+				printf("using only X-tiling\n");
+			}
+			break;
+		case 'm':
+			options.use_cpu_maps = 1;
+			options.forced_tiling = I915_TILING_NONE;
+			printf("disabling tiling\n");
+			break;
+		case 'o':
+			options.total_rounds = atoi(optarg);
+			printf("total rounds %i\n", options.total_rounds);
+			break;
+		case 'f':
+			options.fail = 0;
+			printf("not failing when detecting errors\n");
+			break;
+		case 'p':
+			options.tiles_per_buf = atoi(optarg);
+			printf("tiles per buffer %i\n", options.tiles_per_buf);
+			break;
+		case DUCTAPE:
+			options.ducttape = 0;
+			printf("applying duct-tape\n");
+			break;
+		case TILESZ:
+			options.tile_size = atoi(optarg);
+			sanitize_tiles_per_buf();
+			printf("til size %i\n", options.tile_size);
+			break;
+		case CHCK_RENDER:
+			options.check_render_cpyfn = 1;
+			printf("checking render copy function\n");
+			break;
+		default:
+			printf("unkown command options\n");
+			break;
+		}
+	}
+
+	if (optind < argc)
+		printf("unkown command options\n");
+
+	/* actually 32767, according to docs, but that kills our nice pot calculations. */
+	options.max_dimension = 16*1024;
+	if (options.use_render) {
+		if (IS_GEN2(devid) || IS_GEN3(devid))
+			options.max_dimension = 2048;
+		else
+			options.max_dimension = 8192;
+	}
+	printf("Limiting buffer to %dx%d\n",
+	       options.max_dimension, options.max_dimension);
+}
+
+static void init(void)
+{
+	int i;
+	unsigned tmp;
+
+	if (options.num_buffers == 0) {
+		tmp = gem_aperture_size(drm_fd);
+		tmp = tmp > 256*(1024*1024) ? 256*(1024*1024) : tmp;
+		num_buffers = 2 * tmp / options.scratch_buf_size / 3;
+		num_buffers /= 2;
+		printf("using %u buffers\n", num_buffers);
+	} else
+		num_buffers = options.num_buffers;
+
+	bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	drm_intel_bufmgr_gem_enable_fenced_relocs(bufmgr);
+	num_fences = get_num_fences();
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+	busy_bo = drm_intel_bo_alloc(bufmgr, "tiled bo", BUSY_BUF_SIZE, 4096);
+	if (options.forced_tiling >= 0)
+		gem_set_tiling(drm_fd, busy_bo->handle, options.forced_tiling, 4096);
+
+	for (i = 0; i < num_buffers; i++) {
+		init_buffer(&buffers[0][i], options.scratch_buf_size);
+		init_buffer(&buffers[1][i], options.scratch_buf_size);
+
+		num_total_tiles += buffers[0][i].num_tiles;
+	}
+	current_set = 0;
+
+	/* just in case it helps reproducability */
+	srandom(0xdeadbeef);
+}
+
+static void check_render_copyfunc(void)
+{
+	struct scratch_buf src, dst;
+	uint32_t *ptr;
+	int i, j, pass;
+
+	if (!options.check_render_cpyfn)
+		return;
+
+	init_buffer(&src, options.scratch_buf_size);
+	init_buffer(&dst, options.scratch_buf_size);
+
+	for (pass = 0; pass < 16; pass++) {
+		int sx = random() % (buf_width(&src)-options.tile_size);
+		int sy = random() % (buf_height(&src)-options.tile_size);
+		int dx = random() % (buf_width(&dst)-options.tile_size);
+		int dy = random() % (buf_height(&dst)-options.tile_size);
+
+		if (options.use_cpu_maps)
+			set_to_cpu_domain(&src, 1);
+
+		memset(src.data, 0xff, options.scratch_buf_size);
+		for (j = 0; j < options.tile_size; j++) {
+			ptr = (uint32_t*)((char *)src.data + sx*4 + (sy+j) * src.stride);
+			for (i = 0; i < options.tile_size; i++)
+				ptr[i] = j * options.tile_size + i;
+		}
+
+		render_copyfunc(&src, sx, sy, &dst, dx, dy, 0);
+
+		if (options.use_cpu_maps)
+			set_to_cpu_domain(&dst, 0);
+
+		for (j = 0; j < options.tile_size; j++) {
+			ptr = (uint32_t*)((char *)dst.data + dx*4 + (dy+j) * dst.stride);
+			for (i = 0; i < options.tile_size; i++)
+				if (ptr[i] != j * options.tile_size + i) {
+					printf("render copyfunc mismatch at (%d, %d): found %d, expected %d\n",
+					       i, j, ptr[i], j*options.tile_size + i);
+				}
+		}
+	}
+}
+
+
+int main(int argc, char **argv)
+{
+	int i, j;
+	unsigned *current_permutation, *tmp_permutation;
+
+	drm_fd = drm_open_any();
+	devid = intel_get_drm_devid(drm_fd);
+
+	parse_options(argc, argv);
+
+	/* start our little helper early before too may allocations occur */
+	if (options.use_signal_helper)
+		drmtest_fork_signal_helper();
+
+	init();
+
+	check_render_copyfunc();
+
+	tile_permutation = malloc(num_total_tiles*sizeof(uint32_t));
+	current_permutation = malloc(num_total_tiles*sizeof(uint32_t));
+	tmp_permutation = malloc(num_total_tiles*sizeof(uint32_t));
+	assert(tile_permutation);
+	assert(current_permutation);
+	assert(tmp_permutation);
+
+	fan_out();
+
+	for (i = 0; i < options.total_rounds; i++) {
+		printf("round %i\n", i);
+		if (i % 64 == 63) {
+			fan_in_and_check();
+			printf("everything correct after %i rounds\n", i + 1);
+		}
+
+		target_set = (current_set + 1) & 1;
+		init_set(target_set);
+
+		for (j = 0; j < num_total_tiles; j++)
+			current_permutation[j] = j;
+		drmtest_permute_array(current_permutation, num_total_tiles, exchange_uint);
+
+		copy_tiles(current_permutation);
+
+		memcpy(tmp_permutation, tile_permutation, sizeof(unsigned)*num_total_tiles);
+
+		/* accumulate the permutations */
+		for (j = 0; j < num_total_tiles; j++)
+			tile_permutation[j] = current_permutation[tmp_permutation[j]];
+
+		current_set = target_set;
+	}
+
+	fan_in_and_check();
+
+	fprintf(stderr, "num failed tiles %u, max incoherent bytes %zd\n",
+		stats.num_failed, stats.max_failed_reads*sizeof(uint32_t));
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(drm_fd);
+
+	drmtest_stop_signal_helper();
+
+	return 0;
+}
diff --git a/tests/gem_tiled_blits.c b/tests/gem_tiled_blits.c
new file mode 100644
index 00000000..86c9a886
--- /dev/null
+++ b/tests/gem_tiled_blits.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file gem_tiled_blits.c
+ *
+ * This is a test of doing many tiled blits, with a working set
+ * larger than the aperture size.
+ *
+ * The goal is to catch a couple types of failure;
+ * - Fence management problems on pre-965.
+ * - A17 or L-shaped memory tiling workaround problems in acceleration.
+ *
+ * The model is to fill a collection of 1MB objects in a way that can't trip
+ * over A6 swizzling -- upload data to a non-tiled object, blit to the tiled
+ * object.  Then, copy the 1MB objects randomly between each other for a while.
+ * Finally, download their data through linear objects again and see what
+ * resulted.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+static int width = 512, height = 512;
+
+static drm_intel_bo *
+create_bo(uint32_t start_val)
+{
+	drm_intel_bo *bo, *linear_bo;
+	uint32_t *linear;
+	uint32_t tiling = I915_TILING_X;
+	int ret, i;
+
+	bo = drm_intel_bo_alloc(bufmgr, "tiled bo", 1024 * 1024, 4096);
+	ret = drm_intel_bo_set_tiling(bo, &tiling, width * 4);
+	assert(ret == 0);
+	assert(tiling == I915_TILING_X);
+
+	linear_bo = drm_intel_bo_alloc(bufmgr, "linear src", 1024 * 1024, 4096);
+
+	/* Fill the BO with dwords starting at start_val */
+	drm_intel_bo_map(linear_bo, 1);
+	linear = linear_bo->virtual;
+	for (i = 0; i < 1024 * 1024 / 4; i++)
+		linear[i] = start_val++;
+	drm_intel_bo_unmap(linear_bo);
+
+	intel_copy_bo (batch, bo, linear_bo, width, height);
+
+	drm_intel_bo_unreference(linear_bo);
+
+	return bo;
+}
+
+static void
+check_bo(drm_intel_bo *bo, uint32_t start_val)
+{
+	drm_intel_bo *linear_bo;
+	uint32_t *linear;
+	int i;
+
+	linear_bo = drm_intel_bo_alloc(bufmgr, "linear dst", 1024 * 1024, 4096);
+
+	intel_copy_bo(batch, linear_bo, bo, width, height);
+
+	drm_intel_bo_map(linear_bo, 0);
+	linear = linear_bo->virtual;
+
+	for (i = 0; i < 1024 * 1024 / 4; i++) {
+		if (linear[i] != start_val) {
+			fprintf(stderr, "Expected 0x%08x, found 0x%08x "
+				"at offset 0x%08x\n",
+				start_val, linear[i], i * 4);
+			abort();
+		}
+		start_val++;
+	}
+	drm_intel_bo_unmap(linear_bo);
+
+	drm_intel_bo_unreference(linear_bo);
+}
+
+int main(int argc, char **argv)
+{
+	drm_intel_bo **bo;
+	uint32_t *bo_start_val;
+	uint32_t start = 0;
+	int i, fd, count;
+
+	fd = drm_open_any();
+
+	count = 0;
+	if (argc > 1)
+		count = atoi(argv[1]);
+	if (count == 0) {
+		count = 3 * gem_aperture_size(fd) / (1024*1024) / 2;
+		count += (count & 1) == 0;
+	}
+
+	if (count > intel_get_total_ram_mb() * 9 / 10) {
+		count = intel_get_total_ram_mb() * 9 / 10;
+		printf("not enough RAM to run test, reducing buffer count\n");
+	}
+
+	printf("Using %d 1MiB buffers\n", count);
+
+	bo = malloc(sizeof(drm_intel_bo *)*count);
+	bo_start_val = malloc(sizeof(uint32_t)*count);
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	for (i = 0; i < count; i++) {
+		bo[i] = create_bo(start);
+		bo_start_val[i] = start;
+		start += 1024 * 1024 / 4;
+	}
+	printf("Verifying initialisation...\n");
+	for (i = 0; i < count; i++)
+		check_bo(bo[i], bo_start_val[i]);
+
+	printf("Cyclic blits, forward...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = i % count;
+		int dst = (i+1) % count;
+
+		if (src == dst)
+			continue;
+
+		intel_copy_bo(batch, bo[dst], bo[src], width, height);
+		bo_start_val[dst] = bo_start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(bo[i], bo_start_val[i]);
+
+	printf("Cyclic blits, backward...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = (i+1) % count;
+		int dst = i % count;
+
+		if (src == dst)
+			continue;
+
+		intel_copy_bo(batch, bo[dst], bo[src], width, height);
+		bo_start_val[dst] = bo_start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(bo[i], bo_start_val[i]);
+
+	printf("Random blits...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = random() % count;
+		int dst = random() % count;
+
+		if (src == dst)
+			continue;
+
+		intel_copy_bo(batch, bo[dst], bo[src], width, height);
+		bo_start_val[dst] = bo_start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(bo[i], bo_start_val[i]);
+
+	for (i = 0; i < count; i++) {
+		drm_intel_bo_unreference(bo[i]);
+		bo[i] = NULL;
+	}
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_tiled_fence_blits.c b/tests/gem_tiled_fence_blits.c
new file mode 100644
index 00000000..dc654793
--- /dev/null
+++ b/tests/gem_tiled_fence_blits.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright © 2009,2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file gem_tiled_fence_blits.c
+ *
+ * This is a test of doing many tiled blits, with a working set
+ * larger than the aperture size.
+ *
+ * The goal is to catch a couple types of failure;
+ * - Fence management problems on pre-965.
+ * - A17 or L-shaped memory tiling workaround problems in acceleration.
+ *
+ * The model is to fill a collection of 1MB objects in a way that can't trip
+ * over A6 swizzling -- upload data to a non-tiled object, blit to the tiled
+ * object.  Then, copy the 1MB objects randomly between each other for a while.
+ * Finally, download their data through linear objects again and see what
+ * resulted.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+static int width = 512, height = 512;
+static uint32_t linear[1024*1024/4];
+
+static drm_intel_bo *
+create_bo(int fd, uint32_t start_val)
+{
+	drm_intel_bo *bo;
+	uint32_t tiling = I915_TILING_X;
+	int ret, i;
+
+	bo = drm_intel_bo_alloc(bufmgr, "tiled bo", 1024 * 1024, 4096);
+	ret = drm_intel_bo_set_tiling(bo, &tiling, width * 4);
+	assert(ret == 0);
+	assert(tiling == I915_TILING_X);
+
+	/* Fill the BO with dwords starting at start_val */
+	for (i = 0; i < 1024 * 1024 / 4; i++)
+		linear[i] = start_val++;
+
+	gem_write(fd, bo->handle, 0, linear, sizeof(linear));
+
+	return bo;
+}
+
+static void
+check_bo(int fd, drm_intel_bo *bo, uint32_t start_val)
+{
+	int i;
+
+	gem_read(fd, bo->handle, 0, linear, sizeof(linear));
+
+	for (i = 0; i < 1024 * 1024 / 4; i++) {
+		if (linear[i] != start_val) {
+			fprintf(stderr, "Expected 0x%08x, found 0x%08x "
+				"at offset 0x%08x\n",
+				start_val, linear[i], i * 4);
+			abort();
+		}
+		start_val++;
+	}
+}
+
+int main(int argc, char **argv)
+{
+	drm_intel_bo *bo[4096];
+	uint32_t bo_start_val[4096];
+	uint32_t start = 0;
+	int fd, i, count;
+
+	fd = drm_open_any();
+	count = 3 * gem_aperture_size(fd) / (1024*1024) / 2;
+	if (count > intel_get_total_ram_mb() * 9 / 10) {
+		count = intel_get_total_ram_mb() * 9 / 10;
+		printf("not enough RAM to run test, reducing buffer count\n");
+	}
+	count |= 1;
+	printf("Using %d 1MiB buffers\n", count);
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	for (i = 0; i < count; i++) {
+		bo[i] = create_bo(fd, start);
+		bo_start_val[i] = start;
+
+		/*
+		printf("Creating bo %d\n", i);
+		check_bo(bo[i], bo_start_val[i]);
+		*/
+
+		start += 1024 * 1024 / 4;
+	}
+
+	for (i = 0; i < count; i++) {
+		int src = count - i - 1;
+		intel_copy_bo(batch, bo[i], bo[src], width, height);
+		bo_start_val[i] = bo_start_val[src];
+	}
+
+	for (i = 0; i < count * 4; i++) {
+		int src = random() % count;
+		int dst = random() % count;
+
+		if (src == dst)
+			continue;
+
+		intel_copy_bo(batch, bo[dst], bo[src], width, height);
+		bo_start_val[dst] = bo_start_val[src];
+
+		/*
+		check_bo(bo[dst], bo_start_val[dst]);
+		printf("%d: copy bo %d to %d\n", i, src, dst);
+		*/
+	}
+
+	for (i = 0; i < count; i++) {
+		/*
+		printf("check %d\n", i);
+		*/
+		check_bo(fd, bo[i], bo_start_val[i]);
+
+		drm_intel_bo_unreference(bo[i]);
+		bo[i] = NULL;
+	}
+
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_tiled_partial_pwrite_pread.c b/tests/gem_tiled_partial_pwrite_pread.c
new file mode 100644
index 00000000..bd0d4e09
--- /dev/null
+++ b/tests/gem_tiled_partial_pwrite_pread.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+/*
+ * Testcase: pwrite/pread consistency when touching partial cachelines
+ *
+ * Some fancy new pwrite/pread optimizations clflush in-line while
+ * reading/writing. Check whether all required clflushes happen.
+ *
+ * Unfortunately really old mesa used unaligned pread/pwrite for s/w fallback
+ * rendering, so we need to check whether this works on tiled buffers, too.
+ *
+ */
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+
+drm_intel_bo *scratch_bo;
+drm_intel_bo *staging_bo;
+drm_intel_bo *tiled_staging_bo;
+unsigned long scratch_pitch;
+#define BO_SIZE (32*4096)
+uint32_t devid;
+uint64_t mappable_gtt_limit;
+int fd;
+
+static void
+copy_bo(drm_intel_bo *src, int src_tiled,
+	drm_intel_bo *dst, int dst_tiled)
+{
+	unsigned long dst_pitch = scratch_pitch;
+	unsigned long src_pitch = scratch_pitch;
+	uint32_t cmd_bits = 0;
+
+	/* dst is tiled ... */
+	if (intel_gen(devid) >= 4 && dst_tiled) {
+		dst_pitch /= 4;
+		cmd_bits |= XY_SRC_COPY_BLT_DST_TILED;
+	}
+
+	if (intel_gen(devid) >= 4 && dst_tiled) {
+		src_pitch /= 4;
+		cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED;
+	}
+
+	BEGIN_BATCH(8);
+	OUT_BATCH(XY_SRC_COPY_BLT_CMD |
+		  XY_SRC_COPY_BLT_WRITE_ALPHA |
+		  XY_SRC_COPY_BLT_WRITE_RGB |
+		  cmd_bits);
+	OUT_BATCH((3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  dst_pitch);
+	OUT_BATCH(0 << 16 | 0);
+	OUT_BATCH(BO_SIZE/scratch_pitch << 16 | 1024);
+	OUT_RELOC_FENCED(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	OUT_BATCH(0 << 16 | 0);
+	OUT_BATCH(src_pitch);
+	OUT_RELOC_FENCED(src, I915_GEM_DOMAIN_RENDER, 0, 0);
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush(batch);
+}
+
+static void
+blt_bo_fill(drm_intel_bo *tmp_bo, drm_intel_bo *bo, int val)
+{
+	uint8_t *gtt_ptr;
+	int i;
+
+	drm_intel_gem_bo_map_gtt(tmp_bo);
+	gtt_ptr = tmp_bo->virtual;
+
+	for (i = 0; i < BO_SIZE; i++)
+		gtt_ptr[i] = val;
+
+	drm_intel_gem_bo_unmap_gtt(tmp_bo);
+
+	if (bo->offset < mappable_gtt_limit &&
+	    (IS_G33(devid) || intel_gen(devid) >= 4))
+		drmtest_trash_aperture();
+
+	copy_bo(tmp_bo, 0, bo, 1);
+}
+
+#define MAX_BLT_SIZE 128
+#define ROUNDS 200
+int main(int argc, char **argv)
+{
+	int i, j;
+	uint8_t tmp[BO_SIZE];
+	uint8_t compare_tmp[BO_SIZE];
+	uint32_t tiling_mode = I915_TILING_X;
+
+	srandom(0xdeadbeef);
+
+	fd = drm_open_any();
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	//drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	devid = intel_get_drm_devid(fd);
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+	/* overallocate the buffers we're actually using because */
+	scratch_bo = drm_intel_bo_alloc_tiled(bufmgr, "scratch bo", 1024, 
+					      BO_SIZE/4096, 4,
+					      &tiling_mode, &scratch_pitch, 0);
+	assert(tiling_mode == I915_TILING_X);
+	assert(scratch_pitch == 4096);
+	staging_bo = drm_intel_bo_alloc(bufmgr, "staging bo", BO_SIZE, 4096);
+	tiled_staging_bo = drm_intel_bo_alloc_tiled(bufmgr, "scratch bo", 1024,
+						    BO_SIZE/4096, 4,
+						    &tiling_mode,
+						    &scratch_pitch, 0);
+
+	drmtest_init_aperture_trashers(bufmgr);
+	mappable_gtt_limit = gem_mappable_aperture_size();
+
+	printf("checking partial reads\n");
+	for (i = 0; i < ROUNDS; i++) {
+		int start, len;
+		int val = i % 256;
+
+		blt_bo_fill(staging_bo, scratch_bo, i);
+
+		start = random() % BO_SIZE;
+		len = random() % (BO_SIZE-start) + 1;
+
+		drm_intel_bo_get_subdata(scratch_bo, start, len, tmp);
+		for (j = 0; j < len; j++) {
+			if (tmp[j] != val) {
+				printf("mismatch at %i, got: %i, expected: %i\n",
+				       start + j, tmp[j], val);
+				exit(1);
+			}
+		}
+
+		drmtest_progress("partial reads test: ", i, ROUNDS);
+	}
+
+	printf("checking partial writes\n");
+	for (i = 0; i < ROUNDS; i++) {
+		int start, len;
+		int val = i % 256;
+
+		blt_bo_fill(staging_bo, scratch_bo, i);
+
+		start = random() % BO_SIZE;
+		len = random() % (BO_SIZE-start) + 1;
+
+		memset(tmp, i + 63, BO_SIZE);
+
+		drm_intel_bo_subdata(scratch_bo, start, len, tmp);
+
+		copy_bo(scratch_bo, 1, tiled_staging_bo, 1);
+		drm_intel_bo_get_subdata(tiled_staging_bo, 0, BO_SIZE,
+					 compare_tmp);
+
+		for (j = 0; j < start; j++) {
+			if (compare_tmp[j] != val) {
+				printf("amismatch at %i, got: %i, expected: %i\n",
+				       j, tmp[j], val);
+				exit(1);
+			}
+		}
+		for (; j < start + len; j++) {
+			if (compare_tmp[j] != tmp[0]) {
+				printf("bmismatch at %i, got: %i, expected: %i\n",
+				       j, tmp[j], i);
+				exit(1);
+			}
+		}
+		for (; j < BO_SIZE; j++) {
+			if (compare_tmp[j] != val) {
+				printf("cmismatch at %i, got: %i, expected: %i\n",
+				       j, tmp[j], val);
+				exit(1);
+			}
+		}
+		drm_intel_gem_bo_unmap_gtt(staging_bo);
+
+		drmtest_progress("partial writes test: ", i, ROUNDS);
+	}
+
+	printf("checking partial writes after partial reads\n");
+	for (i = 0; i < ROUNDS; i++) {
+		int start, len;
+		int val = i % 256;
+
+		blt_bo_fill(staging_bo, scratch_bo, i);
+
+		/* partial read */
+		start = random() % BO_SIZE;
+		len = random() % (BO_SIZE-start) + 1;
+
+		drm_intel_bo_get_subdata(scratch_bo, start, len, tmp);
+		for (j = 0; j < len; j++) {
+			if (tmp[j] != val) {
+				printf("mismatch in read at %i, got: %i, expected: %i\n",
+				       start + j, tmp[j], val);
+				exit(1);
+			}
+		}
+
+		/* Change contents through gtt to make the pread cachelines
+		 * stale. */
+		val = (i + 17) % 256;
+		blt_bo_fill(staging_bo, scratch_bo, val);
+
+		/* partial write */
+		start = random() % BO_SIZE;
+		len = random() % (BO_SIZE-start) + 1;
+
+		memset(tmp, i + 63, BO_SIZE);
+
+		drm_intel_bo_subdata(scratch_bo, start, len, tmp);
+
+		copy_bo(scratch_bo, 1, tiled_staging_bo, 1);
+		drm_intel_bo_get_subdata(tiled_staging_bo, 0, BO_SIZE,
+					 compare_tmp);
+
+		for (j = 0; j < start; j++) {
+			if (compare_tmp[j] != val) {
+				printf("mismatch at %i, got: %i, expected: %i\n",
+				       j, tmp[j], val);
+				exit(1);
+			}
+		}
+		for (; j < start + len; j++) {
+			if (compare_tmp[j] != tmp[0]) {
+				printf("mismatch at %i, got: %i, expected: %i\n",
+				       j, tmp[j], tmp[0]);
+				exit(1);
+			}
+		}
+		for (; j < BO_SIZE; j++) {
+			if (compare_tmp[j] != val) {
+				printf("mismatch at %i, got: %i, expected: %i\n",
+				       j, tmp[j], val);
+				exit(1);
+			}
+		}
+		drm_intel_gem_bo_unmap_gtt(staging_bo);
+
+		drmtest_progress("partial read/writes test: ", i, ROUNDS);
+	}
+
+	drmtest_cleanup_aperture_trashers();
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_tiled_pread.c b/tests/gem_tiled_pread.c
new file mode 100644
index 00000000..189affce
--- /dev/null
+++ b/tests/gem_tiled_pread.c
@@ -0,0 +1,237 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file gem_tiled_pread.c
+ *
+ * This is a test of pread's behavior on tiled objects with respect to the
+ * reported swizzling value.
+ *
+ * The goal is to exercise the slow_bit17_copy path for reading on bit17
+ * machines, but will also be useful for catching swizzling value bugs on
+ * other systems.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_gpu_tools.h"
+
+#define WIDTH 512
+#define HEIGHT 512
+static uint32_t linear[WIDTH * HEIGHT];
+
+#define PAGE_SIZE 4096
+
+static int tile_width;
+static int tile_height;
+static int tile_size;
+
+static void
+gem_get_tiling(int fd, uint32_t handle, uint32_t *tiling, uint32_t *swizzle)
+{
+	struct drm_i915_gem_get_tiling get_tiling;
+	int ret;
+
+	memset(&get_tiling, 0, sizeof(get_tiling));
+	get_tiling.handle = handle;
+
+	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling);
+	assert(ret == 0);
+
+	*tiling = get_tiling.tiling_mode;
+	*swizzle = get_tiling.swizzle_mode;
+}
+
+static uint32_t
+create_bo(int fd)
+{
+	uint32_t handle;
+	uint32_t *data;
+	int i;
+
+	handle = gem_create(fd, sizeof(linear));
+	gem_set_tiling(fd, handle, I915_TILING_X, WIDTH * sizeof(uint32_t));
+
+	/* Fill the BO with dwords starting at start_val */
+	data = gem_mmap(fd, handle, sizeof(linear), PROT_READ | PROT_WRITE);
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		data[i] = i;
+	munmap(data, sizeof(linear));
+
+	return handle;
+}
+
+static int
+swizzle_bit(int bit, int offset)
+{
+	return (offset & (1 << bit)) >> (bit - 6);
+}
+
+/* Translate from a swizzled offset in the tiled buffer to the corresponding
+ * value from the original linear buffer.
+ */
+static uint32_t
+calculate_expected(int offset)
+{
+	int tile_off = offset & (tile_size - 1);
+	int tile_base = offset & -tile_size;
+	int tile_index = tile_base / tile_size;
+	int tiles_per_row = 4*WIDTH / tile_width;
+
+	/* base x,y values from the tile (page) index. */
+	int base_y = tile_index / tiles_per_row * tile_height;
+	int base_x = tile_index % tiles_per_row * (tile_width/4);
+
+	/* x, y offsets within the tile */
+	int tile_y = tile_off / tile_width;
+	int tile_x = (tile_off % tile_width) / 4;
+
+	/* printf("%3d, %3d, %3d,%3d\n", base_x, base_y, tile_x, tile_y); */
+	return (base_y + tile_y) * WIDTH + base_x + tile_x;
+}
+
+int
+main(int argc, char **argv)
+{
+	int fd;
+	int i, iter = 100;
+	uint32_t tiling, swizzle;
+	uint32_t handle;
+	uint32_t devid;
+
+	fd = drm_open_any();
+
+	handle = create_bo(fd);
+	gem_get_tiling(fd, handle, &tiling, &swizzle);
+
+	devid = intel_get_drm_devid(fd);
+
+	if (IS_GEN2(devid)) {
+		tile_height = 16;
+		tile_width = 128;
+		tile_size = 2048;
+	} else {
+		tile_height = 8;
+		tile_width = 512;
+		tile_size = PAGE_SIZE;
+	}
+
+	/* Read a bunch of random subsets of the data and check that they come
+	 * out right.
+	 */
+	for (i = 0; i < iter; i++) {
+		int size = WIDTH * HEIGHT * 4;
+		int offset = (random() % size) & ~3;
+		int len = (random() % size) & ~3;
+		int j;
+
+		if (len == 0)
+			len = 4;
+
+		if (offset + len > size)
+			len = size - offset;
+
+		if (i == 0) {
+			offset = 0;
+			len = size;
+		}
+
+		gem_read(fd, handle, offset, linear, len);
+
+		/* Translate from offsets in the read buffer to the swizzled
+		 * address that it corresponds to.  This is the opposite of
+		 * what Mesa does (calculate offset to be read given the linear
+		 * offset it's looking for).
+		 */
+		for (j = offset; j < offset + len; j += 4) {
+			uint32_t expected_val, found_val;
+			int swizzled_offset;
+			const char *swizzle_str;
+
+			switch (swizzle) {
+			case I915_BIT_6_SWIZZLE_NONE:
+				swizzled_offset = j;
+				swizzle_str = "none";
+				break;
+			case I915_BIT_6_SWIZZLE_9:
+				swizzled_offset = j ^
+					swizzle_bit(9, j);
+				swizzle_str = "bit9";
+				break;
+			case I915_BIT_6_SWIZZLE_9_10:
+				swizzled_offset = j ^
+					swizzle_bit(9, j) ^
+					swizzle_bit(10, j);
+				swizzle_str = "bit9^10";
+				break;
+			case I915_BIT_6_SWIZZLE_9_11:
+				swizzled_offset = j ^
+					swizzle_bit(9, j) ^
+					swizzle_bit(11, j);
+				swizzle_str = "bit9^11";
+				break;
+			case I915_BIT_6_SWIZZLE_9_10_11:
+				swizzled_offset = j ^
+					swizzle_bit(9, j) ^
+					swizzle_bit(10, j) ^
+					swizzle_bit(11, j);
+				swizzle_str = "bit9^10^11";
+				break;
+			default:
+				fprintf(stderr, "Bad swizzle bits; %d\n",
+					swizzle);
+				abort();
+			}
+			expected_val = calculate_expected(swizzled_offset);
+			found_val = linear[(j - offset) / 4];
+			if (expected_val != found_val) {
+				fprintf(stderr,
+					"Bad read [%d]: %d instead of %d at 0x%08x "
+					"for read from 0x%08x to 0x%08x, swizzle=%s\n",
+					i, found_val, expected_val, j,
+					offset, offset + len,
+					swizzle_str);
+				abort();
+			}
+		}
+	}
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_tiled_pread_pwrite.c b/tests/gem_tiled_pread_pwrite.c
new file mode 100644
index 00000000..e2225c5e
--- /dev/null
+++ b/tests/gem_tiled_pread_pwrite.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file gem_tiled_pread_pwrite.c
+ *
+ * This is a test of pread's behavior on tiled objects with respect to the
+ * reported swizzling value.
+ *
+ * The goal is to exercise the slow_bit17_copy path for reading on bit17
+ * machines, but will also be useful for catching swizzling value bugs on
+ * other systems.
+ */
+
+/*
+ * Testcase: Test swizzling by testing pwrite does the invers of pread
+ *
+ * Together with the explicit pread testcase, this should cover our swizzle
+ * handling.
+ *
+ * Note that this test will use swap in an effort to test all of ram.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_gpu_tools.h"
+
+#define WIDTH 512
+#define HEIGHT 512
+static uint32_t linear[WIDTH * HEIGHT];
+static uint32_t current_tiling_mode;
+
+#define PAGE_SIZE 4096
+
+static void
+gem_get_tiling(int fd, uint32_t handle, uint32_t *tiling, uint32_t *swizzle)
+{
+	struct drm_i915_gem_get_tiling get_tiling;
+	int ret;
+
+	memset(&get_tiling, 0, sizeof(get_tiling));
+	get_tiling.handle = handle;
+
+	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling);
+	assert(ret == 0);
+
+	*tiling = get_tiling.tiling_mode;
+	*swizzle = get_tiling.swizzle_mode;
+}
+
+static uint32_t
+create_bo_and_fill(int fd)
+{
+	uint32_t handle;
+	uint32_t *data;
+	int i;
+
+	handle = gem_create(fd, sizeof(linear));
+	gem_set_tiling(fd, handle, current_tiling_mode, WIDTH * sizeof(uint32_t));
+
+	/* Fill the BO with dwords starting at start_val */
+	data = gem_mmap(fd, handle, sizeof(linear), PROT_READ | PROT_WRITE);
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		data[i] = i;
+	munmap(data, sizeof(linear));
+
+	return handle;
+}
+
+static uint32_t
+create_bo(int fd)
+{
+	uint32_t handle;
+
+	handle = gem_create(fd, sizeof(linear));
+	gem_set_tiling(fd, handle, current_tiling_mode, WIDTH * sizeof(uint32_t));
+
+	return handle;
+}
+
+int
+main(int argc, char **argv)
+{
+	int fd;
+	uint32_t *data;
+	int i, j;
+	uint32_t tiling, swizzle;
+	uint32_t handle, handle_target;
+	int count;
+	
+
+	fd = drm_open_any();
+	count = intel_get_total_ram_mb() * 9 / 10;
+
+	for (i = 0; i < count/2; i++) {
+		current_tiling_mode = I915_TILING_X;
+
+		handle = create_bo_and_fill(fd);
+		gem_get_tiling(fd, handle, &tiling, &swizzle);
+
+		gem_read(fd, handle, 0, linear, sizeof(linear));
+
+		handle_target = create_bo(fd);
+		gem_write(fd, handle_target, 0, linear, sizeof(linear));
+
+		/* Check the target bo's contents. */
+		data = gem_mmap(fd, handle_target, sizeof(linear), PROT_READ | PROT_WRITE);
+		for (j = 0; j < WIDTH*HEIGHT; j++)
+			if (data[j] != j) {
+				fprintf(stderr, "mismatch at %i: %i\n",
+						j, data[j]);
+				exit(1);
+			}
+		munmap(data, sizeof(linear));
+
+		/* Leak both bos so that we use all of system mem! */
+
+		drmtest_progress("gem_tiled_pread_pwrite: ", i, count/2);
+	}
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_tiled_swapping.c b/tests/gem_tiled_swapping.c
new file mode 100644
index 00000000..d1484f0e
--- /dev/null
+++ b/tests/gem_tiled_swapping.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+/** @file gem_tiled_pread_pwrite.c
+ *
+ * This is a test of pread's behavior on tiled objects with respect to the
+ * reported swizzling value.
+ *
+ * The goal is to exercise the slow_bit17_copy path for reading on bit17
+ * machines, but will also be useful for catching swizzling value bugs on
+ * other systems.
+ */
+
+/*
+ * Testcase: Exercise swizzle code for swapping
+ *
+ * The swizzle checks in the swapin path are at a different place than the ones
+ * for pread/pwrite, so we need to check them separately.
+ *
+ * This test obviously needs swap present (and exits if none is detected).
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_gpu_tools.h"
+
+#define WIDTH 512
+#define HEIGHT 512
+static uint32_t linear[WIDTH * HEIGHT];
+static uint32_t current_tiling_mode;
+
+#define PAGE_SIZE 4096
+
+static uint32_t
+create_bo_and_fill(int fd)
+{
+	uint32_t handle;
+	uint32_t *data;
+	int i;
+
+	handle = gem_create(fd, sizeof(linear));
+	gem_set_tiling(fd, handle, current_tiling_mode, WIDTH * sizeof(uint32_t));
+
+	/* Fill the BO with dwords starting at start_val */
+	data = gem_mmap(fd, handle, sizeof(linear), PROT_READ | PROT_WRITE);
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		data[i] = i;
+	munmap(data, sizeof(linear));
+
+	return handle;
+}
+
+uint32_t *bo_handles;
+int *idx_arr;
+
+int
+main(int argc, char **argv)
+{
+	int fd;
+	uint32_t *data;
+	int i, j;
+	int count;
+	current_tiling_mode = I915_TILING_X;
+
+	fd = drm_open_any();
+	/* need slightly more than total ram */
+	count = intel_get_total_ram_mb() * 11 / 10;
+	bo_handles = calloc(count, sizeof(uint32_t));
+	assert(bo_handles);
+
+	idx_arr = calloc(count, sizeof(int));
+	assert(idx_arr);
+
+	if (intel_get_total_swap_mb() == 0) {
+		printf("no swap detected\n");
+		return 77;
+	}
+
+	if (intel_get_total_ram_mb() / 4 > intel_get_total_swap_mb()) {
+		printf("not enough swap detected\n");
+		return 77;
+	}
+
+	for (i = 0; i < count; i++)
+		bo_handles[i] = create_bo_and_fill(fd);
+
+	for (i = 0; i < count; i++)
+		idx_arr[i] = i;
+
+	drmtest_permute_array(idx_arr, count,
+			      drmtest_exchange_int);
+
+	for (i = 0; i < count/2; i++) {
+		/* Check the target bo's contents. */
+		data = gem_mmap(fd, bo_handles[idx_arr[i]],
+				sizeof(linear), PROT_READ | PROT_WRITE);
+		for (j = 0; j < WIDTH*HEIGHT; j++)
+			if (data[j] != j) {
+				fprintf(stderr, "mismatch at %i: %i\n",
+						j, data[j]);
+				exit(1);
+			}
+		munmap(data, sizeof(linear));
+	}
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_unfence_active_buffers.c b/tests/gem_unfence_active_buffers.c
new file mode 100644
index 00000000..bffc62ea
--- /dev/null
+++ b/tests/gem_unfence_active_buffers.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+/** @file gem_unfence_active_buffers.c
+ *
+ * Testcase: Check for use-after free in the fence stealing code
+ *
+ * If we're stealing the fence of a active object where the active list is the
+ * only thing holding a reference, we need to be careful not to access the old
+ * object we're stealing the fence from after that reference has been dropped by
+ * retire_requests.
+ *
+ * Note that this needs slab poisoning enabled in the kernel to reliably hit the
+ * problem - the race window is too small.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <stdbool.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+uint32_t devid;
+
+#define TEST_SIZE (1024*1024)
+#define TEST_STRIDE (4*1024)
+
+uint32_t data[TEST_SIZE/4];
+
+int main(int argc, char **argv)
+{
+	int i, ret, fd, num_fences;
+	drm_intel_bo *busy_bo, *test_bo;
+	uint32_t tiling = I915_TILING_X;
+
+	for (i = 0; i < 1024*256; i++)
+		data[i] = i;
+
+	fd = drm_open_any();
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	devid = intel_get_drm_devid(fd);
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+	printf("filling ring\n");
+	busy_bo = drm_intel_bo_alloc(bufmgr, "busy bo bo", 16*1024*1024, 4096);
+
+	for (i = 0; i < 250; i++) {
+		BEGIN_BATCH(8);
+		OUT_BATCH(XY_SRC_COPY_BLT_CMD |
+			  XY_SRC_COPY_BLT_WRITE_ALPHA |
+			  XY_SRC_COPY_BLT_WRITE_RGB);
+		OUT_BATCH((3 << 24) | /* 32 bits */
+			  (0xcc << 16) | /* copy ROP */
+			  2*1024*4);
+		OUT_BATCH(0 << 16 | 1024);
+		OUT_BATCH((2048) << 16 | (2048));
+		OUT_RELOC_FENCED(busy_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+		OUT_BATCH(0 << 16 | 0);
+		OUT_BATCH(2*1024*4);
+		OUT_RELOC_FENCED(busy_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+		ADVANCE_BATCH();
+
+		if (IS_GEN6(devid) || IS_GEN7(devid)) {
+			BEGIN_BATCH(3);
+			OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+			ADVANCE_BATCH();
+		}
+	}
+	intel_batchbuffer_flush(batch);
+
+	num_fences = gem_available_fences(fd);
+	printf("creating havoc on %i fences\n", num_fences);
+
+	for (i = 0; i < num_fences*2; i++) {
+		test_bo = drm_intel_bo_alloc(bufmgr, "test_bo",
+					     TEST_SIZE, 4096);
+		ret = drm_intel_bo_set_tiling(test_bo, &tiling, TEST_STRIDE);
+		assert(ret == 0);
+
+		drm_intel_bo_disable_reuse(test_bo);
+
+		BEGIN_BATCH(8);
+		OUT_BATCH(XY_SRC_COPY_BLT_CMD |
+			  XY_SRC_COPY_BLT_WRITE_ALPHA |
+			  XY_SRC_COPY_BLT_WRITE_RGB);
+		OUT_BATCH((3 << 24) | /* 32 bits */
+			  (0xcc << 16) | /* copy ROP */
+			  TEST_STRIDE);
+		OUT_BATCH(0 << 16 | 0);
+		OUT_BATCH((1) << 16 | (1));
+		OUT_RELOC_FENCED(test_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+		OUT_BATCH(0 << 16 | 0);
+		OUT_BATCH(TEST_STRIDE);
+		OUT_RELOC_FENCED(test_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+		ADVANCE_BATCH();
+		intel_batchbuffer_flush(batch);
+		printf("test bo offset: %#lx\n", test_bo->offset);
+
+		drm_intel_bo_unreference(test_bo);
+	}
+
+	/* launch a few batchs to ensure the damaged slab objects get reused. */
+	for (i = 0; i < 10; i++) {
+		BEGIN_BATCH(8);
+		OUT_BATCH(XY_SRC_COPY_BLT_CMD |
+			  XY_SRC_COPY_BLT_WRITE_ALPHA |
+			  XY_SRC_COPY_BLT_WRITE_RGB);
+		OUT_BATCH((3 << 24) | /* 32 bits */
+			  (0xcc << 16) | /* copy ROP */
+			  2*1024*4);
+		OUT_BATCH(0 << 16 | 1024);
+		OUT_BATCH((1) << 16 | (1));
+		OUT_RELOC_FENCED(busy_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+		OUT_BATCH(0 << 16 | 0);
+		OUT_BATCH(2*1024*4);
+		OUT_RELOC_FENCED(busy_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+		ADVANCE_BATCH();
+
+		if (IS_GEN6(devid) || IS_GEN7(devid)) {
+			BEGIN_BATCH(3);
+			OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
+			OUT_BATCH(0);
+			OUT_BATCH(0);
+			ADVANCE_BATCH();
+		}
+	}
+	intel_batchbuffer_flush(batch);
+
+	return 0;
+}
diff --git a/tests/gem_unref_active_buffers.c b/tests/gem_unref_active_buffers.c
new file mode 100644
index 00000000..aa29c1de
--- /dev/null
+++ b/tests/gem_unref_active_buffers.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+/*
+ * Testcase: Unreferencing of active buffers
+ *
+ * Execs buffers and immediately unreferences them, hence the kernel active list
+ * will be the last one to hold a reference on them. Usually libdrm bo caching
+ * prevents that by keeping another reference.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+static drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+static drm_intel_bo *load_bo;
+
+int main(int argc, char **argv)
+{
+	int fd, i;
+
+	fd = drm_open_any();
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	if (!bufmgr) {
+		fprintf(stderr, "failed to init libdrm\n");
+		exit(-1);
+	}
+	/* don't enable buffer reuse!! */
+	//drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+	assert(batch);
+
+	/* put some load onto the gpu to keep the light buffers active for long
+	 * enough */
+	for (i = 0; i < 1000; i++) {
+		load_bo = drm_intel_bo_alloc(bufmgr, "target bo", 1024*4096, 4096);
+		if (!load_bo) {
+			fprintf(stderr, "failed to alloc target buffer\n");
+			exit(-1);
+		}
+
+		BEGIN_BATCH(8);
+		OUT_BATCH(XY_SRC_COPY_BLT_CMD |
+			  XY_SRC_COPY_BLT_WRITE_ALPHA |
+			  XY_SRC_COPY_BLT_WRITE_RGB);
+		OUT_BATCH((3 << 24) | /* 32 bits */
+			  (0xcc << 16) | /* copy ROP */
+			  4096);
+		OUT_BATCH(0); /* dst x1,y1 */
+		OUT_BATCH((1024 << 16) | 512);
+		OUT_RELOC(load_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+		OUT_BATCH((0 << 16) | 512); /* src x1, y1 */
+		OUT_BATCH(4096);
+		OUT_RELOC(load_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
+		ADVANCE_BATCH();
+
+		intel_batchbuffer_flush(batch);
+
+		drm_intel_bo_disable_reuse(load_bo);
+		drm_intel_bo_unreference(load_bo);
+	}
+
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gem_vmap_blits.c b/tests/gem_vmap_blits.c
new file mode 100644
index 00000000..b8868214
--- /dev/null
+++ b/tests/gem_vmap_blits.c
@@ -0,0 +1,355 @@
+/*
+ * Copyright © 2009,2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+/** @file gem_vmap_blits.c
+ *
+ * This is a test of doing many blits using a mixture of normal system pages
+ * and uncached linear buffers, with a working set larger than the
+ * aperture size.
+ *
+ * The goal is to simply ensure the basics work.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
+
+#if !defined(I915_PARAM_HAS_VMAP)
+#warning No vmap support in drm, skipping
+int main(int argc, char **argv)
+{
+	fprintf(stderr, "No vmap support in drm.\n");
+	return 77;
+}
+#else
+
+#define WIDTH 512
+#define HEIGHT 512
+
+static uint32_t linear[WIDTH*HEIGHT];
+
+static uint32_t gem_vmap(int fd, void *ptr, int size, int read_only)
+{
+	struct drm_i915_gem_vmap vmap;
+
+	vmap.user_ptr = (uintptr_t)ptr;
+	vmap.user_size = size;
+	vmap.flags = 0;
+	if (read_only)
+		vmap.flags |= I915_VMAP_READ_ONLY;
+
+	if (drmIoctl(fd, DRM_IOCTL_I915_GEM_VMAP, &vmap))
+		return 0;
+
+	return vmap.handle;
+}
+
+
+static void gem_vmap_sync(int fd, uint32_t handle)
+{
+	gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+}
+
+static void
+gem_read(int fd, uint32_t handle, int offset, int size, void *buf)
+{
+	struct drm_i915_gem_pread pread;
+	int ret;
+
+	pread.handle = handle;
+	pread.offset = offset;
+	pread.size = size;
+	pread.data_ptr = (uintptr_t)buf;
+	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_PREAD, &pread);
+	assert(ret == 0);
+}
+
+static void
+copy(int fd, uint32_t dst, uint32_t src)
+{
+	uint32_t batch[10];
+	struct drm_i915_gem_relocation_entry reloc[2];
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_execbuffer2 exec;
+	uint32_t handle;
+	int ret;
+
+	batch[0] = XY_SRC_COPY_BLT_CMD |
+		  XY_SRC_COPY_BLT_WRITE_ALPHA |
+		  XY_SRC_COPY_BLT_WRITE_RGB;
+	batch[1] = (3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  WIDTH*4;
+	batch[2] = 0; /* dst x1,y1 */
+	batch[3] = (HEIGHT << 16) | WIDTH; /* dst x2,y2 */
+	batch[4] = 0; /* dst reloc */
+	batch[5] = 0; /* src x1,y1 */
+	batch[6] = WIDTH*4;
+	batch[7] = 0; /* src reloc */
+	batch[8] = MI_BATCH_BUFFER_END;
+	batch[9] = MI_NOOP;
+
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, batch, sizeof(batch));
+
+	reloc[0].target_handle = dst;
+	reloc[0].delta = 0;
+	reloc[0].offset = 4 * sizeof(batch[0]);
+	reloc[0].presumed_offset = 0;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+
+	reloc[1].target_handle = src;
+	reloc[1].delta = 0;
+	reloc[1].offset = 7 * sizeof(batch[0]);
+	reloc[1].presumed_offset = 0;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;;
+	reloc[1].write_domain = 0;
+
+	obj[0].handle = dst;
+	obj[0].relocation_count = 0;
+	obj[0].relocs_ptr = 0;
+	obj[0].alignment = 0;
+	obj[0].offset = 0;
+	obj[0].flags = 0;
+	obj[0].rsvd1 = 0;
+	obj[0].rsvd2 = 0;
+
+	obj[1].handle = src;
+	obj[1].relocation_count = 0;
+	obj[1].relocs_ptr = 0;
+	obj[1].alignment = 0;
+	obj[1].offset = 0;
+	obj[1].flags = 0;
+	obj[1].rsvd1 = 0;
+	obj[1].rsvd2 = 0;
+
+	obj[2].handle = handle;
+	obj[2].relocation_count = 2;
+	obj[2].relocs_ptr = (uintptr_t)reloc;
+	obj[2].alignment = 0;
+	obj[2].offset = 0;
+	obj[2].flags = 0;
+	obj[2].rsvd1 = obj[2].rsvd2 = 0;
+
+	exec.buffers_ptr = (uintptr_t)obj;
+	exec.buffer_count = 3;
+	exec.batch_start_offset = 0;
+	exec.batch_len = sizeof(batch);
+	exec.DR1 = exec.DR4 = 0;
+	exec.num_cliprects = 0;
+	exec.cliprects_ptr = 0;
+	exec.flags = HAS_BLT_RING(intel_get_drm_devid(fd)) ? I915_EXEC_BLT : 0;
+	exec.rsvd1 = exec.rsvd2 = 0;
+
+	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec);
+	while (ret && errno == EBUSY) {
+		drmCommandNone(fd, DRM_I915_GEM_THROTTLE);
+		ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec);
+	}
+	assert(ret == 0);
+
+	gem_close(fd, handle);
+}
+
+static uint32_t
+create_vmap(int fd, uint32_t val, uint32_t *ptr)
+{
+	uint32_t handle;
+	int i;
+
+	handle = gem_vmap(fd, ptr, sizeof(linear), 0);
+
+	/* Fill the BO with dwords starting at val */
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		ptr[i] = val++;
+
+	return handle;
+}
+
+static uint32_t
+create_bo(int fd, uint32_t val)
+{
+	uint32_t handle;
+	int i;
+
+	handle = gem_create(fd, sizeof(linear));
+
+	/* Fill the BO with dwords starting at val */
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		linear[i] = val++;
+	gem_write(fd, handle, 0, linear, sizeof(linear));
+
+	return handle;
+}
+
+static void
+check_cpu(uint32_t *ptr, uint32_t val)
+{
+	int i;
+
+	for (i = 0; i < WIDTH*HEIGHT; i++) {
+		if (ptr[i] != val) {
+			fprintf(stderr, "Expected 0x%08x, found 0x%08x "
+				"at offset 0x%08x\n",
+				val, ptr[i], i * 4);
+			abort();
+		}
+		val++;
+	}
+}
+
+static void
+check_gpu(int fd, uint32_t handle, uint32_t val)
+{
+	gem_read(fd, handle, 0, linear, sizeof(linear));
+	check_cpu(linear, val);
+}
+
+static int has_vmap(int fd)
+{
+	drm_i915_getparam_t gp;
+	int i;
+
+	gp.param = I915_PARAM_HAS_VMAP;
+	gp.value = &i;
+
+	return drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp) == 0 && i > 0;
+}
+
+int main(int argc, char **argv)
+{
+	uint32_t *memory;
+	uint32_t *cpu, *cpu_val;
+	uint32_t *gpu, *gpu_val;
+	uint32_t start = 0;
+	int i, fd, count;
+
+	fd = drm_open_any();
+
+	if (!has_vmap(fd)) {
+		fprintf(stderr, "No vmap support, ignoring.\n");
+		return 77;
+	}
+
+	count = 0;
+	if (argc > 1)
+		count = atoi(argv[1]);
+	if (count == 0)
+		count = 3 * gem_aperture_size(fd) / (1024*1024) / 4;
+	printf("Using 2x%d 1MiB buffers\n", count);
+
+	memory = malloc(count*sizeof(linear));
+	if (memory == NULL) {
+		fprintf(stderr, "Unable to allocate %lld bytes\n",
+			(long long)count*sizeof(linear));
+		return 1;
+	}
+
+	gpu = malloc(sizeof(uint32_t)*count*4);
+	gpu_val = gpu + count;
+	cpu = gpu_val + count;
+	cpu_val = cpu + count;
+
+	for (i = 0; i < count; i++) {
+		gpu[i] = create_bo(fd, start);
+		gpu_val[i] = start;
+		start += WIDTH*HEIGHT;
+	}
+
+	for (i = 0; i < count; i++) {
+		cpu[i] = create_vmap(fd, start, memory+i*WIDTH*HEIGHT);
+		cpu_val[i] = start;
+		start += WIDTH*HEIGHT;;
+	}
+
+	printf("Verifying initialisation...\n");
+	for (i = 0; i < count; i++) {
+		check_gpu(fd, gpu[i], gpu_val[i]);
+		check_cpu(memory+i*WIDTH*HEIGHT, cpu_val[i]);
+	}
+
+	printf("Cyclic blits cpu->gpu, forward...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = i % count;
+		int dst = (i + 1) % count;
+
+		copy(fd, gpu[dst], cpu[src]);
+		gpu_val[dst] = cpu_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_gpu(fd, gpu[i], gpu_val[i]);
+
+	printf("Cyclic blits gpu->cpu, backward...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = (i + 1) % count;
+		int dst = i % count;
+
+		copy(fd, cpu[dst], gpu[src]);
+		cpu_val[dst] = gpu_val[src];
+	}
+	for (i = 0; i < count; i++) {
+		gem_vmap_sync(fd, cpu[i]);
+		check_cpu(memory+i*WIDTH*HEIGHT, cpu_val[i]);
+	}
+
+	printf("Random blits...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = random() % count;
+		int dst = random() % count;
+
+		if (random() & 1) {
+			copy(fd, gpu[dst], cpu[src]);
+			gpu_val[dst] = cpu_val[src];
+		} else {
+			copy(fd, cpu[dst], gpu[src]);
+			cpu_val[dst] = gpu_val[src];
+		}
+	}
+	for (i = 0; i < count; i++) {
+		check_gpu(fd, gpu[i], gpu_val[i]);
+		gem_vmap_sync(fd, cpu[i]);
+		check_cpu(memory+i*WIDTH*HEIGHT, cpu_val[i]);
+	}
+
+	return 0;
+}
+
+#endif
diff --git a/tests/gem_wait_render_timeout.c b/tests/gem_wait_render_timeout.c
new file mode 100644
index 00000000..c321d36c
--- /dev/null
+++ b/tests/gem_wait_render_timeout.c
@@ -0,0 +1,230 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <ben@bwidawsk.net>
+ *
+ */
+
+#include <stdio.h>
+#include <time.h>
+#include "drm.h"
+#include "rendercopy.h"
+
+#define MSEC_PER_SEC	1000L
+#define USEC_PER_MSEC	1000L
+#define NSEC_PER_USEC	1000L
+#define NSEC_PER_MSEC	1000000L
+#define USEC_PER_SEC	1000000L
+#define NSEC_PER_SEC	1000000000L
+
+#define ENOUGH_WORK_IN_SECONDS 2
+#define BUF_SIZE (8<<20)
+#define BUF_PAGES ((8<<20)>>12)
+drm_intel_bo *dst, *dst2;
+
+/* returns time diff in milliseconds */
+static int64_t
+do_time_diff(struct timespec *end, struct timespec *start)
+{
+	int64_t ret;
+	ret = (MSEC_PER_SEC * difftime(end->tv_sec, start->tv_sec)) +
+	      ((end->tv_nsec/NSEC_PER_MSEC) - (start->tv_nsec/NSEC_PER_MSEC));
+	return ret;
+}
+
+/* to avoid stupid depencies on libdrm, copy&paste */
+struct local_drm_i915_gem_wait {
+	/** Handle of BO we shall wait on */
+	__u32 bo_handle;
+	__u32 flags;
+	/** Number of nanoseconds to wait, Returns time remaining. */
+	__u64 timeout_ns;
+};
+
+# define WAIT_IOCTL DRM_IOWR(DRM_COMMAND_BASE + 0x2c, struct local_drm_i915_gem_wait)
+
+static int
+gem_bo_wait_timeout(int fd, uint32_t handle, uint64_t *timeout_ns)
+{
+	struct local_drm_i915_gem_wait wait;
+	int ret;
+
+	assert(timeout_ns);
+
+	wait.bo_handle = handle;
+	wait.timeout_ns = *timeout_ns;
+	wait.flags = 0;
+	ret = drmIoctl(fd, WAIT_IOCTL, &wait);
+	*timeout_ns = wait.timeout_ns;
+
+	return ret ? -errno : 0;
+}
+
+static bool
+gem_bo_busy(int fd, uint32_t handle)
+{
+	struct drm_i915_gem_busy busy;
+
+	busy.handle = handle;
+	do_or_die(drmIoctl(fd, DRM_IOCTL_I915_GEM_BUSY, &busy));
+
+	return !!busy.busy;
+}
+
+static void blt_color_fill(struct intel_batchbuffer *batch,
+			   drm_intel_bo *buf,
+			   const unsigned int pages)
+{
+	const unsigned short height = pages/4;
+	const unsigned short width =  4096;
+	BEGIN_BATCH(5);
+	OUT_BATCH(COLOR_BLT_CMD		|
+		  COLOR_BLT_WRITE_ALPHA	|
+		  COLOR_BLT_WRITE_RGB);
+	OUT_BATCH((3 << 24)	| /* 32 Bit Color */
+		  0xF0		| /* Raster OP copy background register */
+		  0);		  /* Dest pitch is 0 */
+	OUT_BATCH(width << 16	|
+		  height);
+	OUT_RELOC(buf, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	OUT_BATCH(rand()); /* random pattern */
+	ADVANCE_BATCH();
+}
+
+int main(int argc, char **argv)
+{
+	drm_intel_bufmgr *bufmgr;
+	struct intel_batchbuffer *batch;
+	uint64_t timeout = ENOUGH_WORK_IN_SECONDS * NSEC_PER_SEC;
+	int fd, ret;
+	const bool do_signals = true; /* signals will seem to make the operation
+				       * use less process CPU time */
+	bool done = false;
+	int i, iter = 1;
+
+	fd = drm_open_any();
+
+	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+
+	dst = drm_intel_bo_alloc(bufmgr, "dst", BUF_SIZE, 4096);
+	dst2 = drm_intel_bo_alloc(bufmgr, "dst2", BUF_SIZE, 4096);
+
+	if (gem_bo_wait_timeout(fd, dst->handle, &timeout) == -EINVAL) {
+		printf("kernel doesn't support wait_timeout, skipping test\n");
+		return -77;
+	}
+	timeout = ENOUGH_WORK_IN_SECONDS * NSEC_PER_SEC;
+
+	/* Figure out a rough number of fills required to consume 1 second of
+	 * GPU work.
+	 */
+	do {
+		struct timespec start, end;
+		long diff;
+
+#ifndef CLOCK_MONOTONIC_RAW
+#define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC
+#endif
+
+		assert(clock_gettime(CLOCK_MONOTONIC_RAW, &start) == 0);
+		for (i = 0; i < iter; i++)
+			blt_color_fill(batch, dst, BUF_PAGES);
+		intel_batchbuffer_flush(batch);
+		drm_intel_bo_wait_rendering(dst);
+		assert(clock_gettime(CLOCK_MONOTONIC_RAW, &end) == 0);
+
+		diff = do_time_diff(&end, &start);
+		assert(diff >= 0);
+
+		if ((diff / MSEC_PER_SEC) > ENOUGH_WORK_IN_SECONDS)
+			done = true;
+		else
+			iter <<= 1;
+	} while (!done && iter < 1000000);
+
+	assert(iter < 1000000);
+
+	printf("%d iters is enough work\n", iter);
+	gem_quiescent_gpu(fd);
+	if (do_signals)
+		drmtest_fork_signal_helper();
+
+	/* We should be able to do half as much work in the same amount of time,
+	 * but because we might schedule almost twice as much as required, we
+	 * might accidentally time out. Hence add some fudge. */
+	for (i = 0; i < iter/3; i++)
+		blt_color_fill(batch, dst2, BUF_PAGES);
+
+	intel_batchbuffer_flush(batch);
+	assert(gem_bo_busy(fd, dst2->handle) == true);
+
+	ret = gem_bo_wait_timeout(fd, dst2->handle, &timeout);
+	if (ret) {
+		fprintf(stderr, "Timed wait failed %s\n", strerror(errno));
+		exit(EXIT_FAILURE);
+	}
+	assert(gem_bo_busy(fd, dst2->handle) == false);
+	assert(timeout != 0);
+	if (timeout ==  (ENOUGH_WORK_IN_SECONDS * NSEC_PER_SEC))
+		printf("Buffer was already done!\n");
+	else {
+		printf("Finished with %lu time remaining\n", timeout);
+	}
+
+	/* check that polling with timeout=0 works. */
+	timeout = 0;
+	assert(gem_bo_wait_timeout(fd, dst2->handle, &timeout) == 0);
+	assert(timeout == 0);
+
+	/* Now check that we correctly time out, twice the auto-tune load should
+	 * be good enough. */
+	timeout = ENOUGH_WORK_IN_SECONDS * NSEC_PER_SEC;
+	for (i = 0; i < iter*2; i++)
+		blt_color_fill(batch, dst2, BUF_PAGES);
+
+	intel_batchbuffer_flush(batch);
+
+	ret = gem_bo_wait_timeout(fd, dst2->handle, &timeout);
+	assert(ret == -ETIME);
+	assert(timeout == 0);
+	assert(gem_bo_busy(fd, dst2->handle) == true);
+
+	/* check that polling with timeout=0 works. */
+	timeout = 0;
+	assert(gem_bo_wait_timeout(fd, dst2->handle, &timeout) == -ETIME);
+	assert(timeout == 0);
+
+
+	if (do_signals)
+		drmtest_stop_signal_helper();
+	drm_intel_bo_unreference(dst2);
+	drm_intel_bo_unreference(dst);
+	intel_batchbuffer_free(batch);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tests/gen3_mixed_blits.c b/tests/gen3_mixed_blits.c
new file mode 100644
index 00000000..5bb6d868
--- /dev/null
+++ b/tests/gen3_mixed_blits.c
@@ -0,0 +1,541 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+/** @file gen3_linear_render_blits.c
+ *
+ * This is a test of doing many blits, with a working set
+ * larger than the aperture size.
+ *
+ * The goal is to simply ensure the basics work.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_gpu_tools.h"
+
+#include "i915_reg.h"
+#include "i915_3d.h"
+
+#define WIDTH (512)
+#define HEIGHT (512)
+
+static inline uint32_t pack_float(float f)
+{
+	union {
+		uint32_t dw;
+		float f;
+	} u;
+	u.f = f;
+	return u.dw;
+}
+
+static uint32_t fill_reloc(struct drm_i915_gem_relocation_entry *reloc,
+			   uint32_t offset,
+			   uint32_t handle,
+			   uint32_t read_domain,
+			   uint32_t write_domain)
+{
+	reloc->target_handle = handle;
+	reloc->delta = 0;
+	reloc->offset = offset * sizeof(uint32_t);
+	reloc->presumed_offset = 0;
+	reloc->read_domains = read_domain;
+	reloc->write_domain = write_domain;
+
+	return reloc->presumed_offset + reloc->delta;
+}
+
+static void
+render_copy(int fd,
+	    uint32_t dst, int dst_tiling,
+	    uint32_t src, int src_tiling,
+	    int use_fence)
+{
+	uint32_t batch[1024], *b = batch;
+	struct drm_i915_gem_relocation_entry reloc[2], *r = reloc;
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_execbuffer2 exec;
+	uint32_t handle;
+	uint32_t tiling_bits;
+	int ret;
+
+	/* invariant state */
+	*b++ = (_3DSTATE_AA_CMD |
+		AA_LINE_ECAAR_WIDTH_ENABLE |
+		AA_LINE_ECAAR_WIDTH_1_0 |
+		AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0);
+	*b++ = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD |
+		IAB_MODIFY_ENABLE |
+		IAB_MODIFY_FUNC | (BLENDFUNC_ADD << IAB_FUNC_SHIFT) |
+		IAB_MODIFY_SRC_FACTOR |
+		(BLENDFACT_ONE << IAB_SRC_FACTOR_SHIFT) |
+		IAB_MODIFY_DST_FACTOR |
+		(BLENDFACT_ZERO << IAB_DST_FACTOR_SHIFT));
+	*b++ = (_3DSTATE_DFLT_DIFFUSE_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DFLT_SPEC_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DFLT_Z_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_COORD_SET_BINDINGS |
+		CSB_TCB(0, 0) |
+		CSB_TCB(1, 1) |
+		CSB_TCB(2, 2) |
+		CSB_TCB(3, 3) |
+		CSB_TCB(4, 4) |
+		CSB_TCB(5, 5) |
+		CSB_TCB(6, 6) |
+		CSB_TCB(7, 7));
+	*b++ = (_3DSTATE_RASTER_RULES_CMD |
+		ENABLE_POINT_RASTER_RULE |
+		OGL_POINT_RASTER_RULE |
+		ENABLE_LINE_STRIP_PROVOKE_VRTX |
+		ENABLE_TRI_FAN_PROVOKE_VRTX |
+		LINE_STRIP_PROVOKE_VRTX(1) |
+		TRI_FAN_PROVOKE_VRTX(2) |
+		ENABLE_TEXKILL_3D_4D |
+		TEXKILL_4D);
+	*b++ = (_3DSTATE_MODES_4_CMD |
+		ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) |
+		ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) |
+		ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff));
+	*b++ = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2);
+	*b++ = (0x00000000);	/* Disable texture coordinate wrap-shortest */
+	*b++ = ((1 << S4_POINT_WIDTH_SHIFT) |
+		S4_LINE_WIDTH_ONE |
+		S4_CULLMODE_NONE |
+		S4_VFMT_XY);
+	*b++ = (0x00000000);	/* Stencil. */
+	*b++ = (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+	*b++ = (_3DSTATE_SCISSOR_RECT_0_CMD);
+	*b++ = (0);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DEPTH_SUBRECT_DISABLE);
+	*b++ = (_3DSTATE_LOAD_INDIRECT | 0);	/* disable indirect state */
+	*b++ = (0);
+	*b++ = (_3DSTATE_STIPPLE);
+	*b++ = (0x00000000);
+	*b++ = (_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0);
+
+	/* samler state */
+	if (use_fence) {
+		tiling_bits = MS3_USE_FENCE_REGS;
+	} else {
+		tiling_bits = 0;
+		if (src_tiling != I915_TILING_NONE)
+			tiling_bits = MS3_TILED_SURFACE;
+		if (src_tiling == I915_TILING_Y)
+			tiling_bits |= MS3_TILE_WALK;
+	}
+
+#define TEX_COUNT 1
+	*b++ = (_3DSTATE_MAP_STATE | (3 * TEX_COUNT));
+	*b++ = ((1 << TEX_COUNT) - 1);
+	*b = fill_reloc(r++, b-batch, src, I915_GEM_DOMAIN_SAMPLER, 0); b++;
+	*b++ = (MAPSURF_32BIT | MT_32BIT_ARGB8888 | tiling_bits |
+		(HEIGHT - 1) << MS3_HEIGHT_SHIFT |
+		(WIDTH - 1) << MS3_WIDTH_SHIFT);
+	*b++ = ((WIDTH-1) << MS4_PITCH_SHIFT);
+
+	*b++ = (_3DSTATE_SAMPLER_STATE | (3 * TEX_COUNT));
+	*b++ = ((1 << TEX_COUNT) - 1);
+	*b++ = (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT |
+		FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
+		FILTER_NEAREST << SS2_MIN_FILTER_SHIFT);
+	*b++ = (TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT |
+		TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT |
+		0 << SS3_TEXTUREMAP_INDEX_SHIFT);
+	*b++ = (0x00000000);
+
+	/* render target state */
+	if (use_fence) {
+		tiling_bits = BUF_3D_USE_FENCE;
+	} else {
+		tiling_bits = 0;
+		if (dst_tiling != I915_TILING_NONE)
+			tiling_bits = BUF_3D_TILED_SURFACE;
+		if (dst_tiling == I915_TILING_Y)
+			tiling_bits |= BUF_3D_TILE_WALK_Y;
+	}
+	*b++ = (_3DSTATE_BUF_INFO_CMD);
+	*b++ = (BUF_3D_ID_COLOR_BACK | tiling_bits | WIDTH*4);
+	*b = fill_reloc(r++, b-batch, dst,
+			I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
+	b++;
+
+	*b++ = (_3DSTATE_DST_BUF_VARS_CMD);
+	*b++ = (COLR_BUF_ARGB8888 |
+		DSTORG_HORT_BIAS(0x8) |
+		DSTORG_VERT_BIAS(0x8));
+
+	/* draw rect is unconditional */
+	*b++ = (_3DSTATE_DRAW_RECT_CMD);
+	*b++ = (0x00000000);
+	*b++ = (0x00000000);	/* ymin, xmin */
+	*b++ = (DRAW_YMAX(HEIGHT - 1) |
+		DRAW_XMAX(WIDTH - 1));
+	/* yorig, xorig (relate to color buffer?) */
+	*b++ = (0x00000000);
+
+	/* texfmt */
+	*b++ = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) | 2);
+	*b++ = ((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
+	*b++ = (~S2_TEXCOORD_FMT(0, TEXCOORDFMT_NOT_PRESENT) |
+		S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D));
+	*b++ = (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
+		BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
+		BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT |
+		BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT);
+
+	/* pixel shader */
+	*b++ = (_3DSTATE_PIXEL_SHADER_PROGRAM | (1 + 3*3 - 2));
+	/* decl FS_T0 */
+	*b++ = (D0_DCL |
+		REG_TYPE(FS_T0) << D0_TYPE_SHIFT |
+		REG_NR(FS_T0) << D0_NR_SHIFT |
+		((REG_TYPE(FS_T0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
+	*b++ = (0);
+	*b++ = (0);
+	/* decl FS_S0 */
+	*b++ = (D0_DCL |
+		(REG_TYPE(FS_S0) << D0_TYPE_SHIFT) |
+		(REG_NR(FS_S0) << D0_NR_SHIFT) |
+		((REG_TYPE(FS_S0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
+	*b++ = (0);
+	*b++ = (0);
+	/* texld(FS_OC, FS_S0, FS_T0 */
+	*b++ = (T0_TEXLD |
+		(REG_TYPE(FS_OC) << T0_DEST_TYPE_SHIFT) |
+		(REG_NR(FS_OC) << T0_DEST_NR_SHIFT) |
+		(REG_NR(FS_S0) << T0_SAMPLER_NR_SHIFT));
+	*b++ = ((REG_TYPE(FS_T0) << T1_ADDRESS_REG_TYPE_SHIFT) |
+		(REG_NR(FS_T0) << T1_ADDRESS_REG_NR_SHIFT));
+	*b++ = (0);
+
+	*b++ = (PRIM3D_RECTLIST | (3*4 - 1));
+	*b++ = pack_float(WIDTH);
+	*b++ = pack_float(HEIGHT);
+	*b++ = pack_float(WIDTH);
+	*b++ = pack_float(HEIGHT);
+
+	*b++ = pack_float(0);
+	*b++ = pack_float(HEIGHT);
+	*b++ = pack_float(0);
+	*b++ = pack_float(HEIGHT);
+
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+
+	*b++ = MI_BATCH_BUFFER_END;
+	if ((b - batch) & 1)
+		*b++ = 0;
+
+	assert(b - batch <= 1024);
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, batch, (b-batch)*sizeof(batch[0]));
+
+	assert(r-reloc == 2);
+
+	tiling_bits = 0;
+	if (use_fence)
+		tiling_bits = EXEC_OBJECT_NEEDS_FENCE;
+
+	obj[0].handle = dst;
+	obj[0].relocation_count = 0;
+	obj[0].relocs_ptr = 0;
+	obj[0].alignment = 0;
+	obj[0].offset = 0;
+	obj[0].flags = tiling_bits;
+	obj[0].rsvd1 = 0;
+	obj[0].rsvd2 = 0;
+
+	obj[1].handle = src;
+	obj[1].relocation_count = 0;
+	obj[1].relocs_ptr = 0;
+	obj[1].alignment = 0;
+	obj[1].offset = 0;
+	obj[1].flags = tiling_bits;
+	obj[1].rsvd1 = 0;
+	obj[1].rsvd2 = 0;
+
+	obj[2].handle = handle;
+	obj[2].relocation_count = 2;
+	obj[2].relocs_ptr = (uintptr_t)reloc;
+	obj[2].alignment = 0;
+	obj[2].offset = 0;
+	obj[2].flags = 0;
+	obj[2].rsvd1 = obj[2].rsvd2 = 0;
+
+	exec.buffers_ptr = (uintptr_t)obj;
+	exec.buffer_count = 3;
+	exec.batch_start_offset = 0;
+	exec.batch_len = (b-batch)*sizeof(batch[0]);
+	exec.DR1 = exec.DR4 = 0;
+	exec.num_cliprects = 0;
+	exec.cliprects_ptr = 0;
+	exec.flags = 0;
+	i915_execbuffer2_set_context_id(exec, 0);
+	exec.rsvd2 = 0;
+
+	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec);
+	while (ret && errno == EBUSY) {
+		drmCommandNone(fd, DRM_I915_GEM_THROTTLE);
+		ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec);
+	}
+	assert(ret == 0);
+
+	gem_close(fd, handle);
+}
+
+static void blt_copy(int fd, uint32_t dst, uint32_t src)
+{
+	uint32_t batch[1024], *b = batch;
+	struct drm_i915_gem_relocation_entry reloc[2], *r = reloc;
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_execbuffer2 exec;
+	uint32_t handle;
+	int ret;
+
+	*b++ = (XY_SRC_COPY_BLT_CMD |
+		XY_SRC_COPY_BLT_WRITE_ALPHA |
+		XY_SRC_COPY_BLT_WRITE_RGB);
+	*b++ = 3 << 24 | 0xcc << 16 | WIDTH * 4;
+	*b++ = 0;
+	*b++ = HEIGHT << 16 | WIDTH;
+	*b = fill_reloc(r++, b-batch, dst,
+			I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); b++;
+	*b++ = 0;
+	*b++ = WIDTH*4;
+	*b = fill_reloc(r++, b-batch, src, I915_GEM_DOMAIN_RENDER, 0); b++;
+
+	*b++ = MI_BATCH_BUFFER_END;
+	if ((b - batch) & 1)
+		*b++ = 0;
+
+	assert(b - batch <= 1024);
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, batch, (b-batch)*sizeof(batch[0]));
+
+	assert(r-reloc == 2);
+
+	obj[0].handle = dst;
+	obj[0].relocation_count = 0;
+	obj[0].relocs_ptr = 0;
+	obj[0].alignment = 0;
+	obj[0].offset = 0;
+	obj[0].flags = EXEC_OBJECT_NEEDS_FENCE;
+	obj[0].rsvd1 = 0;
+	obj[0].rsvd2 = 0;
+
+	obj[1].handle = src;
+	obj[1].relocation_count = 0;
+	obj[1].relocs_ptr = 0;
+	obj[1].alignment = 0;
+	obj[1].offset = 0;
+	obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
+	obj[1].rsvd1 = 0;
+	obj[1].rsvd2 = 0;
+
+	obj[2].handle = handle;
+	obj[2].relocation_count = 2;
+	obj[2].relocs_ptr = (uintptr_t)reloc;
+	obj[2].alignment = 0;
+	obj[2].offset = 0;
+	obj[2].flags = 0;
+	obj[2].rsvd1 = obj[2].rsvd2 = 0;
+
+	exec.buffers_ptr = (uintptr_t)obj;
+	exec.buffer_count = 3;
+	exec.batch_start_offset = 0;
+	exec.batch_len = (b-batch)*sizeof(batch[0]);
+	exec.DR1 = exec.DR4 = 0;
+	exec.num_cliprects = 0;
+	exec.cliprects_ptr = 0;
+	exec.flags = 0;
+	i915_execbuffer2_set_context_id(exec, 0);
+	exec.rsvd2 = 0;
+
+	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec);
+	while (ret && errno == EBUSY) {
+		drmCommandNone(fd, DRM_I915_GEM_THROTTLE);
+		ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec);
+	}
+	assert(ret == 0);
+
+	gem_close(fd, handle);
+}
+
+
+static void
+copy(int fd,
+     uint32_t dst, int dst_tiling,
+     uint32_t src, int src_tiling)
+{
+retry:
+	switch (random() % 3) {
+	case 0: render_copy(fd, dst, dst_tiling, src, src_tiling, 0); break;
+	case 1: render_copy(fd, dst, dst_tiling, src, src_tiling, 1); break;
+	case 2: if (dst_tiling == I915_TILING_Y || src_tiling == I915_TILING_Y)
+			goto retry;
+		blt_copy(fd, dst, src);
+		break;
+	}
+}
+
+static uint32_t
+create_bo(int fd, uint32_t val, int tiling)
+{
+	uint32_t handle;
+	uint32_t *v;
+	int i;
+
+	handle = gem_create(fd, WIDTH*HEIGHT*4);
+	gem_set_tiling(fd, handle, tiling, WIDTH*4);
+
+	/* Fill the BO with dwords starting at val */
+	v = gem_mmap(fd, handle, WIDTH*HEIGHT*4, PROT_READ | PROT_WRITE);
+	assert(v);
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		v[i] = val++;
+	munmap(v, WIDTH*HEIGHT*4);
+
+	return handle;
+}
+
+static void
+check_bo(int fd, uint32_t handle, uint32_t val)
+{
+	uint32_t *v;
+	int i;
+
+	v = gem_mmap(fd, handle, WIDTH*HEIGHT*4, PROT_READ);
+	assert(v);
+	for (i = 0; i < WIDTH*HEIGHT; i++) {
+		if (v[i] != val) {
+			fprintf(stderr, "Expected 0x%08x, found 0x%08x "
+				"at offset 0x%08x\n",
+				val, v[i], i * 4);
+			abort();
+		}
+		val++;
+	}
+	munmap(v, WIDTH*HEIGHT*4);
+}
+
+int main(int argc, char **argv)
+{
+	uint32_t *handle, *tiling, *start_val;
+	uint32_t start = 0;
+	int i, fd, count;
+
+	fd = drm_open_any();
+
+	if (!IS_GEN3(intel_get_drm_devid(fd))) {
+		printf("gen3-only test, doing nothing\n");
+		return 77;
+	}
+
+	count = 0;
+	if (argc > 1)
+		count = atoi(argv[1]);
+	if (count == 0)
+		count = 3 * gem_aperture_size(fd) / (1024*1024) / 2;
+	printf("Using %d 1MiB buffers\n", count);
+
+	handle = malloc(sizeof(uint32_t)*count*3);
+	tiling = handle + count;
+	start_val = tiling + count;
+
+	for (i = 0; i < count; i++) {
+		handle[i] = create_bo(fd, start, tiling[i] = i % 3);
+		start_val[i] = start;
+		start += 1024 * 1024 / 4;
+	}
+
+	printf("Verifying initialisation..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	printf("done\n");
+
+	printf("Cyclic blits, forward..."); fflush(stdout);
+	for (i = 0; i < count * 32; i++) {
+		int src = i % count;
+		int dst = (i + 1) % count;
+
+		copy(fd, handle[dst], tiling[dst], handle[src], tiling[src]);
+		start_val[dst] = start_val[src];
+	}
+	printf("verifying..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	printf("done\n");
+
+	printf("Cyclic blits, backward..."); fflush(stdout);
+	for (i = 0; i < count * 32; i++) {
+		int src = (i + 1) % count;
+		int dst = i % count;
+
+		copy(fd, handle[dst], tiling[dst], handle[src], tiling[src]);
+		start_val[dst] = start_val[src];
+	}
+	printf("verifying..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	printf("done\n");
+
+	printf("Random blits..."); fflush(stdout);
+	for (i = 0; i < count * 32; i++) {
+		int src = random() % count;
+		int dst = random() % count;
+
+		while (src == dst)
+			dst = random() % count;
+
+			copy(fd, handle[dst], tiling[dst], handle[src], tiling[src]);
+		start_val[dst] = start_val[src];
+	}
+	printf("verifying..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	printf("done\n");
+
+	return 0;
+}
diff --git a/tests/gen3_render_linear_blits.c b/tests/gen3_render_linear_blits.c
new file mode 100644
index 00000000..529e23ff
--- /dev/null
+++ b/tests/gen3_render_linear_blits.c
@@ -0,0 +1,400 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+/** @file gen3_linear_render_blits.c
+ *
+ * This is a test of doing many blits, with a working set
+ * larger than the aperture size.
+ *
+ * The goal is to simply ensure the basics work.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_gpu_tools.h"
+
+#include "i915_reg.h"
+#include "i915_3d.h"
+
+#define WIDTH 512
+#define HEIGHT 512
+
+static uint32_t linear[WIDTH*HEIGHT];
+
+static inline uint32_t pack_float(float f)
+{
+	union {
+		uint32_t dw;
+		float f;
+	} u;
+	u.f = f;
+	return u.dw;
+}
+
+static uint32_t fill_reloc(struct drm_i915_gem_relocation_entry *reloc,
+			   uint32_t offset,
+			   uint32_t handle,
+			   uint32_t read_domain,
+			   uint32_t write_domain)
+{
+	reloc->target_handle = handle;
+	reloc->delta = 0;
+	reloc->offset = offset * sizeof(uint32_t);
+	reloc->presumed_offset = 0;
+	reloc->read_domains = read_domain;
+	reloc->write_domain = write_domain;
+
+	return reloc->presumed_offset + reloc->delta;
+}
+
+static void
+copy(int fd, uint32_t dst, uint32_t src)
+{
+	uint32_t batch[1024], *b = batch;
+	struct drm_i915_gem_relocation_entry reloc[2], *r = reloc;
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_execbuffer2 exec;
+	uint32_t handle;
+	int ret;
+
+	/* invariant state */
+	*b++ = (_3DSTATE_AA_CMD |
+		AA_LINE_ECAAR_WIDTH_ENABLE |
+		AA_LINE_ECAAR_WIDTH_1_0 |
+		AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0);
+	*b++ = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD |
+		IAB_MODIFY_ENABLE |
+		IAB_MODIFY_FUNC | (BLENDFUNC_ADD << IAB_FUNC_SHIFT) |
+		IAB_MODIFY_SRC_FACTOR | (BLENDFACT_ONE <<
+					 IAB_SRC_FACTOR_SHIFT) |
+		IAB_MODIFY_DST_FACTOR | (BLENDFACT_ZERO <<
+					 IAB_DST_FACTOR_SHIFT));
+	*b++ = (_3DSTATE_DFLT_DIFFUSE_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DFLT_SPEC_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DFLT_Z_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_COORD_SET_BINDINGS |
+		CSB_TCB(0, 0) |
+		CSB_TCB(1, 1) |
+		CSB_TCB(2, 2) |
+		CSB_TCB(3, 3) |
+		CSB_TCB(4, 4) |
+		CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7));
+	*b++ = (_3DSTATE_RASTER_RULES_CMD |
+		ENABLE_POINT_RASTER_RULE |
+		OGL_POINT_RASTER_RULE |
+		ENABLE_LINE_STRIP_PROVOKE_VRTX |
+		ENABLE_TRI_FAN_PROVOKE_VRTX |
+		LINE_STRIP_PROVOKE_VRTX(1) |
+		TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D);
+	*b++ = (_3DSTATE_MODES_4_CMD |
+		ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) |
+		ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) |
+		ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff));
+	*b++ = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2);
+	*b++ = (0x00000000);	/* Disable texture coordinate wrap-shortest */
+	*b++ = ((1 << S4_POINT_WIDTH_SHIFT) |
+		S4_LINE_WIDTH_ONE |
+		S4_CULLMODE_NONE |
+		S4_VFMT_XY);
+	*b++ = (0x00000000);	/* Stencil. */
+	*b++ = (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+	*b++ = (_3DSTATE_SCISSOR_RECT_0_CMD);
+	*b++ = (0);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DEPTH_SUBRECT_DISABLE);
+	*b++ = (_3DSTATE_LOAD_INDIRECT | 0);	/* disable indirect state */
+	*b++ = (0);
+	*b++ = (_3DSTATE_STIPPLE);
+	*b++ = (0x00000000);
+	*b++ = (_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0);
+
+	/* samler state */
+#define TEX_COUNT 1
+	*b++ = (_3DSTATE_MAP_STATE | (3 * TEX_COUNT));
+	*b++ = ((1 << TEX_COUNT) - 1);
+	*b = fill_reloc(r++, b-batch, src, I915_GEM_DOMAIN_SAMPLER, 0); b++;
+	*b++ = (MAPSURF_32BIT | MT_32BIT_ARGB8888 |
+		(HEIGHT - 1) << MS3_HEIGHT_SHIFT |
+		(WIDTH - 1) << MS3_WIDTH_SHIFT);
+	*b++ = ((WIDTH-1) << MS4_PITCH_SHIFT);
+
+	*b++ = (_3DSTATE_SAMPLER_STATE | (3 * TEX_COUNT));
+	*b++ = ((1 << TEX_COUNT) - 1);
+	*b++ = (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT |
+		FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
+		FILTER_NEAREST << SS2_MIN_FILTER_SHIFT);
+	*b++ = (TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT |
+		TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT |
+		0 << SS3_TEXTUREMAP_INDEX_SHIFT);
+	*b++ = (0x00000000);
+
+	/* render target state */
+	*b++ = (_3DSTATE_BUF_INFO_CMD);
+	*b++ = (BUF_3D_ID_COLOR_BACK | WIDTH*4);
+	*b = fill_reloc(r++, b-batch, dst,
+			I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
+	b++;
+
+	*b++ = (_3DSTATE_DST_BUF_VARS_CMD);
+	*b++ = (COLR_BUF_ARGB8888 |
+		DSTORG_HORT_BIAS(0x8) |
+		DSTORG_VERT_BIAS(0x8));
+
+	/* draw rect is unconditional */
+	*b++ = (_3DSTATE_DRAW_RECT_CMD);
+	*b++ = (0x00000000);
+	*b++ = (0x00000000);	/* ymin, xmin */
+	*b++ = (DRAW_YMAX(HEIGHT - 1) |
+		DRAW_XMAX(WIDTH - 1));
+	/* yorig, xorig (relate to color buffer?) */
+	*b++ = (0x00000000);
+
+	/* texfmt */
+	*b++ = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) | 2);
+	*b++ = ((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
+	*b++ = (~S2_TEXCOORD_FMT(0, TEXCOORDFMT_NOT_PRESENT) |
+		S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D));
+	*b++ = (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
+		BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
+		BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT |
+		BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT);
+
+	/* pixel shader */
+	*b++ = (_3DSTATE_PIXEL_SHADER_PROGRAM | (1 + 3*3 - 2));
+	/* decl FS_T0 */
+	*b++ = (D0_DCL |
+		REG_TYPE(FS_T0) << D0_TYPE_SHIFT |
+		REG_NR(FS_T0) << D0_NR_SHIFT |
+		((REG_TYPE(FS_T0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
+	*b++ = (0);
+	*b++ = (0);
+	/* decl FS_S0 */
+	*b++ = (D0_DCL |
+		(REG_TYPE(FS_S0) << D0_TYPE_SHIFT) |
+		(REG_NR(FS_S0) << D0_NR_SHIFT) |
+		((REG_TYPE(FS_S0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
+	*b++ = (0);
+	*b++ = (0);
+	/* texld(FS_OC, FS_S0, FS_T0 */
+	*b++ = (T0_TEXLD |
+		(REG_TYPE(FS_OC) << T0_DEST_TYPE_SHIFT) |
+		(REG_NR(FS_OC) << T0_DEST_NR_SHIFT) |
+		(REG_NR(FS_S0) << T0_SAMPLER_NR_SHIFT));
+	*b++ = ((REG_TYPE(FS_T0) << T1_ADDRESS_REG_TYPE_SHIFT) |
+		(REG_NR(FS_T0) << T1_ADDRESS_REG_NR_SHIFT));
+	*b++ = (0);
+
+	*b++ = (PRIM3D_RECTLIST | (3*4 - 1));
+	*b++ = pack_float(WIDTH);
+	*b++ = pack_float(HEIGHT);
+	*b++ = pack_float(WIDTH);
+	*b++ = pack_float(HEIGHT);
+
+	*b++ = pack_float(0);
+	*b++ = pack_float(HEIGHT);
+	*b++ = pack_float(0);
+	*b++ = pack_float(HEIGHT);
+
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+
+	*b++ = MI_BATCH_BUFFER_END;
+	if ((b - batch) & 1)
+		*b++ = 0;
+
+	assert(b - batch <= 1024);
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, batch, (b-batch)*sizeof(batch[0]));
+
+	assert(r-reloc == 2);
+
+	obj[0].handle = dst;
+	obj[0].relocation_count = 0;
+	obj[0].relocs_ptr = 0;
+	obj[0].alignment = 0;
+	obj[0].offset = 0;
+	obj[0].flags = 0;
+	obj[0].rsvd1 = 0;
+	obj[0].rsvd2 = 0;
+
+	obj[1].handle = src;
+	obj[1].relocation_count = 0;
+	obj[1].relocs_ptr = 0;
+	obj[1].alignment = 0;
+	obj[1].offset = 0;
+	obj[1].flags = 0;
+	obj[1].rsvd1 = 0;
+	obj[1].rsvd2 = 0;
+
+	obj[2].handle = handle;
+	obj[2].relocation_count = 2;
+	obj[2].relocs_ptr = (uintptr_t)reloc;
+	obj[2].alignment = 0;
+	obj[2].offset = 0;
+	obj[2].flags = 0;
+	obj[2].rsvd1 = obj[2].rsvd2 = 0;
+
+	exec.buffers_ptr = (uintptr_t)obj;
+	exec.buffer_count = 3;
+	exec.batch_start_offset = 0;
+	exec.batch_len = (b-batch)*sizeof(batch[0]);
+	exec.DR1 = exec.DR4 = 0;
+	exec.num_cliprects = 0;
+	exec.cliprects_ptr = 0;
+	exec.flags = 0;
+	i915_execbuffer2_set_context_id(exec, 0);
+	exec.rsvd2 = 0;
+
+	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec);
+	while (ret && errno == EBUSY) {
+		drmCommandNone(fd, DRM_I915_GEM_THROTTLE);
+		ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec);
+	}
+	assert(ret == 0);
+
+	gem_close(fd, handle);
+}
+
+static uint32_t
+create_bo(int fd, uint32_t val)
+{
+	uint32_t handle;
+	int i;
+
+	handle = gem_create(fd, sizeof(linear));
+
+	/* Fill the BO with dwords starting at val */
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		linear[i] = val++;
+	gem_write(fd, handle, 0, linear, sizeof(linear));
+
+	return handle;
+}
+
+static void
+check_bo(int fd, uint32_t handle, uint32_t val)
+{
+	int i;
+
+	gem_read(fd, handle, 0, linear, sizeof(linear));
+	for (i = 0; i < WIDTH*HEIGHT; i++) {
+		if (linear[i] != val) {
+			fprintf(stderr, "Expected 0x%08x, found 0x%08x "
+				"at offset 0x%08x\n",
+				val, linear[i], i * 4);
+			abort();
+		}
+		val++;
+	}
+}
+
+int main(int argc, char **argv)
+{
+	uint32_t *handle, *start_val;
+	uint32_t start = 0;
+	int i, fd, count;
+
+	fd = drm_open_any();
+
+	if (!IS_GEN3(intel_get_drm_devid(fd))) {
+		printf("gen3-only test, doing nothing\n");
+		return 77;
+	}
+
+	count = 0;
+	if (argc > 1)
+		count = atoi(argv[1]);
+	if (count == 0)
+		count = 3 * gem_aperture_size(fd) / (1024*1024) / 2;
+	printf("Using %d 1MiB buffers\n", count);
+
+	handle = malloc(sizeof(uint32_t)*count*2);
+	start_val = handle + count;
+
+	for (i = 0; i < count; i++) {
+		handle[i] = create_bo(fd, start);
+		start_val[i] = start;
+		start += 1024 * 1024 / 4;
+	}
+
+	printf("Verifying initialisation...\n");
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+
+	printf("Cyclic blits, forward...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = i % count;
+		int dst = (i + 1) % count;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+
+	printf("Cyclic blits, backward...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = (i + 1) % count;
+		int dst = i % count;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+
+	printf("Random blits...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = random() % count;
+		int dst = random() % count;
+
+		if (src == dst)
+			continue;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+
+	return 0;
+}
diff --git a/tests/gen3_render_mixed_blits.c b/tests/gen3_render_mixed_blits.c
new file mode 100644
index 00000000..1353b9d7
--- /dev/null
+++ b/tests/gen3_render_mixed_blits.c
@@ -0,0 +1,429 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+/** @file gen3_linear_render_blits.c
+ *
+ * This is a test of doing many blits, with a working set
+ * larger than the aperture size.
+ *
+ * The goal is to simply ensure the basics work.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_gpu_tools.h"
+
+#include "i915_reg.h"
+#include "i915_3d.h"
+
+#define WIDTH 512
+#define HEIGHT 512
+
+static inline uint32_t pack_float(float f)
+{
+	union {
+		uint32_t dw;
+		float f;
+	} u;
+	u.f = f;
+	return u.dw;
+}
+
+static uint32_t fill_reloc(struct drm_i915_gem_relocation_entry *reloc,
+			   uint32_t offset,
+			   uint32_t handle,
+			   uint32_t read_domain,
+			   uint32_t write_domain)
+{
+	reloc->target_handle = handle;
+	reloc->delta = 0;
+	reloc->offset = offset * sizeof(uint32_t);
+	reloc->presumed_offset = 0;
+	reloc->read_domains = read_domain;
+	reloc->write_domain = write_domain;
+
+	return reloc->presumed_offset + reloc->delta;
+}
+
+static void
+copy(int fd,
+     uint32_t dst, int dst_tiling,
+     uint32_t src, int src_tiling)
+{
+	uint32_t batch[1024], *b = batch;
+	struct drm_i915_gem_relocation_entry reloc[2], *r = reloc;
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_execbuffer2 exec;
+	uint32_t handle;
+	uint32_t tiling_bits;
+	int ret;
+
+	/* invariant state */
+	*b++ = (_3DSTATE_AA_CMD |
+		AA_LINE_ECAAR_WIDTH_ENABLE |
+		AA_LINE_ECAAR_WIDTH_1_0 |
+		AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0);
+	*b++ = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD |
+		IAB_MODIFY_ENABLE |
+		IAB_MODIFY_FUNC | (BLENDFUNC_ADD << IAB_FUNC_SHIFT) |
+		IAB_MODIFY_SRC_FACTOR | (BLENDFACT_ONE <<
+					 IAB_SRC_FACTOR_SHIFT) |
+		IAB_MODIFY_DST_FACTOR | (BLENDFACT_ZERO <<
+					 IAB_DST_FACTOR_SHIFT));
+	*b++ = (_3DSTATE_DFLT_DIFFUSE_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DFLT_SPEC_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DFLT_Z_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_COORD_SET_BINDINGS |
+		CSB_TCB(0, 0) |
+		CSB_TCB(1, 1) |
+		CSB_TCB(2, 2) |
+		CSB_TCB(3, 3) |
+		CSB_TCB(4, 4) |
+		CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7));
+	*b++ = (_3DSTATE_RASTER_RULES_CMD |
+		ENABLE_POINT_RASTER_RULE |
+		OGL_POINT_RASTER_RULE |
+		ENABLE_LINE_STRIP_PROVOKE_VRTX |
+		ENABLE_TRI_FAN_PROVOKE_VRTX |
+		LINE_STRIP_PROVOKE_VRTX(1) |
+		TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D);
+	*b++ = (_3DSTATE_MODES_4_CMD |
+		ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) |
+		ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) |
+		ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff));
+	*b++ = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2);
+	*b++ = (0x00000000);	/* Disable texture coordinate wrap-shortest */
+	*b++ = ((1 << S4_POINT_WIDTH_SHIFT) |
+		S4_LINE_WIDTH_ONE |
+		S4_CULLMODE_NONE |
+		S4_VFMT_XY);
+	*b++ = (0x00000000);	/* Stencil. */
+	*b++ = (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+	*b++ = (_3DSTATE_SCISSOR_RECT_0_CMD);
+	*b++ = (0);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DEPTH_SUBRECT_DISABLE);
+	*b++ = (_3DSTATE_LOAD_INDIRECT | 0);	/* disable indirect state */
+	*b++ = (0);
+	*b++ = (_3DSTATE_STIPPLE);
+	*b++ = (0x00000000);
+	*b++ = (_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0);
+
+	/* samler state */
+	tiling_bits = 0;
+	if (src_tiling != I915_TILING_NONE)
+		tiling_bits = MS3_TILED_SURFACE;
+	if (src_tiling == I915_TILING_Y)
+		tiling_bits |= MS3_TILE_WALK;
+
+#define TEX_COUNT 1
+	*b++ = (_3DSTATE_MAP_STATE | (3 * TEX_COUNT));
+	*b++ = ((1 << TEX_COUNT) - 1);
+	*b = fill_reloc(r++, b-batch, src, I915_GEM_DOMAIN_SAMPLER, 0); b++;
+	*b++ = (MAPSURF_32BIT | MT_32BIT_ARGB8888 | tiling_bits |
+		(HEIGHT - 1) << MS3_HEIGHT_SHIFT |
+		(WIDTH - 1) << MS3_WIDTH_SHIFT);
+	*b++ = ((WIDTH-1) << MS4_PITCH_SHIFT);
+
+	*b++ = (_3DSTATE_SAMPLER_STATE | (3 * TEX_COUNT));
+	*b++ = ((1 << TEX_COUNT) - 1);
+	*b++ = (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT |
+		FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
+		FILTER_NEAREST << SS2_MIN_FILTER_SHIFT);
+	*b++ = (TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT |
+		TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT |
+		0 << SS3_TEXTUREMAP_INDEX_SHIFT);
+	*b++ = (0x00000000);
+
+	/* render target state */
+	tiling_bits = 0;
+	if (dst_tiling != I915_TILING_NONE)
+		tiling_bits = BUF_3D_TILED_SURFACE;
+	if (dst_tiling == I915_TILING_Y)
+		tiling_bits |= BUF_3D_TILE_WALK_Y;
+	*b++ = (_3DSTATE_BUF_INFO_CMD);
+	*b++ = (BUF_3D_ID_COLOR_BACK | tiling_bits | WIDTH*4);
+	*b = fill_reloc(r++, b-batch, dst,
+			I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
+	b++;
+
+	*b++ = (_3DSTATE_DST_BUF_VARS_CMD);
+	*b++ = (COLR_BUF_ARGB8888 |
+		DSTORG_HORT_BIAS(0x8) |
+		DSTORG_VERT_BIAS(0x8));
+
+	/* draw rect is unconditional */
+	*b++ = (_3DSTATE_DRAW_RECT_CMD);
+	*b++ = (0x00000000);
+	*b++ = (0x00000000);	/* ymin, xmin */
+	*b++ = (DRAW_YMAX(HEIGHT - 1) |
+		DRAW_XMAX(WIDTH - 1));
+	/* yorig, xorig (relate to color buffer?) */
+	*b++ = (0x00000000);
+
+	/* texfmt */
+	*b++ = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) | 2);
+	*b++ = ((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
+	*b++ = (~S2_TEXCOORD_FMT(0, TEXCOORDFMT_NOT_PRESENT) |
+		S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D));
+	*b++ = (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
+		BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
+		BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT |
+		BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT);
+
+	/* pixel shader */
+	*b++ = (_3DSTATE_PIXEL_SHADER_PROGRAM | (1 + 3*3 - 2));
+	/* decl FS_T0 */
+	*b++ = (D0_DCL |
+		REG_TYPE(FS_T0) << D0_TYPE_SHIFT |
+		REG_NR(FS_T0) << D0_NR_SHIFT |
+		((REG_TYPE(FS_T0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
+	*b++ = (0);
+	*b++ = (0);
+	/* decl FS_S0 */
+	*b++ = (D0_DCL |
+		(REG_TYPE(FS_S0) << D0_TYPE_SHIFT) |
+		(REG_NR(FS_S0) << D0_NR_SHIFT) |
+		((REG_TYPE(FS_S0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
+	*b++ = (0);
+	*b++ = (0);
+	/* texld(FS_OC, FS_S0, FS_T0 */
+	*b++ = (T0_TEXLD |
+		(REG_TYPE(FS_OC) << T0_DEST_TYPE_SHIFT) |
+		(REG_NR(FS_OC) << T0_DEST_NR_SHIFT) |
+		(REG_NR(FS_S0) << T0_SAMPLER_NR_SHIFT));
+	*b++ = ((REG_TYPE(FS_T0) << T1_ADDRESS_REG_TYPE_SHIFT) |
+		(REG_NR(FS_T0) << T1_ADDRESS_REG_NR_SHIFT));
+	*b++ = (0);
+
+	*b++ = (PRIM3D_RECTLIST | (3*4 - 1));
+	*b++ = pack_float(WIDTH);
+	*b++ = pack_float(HEIGHT);
+	*b++ = pack_float(WIDTH);
+	*b++ = pack_float(HEIGHT);
+
+	*b++ = pack_float(0);
+	*b++ = pack_float(HEIGHT);
+	*b++ = pack_float(0);
+	*b++ = pack_float(HEIGHT);
+
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+
+	*b++ = MI_BATCH_BUFFER_END;
+	if ((b - batch) & 1)
+		*b++ = 0;
+
+	assert(b - batch <= 1024);
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, batch, (b-batch)*sizeof(batch[0]));
+
+	assert(r-reloc == 2);
+
+	obj[0].handle = dst;
+	obj[0].relocation_count = 0;
+	obj[0].relocs_ptr = 0;
+	obj[0].alignment = 0;
+	obj[0].offset = 0;
+	obj[0].flags = 0;
+	obj[0].rsvd1 = 0;
+	obj[0].rsvd2 = 0;
+
+	obj[1].handle = src;
+	obj[1].relocation_count = 0;
+	obj[1].relocs_ptr = 0;
+	obj[1].alignment = 0;
+	obj[1].offset = 0;
+	obj[1].flags = 0;
+	obj[1].rsvd1 = 0;
+	obj[1].rsvd2 = 0;
+
+	obj[2].handle = handle;
+	obj[2].relocation_count = 2;
+	obj[2].relocs_ptr = (uintptr_t)reloc;
+	obj[2].alignment = 0;
+	obj[2].offset = 0;
+	obj[2].flags = 0;
+	obj[2].rsvd1 = obj[2].rsvd2 = 0;
+
+	exec.buffers_ptr = (uintptr_t)obj;
+	exec.buffer_count = 3;
+	exec.batch_start_offset = 0;
+	exec.batch_len = (b-batch)*sizeof(batch[0]);
+	exec.DR1 = exec.DR4 = 0;
+	exec.num_cliprects = 0;
+	exec.cliprects_ptr = 0;
+	exec.flags = 0;
+	i915_execbuffer2_set_context_id(exec, 0);
+	exec.rsvd2 = 0;
+
+	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec);
+	while (ret && errno == EBUSY) {
+		drmCommandNone(fd, DRM_I915_GEM_THROTTLE);
+		ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec);
+	}
+	assert(ret == 0);
+
+	gem_close(fd, handle);
+}
+
+static uint32_t
+create_bo(int fd, uint32_t val, int tiling)
+{
+	uint32_t handle;
+	uint32_t *v;
+	int i;
+
+	handle = gem_create(fd, WIDTH*HEIGHT*4);
+	gem_set_tiling(fd, handle, tiling, WIDTH*4);
+
+	/* Fill the BO with dwords starting at val */
+	v = gem_mmap(fd, handle, WIDTH*HEIGHT*4, PROT_READ | PROT_WRITE);
+	assert(v);
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		v[i] = val++;
+	munmap(v, WIDTH*HEIGHT*4);
+
+	return handle;
+}
+
+static void
+check_bo(int fd, uint32_t handle, uint32_t val)
+{
+	uint32_t *v;
+	int i;
+
+	v = gem_mmap(fd, handle, WIDTH*HEIGHT*4, PROT_READ);
+	assert(v);
+	for (i = 0; i < WIDTH*HEIGHT; i++) {
+		if (v[i] != val) {
+			fprintf(stderr, "Expected 0x%08x, found 0x%08x "
+				"at offset 0x%08x\n",
+				val, v[i], i * 4);
+			abort();
+		}
+		val++;
+	}
+	munmap(v, WIDTH*HEIGHT*4);
+}
+
+int main(int argc, char **argv)
+{
+	uint32_t *handle, *tiling, *start_val;
+	uint32_t start = 0;
+	int i, fd, count;
+
+	fd = drm_open_any();
+
+	if (!IS_GEN3(intel_get_drm_devid(fd))) {
+		printf("gen3-only test, doing nothing\n");
+		return 77;
+	}
+
+	count = 0;
+	if (argc > 1)
+		count = atoi(argv[1]);
+	if (count == 0)
+		count = 3 * gem_aperture_size(fd) / (1024*1024) / 2;
+	printf("Using %d 1MiB buffers\n", count);
+
+	handle = malloc(sizeof(uint32_t)*count*3);
+	tiling = handle + count;
+	start_val = tiling + count;
+
+	for (i = 0; i < count; i++) {
+		handle[i] = create_bo(fd, start, tiling[i] = i % 3);
+		start_val[i] = start;
+		start += 1024 * 1024 / 4;
+	}
+
+	printf("Verifying initialisation..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	printf("done\n");
+
+	printf("Cyclic blits, forward..."); fflush(stdout);
+	for (i = 0; i < count * 32; i++) {
+		int src = i % count;
+		int dst = (i + 1) % count;
+
+		copy(fd, handle[dst], tiling[dst], handle[src], tiling[src]);
+		start_val[dst] = start_val[src];
+	}
+	printf("verifying..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	printf("done\n");
+
+	printf("Cyclic blits, backward..."); fflush(stdout);
+	for (i = 0; i < count * 32; i++) {
+		int src = (i + 1) % count;
+		int dst = i % count;
+
+		copy(fd, handle[dst], tiling[dst], handle[src], tiling[src]);
+		start_val[dst] = start_val[src];
+	}
+	printf("verifying..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	printf("done\n");
+
+	printf("Random blits..."); fflush(stdout);
+	for (i = 0; i < count * 32; i++) {
+		int src = random() % count;
+		int dst = random() % count;
+
+		while (src == dst)
+			dst = random() % count;
+
+		copy(fd, handle[dst], tiling[dst], handle[src], tiling[src]);
+		start_val[dst] = start_val[src];
+	}
+	printf("verifying..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	printf("done\n");
+
+	return 0;
+}
diff --git a/tests/gen3_render_tiledx_blits.c b/tests/gen3_render_tiledx_blits.c
new file mode 100644
index 00000000..0e96e797
--- /dev/null
+++ b/tests/gen3_render_tiledx_blits.c
@@ -0,0 +1,408 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+/** @file gen3_linear_render_blits.c
+ *
+ * This is a test of doing many blits, with a working set
+ * larger than the aperture size.
+ *
+ * The goal is to simply ensure the basics work.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_gpu_tools.h"
+
+#include "i915_reg.h"
+#include "i915_3d.h"
+
+#define WIDTH 512
+#define HEIGHT 512
+
+static inline uint32_t pack_float(float f)
+{
+	union {
+		uint32_t dw;
+		float f;
+	} u;
+	u.f = f;
+	return u.dw;
+}
+
+static uint32_t fill_reloc(struct drm_i915_gem_relocation_entry *reloc,
+			   uint32_t offset,
+			   uint32_t handle,
+			   uint32_t read_domain,
+			   uint32_t write_domain)
+{
+	reloc->target_handle = handle;
+	reloc->delta = 0;
+	reloc->offset = offset * sizeof(uint32_t);
+	reloc->presumed_offset = 0;
+	reloc->read_domains = read_domain;
+	reloc->write_domain = write_domain;
+
+	return reloc->presumed_offset + reloc->delta;
+}
+
+static void
+copy(int fd, uint32_t dst, uint32_t src)
+{
+	uint32_t batch[1024], *b = batch;
+	struct drm_i915_gem_relocation_entry reloc[2], *r = reloc;
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_execbuffer2 exec;
+	uint32_t handle;
+	int ret;
+
+	/* invariant state */
+	*b++ = (_3DSTATE_AA_CMD |
+		AA_LINE_ECAAR_WIDTH_ENABLE |
+		AA_LINE_ECAAR_WIDTH_1_0 |
+		AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0);
+	*b++ = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD |
+		IAB_MODIFY_ENABLE |
+		IAB_MODIFY_FUNC | (BLENDFUNC_ADD << IAB_FUNC_SHIFT) |
+		IAB_MODIFY_SRC_FACTOR | (BLENDFACT_ONE <<
+					 IAB_SRC_FACTOR_SHIFT) |
+		IAB_MODIFY_DST_FACTOR | (BLENDFACT_ZERO <<
+					 IAB_DST_FACTOR_SHIFT));
+	*b++ = (_3DSTATE_DFLT_DIFFUSE_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DFLT_SPEC_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DFLT_Z_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_COORD_SET_BINDINGS |
+		CSB_TCB(0, 0) |
+		CSB_TCB(1, 1) |
+		CSB_TCB(2, 2) |
+		CSB_TCB(3, 3) |
+		CSB_TCB(4, 4) |
+		CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7));
+	*b++ = (_3DSTATE_RASTER_RULES_CMD |
+		ENABLE_POINT_RASTER_RULE |
+		OGL_POINT_RASTER_RULE |
+		ENABLE_LINE_STRIP_PROVOKE_VRTX |
+		ENABLE_TRI_FAN_PROVOKE_VRTX |
+		LINE_STRIP_PROVOKE_VRTX(1) |
+		TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D);
+	*b++ = (_3DSTATE_MODES_4_CMD |
+		ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) |
+		ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) |
+		ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff));
+	*b++ = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2);
+	*b++ = (0x00000000);	/* Disable texture coordinate wrap-shortest */
+	*b++ = ((1 << S4_POINT_WIDTH_SHIFT) |
+		S4_LINE_WIDTH_ONE |
+		S4_CULLMODE_NONE |
+		S4_VFMT_XY);
+	*b++ = (0x00000000);	/* Stencil. */
+	*b++ = (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+	*b++ = (_3DSTATE_SCISSOR_RECT_0_CMD);
+	*b++ = (0);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DEPTH_SUBRECT_DISABLE);
+	*b++ = (_3DSTATE_LOAD_INDIRECT | 0);	/* disable indirect state */
+	*b++ = (0);
+	*b++ = (_3DSTATE_STIPPLE);
+	*b++ = (0x00000000);
+	*b++ = (_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0);
+
+	/* samler state */
+#define TEX_COUNT 1
+	*b++ = (_3DSTATE_MAP_STATE | (3 * TEX_COUNT));
+	*b++ = ((1 << TEX_COUNT) - 1);
+	*b = fill_reloc(r++, b-batch, src, I915_GEM_DOMAIN_SAMPLER, 0); b++;
+	*b++ = (MAPSURF_32BIT | MT_32BIT_ARGB8888 |
+		MS3_TILED_SURFACE |
+		(HEIGHT - 1) << MS3_HEIGHT_SHIFT |
+		(WIDTH - 1) << MS3_WIDTH_SHIFT);
+	*b++ = ((WIDTH-1) << MS4_PITCH_SHIFT);
+
+	*b++ = (_3DSTATE_SAMPLER_STATE | (3 * TEX_COUNT));
+	*b++ = ((1 << TEX_COUNT) - 1);
+	*b++ = (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT |
+		FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
+		FILTER_NEAREST << SS2_MIN_FILTER_SHIFT);
+	*b++ = (TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT |
+		TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT |
+		0 << SS3_TEXTUREMAP_INDEX_SHIFT);
+	*b++ = (0x00000000);
+
+	/* render target state */
+	*b++ = (_3DSTATE_BUF_INFO_CMD);
+	*b++ = (BUF_3D_ID_COLOR_BACK | BUF_3D_TILED_SURFACE |  WIDTH*4);
+	*b = fill_reloc(r++, b-batch, dst,
+			I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
+	b++;
+
+	*b++ = (_3DSTATE_DST_BUF_VARS_CMD);
+	*b++ = (COLR_BUF_ARGB8888 |
+		DSTORG_HORT_BIAS(0x8) |
+		DSTORG_VERT_BIAS(0x8));
+
+	/* draw rect is unconditional */
+	*b++ = (_3DSTATE_DRAW_RECT_CMD);
+	*b++ = (0x00000000);
+	*b++ = (0x00000000);	/* ymin, xmin */
+	*b++ = (DRAW_YMAX(HEIGHT - 1) |
+		DRAW_XMAX(WIDTH - 1));
+	/* yorig, xorig (relate to color buffer?) */
+	*b++ = (0x00000000);
+
+	/* texfmt */
+	*b++ = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) | 2);
+	*b++ = ((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
+	*b++ = (~S2_TEXCOORD_FMT(0, TEXCOORDFMT_NOT_PRESENT) |
+		S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D));
+	*b++ = (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
+		BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
+		BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT |
+		BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT);
+
+	/* pixel shader */
+	*b++ = (_3DSTATE_PIXEL_SHADER_PROGRAM | (1 + 3*3 - 2));
+	/* decl FS_T0 */
+	*b++ = (D0_DCL |
+		REG_TYPE(FS_T0) << D0_TYPE_SHIFT |
+		REG_NR(FS_T0) << D0_NR_SHIFT |
+		((REG_TYPE(FS_T0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
+	*b++ = (0);
+	*b++ = (0);
+	/* decl FS_S0 */
+	*b++ = (D0_DCL |
+		(REG_TYPE(FS_S0) << D0_TYPE_SHIFT) |
+		(REG_NR(FS_S0) << D0_NR_SHIFT) |
+		((REG_TYPE(FS_S0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
+	*b++ = (0);
+	*b++ = (0);
+	/* texld(FS_OC, FS_S0, FS_T0 */
+	*b++ = (T0_TEXLD |
+		(REG_TYPE(FS_OC) << T0_DEST_TYPE_SHIFT) |
+		(REG_NR(FS_OC) << T0_DEST_NR_SHIFT) |
+		(REG_NR(FS_S0) << T0_SAMPLER_NR_SHIFT));
+	*b++ = ((REG_TYPE(FS_T0) << T1_ADDRESS_REG_TYPE_SHIFT) |
+		(REG_NR(FS_T0) << T1_ADDRESS_REG_NR_SHIFT));
+	*b++ = (0);
+
+	*b++ = (PRIM3D_RECTLIST | (3*4 - 1));
+	*b++ = pack_float(WIDTH);
+	*b++ = pack_float(HEIGHT);
+	*b++ = pack_float(WIDTH);
+	*b++ = pack_float(HEIGHT);
+
+	*b++ = pack_float(0);
+	*b++ = pack_float(HEIGHT);
+	*b++ = pack_float(0);
+	*b++ = pack_float(HEIGHT);
+
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+
+	*b++ = MI_BATCH_BUFFER_END;
+	if ((b - batch) & 1)
+		*b++ = 0;
+
+	assert(b - batch <= 1024);
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, batch, (b-batch)*sizeof(batch[0]));
+
+	assert(r-reloc == 2);
+
+	obj[0].handle = dst;
+	obj[0].relocation_count = 0;
+	obj[0].relocs_ptr = 0;
+	obj[0].alignment = 0;
+	obj[0].offset = 0;
+	obj[0].flags = 0;
+	obj[0].rsvd1 = 0;
+	obj[0].rsvd2 = 0;
+
+	obj[1].handle = src;
+	obj[1].relocation_count = 0;
+	obj[1].relocs_ptr = 0;
+	obj[1].alignment = 0;
+	obj[1].offset = 0;
+	obj[1].flags = 0;
+	obj[1].rsvd1 = 0;
+	obj[1].rsvd2 = 0;
+
+	obj[2].handle = handle;
+	obj[2].relocation_count = 2;
+	obj[2].relocs_ptr = (uintptr_t)reloc;
+	obj[2].alignment = 0;
+	obj[2].offset = 0;
+	obj[2].flags = 0;
+	obj[2].rsvd1 = obj[2].rsvd2 = 0;
+
+	exec.buffers_ptr = (uintptr_t)obj;
+	exec.buffer_count = 3;
+	exec.batch_start_offset = 0;
+	exec.batch_len = (b-batch)*sizeof(batch[0]);
+	exec.DR1 = exec.DR4 = 0;
+	exec.num_cliprects = 0;
+	exec.cliprects_ptr = 0;
+	exec.flags = 0;
+	i915_execbuffer2_set_context_id(exec, 0);
+	exec.rsvd2 = 0;
+
+	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec);
+	while (ret && errno == EBUSY) {
+		drmCommandNone(fd, DRM_I915_GEM_THROTTLE);
+		ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec);
+	}
+	assert(ret == 0);
+
+	gem_close(fd, handle);
+}
+
+static uint32_t
+create_bo(int fd, uint32_t val)
+{
+	uint32_t handle;
+	uint32_t *v;
+	int i;
+
+	handle = gem_create(fd, WIDTH*HEIGHT*4);
+	gem_set_tiling(fd, handle, I915_TILING_X, WIDTH*4);
+
+	/* Fill the BO with dwords starting at val */
+	v = gem_mmap(fd, handle, WIDTH*HEIGHT*4, PROT_READ | PROT_WRITE);
+	assert(v);
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		v[i] = val++;
+	munmap(v, WIDTH*HEIGHT*4);
+
+	return handle;
+}
+
+static void
+check_bo(int fd, uint32_t handle, uint32_t val)
+{
+	uint32_t *v;
+	int i;
+
+	v = gem_mmap(fd, handle, WIDTH*HEIGHT*4, PROT_READ);
+	assert(v);
+	for (i = 0; i < WIDTH*HEIGHT; i++) {
+		if (v[i] != val) {
+			fprintf(stderr, "Expected 0x%08x, found 0x%08x "
+				"at offset 0x%08x\n",
+				val, v[i], i * 4);
+			abort();
+		}
+		val++;
+	}
+	munmap(v, WIDTH*HEIGHT*4);
+}
+
+int main(int argc, char **argv)
+{
+	uint32_t *handle, *start_val;
+	uint32_t start = 0;
+	int i, fd, count;
+
+	fd = drm_open_any();
+
+	if (!IS_GEN3(intel_get_drm_devid(fd))) {
+		printf("gen3-only test, doing nothing\n");
+		return 77;
+	}
+
+	count = 0;
+	if (argc > 1)
+		count = atoi(argv[1]);
+	if (count == 0)
+		count = 3 * gem_aperture_size(fd) / (1024*1024) / 2;
+	printf("Using %d 1MiB buffers\n", count);
+
+	handle = malloc(sizeof(uint32_t)*count*2);
+	start_val = handle + count;
+
+	for (i = 0; i < count; i++) {
+		handle[i] = create_bo(fd, start);
+		start_val[i] = start;
+		start += 1024 * 1024 / 4;
+	}
+
+	printf("Verifying initialisation...\n");
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+
+	printf("Cyclic blits, forward...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = i % count;
+		int dst = (i + 1) % count;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+
+	printf("Cyclic blits, backward...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = (i + 1) % count;
+		int dst = i % count;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+
+	printf("Random blits...\n");
+	for (i = 0; i < count * 4; i++) {
+		int src = random() % count;
+		int dst = random() % count;
+
+		if (src == dst)
+			continue;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+
+	return 0;
+}
diff --git a/tests/gen3_render_tiledy_blits.c b/tests/gen3_render_tiledy_blits.c
new file mode 100644
index 00000000..90fc7eb9
--- /dev/null
+++ b/tests/gen3_render_tiledy_blits.c
@@ -0,0 +1,415 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+/** @file gen3_linear_render_blits.c
+ *
+ * This is a test of doing many blits, with a working set
+ * larger than the aperture size.
+ *
+ * The goal is to simply ensure the basics work.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_gpu_tools.h"
+
+#include "i915_reg.h"
+#include "i915_3d.h"
+
+#define WIDTH 512
+#define HEIGHT 512
+
+static inline uint32_t pack_float(float f)
+{
+	union {
+		uint32_t dw;
+		float f;
+	} u;
+	u.f = f;
+	return u.dw;
+}
+
+static uint32_t fill_reloc(struct drm_i915_gem_relocation_entry *reloc,
+			   uint32_t offset,
+			   uint32_t handle,
+			   uint32_t read_domain,
+			   uint32_t write_domain)
+{
+	reloc->target_handle = handle;
+	reloc->delta = 0;
+	reloc->offset = offset * sizeof(uint32_t);
+	reloc->presumed_offset = 0;
+	reloc->read_domains = read_domain;
+	reloc->write_domain = write_domain;
+
+	return reloc->presumed_offset + reloc->delta;
+}
+
+static void
+copy(int fd, uint32_t dst, uint32_t src)
+{
+	uint32_t batch[1024], *b = batch;
+	struct drm_i915_gem_relocation_entry reloc[2], *r = reloc;
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_execbuffer2 exec;
+	uint32_t handle;
+	int ret;
+
+	/* invariant state */
+	*b++ = (_3DSTATE_AA_CMD |
+		AA_LINE_ECAAR_WIDTH_ENABLE |
+		AA_LINE_ECAAR_WIDTH_1_0 |
+		AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0);
+	*b++ = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD |
+		IAB_MODIFY_ENABLE |
+		IAB_MODIFY_FUNC | (BLENDFUNC_ADD << IAB_FUNC_SHIFT) |
+		IAB_MODIFY_SRC_FACTOR | (BLENDFACT_ONE <<
+					 IAB_SRC_FACTOR_SHIFT) |
+		IAB_MODIFY_DST_FACTOR | (BLENDFACT_ZERO <<
+					 IAB_DST_FACTOR_SHIFT));
+	*b++ = (_3DSTATE_DFLT_DIFFUSE_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DFLT_SPEC_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DFLT_Z_CMD);
+	*b++ = (0);
+	*b++ = (_3DSTATE_COORD_SET_BINDINGS |
+		CSB_TCB(0, 0) |
+		CSB_TCB(1, 1) |
+		CSB_TCB(2, 2) |
+		CSB_TCB(3, 3) |
+		CSB_TCB(4, 4) |
+		CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7));
+	*b++ = (_3DSTATE_RASTER_RULES_CMD |
+		ENABLE_POINT_RASTER_RULE |
+		OGL_POINT_RASTER_RULE |
+		ENABLE_LINE_STRIP_PROVOKE_VRTX |
+		ENABLE_TRI_FAN_PROVOKE_VRTX |
+		LINE_STRIP_PROVOKE_VRTX(1) |
+		TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D);
+	*b++ = (_3DSTATE_MODES_4_CMD |
+		ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) |
+		ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) |
+		ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff));
+	*b++ = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2);
+	*b++ = (0x00000000);	/* Disable texture coordinate wrap-shortest */
+	*b++ = ((1 << S4_POINT_WIDTH_SHIFT) |
+		S4_LINE_WIDTH_ONE |
+		S4_CULLMODE_NONE |
+		S4_VFMT_XY);
+	*b++ = (0x00000000);	/* Stencil. */
+	*b++ = (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+	*b++ = (_3DSTATE_SCISSOR_RECT_0_CMD);
+	*b++ = (0);
+	*b++ = (0);
+	*b++ = (_3DSTATE_DEPTH_SUBRECT_DISABLE);
+	*b++ = (_3DSTATE_LOAD_INDIRECT | 0);	/* disable indirect state */
+	*b++ = (0);
+	*b++ = (_3DSTATE_STIPPLE);
+	*b++ = (0x00000000);
+	*b++ = (_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0);
+
+	/* samler state */
+#define TEX_COUNT 1
+	*b++ = (_3DSTATE_MAP_STATE | (3 * TEX_COUNT));
+	*b++ = ((1 << TEX_COUNT) - 1);
+	*b = fill_reloc(r++, b-batch, src, I915_GEM_DOMAIN_SAMPLER, 0); b++;
+	*b++ = (MAPSURF_32BIT | MT_32BIT_ARGB8888 |
+		MS3_TILED_SURFACE | MS3_TILE_WALK |
+		(HEIGHT - 1) << MS3_HEIGHT_SHIFT |
+		(WIDTH - 1) << MS3_WIDTH_SHIFT);
+	*b++ = ((WIDTH-1) << MS4_PITCH_SHIFT);
+
+	*b++ = (_3DSTATE_SAMPLER_STATE | (3 * TEX_COUNT));
+	*b++ = ((1 << TEX_COUNT) - 1);
+	*b++ = (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT |
+		FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
+		FILTER_NEAREST << SS2_MIN_FILTER_SHIFT);
+	*b++ = (TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT |
+		TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT |
+		0 << SS3_TEXTUREMAP_INDEX_SHIFT);
+	*b++ = (0x00000000);
+
+	/* render target state */
+	*b++ = (_3DSTATE_BUF_INFO_CMD);
+	*b++ = (BUF_3D_ID_COLOR_BACK | BUF_3D_TILED_SURFACE | BUF_3D_TILE_WALK_Y | WIDTH*4);
+	*b = fill_reloc(r++, b-batch, dst,
+			I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
+	b++;
+
+	*b++ = (_3DSTATE_DST_BUF_VARS_CMD);
+	*b++ = (COLR_BUF_ARGB8888 |
+		DSTORG_HORT_BIAS(0x8) |
+		DSTORG_VERT_BIAS(0x8));
+
+	/* draw rect is unconditional */
+	*b++ = (_3DSTATE_DRAW_RECT_CMD);
+	*b++ = (0x00000000);
+	*b++ = (0x00000000);	/* ymin, xmin */
+	*b++ = (DRAW_YMAX(HEIGHT - 1) |
+		DRAW_XMAX(WIDTH - 1));
+	/* yorig, xorig (relate to color buffer?) */
+	*b++ = (0x00000000);
+
+	/* texfmt */
+	*b++ = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) | 2);
+	*b++ = ((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
+	*b++ = (~S2_TEXCOORD_FMT(0, TEXCOORDFMT_NOT_PRESENT) |
+		S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D));
+	*b++ = (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
+		BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
+		BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT |
+		BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT);
+
+	/* pixel shader */
+	*b++ = (_3DSTATE_PIXEL_SHADER_PROGRAM | (1 + 3*3 - 2));
+	/* decl FS_T0 */
+	*b++ = (D0_DCL |
+		REG_TYPE(FS_T0) << D0_TYPE_SHIFT |
+		REG_NR(FS_T0) << D0_NR_SHIFT |
+		((REG_TYPE(FS_T0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
+	*b++ = (0);
+	*b++ = (0);
+	/* decl FS_S0 */
+	*b++ = (D0_DCL |
+		(REG_TYPE(FS_S0) << D0_TYPE_SHIFT) |
+		(REG_NR(FS_S0) << D0_NR_SHIFT) |
+		((REG_TYPE(FS_S0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0));
+	*b++ = (0);
+	*b++ = (0);
+	/* texld(FS_OC, FS_S0, FS_T0 */
+	*b++ = (T0_TEXLD |
+		(REG_TYPE(FS_OC) << T0_DEST_TYPE_SHIFT) |
+		(REG_NR(FS_OC) << T0_DEST_NR_SHIFT) |
+		(REG_NR(FS_S0) << T0_SAMPLER_NR_SHIFT));
+	*b++ = ((REG_TYPE(FS_T0) << T1_ADDRESS_REG_TYPE_SHIFT) |
+		(REG_NR(FS_T0) << T1_ADDRESS_REG_NR_SHIFT));
+	*b++ = (0);
+
+	*b++ = (PRIM3D_RECTLIST | (3*4 - 1));
+	*b++ = pack_float(WIDTH);
+	*b++ = pack_float(HEIGHT);
+	*b++ = pack_float(WIDTH);
+	*b++ = pack_float(HEIGHT);
+
+	*b++ = pack_float(0);
+	*b++ = pack_float(HEIGHT);
+	*b++ = pack_float(0);
+	*b++ = pack_float(HEIGHT);
+
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+	*b++ = pack_float(0);
+
+	*b++ = MI_BATCH_BUFFER_END;
+	if ((b - batch) & 1)
+		*b++ = 0;
+
+	assert(b - batch <= 1024);
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, batch, (b-batch)*sizeof(batch[0]));
+
+	assert(r-reloc == 2);
+
+	obj[0].handle = dst;
+	obj[0].relocation_count = 0;
+	obj[0].relocs_ptr = 0;
+	obj[0].alignment = 0;
+	obj[0].offset = 0;
+	obj[0].flags = 0;
+	obj[0].rsvd1 = 0;
+	obj[0].rsvd2 = 0;
+
+	obj[1].handle = src;
+	obj[1].relocation_count = 0;
+	obj[1].relocs_ptr = 0;
+	obj[1].alignment = 0;
+	obj[1].offset = 0;
+	obj[1].flags = 0;
+	obj[1].rsvd1 = 0;
+	obj[1].rsvd2 = 0;
+
+	obj[2].handle = handle;
+	obj[2].relocation_count = 2;
+	obj[2].relocs_ptr = (uintptr_t)reloc;
+	obj[2].alignment = 0;
+	obj[2].offset = 0;
+	obj[2].flags = 0;
+	obj[2].rsvd1 = obj[2].rsvd2 = 0;
+
+	exec.buffers_ptr = (uintptr_t)obj;
+	exec.buffer_count = 3;
+	exec.batch_start_offset = 0;
+	exec.batch_len = (b-batch)*sizeof(batch[0]);
+	exec.DR1 = exec.DR4 = 0;
+	exec.num_cliprects = 0;
+	exec.cliprects_ptr = 0;
+	exec.flags = 0;
+	i915_execbuffer2_set_context_id(exec, 0);
+	exec.rsvd2 = 0;
+
+	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec);
+	while (ret && errno == EBUSY) {
+		drmCommandNone(fd, DRM_I915_GEM_THROTTLE);
+		ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec);
+	}
+	assert(ret == 0);
+
+	gem_close(fd, handle);
+}
+
+static uint32_t
+create_bo(int fd, uint32_t val)
+{
+	uint32_t handle;
+	uint32_t *v;
+	int i;
+
+	handle = gem_create(fd, WIDTH*HEIGHT*4);
+	gem_set_tiling(fd, handle, I915_TILING_Y, WIDTH*4);
+
+	/* Fill the BO with dwords starting at val */
+	v = gem_mmap(fd, handle, WIDTH*HEIGHT*4, PROT_READ | PROT_WRITE);
+	assert(v);
+	for (i = 0; i < WIDTH*HEIGHT; i++)
+		v[i] = val++;
+	munmap(v, WIDTH*HEIGHT*4);
+
+	return handle;
+}
+
+static void
+check_bo(int fd, uint32_t handle, uint32_t val)
+{
+	uint32_t *v;
+	int i;
+
+	v = gem_mmap(fd, handle, WIDTH*HEIGHT*4, PROT_READ);
+	assert(v);
+	for (i = 0; i < WIDTH*HEIGHT; i++) {
+		if (v[i] != val) {
+			fprintf(stderr, "Expected 0x%08x, found 0x%08x "
+				"at offset 0x%08x\n",
+				val, v[i], i * 4);
+			abort();
+		}
+		val++;
+	}
+	munmap(v, WIDTH*HEIGHT*4);
+}
+
+int main(int argc, char **argv)
+{
+	uint32_t *handle, *start_val;
+	uint32_t start = 0;
+	int i, fd, count;
+
+	fd = drm_open_any();
+
+	if (!IS_GEN3(intel_get_drm_devid(fd))) {
+		printf("gen3-only test, doing nothing\n");
+		return 77;
+	}
+
+	count = 0;
+	if (argc > 1)
+		count = atoi(argv[1]);
+	if (count == 0)
+		count = 3 * gem_aperture_size(fd) / (1024*1024) / 2;
+	printf("Using %d 1MiB buffers\n", count);
+
+	handle = malloc(sizeof(uint32_t)*count*2);
+	start_val = handle + count;
+
+	for (i = 0; i < count; i++) {
+		handle[i] = create_bo(fd, start);
+		start_val[i] = start;
+		start += 1024 * 1024 / 4;
+	}
+
+	printf("Verifying initialisation..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	printf("done\n");
+
+	printf("Cyclic blits, forward..."); fflush(stdout);
+	for (i = 0; i < count * 32; i++) {
+		int src = i % count;
+		int dst = (i + 1) % count;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	printf("verifying..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	printf("done\n");
+
+	printf("Cyclic blits, backward..."); fflush(stdout);
+	for (i = 0; i < count * 32; i++) {
+		int src = (i + 1) % count;
+		int dst = i % count;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	printf("verifying..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	printf("done\n");
+
+	printf("Random blits..."); fflush(stdout);
+	for (i = 0; i < count * 32; i++) {
+		int src = random() % count;
+		int dst = random() % count;
+
+		while (src == dst)
+			dst = random() % count;
+
+		copy(fd, handle[dst], handle[src]);
+		start_val[dst] = start_val[src];
+	}
+	printf("verifying..."); fflush(stdout);
+	for (i = 0; i < count; i++)
+		check_bo(fd, handle[i], start_val[i]);
+	printf("done\n");
+
+	return 0;
+}
diff --git a/tests/getclient.c b/tests/getclient.c
new file mode 100644
index 00000000..481ce119
--- /dev/null
+++ b/tests/getclient.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include <limits.h>
+#include <sys/ioctl.h>
+#include "drmtest.h"
+
+/**
+ * Checks DRM_IOCTL_GET_CLIENT.
+ */
+int main(int argc, char **argv)
+{
+	int fd, ret;
+	drm_client_t client;
+
+	fd = drm_open_any();
+
+	/* Look for client index 0.  This should exist whether we're operating
+	 * on an otherwise unused drm device, or the X Server is running on
+	 * the device.
+	 */
+	client.idx = 0;
+	ret = ioctl(fd, DRM_IOCTL_GET_CLIENT, &client);
+	assert(ret == 0);
+
+	/* Look for some absurd client index and make sure it's invalid.
+	 * The DRM drivers currently always return data, so the user has
+	 * no real way to detect when the list has terminated.  That's bad,
+	 * and this test is XFAIL as a result.
+	 */
+	client.idx = 0x7fffffff;
+	ret = ioctl(fd, DRM_IOCTL_GET_CLIENT, &client);
+	assert(ret == -1 && errno == EINVAL);
+
+	close(fd);
+	return 0;
+}
diff --git a/tests/getstats.c b/tests/getstats.c
new file mode 100644
index 00000000..8a7d2999
--- /dev/null
+++ b/tests/getstats.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include <limits.h>
+#include <sys/ioctl.h>
+#include "drmtest.h"
+
+/**
+ * Checks DRM_IOCTL_GET_STATS.
+ *
+ * I don't care too much about the actual contents, just that the kernel
+ * doesn't crash.
+ */
+int main(int argc, char **argv)
+{
+	int fd, ret;
+	drm_stats_t stats;
+
+	fd = drm_open_any();
+
+	ret = ioctl(fd, DRM_IOCTL_GET_STATS, &stats);
+	assert(ret == 0);
+
+	close(fd);
+	return 0;
+}
diff --git a/tests/getversion.c b/tests/getversion.c
new file mode 100644
index 00000000..4847e796
--- /dev/null
+++ b/tests/getversion.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include <string.h>
+#include <sys/ioctl.h>
+#include "drmtest.h"
+
+/**
+ * Checks DRM_IOCTL_GET_VERSION and libdrm's drmGetVersion() interface to it.
+ */
+int main(int argc, char **argv)
+{
+	int fd;
+	drmVersionPtr v;
+
+	fd = drm_open_any();
+	v = drmGetVersion(fd);
+	assert(strlen(v->name) != 0);
+	assert(strlen(v->date) != 0);
+	assert(strlen(v->desc) != 0);
+	assert(v->version_major >= 1);
+	drmFree(v);
+	close(fd);
+	return 0;
+}
diff --git a/tests/module_reload b/tests/module_reload
new file mode 100755
index 00000000..400fdd04
--- /dev/null
+++ b/tests/module_reload
@@ -0,0 +1,41 @@
+#!/bin/bash
+#
+# Testcase: Reload the drm module
+#
+# ... we've broken this way too often :(
+#
+
+SOURCE_DIR="$( dirname "${BASH_SOURCE[0]}" )"
+. $SOURCE_DIR/drm_lib.sh
+
+# no other drm service should be running, so we can just unbind
+
+# vtcon0 is vga, vtcon1 fbcon and let's pray that won't change due to boot load
+# time changes
+if ! echo 0 > /sys/class/vtconsole/vtcon1/bind ; then
+	echo -e "no kms unload support"
+	echo "please enable CONFIG_VT_HW_CONSOLE_BINDING in the kernel"
+	exit 77
+fi
+
+#ignore errors in ips - gen5 only
+rmmod intel_ips &> /dev/null
+rmmod i915
+#ignore errors in intel-gtt, often built-in
+rmmod intel-gtt &> /dev/null
+rmmod drm_kms_helper
+rmmod drm
+
+if lsmod | grep i915 &> /dev/null ; then
+	echo WARNING: i915.ko still loaded!
+	exitcode=1
+else
+	echo module successfully unloaded
+	exitcode=0
+fi
+
+modprobe i915
+echo 1 > /sys/class/vtconsole/vtcon1/bind
+
+# try to run something
+$SOURCE_DIR/gem_exec_nop > /dev/null && echo "module successfully loaded again"
diff --git a/tests/pass.png b/tests/pass.png
new file mode 100644
index 00000000..36a5236b
--- /dev/null
+++ b/tests/pass.png
diff --git a/tests/prime_nv_api.c b/tests/prime_nv_api.c
new file mode 100644
index 00000000..962e903d
--- /dev/null
+++ b/tests/prime_nv_api.c
@@ -0,0 +1,408 @@
+/* wierd use of API tests */
+
+/* test1- export buffer from intel, import same fd twice into nouveau,
+   check handles match
+   test2 - export buffer from intel, import fd once, close fd, try import again
+   fail if it succeeds
+   test3 - export buffer from intel, import twice on nouveau, check handle is the same
+   test4 - export handle twice from intel, import into nouveau twice, check handle is the same
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+
+#include "intel_bufmgr.h"
+#include "nouveau.h"
+#include "intel_gpu_tools.h"
+#include "intel_batchbuffer.h"
+
+#define BO_SIZE (256*1024)
+
+int intel_fd = -1, intel_fd2 = -1, nouveau_fd = -1, nouveau_fd2 = -1;
+drm_intel_bufmgr *bufmgr;
+drm_intel_bufmgr *bufmgr2;
+struct nouveau_device *ndev, *ndev2;
+struct nouveau_client *nclient, *nclient2;
+uint32_t devid;
+struct intel_batchbuffer *intel_batch;
+
+static int find_and_open_devices(void)
+{
+	int i;
+	char path[80];
+	struct stat buf;
+	FILE *fl;
+	char vendor_id[8];
+	int venid;
+	for (i = 0; i < 9; i++) {
+		sprintf(path, "/sys/class/drm/card%d/device/vendor", i);
+		if (stat(path, &buf))
+			break;
+
+		fl = fopen(path, "r");
+		if (!fl)
+			break;
+
+		fgets(vendor_id, 8, fl);
+		fclose(fl);
+
+		venid = strtoul(vendor_id, NULL, 16);
+		sprintf(path, "/dev/dri/card%d", i);
+		if (venid == 0x8086) {
+			intel_fd = open(path, O_RDWR);
+			if (!intel_fd)
+				return -1;
+			intel_fd2 = open(path, O_RDWR);
+			if (!intel_fd2)
+				return -1;
+		} else if (venid == 0x10de) {
+			nouveau_fd = open(path, O_RDWR);
+			if (!nouveau_fd)
+				return -1;
+			nouveau_fd2 = open(path, O_RDWR);
+			if (!nouveau_fd2)
+				return -1;
+		}
+	}
+	return 0;
+}
+
+static int test1(void)
+{
+	int ret;
+	drm_intel_bo *test_intel_bo;
+	int prime_fd;
+	struct nouveau_bo *nvbo = NULL, *nvbo2 = NULL;
+
+	test_intel_bo = drm_intel_bo_alloc(bufmgr, "test bo", BO_SIZE, 4096);
+
+	ret = drm_intel_bo_gem_export_to_prime(test_intel_bo, &prime_fd);
+	if (ret)
+		goto out;
+
+	ret = nouveau_bo_prime_handle_ref(ndev, prime_fd, &nvbo);
+	if (ret < 0) {
+		close(prime_fd);
+		goto out;
+	}
+	ret = nouveau_bo_prime_handle_ref(ndev2, prime_fd, &nvbo2);
+	close(prime_fd);
+	if (ret < 0)
+		goto out;
+
+	if (nvbo->handle != nvbo2->handle)
+		ret = -1;
+out:
+	nouveau_bo_ref(NULL, &nvbo2);
+	nouveau_bo_ref(NULL, &nvbo);
+	drm_intel_bo_unreference(test_intel_bo);
+	return ret;
+}
+
+
+static int test2(void)
+{
+	int ret;
+	drm_intel_bo *test_intel_bo;
+	int prime_fd;
+	struct nouveau_bo *nvbo = NULL, *nvbo2 = NULL;
+
+	test_intel_bo = drm_intel_bo_alloc(bufmgr, "test bo", BO_SIZE, 4096);
+
+	ret = drm_intel_bo_gem_export_to_prime(test_intel_bo, &prime_fd);
+	if (ret < 0)
+		goto out;
+
+	ret = nouveau_bo_prime_handle_ref(ndev, prime_fd, &nvbo);
+	close(prime_fd);
+	if (ret < 0)
+		goto out;
+	ret = nouveau_bo_prime_handle_ref(ndev2, prime_fd, &nvbo2);
+	if (ret == 0)
+		ret = -1;
+	else
+		ret = 0;
+
+out:
+	nouveau_bo_ref(NULL, &nvbo2);
+	nouveau_bo_ref(NULL, &nvbo);
+	drm_intel_bo_unreference(test_intel_bo);
+	return ret;
+}
+
+
+/* import handle twice on one driver */
+static int test3(void)
+{
+	int ret;
+	drm_intel_bo *test_intel_bo;
+	int prime_fd;
+	struct nouveau_bo *nvbo = NULL, *nvbo2 = NULL;
+
+	test_intel_bo = drm_intel_bo_alloc(bufmgr, "test bo", BO_SIZE, 4096);
+
+	ret = drm_intel_bo_gem_export_to_prime(test_intel_bo, &prime_fd);
+	if (ret < 0)
+		goto out;
+
+	ret = nouveau_bo_prime_handle_ref(ndev, prime_fd, &nvbo);
+	if (ret < 0) {
+		close(prime_fd);
+		goto out;
+	}
+	ret = nouveau_bo_prime_handle_ref(ndev, prime_fd, &nvbo2);
+	close(prime_fd);
+	if (ret < 0)
+		goto out;
+
+	if (nvbo->handle != nvbo2->handle)
+		ret = -1;
+
+out:
+	nouveau_bo_ref(NULL, &nvbo2);
+	nouveau_bo_ref(NULL, &nvbo);
+	drm_intel_bo_unreference(test_intel_bo);
+	return ret;
+}
+
+/* export handle twice from one driver - import twice
+   see if we get same object */
+static int test4(void)
+{
+	int ret;
+	drm_intel_bo *test_intel_bo;
+	int prime_fd, prime_fd2;
+	struct nouveau_bo *nvbo = NULL, *nvbo2 = NULL;
+
+	test_intel_bo = drm_intel_bo_alloc(bufmgr, "test bo", BO_SIZE, 4096);
+
+	drm_intel_bo_gem_export_to_prime(test_intel_bo, &prime_fd);
+
+	drm_intel_bo_gem_export_to_prime(test_intel_bo, &prime_fd2);
+
+	ret = nouveau_bo_prime_handle_ref(ndev, prime_fd, &nvbo);
+	close(prime_fd);
+	if (ret >= 0)
+		ret = nouveau_bo_prime_handle_ref(ndev, prime_fd2, &nvbo2);
+	close(prime_fd2);
+	if (ret < 0)
+		goto out;
+
+	if (nvbo->handle != nvbo2->handle)
+		ret = -1;
+
+out:
+	nouveau_bo_ref(NULL, &nvbo2);
+	nouveau_bo_ref(NULL, &nvbo);
+	drm_intel_bo_unreference(test_intel_bo);
+	return ret;
+}
+
+/* export handle from intel driver - reimport to intel driver
+   see if you get same object */
+static int test5(void)
+{
+	int ret;
+	drm_intel_bo *test_intel_bo, *test_intel_bo2;
+	int prime_fd;
+
+	test_intel_bo = drm_intel_bo_alloc(bufmgr, "test bo", BO_SIZE, 4096);
+
+	drm_intel_bo_gem_export_to_prime(test_intel_bo, &prime_fd);
+
+	test_intel_bo2 = drm_intel_bo_gem_create_from_prime(bufmgr, prime_fd, BO_SIZE);
+	close(prime_fd);
+	if (!test_intel_bo2) {
+		ret = -1;
+		goto out;
+	}
+
+	ret = 0;
+	if (test_intel_bo->handle != test_intel_bo2->handle)
+		ret = -1;
+
+out:
+	drm_intel_bo_unreference(test_intel_bo);
+	return ret;
+}
+
+/* nouveau export reimport test */
+static int test6(void)
+{
+	int ret;
+	int prime_fd;
+	struct nouveau_bo *nvbo, *nvbo2;
+
+	ret = nouveau_bo_new(ndev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
+			     0, BO_SIZE, NULL, &nvbo);
+	if (ret < 0)
+		return ret;
+	ret = nouveau_bo_set_prime(nvbo, &prime_fd);
+	if (ret < 0)
+		return ret;
+
+	ret = nouveau_bo_prime_handle_ref(ndev, prime_fd, &nvbo2);
+	close(prime_fd);
+	if (ret < 0)
+		return ret;
+
+	if (nvbo->handle != nvbo2->handle)
+		fprintf(stderr,"mismatch handles %d %d\n", nvbo->handle, nvbo2->handle);
+	nouveau_bo_ref(NULL, &nvbo);
+	nouveau_bo_ref(NULL, &nvbo2);
+	return 0;
+}
+
+/* export handle from intel driver - reimport to another intel driver bufmgr
+   see if you get same object */
+static int test7(void)
+{
+	int ret;
+	drm_intel_bo *test_intel_bo, *test_intel_bo2;
+	int prime_fd;
+
+	test_intel_bo = drm_intel_bo_alloc(bufmgr, "test bo", BO_SIZE, 4096);
+
+	drm_intel_bo_gem_export_to_prime(test_intel_bo, &prime_fd);
+
+	test_intel_bo2 = drm_intel_bo_gem_create_from_prime(bufmgr2, prime_fd, BO_SIZE);
+	close(prime_fd);
+	if (!test_intel_bo2) {
+		ret = -1;
+		goto out;
+	}
+
+	ret = 0;
+	/* not sure what to test for, just that we don't explode */
+out:
+	drm_intel_bo_unreference(test_intel_bo2);
+	drm_intel_bo_unreference(test_intel_bo);
+	return ret;
+}
+
+/* nouveau export reimport to other driver test */
+static int test8(void)
+{
+	int ret;
+	int prime_fd;
+	struct nouveau_bo *nvbo, *nvbo2;
+
+	ret = nouveau_bo_new(ndev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
+			     0, BO_SIZE, NULL, &nvbo);
+	if (ret < 0)
+		return ret;
+	ret = nouveau_bo_set_prime(nvbo, &prime_fd);
+	if (ret < 0)
+		return ret;
+
+	ret = nouveau_bo_prime_handle_ref(ndev2, prime_fd, &nvbo2);
+	close(prime_fd);
+	if (ret < 0)
+		return ret;
+
+	/* not sure what to test for, just make sure we don't explode */
+	nouveau_bo_ref(NULL, &nvbo);
+	nouveau_bo_ref(NULL, &nvbo2);
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int ret;
+
+	ret = find_and_open_devices();
+	if (ret < 0)
+		return ret;
+
+	if (nouveau_fd == -1 || intel_fd == -1 || nouveau_fd2 == -1 || intel_fd2 == -1) {
+		fprintf(stderr,"failed to find intel and nouveau GPU\n");
+		return 77;
+	}
+
+	/* set up intel bufmgr */
+	bufmgr = drm_intel_bufmgr_gem_init(intel_fd, 4096);
+	if (!bufmgr)
+		return -1;
+	/* Do not enable reuse, we share (almost) all buffers. */
+	//drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+	bufmgr2 = drm_intel_bufmgr_gem_init(intel_fd2, 4096);
+	if (!bufmgr2)
+		return -1;
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr2);
+
+	/* set up nouveau bufmgr */
+	ret = nouveau_device_wrap(nouveau_fd, 0, &ndev);
+	if (ret < 0) {
+		fprintf(stderr,"failed to wrap nouveau device\n");
+		return 77;
+	}
+
+	ret = nouveau_client_new(ndev, &nclient);
+	if (ret < 0) {
+		fprintf(stderr,"failed to setup nouveau client\n");
+		return -1;
+	}
+
+	/* set up nouveau bufmgr */
+	ret = nouveau_device_wrap(nouveau_fd2, 0, &ndev2);
+	if (ret < 0) {
+		fprintf(stderr,"failed to wrap nouveau device\n");
+		return 77;
+	}
+
+	ret = nouveau_client_new(ndev2, &nclient2);
+	if (ret < 0) {
+		fprintf(stderr,"failed to setup nouveau client\n");
+		return -1;
+	}
+
+	/* set up an intel batch buffer */
+	devid = intel_get_drm_devid(intel_fd);
+	intel_batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+	ret = test1();
+	if (ret)
+		fprintf(stderr,"prime_test: failed test 1\n");
+
+	ret = test2();
+	if (ret)
+		fprintf(stderr,"prime_test: failed test 2\n");
+
+	ret = test3();
+	if (ret)
+		fprintf(stderr,"prime_test: failed test 3\n");
+
+	ret = test4();
+	if (ret)
+		fprintf(stderr,"prime_test: failed test 4\n");
+
+	ret = test5();
+	if (ret)
+		fprintf(stderr,"prime_test: failed test 5\n");
+
+	ret = test6();
+	if (ret)
+		fprintf(stderr,"prime_test: failed test 6\n");
+
+	ret = test7();
+	if (ret)
+		fprintf(stderr,"prime_test: failed test 7\n");
+
+	ret = test8();
+	if (ret)
+		fprintf(stderr,"prime_test: failed test 8\n");
+
+	intel_batchbuffer_free(intel_batch);
+
+	nouveau_device_del(&ndev);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(intel_fd);
+	close(nouveau_fd);
+
+	return ret;
+}
diff --git a/tests/prime_nv_pcopy.c b/tests/prime_nv_pcopy.c
new file mode 100644
index 00000000..21dccf38
--- /dev/null
+++ b/tests/prime_nv_pcopy.c
@@ -0,0 +1,1329 @@
+/* basic set of prime tests between intel and nouveau */
+
+/* test list -
+   1. share buffer from intel -> nouveau.
+   2. share buffer from nouveau -> intel
+   3. share intel->nouveau, map on both, write intel, read nouveau
+   4. share intel->nouveau, blit intel fill, readback on nouveau
+   test 1 + map buffer, read/write, map other size.
+   do some hw actions on the buffer
+   some illegal operations -
+       close prime fd try and map
+
+   TODO add some nouveau rendering tests
+*/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+
+#include "i915_drm.h"
+#include "intel_bufmgr.h"
+#include "nouveau.h"
+#include "intel_gpu_tools.h"
+#include "intel_batchbuffer.h"
+
+static int intel_fd = -1, nouveau_fd = -1;
+static drm_intel_bufmgr *bufmgr;
+static struct nouveau_device *ndev;
+static struct nouveau_client *nclient;
+static uint32_t devid;
+static struct intel_batchbuffer *batch;
+static struct nouveau_object *nchannel, *pcopy;
+static struct nouveau_bufctx *nbufctx;
+static struct nouveau_pushbuf *npush;
+
+static struct nouveau_bo *query_bo;
+static uint32_t query_counter;
+static volatile uint32_t *query;
+static uint32_t memtype_intel, tile_intel_y, tile_intel_x;
+
+#define SUBC_COPY(x) 6, (x)
+#define NV01_SUBCHAN_OBJECT 0
+
+#define NV01_SUBC(subc, mthd) SUBC_##subc((NV01_SUBCHAN_##mthd))
+
+#if 0
+#define dbg(fmt...) fprintf(stderr, fmt);
+#else
+#define dbg(...) do { } while (0)
+#endif
+
+typedef struct {
+	uint32_t w, h;
+	uint32_t pitch, lines;
+} rect;
+
+static int nv_bo_alloc(struct nouveau_bo **bo, rect *r,
+		       uint32_t w, uint32_t h, uint32_t tile_mode,
+		       int handle, uint32_t dom)
+{
+	uint32_t size;
+	uint32_t dx = 1, dy = 1, memtype = 0;
+	int ret;
+
+	*bo = NULL;
+	if (tile_mode) {
+		uint32_t tile_y;
+		uint32_t tile_x;
+
+		/* Y major tiling */
+		if ((tile_mode & 0xf) == 0xe)
+			/* but the internal layout is different */
+			tile_x = 7;
+		else
+			tile_x = 6 + (tile_mode & 0xf);
+		if (ndev->chipset < 0xc0) {
+			memtype = 0x70;
+			tile_y = 2;
+		} else {
+			memtype = 0xfe;
+			tile_y = 3;
+		}
+		if ((tile_mode & 0xf) == 0xe)
+			memtype = memtype_intel;
+		tile_y += ((tile_mode & 0xf0)>>4);
+
+		dx = 1 << tile_x;
+		dy = 1 << tile_y;
+		dbg("Tiling requirements: x y %u %u\n", dx, dy);
+	}
+
+	r->w = w;
+	r->h = h;
+
+	r->pitch = w = (w + dx-1) & ~(dx-1);
+	r->lines = h = (h + dy-1) & ~(dy-1);
+	size = w*h;
+
+	if (handle < 0) {
+		union nouveau_bo_config cfg;
+		cfg.nv50.memtype = memtype;
+		cfg.nv50.tile_mode = tile_mode;
+		if (dom == NOUVEAU_BO_GART)
+			dom |= NOUVEAU_BO_MAP;
+		ret = nouveau_bo_new(ndev, dom, 4096, size, &cfg, bo);
+		if (!ret)
+			ret = nouveau_bo_map(*bo, NOUVEAU_BO_RDWR, nclient);
+		if (ret) {
+			fprintf(stderr, "creating bo failed with %i %s\n",
+				ret, strerror(-ret));
+			nouveau_bo_ref(NULL, bo);
+			return ret;
+		}
+
+		dbg("new flags %08x memtype %08x tile %08x\n", (*bo)->flags, (*bo)->config.nv50.memtype, (*bo)->config.nv50.tile_mode);
+		if (tile_mode == tile_intel_y || tile_mode == tile_intel_x) {
+			dbg("tile mode was: %02x, now: %02x\n", (*bo)->config.nv50.tile_mode, tile_mode);
+			/* Doesn't like intel tiling much.. */
+			(*bo)->config.nv50.tile_mode = tile_mode;
+		}
+	} else {
+		ret = nouveau_bo_prime_handle_ref(ndev, handle, bo);
+		close(handle);
+		if (ret < 0) {
+			fprintf(stderr, "receiving bo failed with %i %s\n",
+				ret, strerror(-ret));
+			return ret;
+		}
+		if ((*bo)->size < size) {
+			fprintf(stderr, "expected bo size to be at least %u,"
+				"but received %"PRIu64"\n", size, (*bo)->size);
+			nouveau_bo_ref(NULL, bo);
+			return -1;
+		}
+		dbg("prime flags %08x memtype %08x tile %08x\n", (*bo)->flags, (*bo)->config.nv50.memtype, (*bo)->config.nv50.tile_mode);
+		(*bo)->config.nv50.memtype = memtype;
+		(*bo)->config.nv50.tile_mode = tile_mode;
+	}
+	dbg("size: %"PRIu64"\n", (*bo)->size);
+
+	return ret;
+}
+
+static inline void
+PUSH_DATA(struct nouveau_pushbuf *push, uint32_t data)
+{
+	*push->cur++ = data;
+}
+
+static inline void
+BEGIN_NV04(struct nouveau_pushbuf *push, int subc, int mthd, int size)
+{
+	PUSH_DATA (push, 0x00000000 | (size << 18) | (subc << 13) | mthd);
+}
+
+static inline void
+BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, int size)
+{
+	PUSH_DATA (push, 0x40000000 | (size << 18) | (subc << 13) | mthd);
+}
+
+static inline void
+BEGIN_NVC0(struct nouveau_pushbuf *push, int subc, int mthd, int size)
+{
+	PUSH_DATA (push, 0x20000000 | (size << 16) | (subc << 13) | (mthd / 4));
+}
+
+static inline void
+BEGIN_NVXX(struct nouveau_pushbuf *push, int subc, int mthd, int size)
+{
+	if (ndev->chipset < 0xc0)
+		BEGIN_NV04(push, subc, mthd, size);
+	else
+		BEGIN_NVC0(push, subc, mthd, size);
+}
+
+static void
+noop_intel(drm_intel_bo *bo)
+{
+	BEGIN_BATCH(3);
+	OUT_BATCH(MI_NOOP);
+	OUT_BATCH(MI_BATCH_BUFFER_END);
+	OUT_RELOC(bo, I915_GEM_DOMAIN_RENDER,
+			I915_GEM_DOMAIN_RENDER, 0);
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush(batch);
+}
+
+static int find_and_open_devices(void)
+{
+	int i;
+	char path[80], *unused;
+	struct stat buf;
+	FILE *fl;
+	char vendor_id[8] = {};
+	int venid;
+	for (i = 0; i < 9; i++) {
+		sprintf(path, "/sys/class/drm/card%d/device/vendor", i);
+		if (stat(path, &buf))
+			break;
+
+		fl = fopen(path, "r");
+		if (!fl)
+			break;
+
+		unused = fgets(vendor_id, sizeof(vendor_id)-1, fl);
+		(void)unused;
+		fclose(fl);
+
+		venid = strtoul(vendor_id, NULL, 16);
+		sprintf(path, "/dev/dri/card%d", i);
+		if (venid == 0x8086) {
+			intel_fd = open(path, O_RDWR);
+			if (!intel_fd)
+				return -1;
+		} else if (venid == 0x10de) {
+			nouveau_fd = open(path, O_RDWR);
+			if (!nouveau_fd)
+				return -1;
+		}
+	}
+	return 0;
+}
+
+static int init_nouveau(void)
+{
+	struct nv04_fifo nv04_data = { .vram = 0xbeef0201,
+				       .gart = 0xbeef0202 };
+	struct nvc0_fifo nvc0_data = { };
+	struct nouveau_fifo *fifo;
+	int size, ret;
+	uint32_t class;
+	void *data;
+
+	ret = nouveau_device_wrap(nouveau_fd, 0, &ndev);
+	if (ret < 0) {
+		fprintf(stderr,"failed to wrap nouveau device\n");
+		return ret;
+	}
+
+	ret = nouveau_client_new(ndev, &nclient);
+	if (ret < 0) {
+		fprintf(stderr,"failed to setup nouveau client\n");
+		return ret;
+	}
+
+	if (ndev->chipset < 0xa3 || ndev->chipset == 0xaa || ndev->chipset == 0xac) {
+		fprintf(stderr, "Your card doesn't support PCOPY\n");
+		return -1;
+	}
+
+	// TODO: Get a kepler and add support for it
+	if (ndev->chipset >= 0xe0) {
+		fprintf(stderr, "Unsure how kepler works!\n");
+		return -1;
+	}
+	ret = nouveau_bo_new(ndev,  NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
+			     4096, 4096, NULL, &query_bo);
+	if (!ret)
+		ret = nouveau_bo_map(query_bo, NOUVEAU_BO_RDWR, nclient);
+	if (ret < 0) {
+		fprintf(stderr,"failed to setup query counter\n");
+		return ret;
+	}
+	query = query_bo->map;
+	*query = query_counter;
+
+	if (ndev->chipset < 0xc0) {
+		class = 0x85b5;
+		data = &nv04_data;
+		size = sizeof(nv04_data);
+	} else {
+		class = ndev->chipset < 0xe0 ? 0x490b5 : 0xa0b5;
+		data = &nvc0_data;
+		size = sizeof(nvc0_data);
+	}
+
+	ret = nouveau_object_new(&ndev->object, 0, NOUVEAU_FIFO_CHANNEL_CLASS,
+				 data, size, &nchannel);
+	if (ret) {
+		fprintf(stderr, "Error creating GPU channel: %d\n", ret);
+		if (ret == -ENODEV) {
+			fprintf(stderr, "Make sure nouveau_accel is active\n");
+			fprintf(stderr, "nvd9 is likely broken regardless\n");
+		}
+		return ret;
+	}
+
+	fifo = nchannel->data;
+
+	ret = nouveau_pushbuf_new(nclient, nchannel, 4, 32 * 1024,
+				  true, &npush);
+	if (ret) {
+		fprintf(stderr, "Error allocating DMA push buffer: %d\n", ret);
+		return ret;
+	}
+
+	ret = nouveau_bufctx_new(nclient, 1, &nbufctx);
+	if (ret) {
+		fprintf(stderr, "Error allocating buffer context: %d\n", ret);
+		return ret;
+	}
+
+	npush->user_priv = nbufctx;
+
+	/* Hope this is enough init for PCOPY */
+	ret = nouveau_object_new(nchannel, class, class & 0xffff, NULL, 0, &pcopy);
+	if (ret) {
+		fprintf(stderr, "Failed to allocate pcopy: %d\n", ret);
+		return ret;
+	}
+	ret = nouveau_pushbuf_space(npush, 512, 0, 0);
+	if (ret) {
+		fprintf(stderr, "No space in pushbuf: %d\n", ret);
+		return ret;
+	}
+	if (ndev->chipset < 0xc0) {
+		struct nv04_fifo *nv04_fifo = (struct nv04_fifo*)fifo;
+		tile_intel_y = 0x3e;
+		tile_intel_x = 0x13;
+
+		BEGIN_NV04(npush, NV01_SUBC(COPY, OBJECT), 1);
+		PUSH_DATA(npush, pcopy->handle);
+		BEGIN_NV04(npush, SUBC_COPY(0x0180), 3);
+		PUSH_DATA(npush, nv04_fifo->vram);
+		PUSH_DATA(npush, nv04_fifo->vram);
+		PUSH_DATA(npush, nv04_fifo->vram);
+	} else {
+		tile_intel_y = 0x2e;
+		tile_intel_x = 0x03;
+		BEGIN_NVC0(npush, NV01_SUBC(COPY, OBJECT), 1);
+		PUSH_DATA(npush, pcopy->handle);
+	}
+	nouveau_pushbuf_kick(npush, npush->channel);
+	return ret;
+}
+
+static void fill16(void *ptr, uint32_t val)
+{
+	uint32_t *p = ptr;
+	val = (val) | (val << 8) | (val << 16) | (val << 24);
+	p[0] = p[1] = p[2] = p[3] = val;
+}
+
+#define TILE_SIZE 4096
+
+static int swtile_y(uint8_t *out, const uint8_t *in, int w, int h)
+{
+	uint32_t x, y, dx, dy;
+	uint8_t *endptr = out + w * h;
+	assert(!(w % 128));
+	assert(!(h % 32));
+
+	for (y = 0; y < h; y += 32) {
+		for (x = 0; x < w; x += 128, out += TILE_SIZE) {
+			for (dx = 0; dx < 8; ++dx) {
+				for (dy = 0; dy < 32; ++dy) {
+					uint32_t out_ofs = (dx * 32 + dy) * 16;
+					uint32_t in_ofs = (y + dy) * w + (x + 16 * dx);
+					assert(out_ofs < TILE_SIZE);
+					assert(in_ofs < w*h);
+
+					// To do the Y tiling quirk:
+					// out_ofs = out_ofs ^ (((out_ofs >> 9) & 1) << 6);
+					memcpy(&out[out_ofs], &in[in_ofs], 16);
+				}
+			}
+		}
+	}
+	assert(out == endptr);
+	return 0;
+}
+
+static int swtile_x(uint8_t *out, const uint8_t *in, int w, int h)
+{
+	uint32_t x, y, dy;
+	uint8_t *endptr = out + w * h;
+	assert(!(w % 512));
+	assert(!(h % 8));
+
+	for (y = 0; y < h; y += 8) {
+		for (x = 0; x < w; x += 512, out += TILE_SIZE) {
+			for (dy = 0; dy < 8; ++dy) {
+				uint32_t out_ofs = 512 * dy;
+				uint32_t in_ofs = (y + dy) * w + x;
+				assert(out_ofs < TILE_SIZE);
+				assert(in_ofs < w*h);
+				memcpy(&out[out_ofs], &in[in_ofs], 512);
+			}
+		}
+	}
+	assert(out == endptr);
+	return 0;
+}
+
+#if 0
+/* X tiling is approximately linear, except tiled in 512x8 blocks, so lets abuse that
+ *
+ * How? Whole contiguous tiles can be copied safely as if linear
+ */
+
+static int perform_copy_hack(struct nouveau_bo *nvbo, const rect *dst,
+			     uint32_t dst_x, uint32_t dst_y,
+			     struct nouveau_bo *nvbi, const rect *src,
+			     uint32_t src_x, uint32_t src_y,
+			     uint32_t w, uint32_t h)
+{
+	struct nouveau_pushbuf_refn refs[] = {
+		{ nvbi, (nvbi->flags & NOUVEAU_BO_APER) | NOUVEAU_BO_RD },
+		{ nvbo, (nvbo->flags & NOUVEAU_BO_APER) | NOUVEAU_BO_WR },
+		{ query_bo, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR }
+	};
+	uint32_t exec = 0x00000000;
+	uint32_t src_off = 0, dst_off = 0;
+	struct nouveau_pushbuf *push = npush;
+	uint32_t dw, tiles, tile_src = nvbi->config.nv50.tile_mode, tile_dst = nvbo->config.nv50.tile_mode;
+
+	if (tile_src == tile_intel_x)
+		dw = 512 - (src_x & 512);
+	else
+		dw = 512 - (dst_x % 512);
+
+	if (!nvbi->config.nv50.memtype)
+		exec |= 0x00000010;
+	if (!tile_src)
+		src_off = src_y * src->pitch + src_x;
+
+	if (!nvbo->config.nv50.memtype)
+		exec |= 0x00000100;
+	if (!tile_dst)
+		dst_off = dst_y * dst->pitch + dst_x;
+
+	if (dw > w)
+		dw = w;
+	tiles = 1 + ((w - dw + 511)/512);
+
+	if (nouveau_pushbuf_space(push, 8 + tiles * 32, 0, 0) ||
+	    nouveau_pushbuf_refn(push, refs, 3))
+		return -1;
+
+	for (; w; w -= dw, src_x += dw, dst_x += dw, dw = w > 512 ? 512 : w) {
+		if (tile_src == tile_intel_x) {
+			/* Find the correct tiled offset */
+			src_off = 8 * dst->pitch * (src_y / 8);
+			src_off += src_x / 512 * 4096;
+			src_off += (src_x % 512) + 512 * (src_y % 8);
+
+			if (!tile_dst)
+				dst_off = dst_y * dst->pitch + dst_x;
+		} else {
+			if (!tile_src)
+				src_off = src_y * src->pitch + src_x;
+
+			dst_off = 8 * dst->pitch * (dst_y / 8);
+			dst_off += dst_x / 512 * 4096;
+			dst_off += (dst_x % 512) + 512 * (dst_y % 8);
+		}
+
+		fprintf(stderr, "Copying from %u to %u for %u bytes\n", src_x, dst_x, dw);
+		fprintf(stderr, "src ofs: %u, dst ofs: %u\n", src_off, dst_off);
+		BEGIN_NVXX(push, SUBC_COPY(0x0200), 7);
+		PUSH_DATA (push, tile_src == tile_intel_x ? 0 : nvbi->config.nv50.tile_mode);
+		PUSH_DATA (push, src->pitch);
+		PUSH_DATA (push, src->h);
+		PUSH_DATA (push, 1);
+		PUSH_DATA (push, 0);
+		PUSH_DATA (push, src_x);
+		PUSH_DATA (push, src_y);
+
+		BEGIN_NVXX(push, SUBC_COPY(0x0220), 7);
+		PUSH_DATA (push, tile_dst == tile_intel_x ? 0 : nvbo->config.nv50.tile_mode);
+		PUSH_DATA (push, dst->pitch);
+		PUSH_DATA (push, dst->h);
+		PUSH_DATA (push, 1);
+		PUSH_DATA (push, 0);
+		PUSH_DATA (push, dst_x);
+		PUSH_DATA (push, dst_y);
+
+		BEGIN_NVXX(push, SUBC_COPY(0x030c), 8);
+		PUSH_DATA (push, (nvbi->offset + src_off) >> 32);
+		PUSH_DATA (push, (nvbi->offset + src_off));
+		PUSH_DATA (push, (nvbo->offset + dst_off) >> 32);
+		PUSH_DATA (push, (nvbo->offset + dst_off));
+		PUSH_DATA (push, src->pitch);
+		PUSH_DATA (push, dst->pitch);
+		PUSH_DATA (push, dw);
+		PUSH_DATA (push, h);
+
+		if (w == dw) {
+			exec |= 0x3000; /* QUERY|QUERY_SHORT */
+			BEGIN_NVXX(push, SUBC_COPY(0x0338), 3);
+			PUSH_DATA (push, (query_bo->offset) >> 32);
+			PUSH_DATA (push, (query_bo->offset));
+			PUSH_DATA (push, ++query_counter);
+		}
+
+		BEGIN_NVXX(push, SUBC_COPY(0x0300), 1);
+		PUSH_DATA (push, exec);
+	}
+	nouveau_pushbuf_kick(push, push->channel);
+	while (*query < query_counter) { }
+	return 0;
+}
+#endif
+
+static int perform_copy(struct nouveau_bo *nvbo, const rect *dst,
+			uint32_t dst_x, uint32_t dst_y,
+			struct nouveau_bo *nvbi, const rect *src,
+			uint32_t src_x, uint32_t src_y,
+			uint32_t w, uint32_t h)
+{
+#if 0
+	/* Too much effort */
+	if (nvbi->config.nv50.tile_mode == tile_intel_x &&
+	    nvbo->config.nv50.tile_mode == tile_intel_x)
+		return -1;
+	else if (nvbi->config.nv50.tile_mode == tile_intel_x ||
+		 nvbo->config.nv50.tile_mode == tile_intel_x)
+		return perform_copy_hack(nvbo, dst, dst_x, dst_y,
+					 nvbi, src, src_x, src_y, w, h);
+#endif
+	struct nouveau_pushbuf_refn refs[] = {
+		{ nvbi, (nvbi->flags & NOUVEAU_BO_APER) | NOUVEAU_BO_RD },
+		{ nvbo, (nvbo->flags & NOUVEAU_BO_APER) | NOUVEAU_BO_WR },
+		{ query_bo, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR }
+	};
+	uint32_t cpp = 1, exec = 0x00003000; /* QUERY|QUERY_SHORT|FORMAT */
+	uint32_t src_off = 0, dst_off = 0;
+	struct nouveau_pushbuf *push = npush;
+
+	if (nvbi->config.nv50.tile_mode == tile_intel_y)
+		dbg("src is y-tiled\n");
+	if (nvbo->config.nv50.tile_mode == tile_intel_y)
+		dbg("dst is y-tiled\n");
+
+	if (nouveau_pushbuf_space(push, 64, 0, 0) ||
+	    nouveau_pushbuf_refn(push, refs, 3))
+		return -1;
+
+	if (!nvbi->config.nv50.tile_mode) {
+		src_off = src_y * src->pitch + src_x;
+		exec |= 0x00000010;
+	}
+
+	if (!nvbo->config.nv50.tile_mode) {
+		dst_off = dst_y * dst->pitch + dst_x;
+		exec |= 0x00000100;
+	}
+
+	BEGIN_NVXX(push, SUBC_COPY(0x0200), 7);
+	PUSH_DATA (push, nvbi->config.nv50.tile_mode);
+	PUSH_DATA (push, src->pitch / cpp);
+	PUSH_DATA (push, src->h);
+	PUSH_DATA (push, 1);
+	PUSH_DATA (push, 0);
+	PUSH_DATA (push, src_x / cpp);
+	PUSH_DATA (push, src_y);
+
+	BEGIN_NVXX(push, SUBC_COPY(0x0220), 7);
+	PUSH_DATA (push, nvbo->config.nv50.tile_mode);
+	PUSH_DATA (push, dst->pitch / cpp);
+	PUSH_DATA (push, dst->h);
+	PUSH_DATA (push, 1);
+	PUSH_DATA (push, 0);
+	PUSH_DATA (push, dst_x / cpp);
+	PUSH_DATA (push, dst_y);
+
+	BEGIN_NVXX(push, SUBC_COPY(0x030c), 9);
+	PUSH_DATA (push, (nvbi->offset + src_off) >> 32);
+	PUSH_DATA (push, (nvbi->offset + src_off));
+	PUSH_DATA (push, (nvbo->offset + dst_off) >> 32);
+	PUSH_DATA (push, (nvbo->offset + dst_off));
+	PUSH_DATA (push, src->pitch);
+	PUSH_DATA (push, dst->pitch);
+	PUSH_DATA (push, w / cpp);
+	PUSH_DATA (push, h);
+	PUSH_DATA (push, 0x03333120);
+
+	BEGIN_NVXX(push, SUBC_COPY(0x0338), 3);
+	PUSH_DATA (push, (query_bo->offset) >> 32);
+	PUSH_DATA (push, (query_bo->offset));
+	PUSH_DATA (push, ++query_counter);
+
+	BEGIN_NVXX(push, SUBC_COPY(0x0300), 1);
+	PUSH_DATA (push, exec);
+
+	nouveau_pushbuf_kick(push, push->channel);
+	while (*query < query_counter) { usleep(1000); }
+	return 0;
+}
+
+static int check1_macro(uint32_t *p, uint32_t w, uint32_t h)
+{
+	uint32_t i, val, j;
+
+	for (i = 0; i < 256; ++i, p += 4) {
+		val = (i) | (i << 8) | (i << 16) | (i << 24);
+		if (p[0] != val || p[1] != val || p[2] != val || p[3] != val) {
+			fprintf(stderr, "Retile check failed in first tile!\n");
+			fprintf(stderr, "%08x %08x %08x %08x instead of %08x\n",
+				p[0], p[1], p[2], p[3], val);
+			return -1;
+		}
+	}
+
+	val = 0x3e3e3e3e;
+	for (i = 0; i < 256 * (w-1); ++i, p += 4) {
+		if (p[0] != val || p[1] != val || p[2] != val || p[3] != val) {
+			fprintf(stderr, "Retile check failed in second tile!\n");
+			fprintf(stderr, "%08x %08x %08x %08x instead of %08x\n",
+				p[0], p[1], p[2], p[3], val);
+			return -1;
+		}
+	}
+
+	for (j = 1; j < h; ++j) {
+		val = 0x7e7e7e7e;
+		for (i = 0; i < 256; ++i, p += 4) {
+			if (p[0] != val || p[1] != val || p[2] != val || p[3] != val) {
+				fprintf(stderr, "Retile check failed in third tile!\n");
+				fprintf(stderr, "%08x %08x %08x %08x instead of %08x\n",
+					p[0], p[1], p[2], p[3], val);
+				return -1;
+			}
+		}
+
+		val = 0xcececece;
+		for (i = 0; i < 256 * (w-1); ++i, p += 4) {
+			if (p[0] != val || p[1] != val || p[2] != val || p[3] != val) {
+				fprintf(stderr, "Retile check failed in fourth tile!\n");
+				fprintf(stderr, "%08x %08x %08x %08x instead of %08x\n",
+					p[0], p[1], p[2], p[3], val);
+				return -1;
+			}
+		}
+	}
+	return 0;
+}
+
+/* test 1, see if we can copy from linear to intel Y format safely */
+static int test1_macro(void)
+{
+	int ret, prime_fd = -1;
+	struct nouveau_bo *nvbo = NULL, *nvbi = NULL;
+	rect dst, src;
+	uint8_t *ptr;
+	uint32_t w = 2 * 128, h = 2 * 32, x, y;
+
+	ret = nv_bo_alloc(&nvbi, &src, w, h, 0, -1, NOUVEAU_BO_GART);
+	if (ret >= 0)
+		ret = nv_bo_alloc(&nvbo, &dst, w, h, tile_intel_y, -1, NOUVEAU_BO_GART);
+	if (ret < 0)
+		goto out;
+
+	nouveau_bo_set_prime(nvbo, &prime_fd);
+
+	/* Set up something for our tile that should map into the first
+	 * y-major tile, assuming my understanding of documentation is
+	 * correct
+	 */
+
+	/* First tile should be read out in groups of 16 bytes that
+	 * are all set to a linear increasing value..
+	 */
+	ptr = nvbi->map;
+	for (x = 0; x < 128; x += 16)
+		for (y = 0; y < 32; ++y)
+			fill16(&ptr[y * w + x], x * 2 + y);
+
+	/* second tile */
+	for (x = 128; x < w; x += 16)
+		for (y = 0; y < 32; ++y)
+			fill16(&ptr[y * w + x], 0x3e);
+
+	/* third tile */
+	for (x = 0; x < 128; x += 16)
+		for (y = 32; y < h; ++y)
+			fill16(&ptr[y * w + x], 0x7e);
+
+	/* last tile */
+	for (x = 128; x < w; x += 16)
+		for (y = 32; y < h; ++y)
+			fill16(&ptr[y * w + x], 0xce);
+	memset(nvbo->map, 0xfc, w * h);
+
+	if (pcopy)
+		ret = perform_copy(nvbo, &dst, 0, 0, nvbi, &src, 0, 0, w, h);
+	else
+		ret = swtile_y(nvbo->map, nvbi->map, w, h);
+	if (!ret)
+		ret = check1_macro(nvbo->map, w/128, h/32);
+
+out:
+	nouveau_bo_ref(NULL, &nvbo);
+	nouveau_bo_ref(NULL, &nvbi);
+	close(prime_fd);
+	return ret;
+}
+
+static int dump_line(uint8_t *map)
+{
+	uint32_t dx, dy;
+	fprintf(stderr, "Dumping sub-tile:\n");
+	for (dy = 0; dy < 32; ++dy) {
+		for (dx = 0; dx < 15; ++dx, ++map) {
+			fprintf(stderr, "%02x ", *map);
+		}
+		fprintf(stderr, "%02x\n", *(map++));
+	}
+	return -1;
+}
+
+static int check1_micro(void *map, uint32_t pitch, uint32_t lines,
+			uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h)
+{
+	uint32_t x, y;
+
+	/* check only the relevant subrectangle [0..w) [0...h) */
+	uint8_t *m = map;
+	for (y = 0; y < h; ++y, m += pitch) {
+		for (x = 0; x < w; ++x) {
+			uint8_t expected = ((y & 3) << 6) | (x & 0x3f);
+			if (expected != m[x]) {
+				fprintf(stderr, "failed check at x=%u y=%u, expected %02x got %02x\n",
+					x, y, expected, m[x]);
+				return dump_line(m);
+			}
+		}
+	}
+
+	return 0;
+}
+
+/* test 1, but check micro format, should be unaffected by bit9 swizzling */
+static int test1_micro(void)
+{
+	struct nouveau_bo *bo_intel = NULL, *bo_nvidia = NULL, *bo_linear = NULL;
+	rect intel, nvidia, linear;
+	int ret = -1;
+	uint32_t tiling = I915_TILING_Y;
+
+	uint32_t src_x = 0, src_y = 0;
+	uint32_t dst_x = 0, dst_y = 0;
+	uint32_t x, y, w = 256, h = 64;
+
+	drm_intel_bo *test_intel_bo;
+	int prime_fd;
+
+	test_intel_bo = drm_intel_bo_alloc(bufmgr, "test bo", w * h, 4096);
+	if (!test_intel_bo)
+		return -1;
+	drm_intel_bo_set_tiling(test_intel_bo, &tiling, w);
+	if (tiling != I915_TILING_Y) {
+		fprintf(stderr, "Couldn't set y tiling\n");
+		goto out;
+	}
+	ret = drm_intel_gem_bo_map_gtt(test_intel_bo);
+	if (ret)
+		goto out;
+
+	drm_intel_bo_gem_export_to_prime(test_intel_bo, &prime_fd);
+	if (prime_fd < 0) {
+		drm_intel_bo_unreference(test_intel_bo);
+		goto out;
+	}
+	noop_intel(test_intel_bo);
+
+	ret = nv_bo_alloc(&bo_intel, &intel, w, h, tile_intel_y, prime_fd, 0);
+	if (!ret)
+		ret = nv_bo_alloc(&bo_nvidia, &nvidia, w, h, 0x10, -1, NOUVEAU_BO_VRAM);
+	if (!ret)
+		ret = nv_bo_alloc(&bo_linear, &linear, w, h, 0, -1, NOUVEAU_BO_GART);
+	if (ret)
+		goto out;
+
+	for (y = 0; y < linear.h; ++y) {
+		uint8_t *map = bo_linear->map;
+		map += y * linear.pitch;
+		for (x = 0; x < linear.pitch; ++x) {
+			uint8_t pos = x & 0x3f;
+			/* low 4 bits: micro tile pos */
+			/* 2 bits: x pos in tile (wraps) */
+			/* 2 bits: y pos in tile (wraps) */
+			pos |= (y & 3) << 6;
+			map[x] = pos;
+		}
+	}
+
+	ret = perform_copy(bo_nvidia, &nvidia, 0, 0, bo_linear, &linear, 0, 0, nvidia.pitch, nvidia.h);
+	if (ret)
+		goto out;
+
+	/* Perform the actual sub rectangle copy */
+	if (pcopy)
+		ret = perform_copy(bo_intel, &intel, dst_x, dst_y, bo_nvidia, &nvidia, src_x, src_y, w, h);
+	else
+		ret = swtile_y(test_intel_bo->virtual, bo_linear->map, w, h);
+	if (ret)
+		goto out;
+
+	noop_intel(test_intel_bo);
+	ret = check1_micro(test_intel_bo->virtual, intel.pitch, intel.h, dst_x, dst_y, w, h);
+
+out:
+	nouveau_bo_ref(NULL, &bo_linear);
+	nouveau_bo_ref(NULL, &bo_nvidia);
+	nouveau_bo_ref(NULL, &bo_intel);
+	drm_intel_bo_unreference(test_intel_bo);
+	return ret;
+}
+
+static int check1_swizzle(uint32_t *p, uint32_t pitch, uint32_t lines,
+			  uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h)
+{
+	uint32_t i, val, j;
+
+	for (j = 0; j < 32; ++j, p += (pitch - w)/4) {
+		for (i = 0; i < 8; ++i, p += 4) {
+			val = (i * 32) + j;
+			val = (val) | (val << 8) | (val << 16) | (val << 24);
+			if (p[0] != val || p[1] != val || p[2] != val || p[3] != val) {
+				fprintf(stderr, "Retile check failed in first tile!\n");
+				fprintf(stderr, "%08x %08x %08x %08x instead of %08x\n",
+					p[0], p[1], p[2], p[3], val);
+				return -1;
+			}
+		}
+
+		val = 0x3e3e3e3e;
+		for (; i < w/16; ++i, p += 4) {
+			if (p[0] != val || p[1] != val || p[2] != val || p[3] != val) {
+				fprintf(stderr, "Retile check failed in second tile!\n");
+				fprintf(stderr, "%08x %08x %08x %08x instead of %08x\n",
+					p[0], p[1], p[2], p[3], val);
+				return -1;
+			}
+		}
+	}
+
+	for (j = 32; j < h; ++j, p += (pitch - w)/4) {
+		val = 0x7e7e7e7e;
+		for (i = 0; i < 8; ++i, p += 4) {
+			if (p[0] != val || p[1] != val || p[2] != val || p[3] != val) {
+				fprintf(stderr, "Retile check failed in third tile!\n");
+				fprintf(stderr, "%08x %08x %08x %08x instead of %08x\n",
+					p[0], p[1], p[2], p[3], val);
+				return -1;
+			}
+		}
+
+		val = 0xcececece;
+		for (; i < w/16; ++i, p += 4) {
+			if (p[0] != val || p[1] != val || p[2] != val || p[3] != val) {
+				fprintf(stderr, "Retile check failed in fourth tile!\n");
+				fprintf(stderr, "%08x %08x %08x %08x instead of %08x\n",
+					p[0], p[1], p[2], p[3], val);
+				return -1;
+			}
+		}
+	}
+	return 0;
+}
+
+/* Create a new bo, set tiling to y, and see if macro swizzling is done correctl */
+static int test1_swizzle(void)
+{
+	struct nouveau_bo *bo_intel = NULL, *bo_nvidia = NULL, *bo_linear = NULL;
+	rect intel, nvidia, linear;
+	int ret = -1;
+	uint32_t tiling = I915_TILING_Y;
+
+	uint32_t src_x = 0, src_y = 0;
+	uint32_t dst_x = 0, dst_y = 0;
+	uint32_t x, y, w = 256, h = 64;
+	uint8_t *ptr;
+
+	drm_intel_bo *test_intel_bo;
+	int prime_fd;
+
+	test_intel_bo = drm_intel_bo_alloc(bufmgr, "test bo", w * h, 4096);
+	if (!test_intel_bo)
+		return -1;
+	drm_intel_bo_set_tiling(test_intel_bo, &tiling, w);
+	if (tiling != I915_TILING_Y) {
+		fprintf(stderr, "Couldn't set y tiling\n");
+		goto out;
+	}
+	ret = drm_intel_gem_bo_map_gtt(test_intel_bo);
+	if (ret)
+		goto out;
+
+	drm_intel_bo_gem_export_to_prime(test_intel_bo, &prime_fd);
+	if (prime_fd < 0) {
+		drm_intel_bo_unreference(test_intel_bo);
+		goto out;
+	}
+
+	ret = nv_bo_alloc(&bo_intel, &intel, w, h, tile_intel_y, prime_fd, 0);
+	if (!ret)
+		ret = nv_bo_alloc(&bo_nvidia, &nvidia, w, h, 0x10, -1, NOUVEAU_BO_VRAM);
+	if (!ret)
+		ret = nv_bo_alloc(&bo_linear, &linear, w, h, 0, -1, NOUVEAU_BO_GART);
+	if (ret)
+		goto out;
+
+	noop_intel(test_intel_bo);
+	ptr = bo_linear->map;
+	for (x = 0; x < 128; x += 16)
+		for (y = 0; y < 32; ++y)
+			fill16(&ptr[y * w + x], x * 2 + y);
+
+	/* second tile */
+	for (x = 128; x < w; x += 16)
+		for (y = 0; y < 32; ++y)
+			fill16(&ptr[y * w + x], 0x3e);
+
+	/* third tile */
+	for (x = 0; x < 128; x += 16)
+		for (y = 32; y < h; ++y)
+			fill16(&ptr[y * w + x], 0x7e);
+
+	/* last tile */
+	for (x = 128; x < w; x += 16)
+		for (y = 32; y < h; ++y)
+			fill16(&ptr[y * w + x], 0xce);
+
+	ret = perform_copy(bo_nvidia, &nvidia, 0, 0, bo_linear, &linear, 0, 0, nvidia.pitch, nvidia.h);
+	if (ret)
+		goto out;
+
+	/* Perform the actual sub rectangle copy */
+	ret = perform_copy(bo_intel, &intel, dst_x, dst_y, bo_nvidia, &nvidia, src_x, src_y, w, h);
+	if (ret)
+		goto out;
+	noop_intel(test_intel_bo);
+
+	ret = check1_swizzle(test_intel_bo->virtual, intel.pitch, intel.h, dst_x, dst_y, w, h);
+
+out:
+	nouveau_bo_ref(NULL, &bo_linear);
+	nouveau_bo_ref(NULL, &bo_nvidia);
+	nouveau_bo_ref(NULL, &bo_intel);
+	drm_intel_bo_unreference(test_intel_bo);
+	return ret;
+}
+
+/* test 2, see if we can copy from linear to intel X format safely
+ * Seems nvidia lacks a method to do it, so just keep this test
+ * as a reference for potential future tests. Software tiling is
+ * used for now
+ */
+static int test2(void)
+{
+	int ret;
+	struct nouveau_bo *nvbo = NULL, *nvbi = NULL;
+	rect dst, src;
+	uint8_t *ptr;
+	uint32_t w = 1024, h = 16, x, y;
+
+	ret = nv_bo_alloc(&nvbi, &src, w, h, 0, -1, NOUVEAU_BO_GART);
+	if (ret >= 0)
+		ret = nv_bo_alloc(&nvbo, &dst, w, h, tile_intel_x, -1, NOUVEAU_BO_GART);
+	if (ret < 0)
+		goto out;
+
+	/* Set up something for our tile that should map into the first
+	 * y-major tile, assuming my understanding of documentation is
+	 * correct
+	 */
+
+	/* First tile should be read out in groups of 16 bytes that
+	 * are all set to a linear increasing value..
+	 */
+	ptr = nvbi->map;
+	for (y = 0; y < 8; ++y)
+		for (x = 0; x < 512; x += 16)
+			fill16(&ptr[y * w + x], (y * 512 + x)/16);
+
+	for (y = 0; y < 8; ++y)
+		for (x = 512; x < w; x += 16)
+			fill16(&ptr[y * w + x], 0x3e);
+
+	for (y = 8; y < h; ++y)
+		for (x = 0; x < 512; x += 16)
+			fill16(&ptr[y * w + x], 0x7e);
+
+	for (y = 8; y < h; ++y)
+		for (x = 512; x < w; x += 16)
+			fill16(&ptr[y * w + x], 0xce);
+	memset(nvbo->map, 0xfc, w * h);
+
+	/* do this in software, there is no X major tiling in PCOPY (yet?) */
+	if (0 && pcopy)
+		ret = perform_copy(nvbo, &dst, 0, 0, nvbi, &src, 0, 0, w, h);
+	else
+		ret = swtile_x(nvbo->map, nvbi->map, w, h);
+	if (!ret)
+		ret = check1_macro(nvbo->map, w/512, h/8);
+
+out:
+	nouveau_bo_ref(NULL, &nvbo);
+	nouveau_bo_ref(NULL, &nvbi);
+	return ret;
+}
+
+static int check3(const uint32_t *p, uint32_t pitch, uint32_t lines,
+		  uint32_t sub_x, uint32_t sub_y,
+		  uint32_t sub_w, uint32_t sub_h)
+{
+	uint32_t x, y;
+
+	sub_w += sub_x;
+	sub_h += sub_y;
+
+	if (p[pitch * lines / 4 - 1] == 0x03030303) {
+		fprintf(stderr, "copy failed: Not all lines have been copied back!\n");
+		return -1;
+	}
+
+	for (y = 0; y < lines; ++y) {
+		for (x = 0; x < pitch; x += 4, ++p) {
+			uint32_t expected;
+			if ((x < sub_x || x >= sub_w) ||
+			    (y < sub_y || y >= sub_h))
+				expected = 0x80808080;
+			else
+				expected = 0x04040404;
+			if (*p != expected) {
+				fprintf(stderr, "%u,%u should be %08x, but is %08x\n", x, y, expected, *p);
+				return -1;
+			}
+		}
+	}
+	return 0;
+}
+
+/* copy from nvidia bo to intel bo and copy to a linear bo to check if tiling went succesful */
+static int test3_base(int tile_src, int tile_dst)
+{
+	struct nouveau_bo *bo_intel = NULL, *bo_nvidia = NULL, *bo_linear = NULL;
+	rect intel, nvidia, linear;
+	int ret;
+	uint32_t cpp = 4;
+
+	uint32_t src_x = 1 * cpp, src_y = 1;
+	uint32_t dst_x = 2 * cpp, dst_y = 26;
+	uint32_t w = 298 * cpp, h = 298;
+
+	drm_intel_bo *test_intel_bo;
+	int prime_fd;
+
+	test_intel_bo = drm_intel_bo_alloc(bufmgr, "test bo", 2048 * cpp * 768, 4096);
+	if (!test_intel_bo)
+		return -1;
+
+	drm_intel_bo_gem_export_to_prime(test_intel_bo, &prime_fd);
+	if (prime_fd < 0) {
+		drm_intel_bo_unreference(test_intel_bo);
+		return -1;
+	}
+
+	ret = nv_bo_alloc(&bo_intel, &intel, 2048 * cpp, 768, tile_dst, prime_fd, 0);
+	if (!ret)
+		ret = nv_bo_alloc(&bo_nvidia, &nvidia, 300 * cpp, 300, tile_src, -1, NOUVEAU_BO_VRAM);
+	if (!ret)
+		ret = nv_bo_alloc(&bo_linear, &linear, 2048 * cpp, 768, 0, -1, NOUVEAU_BO_GART);
+	if (ret)
+		goto out;
+
+	noop_intel(test_intel_bo);
+	memset(bo_linear->map, 0x80, bo_linear->size);
+	ret = perform_copy(bo_intel, &intel, 0, 0, bo_linear, &linear, 0, 0, linear.pitch, linear.h);
+	if (ret)
+		goto out;
+	noop_intel(test_intel_bo);
+
+	memset(bo_linear->map, 0x04, bo_linear->size);
+	ret = perform_copy(bo_nvidia, &nvidia, 0, 0, bo_linear, &linear, 0, 0, nvidia.pitch, nvidia.h);
+	if (ret)
+		goto out;
+
+	/* Perform the actual sub rectangle copy */
+	noop_intel(test_intel_bo);
+	ret = perform_copy(bo_intel, &intel, dst_x, dst_y, bo_nvidia, &nvidia, src_x, src_y, w, h);
+	if (ret)
+		goto out;
+	noop_intel(test_intel_bo);
+
+	memset(bo_linear->map, 0x3, bo_linear->size);
+	noop_intel(test_intel_bo);
+	ret = perform_copy(bo_linear, &linear, 0, 0, bo_intel, &intel, 0, 0, intel.pitch, intel.h);
+	if (ret)
+		goto out;
+	noop_intel(test_intel_bo);
+
+	ret = check3(bo_linear->map, linear.pitch, linear.h, dst_x, dst_y, w, h);
+
+out:
+	nouveau_bo_ref(NULL, &bo_linear);
+	nouveau_bo_ref(NULL, &bo_nvidia);
+	nouveau_bo_ref(NULL, &bo_intel);
+	drm_intel_bo_unreference(test_intel_bo);
+	return ret;
+}
+
+static int test3_1(void)
+{
+	/* nvidia tiling to intel */
+	return test3_base(0x40, tile_intel_y);
+}
+
+static int test3_2(void)
+{
+	/* intel tiling to nvidia */
+	return test3_base(tile_intel_y, 0x40);
+}
+
+static int test3_3(void)
+{
+	/* intel tiling to linear */
+	return test3_base(tile_intel_y, 0);
+}
+
+static int test3_4(void)
+{
+	/* linear tiling to intel */
+	return test3_base(0, tile_intel_y);
+}
+
+static int test3_5(void)
+{
+	/* linear to linear */
+	return test3_base(0, 0);
+}
+
+/* Acquire when == SEQUENCE */
+#define SEMA_ACQUIRE_EQUAL 1
+
+/* Release, and write a 16 byte query structure to sema:
+ * { (uint32)seq, (uint32)0, (uint64)timestamp } */
+#define SEMA_WRITE_LONG 2
+
+/* Acquire when >= SEQUENCE */
+#define SEMA_ACQUIRE_GEQUAL 4
+
+/* Test only new style semaphores, old ones are AWFUL */
+static int test_semaphore(void)
+{
+	drm_intel_bo *test_intel_bo = NULL;
+	struct nouveau_bo *sema_bo = NULL;
+	int ret = -1, prime_fd;
+	uint32_t *sema;
+	struct nouveau_pushbuf *push = npush;
+
+	if (ndev->chipset < 0x84)
+		return -1;
+
+	/* Should probably be kept in sysmem */
+	test_intel_bo = drm_intel_bo_alloc(bufmgr, "semaphore bo", 4096, 4096);
+	if (!test_intel_bo)
+		goto out;
+
+	drm_intel_bo_gem_export_to_prime(test_intel_bo, &prime_fd);
+	if (prime_fd < 0)
+		goto out;
+	ret = nouveau_bo_prime_handle_ref(ndev, prime_fd, &sema_bo);
+	close(prime_fd);
+	if (ret < 0)
+		goto out;
+
+	ret = drm_intel_gem_bo_map_gtt(test_intel_bo);
+	if (ret != 0) {
+		fprintf(stderr,"failed to map bo\n");
+		goto out;
+	}
+	sema = test_intel_bo->virtual;
+	sema++;
+	*sema = 0;
+
+	ret = -1;
+	if (nouveau_pushbuf_space(push, 64, 0, 0) ||
+	    nouveau_pushbuf_refn(push, &(struct nouveau_pushbuf_refn)
+	    { sema_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR }, 1))
+		goto out;
+
+	if (ndev->chipset < 0xc0) {
+		struct nv04_fifo *nv04_fifo = nchannel->data;
+		/* kernel binds it's own dma object here and overwrites old one,
+		 * so just rebind vram every time we submit
+		 */
+		BEGIN_NV04(npush, SUBC_COPY(0x0060), 1);
+		PUSH_DATA(npush, nv04_fifo->vram);
+	}
+	BEGIN_NVXX(push, SUBC_COPY(0x0010), 4);
+	PUSH_DATA(push, sema_bo->offset >> 32);
+	PUSH_DATA(push, sema_bo->offset + 4);
+	PUSH_DATA(push, 2); // SEQUENCE
+	PUSH_DATA(push, SEMA_WRITE_LONG); // TRIGGER
+
+	BEGIN_NVXX(push, SUBC_COPY(0x0018), 2);
+	PUSH_DATA(push, 3);
+	PUSH_DATA(push, SEMA_ACQUIRE_EQUAL);
+	BEGIN_NVXX(push, SUBC_COPY(0x0018), 2);
+	PUSH_DATA(push, 4);
+	PUSH_DATA(push, SEMA_WRITE_LONG);
+
+	BEGIN_NVXX(push, SUBC_COPY(0x0018), 2);
+	PUSH_DATA(push, 5);
+	PUSH_DATA(push, SEMA_ACQUIRE_GEQUAL);
+	BEGIN_NVXX(push, SUBC_COPY(0x0018), 2);
+	PUSH_DATA(push, 6);
+	PUSH_DATA(push, SEMA_WRITE_LONG);
+
+	BEGIN_NVXX(push, SUBC_COPY(0x0018), 2);
+	PUSH_DATA(push, 7);
+	PUSH_DATA(push, SEMA_ACQUIRE_GEQUAL);
+	BEGIN_NVXX(push, SUBC_COPY(0x0018), 2);
+	PUSH_DATA(push, 9);
+	PUSH_DATA(push, SEMA_WRITE_LONG);
+	nouveau_pushbuf_kick(push, push->channel);
+
+	usleep(1000);
+	if (*sema != 2) {
+		fprintf(stderr, "new sema should be 2 is %u\n", *sema);
+		goto out;
+	}
+
+	*sema = 3;
+	usleep(1000);
+	if (*sema != 4) {
+		fprintf(stderr, "new sema should be 4 is %u\n", *sema);
+		goto out;
+	}
+
+	*sema = 5;
+	usleep(1000);
+	if (*sema != 6) {
+		fprintf(stderr, "new sema should be 6 is %u\n", *sema);
+		goto out;
+	}
+
+	*sema = 8;
+	usleep(1000);
+	if (*sema != 9) {
+		fprintf(stderr, "new sema should be 9 is %u\n", *sema);
+		goto out;
+	}
+	ret = 0;
+
+out:
+	nouveau_bo_ref(NULL, &sema_bo);
+	if (test_intel_bo)
+		drm_intel_bo_unreference(test_intel_bo);
+	return ret;
+}
+
+int main(int argc, char **argv)
+{
+	int ret, failed = 0, run = 0;
+
+	ret = find_and_open_devices();
+	if (ret < 0)
+		return ret;
+
+	if (nouveau_fd == -1 || intel_fd == -1) {
+		fprintf(stderr,"failed to find intel and nouveau GPU\n");
+		return 77;
+	}
+
+	/* set up intel bufmgr */
+	bufmgr = drm_intel_bufmgr_gem_init(intel_fd, 4096);
+	if (!bufmgr)
+		return -1;
+	/* Do not enable reuse, we share (almost) all buffers. */
+	//drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+	/* set up nouveau bufmgr */
+	ret = init_nouveau();
+	if (ret < 0)
+		return 77;
+
+	/* set up an intel batch buffer */
+	devid = intel_get_drm_devid(intel_fd);
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+#define xtest(x, args...) do { \
+	ret = ((x)(args)); \
+	++run; \
+	if (ret) { \
+		++failed; \
+		fprintf(stderr, "prime_pcopy: failed " #x "\n"); } \
+	} while (0)
+
+	xtest(test1_macro);
+	xtest(test1_micro);
+	xtest(test1_swizzle);
+	xtest(test2);
+	xtest(test3_1);
+	xtest(test3_2);
+	xtest(test3_3);
+	xtest(test3_4);
+	xtest(test3_5);
+	xtest(test_semaphore);
+
+	nouveau_bo_ref(NULL, &query_bo);
+	nouveau_object_del(&pcopy);
+	nouveau_bufctx_del(&nbufctx);
+	nouveau_pushbuf_del(&npush);
+	nouveau_object_del(&nchannel);
+
+	intel_batchbuffer_free(batch);
+
+	nouveau_client_del(&nclient);
+	nouveau_device_del(&ndev);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(intel_fd);
+	close(nouveau_fd);
+
+	printf("Tests: %u run, %u failed\n", run, failed);
+	return failed;
+}
diff --git a/tests/prime_nv_test.c b/tests/prime_nv_test.c
new file mode 100644
index 00000000..2269f84f
--- /dev/null
+++ b/tests/prime_nv_test.c
@@ -0,0 +1,582 @@
+/* basic set of prime tests between intel and nouveau */
+
+/* test list -
+   1. share buffer from intel -> nouveau.
+   2. share buffer from nouveau -> intel
+   3. share intel->nouveau, map on both, write intel, read nouveau
+   4. share intel->nouveau, blit intel fill, readback on nouveau
+   test 1 + map buffer, read/write, map other size.
+   do some hw actions on the buffer
+   some illegal operations -
+       close prime fd try and map
+
+   TODO add some nouveau rendering tests
+*/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+
+#include "i915_drm.h"
+#include "intel_bufmgr.h"
+#include "nouveau.h"
+#include "intel_gpu_tools.h"
+#include "intel_batchbuffer.h"
+
+int intel_fd = -1, nouveau_fd = -1;
+drm_intel_bufmgr *bufmgr;
+struct nouveau_device *ndev;
+struct nouveau_client *nclient;
+uint32_t devid;
+struct intel_batchbuffer *intel_batch;
+
+#define BO_SIZE (256*1024)
+
+static int find_and_open_devices(void)
+{
+	int i;
+	char path[80];
+	struct stat buf;
+	FILE *fl;
+	char vendor_id[8];
+	int venid;
+	for (i = 0; i < 9; i++) {
+		sprintf(path, "/sys/class/drm/card%d/device/vendor", i);
+		if (stat(path, &buf))
+			break;
+
+		fl = fopen(path, "r");
+		if (!fl)
+			break;
+
+		fgets(vendor_id, 8, fl);
+		fclose(fl);
+
+		venid = strtoul(vendor_id, NULL, 16);
+		sprintf(path, "/dev/dri/card%d", i);
+		if (venid == 0x8086) {
+			intel_fd = open(path, O_RDWR);
+			if (!intel_fd)
+				return -1;
+		} else if (venid == 0x10de) {
+			nouveau_fd = open(path, O_RDWR);
+			if (!nouveau_fd)
+				return -1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * prime test 1 -
+ * allocate buffer on intel,
+ * set prime on buffer,
+ * retrive buffer from nouveau,
+ * close prime_fd,
+ *  unref buffers
+ */
+static int test1(void)
+{
+	int ret;
+	drm_intel_bo *test_intel_bo;
+	int prime_fd;
+	struct nouveau_bo *nvbo;
+
+	test_intel_bo = drm_intel_bo_alloc(bufmgr, "test bo", BO_SIZE, 4096);
+
+	drm_intel_bo_gem_export_to_prime(test_intel_bo, &prime_fd);
+
+	ret = nouveau_bo_prime_handle_ref(ndev, prime_fd, &nvbo);
+	close(prime_fd);
+	if (ret < 0)
+		return ret;
+
+	nouveau_bo_ref(NULL, &nvbo);
+	drm_intel_bo_unreference(test_intel_bo);
+	return 0;
+}
+
+/*
+ * prime test 2 -
+ * allocate buffer on nouveau
+ * set prime on buffer,
+ * retrive buffer from intel
+ * close prime_fd,
+ *  unref buffers
+ */
+static int test2(void)
+{
+	int ret;
+	drm_intel_bo *test_intel_bo;
+	int prime_fd;
+	struct nouveau_bo *nvbo;
+
+	ret = nouveau_bo_new(ndev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
+			     0, BO_SIZE, NULL, &nvbo);
+	if (ret < 0)
+		return ret;
+	ret = nouveau_bo_set_prime(nvbo, &prime_fd);
+	if (ret < 0)
+		return ret;
+
+	test_intel_bo = drm_intel_bo_gem_create_from_prime(bufmgr, prime_fd, BO_SIZE);
+	close(prime_fd);
+	if (!test_intel_bo)
+		return -1;
+
+	nouveau_bo_ref(NULL, &nvbo);
+	drm_intel_bo_unreference(test_intel_bo);
+	return 0;
+}
+
+/*
+ * allocate intel, give to nouveau, map on nouveau
+ * write 0xdeadbeef, non-gtt map on intel, read
+ */
+static int test3(void)
+{
+	int ret;
+	drm_intel_bo *test_intel_bo;
+	int prime_fd;
+	struct nouveau_bo *nvbo = NULL;
+	uint32_t *ptr;
+
+	test_intel_bo = drm_intel_bo_alloc(bufmgr, "test bo", BO_SIZE, 4096);
+
+	drm_intel_bo_gem_export_to_prime(test_intel_bo, &prime_fd);
+
+	ret = nouveau_bo_prime_handle_ref(ndev, prime_fd, &nvbo);
+	if (ret < 0) {
+		fprintf(stderr,"failed to ref prime buffer %d\n", ret);
+		close(prime_fd);
+		goto free_intel;
+	}
+	close(prime_fd);
+		goto free_intel;
+
+	ret = nouveau_bo_map(nvbo, NOUVEAU_BO_RDWR, nclient);
+	if (ret < 0) {
+		fprintf(stderr,"failed to map nouveau bo\n");
+		goto out;
+	}
+
+	ptr = nvbo->map;
+	*ptr = 0xdeadbeef;
+
+	drm_intel_bo_map(test_intel_bo, 1);
+
+	ptr = test_intel_bo->virtual;
+
+	if (*ptr != 0xdeadbeef) {
+		fprintf(stderr,"mapped value doesn't match\n");
+		ret = -1;
+	}
+out:
+	nouveau_bo_ref(NULL, &nvbo);
+free_intel:
+	drm_intel_bo_unreference(test_intel_bo);
+	return ret;
+}
+
+/*
+ * allocate intel, give to nouveau, map on nouveau
+ * write 0xdeadbeef, gtt map on intel, read
+ */
+static int test4(void)
+{
+	int ret;
+	drm_intel_bo *test_intel_bo;
+	int prime_fd;
+	struct nouveau_bo *nvbo = NULL;
+	uint32_t *ptr;
+
+	test_intel_bo = drm_intel_bo_alloc(bufmgr, "test bo", BO_SIZE, 4096);
+
+	drm_intel_bo_gem_export_to_prime(test_intel_bo, &prime_fd);
+
+	ret = nouveau_bo_prime_handle_ref(ndev, prime_fd, &nvbo);
+	close(prime_fd);
+	if (ret < 0) {
+		fprintf(stderr,"failed to ref prime buffer\n");
+		return ret;
+	}
+
+	ret = nouveau_bo_map(nvbo, NOUVEAU_BO_RDWR, nclient);
+	if (ret < 0) {
+		fprintf(stderr,"failed to map nouveau bo\n");
+		goto out;
+	}
+
+
+	ptr = nvbo->map;
+	*ptr = 0xdeadbeef;
+
+	drm_intel_gem_bo_map_gtt(test_intel_bo);
+	ptr = test_intel_bo->virtual;
+
+	if (*ptr != 0xdeadbeef) {
+		fprintf(stderr,"mapped value doesn't match\n");
+		ret = -1;
+	}
+out:
+	nouveau_bo_ref(NULL, &nvbo);
+	drm_intel_bo_unreference(test_intel_bo);
+	return ret;
+}
+
+/* test drm_intel_bo_map doesn't work properly,
+   this tries to map the backing shmem fd, which doesn't exist
+   for these objects */
+static int test5(void)
+{
+	int ret;
+	drm_intel_bo *test_intel_bo;
+	int prime_fd;
+	struct nouveau_bo *nvbo;
+	uint32_t *ptr;
+
+	ret = nouveau_bo_new(ndev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
+			     0, BO_SIZE, NULL, &nvbo);
+	if (ret < 0)
+		return ret;
+	ret = nouveau_bo_set_prime(nvbo, &prime_fd);
+	if (ret < 0)
+		return ret;
+
+	test_intel_bo = drm_intel_bo_gem_create_from_prime(bufmgr, prime_fd, BO_SIZE);
+	close(prime_fd);
+	if (!test_intel_bo)
+		return -1;
+
+	ret = nouveau_bo_map(nvbo, NOUVEAU_BO_RDWR, nclient);
+	if (ret < 0) {
+		fprintf(stderr,"failed to map nouveau bo\n");
+		goto out;
+	}
+
+	ptr = nvbo->map;
+	*ptr = 0xdeadbeef;
+
+	ret = drm_intel_bo_map(test_intel_bo, 0);
+	if (ret != 0) {
+		/* failed to map the bo is expected */
+		ret = 0;
+		goto out;
+	}
+	if (!test_intel_bo->virtual) {
+		ret = 0;
+		goto out;
+	}
+	ptr = test_intel_bo->virtual;
+
+	if (*ptr != 0xdeadbeef) {
+		fprintf(stderr,"mapped value doesn't match %08x\n", *ptr);
+		ret = -1;
+	}
+ out:
+	nouveau_bo_ref(NULL, &nvbo);
+	drm_intel_bo_unreference(test_intel_bo);
+	return ret;
+}
+
+/* test drm_intel_bo_map_gtt works properly,
+   this tries to map the backing shmem fd, which doesn't exist
+   for these objects */
+static int test6(void)
+{
+	int ret;
+	drm_intel_bo *test_intel_bo;
+	int prime_fd;
+	struct nouveau_bo *nvbo;
+	uint32_t *ptr;
+
+	ret = nouveau_bo_new(ndev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
+			     0, BO_SIZE, NULL, &nvbo);
+	if (ret < 0)
+		return ret;
+	ret = nouveau_bo_set_prime(nvbo, &prime_fd);
+	if (ret < 0)
+		return ret;
+
+	test_intel_bo = drm_intel_bo_gem_create_from_prime(bufmgr, prime_fd, BO_SIZE);
+	close(prime_fd);
+	if (!test_intel_bo)
+		return -1;
+
+	ret = nouveau_bo_map(nvbo, NOUVEAU_BO_RDWR, nclient);
+	if (ret < 0) {
+		fprintf(stderr,"failed to map nouveau bo\n");
+		goto out;
+	}
+
+	ptr = nvbo->map;
+	*ptr = 0xdeadbeef;
+	*(ptr + 1) = 0xa55a55;
+
+	ret = drm_intel_gem_bo_map_gtt(test_intel_bo);
+	if (ret != 0) {
+		fprintf(stderr,"failed to map bo\n");
+		goto out;
+	}
+	if (!test_intel_bo->virtual) {
+		ret = -1;
+		fprintf(stderr,"failed to map bo\n");
+		goto out;
+	}
+	ptr = test_intel_bo->virtual;
+
+	if (*ptr != 0xdeadbeef) {
+		fprintf(stderr,"mapped value doesn't match %08x %08x\n", *ptr, *(ptr + 1));
+		ret = -1;
+	}
+ out:
+	nouveau_bo_ref(NULL, &nvbo);
+	drm_intel_bo_unreference(test_intel_bo);
+	return ret;
+}
+
+static int do_read(int fd, int handle, void *buf, int offset, int size)
+{
+        struct drm_i915_gem_pread intel_pread;
+
+        /* Ensure that we don't have any convenient data in buf in case
+         * we fail.
+         */
+        memset(buf, 0xd0, size);
+
+        memset(&intel_pread, 0, sizeof(intel_pread));
+        intel_pread.handle = handle;
+        intel_pread.data_ptr = (uintptr_t)buf;
+        intel_pread.size = size;
+        intel_pread.offset = offset;
+
+        return ioctl(fd, DRM_IOCTL_I915_GEM_PREAD, &intel_pread);
+}
+
+static int do_write(int fd, int handle, void *buf, int offset, int size)
+{
+        struct drm_i915_gem_pwrite intel_pwrite;
+
+        memset(&intel_pwrite, 0, sizeof(intel_pwrite));
+        intel_pwrite.handle = handle;
+        intel_pwrite.data_ptr = (uintptr_t)buf;
+        intel_pwrite.size = size;
+        intel_pwrite.offset = offset;
+
+        return ioctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &intel_pwrite);
+}
+
+/* test 7 - import from nouveau into intel, test pread/pwrite fail */
+static int test7(void)
+{
+	int ret;
+	drm_intel_bo *test_intel_bo;
+	int prime_fd;
+	struct nouveau_bo *nvbo;
+	uint32_t *ptr;
+	uint32_t buf[64];
+
+	ret = nouveau_bo_new(ndev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
+			     0, BO_SIZE, NULL, &nvbo);
+	if (ret < 0)
+		return ret;
+	ret = nouveau_bo_set_prime(nvbo, &prime_fd);
+	if (ret < 0)
+		return ret;
+
+	test_intel_bo = drm_intel_bo_gem_create_from_prime(bufmgr, prime_fd, BO_SIZE);
+	close(prime_fd);
+	if (!test_intel_bo)
+		return -1;
+
+	ret = nouveau_bo_map(nvbo, NOUVEAU_BO_RDWR, nclient);
+	if (ret < 0) {
+		fprintf(stderr,"failed to map nouveau bo\n");
+		goto out;
+	}
+
+	ptr = nvbo->map;
+	*ptr = 0xdeadbeef;
+
+	ret = do_read(intel_fd, test_intel_bo->handle, buf, 0, 256);
+	if (ret != -1) {
+		fprintf(stderr,"pread succeedded %d\n", ret);
+		goto out;
+	}
+	buf[0] = 0xabcdef55;
+
+	ret = do_write(intel_fd, test_intel_bo->handle, buf, 0, 4);
+	if (ret != -1) {
+		fprintf(stderr,"pwrite succeedded\n");
+		goto out;
+	}
+	ret = 0;
+ out:
+	nouveau_bo_ref(NULL, &nvbo);
+	drm_intel_bo_unreference(test_intel_bo);
+	return ret;
+}
+
+static void
+set_bo(drm_intel_bo *bo, uint32_t val, int width, int height)
+{
+        int size = width * height;
+        uint32_t *vaddr;
+
+        drm_intel_gem_bo_start_gtt_access(bo, true);
+        vaddr = bo->virtual;
+        while (size--)
+                *vaddr++ = val;
+}
+
+static drm_intel_bo *
+create_bo(drm_intel_bufmgr *ibufmgr, uint32_t val, int width, int height)
+{
+        drm_intel_bo *bo;
+
+        bo = drm_intel_bo_alloc(ibufmgr, "bo", 4*width*height, 0);
+        assert(bo);
+
+        /* gtt map doesn't have a write parameter, so just keep the mapping
+         * around (to avoid the set_domain with the gtt write domain set) and
+         * manually tell the kernel when we start access the gtt. */
+        drm_intel_gem_bo_map_gtt(bo);
+
+        set_bo(bo, val, width, height);
+
+        return bo;
+}
+
+/* use intel hw to fill the BO with a blit from another BO,
+   then readback from the nouveau bo, check value is correct */
+static int test8(void)
+{
+	int ret;
+	drm_intel_bo *test_intel_bo, *src_bo;
+	int prime_fd;
+	struct nouveau_bo *nvbo = NULL;
+	uint32_t *ptr;
+
+	src_bo = create_bo(bufmgr, 0xaa55aa55, 256, 1);
+
+	test_intel_bo = drm_intel_bo_alloc(bufmgr, "test bo", BO_SIZE, 4096);
+
+	drm_intel_bo_gem_export_to_prime(test_intel_bo, &prime_fd);
+
+	ret = nouveau_bo_prime_handle_ref(ndev, prime_fd, &nvbo);
+	close(prime_fd);
+	if (ret < 0) {
+		fprintf(stderr,"failed to ref prime buffer\n");
+		return ret;
+	}
+
+	intel_copy_bo(intel_batch, test_intel_bo, src_bo, 256, 1);
+
+	ret = nouveau_bo_map(nvbo, NOUVEAU_BO_RDWR, nclient);
+	if (ret < 0) {
+		fprintf(stderr,"failed to map nouveau bo\n");
+		goto out;
+	}
+
+	drm_intel_bo_map(test_intel_bo, 0);
+
+	ptr = nvbo->map;
+	if (*ptr != 0xaa55aa55) {
+		fprintf(stderr,"mapped value doesn't match\n");
+		ret = -1;
+	}
+out:
+	nouveau_bo_ref(NULL, &nvbo);
+	drm_intel_bo_unreference(test_intel_bo);
+	return ret;
+}
+
+/* test 8 use nouveau to do blit */
+
+/* test 9 nouveau copy engine?? */
+
+int main(int argc, char **argv)
+{
+	int ret;
+
+	ret = find_and_open_devices();
+	if (ret < 0)
+		return ret;
+
+	if (nouveau_fd == -1 || intel_fd == -1) {
+		fprintf(stderr,"failed to find intel and nouveau GPU\n");
+		return 77;
+	}
+
+	/* set up intel bufmgr */
+	bufmgr = drm_intel_bufmgr_gem_init(intel_fd, 4096);
+	if (!bufmgr)
+		return -1;
+	/* Do not enable reuse, we share (almost) all buffers. */
+	//drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+	/* set up nouveau bufmgr */
+	ret = nouveau_device_wrap(nouveau_fd, 0, &ndev);
+	if (ret < 0) {
+		fprintf(stderr,"failed to wrap nouveau device\n");
+		return 77;
+	}
+
+	ret = nouveau_client_new(ndev, &nclient);
+	if (ret < 0) {
+		fprintf(stderr,"failed to setup nouveau client\n");
+		return -1;
+	}
+
+	/* set up an intel batch buffer */
+	devid = intel_get_drm_devid(intel_fd);
+	intel_batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+	/* create an object on the i915 */
+	ret = test1();
+	if (ret)
+		fprintf(stderr,"prime_test: failed test 1\n");
+
+	ret = test2();
+	if (ret)
+		fprintf(stderr,"prime_test: failed test 2\n");
+
+	ret = test3();
+	if (ret)
+		fprintf(stderr,"prime_test: failed test 3\n");
+
+	ret = test4();
+	if (ret)
+		fprintf(stderr,"prime_test: failed test 4\n");
+
+	ret = test5();
+	if (ret)
+		fprintf(stderr,"prime_test: failed test 5\n");
+
+	ret = test6();
+	if (ret)
+		fprintf(stderr,"prime_test: failed test 6\n");
+
+	ret = test7();
+	if (ret)
+		fprintf(stderr,"prime_test: failed test 7\n");
+
+	ret = test8();
+	if (ret)
+		fprintf(stderr,"prime_test: failed test 8\n");
+
+	intel_batchbuffer_free(intel_batch);
+
+	nouveau_device_del(&ndev);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	close(intel_fd);
+	close(nouveau_fd);
+
+	return ret;
+}
diff --git a/tests/prime_self_import.c b/tests/prime_self_import.c
new file mode 100644
index 00000000..111ed4da
--- /dev/null
+++ b/tests/prime_self_import.c
@@ -0,0 +1,123 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ */
+
+/*
+ * Testcase: Check whether prime import/export works on the same device
+ *
+ * ... but with different fds, i.e. the wayland usecase.
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+
+#define BO_SIZE (16*1024)
+
+static void
+check_bo(int fd1, uint32_t handle1, int fd2, uint32_t handle2)
+{
+	char *ptr1, *ptr2;
+	static char counter = 0;
+	int i;
+
+	ptr1 = gem_mmap(fd1, handle1, BO_SIZE, PROT_READ | PROT_WRITE);
+	ptr2 = gem_mmap(fd2, handle2, BO_SIZE, PROT_READ | PROT_WRITE);
+
+	assert(ptr1);
+
+	/* check whether it's still our old object first. */
+	for (i = 0; i < BO_SIZE; i++) {
+		assert(ptr1[i] == counter);
+		assert(ptr2[i] == counter);
+	}
+
+	counter++;
+
+	memset(ptr1, counter, BO_SIZE);
+	assert(memcmp(ptr1, ptr2, BO_SIZE) == 0);
+
+	munmap(ptr1, BO_SIZE);
+	munmap(ptr2, BO_SIZE);
+}
+
+int main(int argc, char **argv)
+{
+	int fd1, fd2;
+	uint32_t handle, handle_import1, handle_import2, handle_selfimport;
+	int dma_buf_fd;
+
+	fd1 = drm_open_any();
+	fd2 = drm_open_any();
+
+	handle = gem_create(fd1, BO_SIZE);
+
+	dma_buf_fd = prime_handle_to_fd(fd1, handle);
+	handle_import1 = prime_fd_to_handle(fd2, dma_buf_fd);
+
+	check_bo(fd1, handle, fd2, handle_import1);
+
+	/* reimport should give us the same handle so that userspace can check
+	 * whether it has that bo already somewhere. */
+	handle_import2 = prime_fd_to_handle(fd2, dma_buf_fd);
+	assert(handle_import1 == handle_import2);
+
+	/* Same for re-importing on the exporting fd. */
+	handle_selfimport = prime_fd_to_handle(fd1, dma_buf_fd);
+	assert(handle == handle_selfimport);
+
+	/* close dma_buf, check whether nothing disappears. */
+	close(dma_buf_fd);
+	check_bo(fd1, handle, fd2, handle_import1);
+
+	gem_close(fd1, handle);
+	check_bo(fd2, handle_import1, fd2, handle_import1);
+
+	/* re-import into old exporter */
+	dma_buf_fd = prime_handle_to_fd(fd2, handle_import1);
+	/* but drop all references to the obj in between */
+	gem_close(fd2, handle_import1);
+	handle = prime_fd_to_handle(fd1, dma_buf_fd);
+	handle_import1 = prime_fd_to_handle(fd2, dma_buf_fd);
+	check_bo(fd1, handle, fd2, handle_import1);
+
+	/* Completely rip out exporting fd. */
+	close(fd1);
+	check_bo(fd2, handle_import1, fd2, handle_import1);
+
+	return 0;
+}
diff --git a/tests/sysfs_edid_timing b/tests/sysfs_edid_timing
new file mode 100755
index 00000000..3a8c6c0e
--- /dev/null
+++ b/tests/sysfs_edid_timing
@@ -0,0 +1,20 @@
+#!/bin/sh
+#
+# This check the time we take to read the content of all the possible connectors.
+# Without the edid -ENXIO patch (http://permalink.gmane.org/gmane.comp.video.dri.devel/62083),
+# we sometimes take a *really* long time. So let's just check for some reasonable timing here
+#
+
+TIME1=$(date +%s%N)
+cat $(find /sys/devices/|grep drm | grep /status) > /dev/null
+TIME2=$(date +%s%N)
+
+# time in ms
+RES=$(((TIME2 - TIME1) / 1000000))
+
+if [ $RES -gt 600 ]; then
+	echo "Talking to outputs took ${RES}ms, something is wrong"
+	exit 1
+fi
+
+exit 0
diff --git a/tests/sysfs_l3_parity b/tests/sysfs_l3_parity
new file mode 100755
index 00000000..6f814a13
--- /dev/null
+++ b/tests/sysfs_l3_parity
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+if ! find /sys/class/drm/card*/ | grep l3_parity > /dev/null ; then
+	echo "no l3_parity interface, skipping test"
+	exit 77
+fi
+
+SOURCE_DIR="$( dirname "${BASH_SOURCE[0]}" )"
+. $SOURCE_DIR/drm_lib.sh
+
+$SOURCE_DIR/../tools/intel_l3_parity -c
+
+#Check that we can remap a row
+$SOURCE_DIR/../tools/intel_l3_parity 0,0,0
+disabled=`$SOURCE_DIR/../tools/intel_l3_parity | grep -c 'Row 0, Bank 0, Subbank 0 is disabled'`
+if [ "$disabled" != "1" ] ; then
+	echo "Fail"
+	exit 1
+fi
+
+$SOURCE_DIR/../tools/intel_l3_parity -c
+
+#Check that we can clear remaps
+if [ `$SOURCE_DIR/../tools/intel_l3_parity | wc -c` != "0" ] ; then
+	echo "Fail"
+	exit 1
+fi
diff --git a/tests/sysfs_rc6_residency.c b/tests/sysfs_rc6_residency.c
new file mode 100644
index 00000000..2f33697a
--- /dev/null
+++ b/tests/sysfs_rc6_residency.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <ben@bwidawsk.net>
+ *
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include "drmtest.h"
+
+#define SLEEP_DURATION 3000 // in milliseconds
+#define RC6_FUDGE 900 // in milliseconds
+
+static unsigned int readit(const char *path)
+{
+	unsigned int ret;
+
+	FILE *file;
+	file = fopen(path, "r");
+	if (file == NULL) {
+		fprintf(stderr, "Couldn't open %s (%d)\n", path, errno);
+		abort();
+	}
+	fscanf(file, "%u", &ret);
+	fclose(file);
+
+	return ret;
+}
+
+int main(int argc, char *argv[])
+{
+	const int device = drm_get_card(0);
+	char *path, *pathp, *pathpp;
+	int fd, ret;
+	unsigned int value1, value1p, value1pp, value2, value2p, value2pp;
+	FILE *file;
+	int diff;
+
+	/* Use drm_open_any to verify device existence */
+	fd = drm_open_any();
+	close(fd);
+
+	ret = asprintf(&path, "/sys/class/drm/card%d/power/rc6_enable", device);
+	assert(ret != -1);
+
+	/* For some reason my ivb isn't idle even after syncing up with the gpu.
+	 * Let's add a sleept just to make it happy. */
+	sleep(5);
+
+	file = fopen(path, "r");
+	if (!file) {
+		printf("kernel too old or rc6 not supported on this platform.\n");
+		exit(77);
+	}
+
+	/* claim success if no rc6 enabled. */
+	if (readit(path) == 0)
+		exit(EXIT_SUCCESS);
+
+	ret = asprintf(&path, "/sys/class/drm/card%d/power/rc6_residency_ms", device);
+	assert(ret != -1);
+	ret = asprintf(&pathp, "/sys/class/drm/card%d/power/rc6p_residency_ms", device);
+	assert(ret != -1);
+	ret = asprintf(&pathpp, "/sys/class/drm/card%d/power/rc6pp_residency_ms", device);
+	assert(ret != -1);
+
+	value1 = readit(path);
+	value1p = readit(pathp);
+	value1pp = readit(pathpp);
+	sleep(SLEEP_DURATION / 1000);
+	value2 = readit(path);
+	value2p = readit(pathp);
+	value2pp = readit(pathpp);
+
+	free(pathpp);
+	free(pathp);
+	free(path);
+
+	diff = (value2pp - value1pp) +
+		(value2p - value1p) +
+		(value2 - value1);
+
+	if (diff > (SLEEP_DURATION + RC6_FUDGE)) {
+		fprintf(stderr, "Diff was too high. That is unpossible\n");
+		exit(EXIT_FAILURE);
+	}
+	if (diff < (SLEEP_DURATION - RC6_FUDGE)) {
+		fprintf(stderr, "GPU was not in RC6 long enough. Check that "
+				"the GPU is as idle as possible (ie. no X, "
+				"running and running no other tests)\n");
+		exit(EXIT_FAILURE);
+	}
+
+	exit(EXIT_SUCCESS);
+}
diff --git a/tests/testdisplay.c b/tests/testdisplay.c
new file mode 100644
index 00000000..14d7da39
--- /dev/null
+++ b/tests/testdisplay.c
@@ -0,0 +1,765 @@
+/*
+ * Copyright 2010 Intel Corporation
+ *   Jesse Barnes <jesse.barnes@intel.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/*
+ * This program is intended for testing of display functionality.  It should
+ * allow for testing of
+ *   - hotplug
+ *   - mode setting
+ *   - clone & twin modes
+ *   - panel fitting
+ *   - test patterns & pixel generators
+ * Additional programs can test the detected outputs against VBT provided
+ * device lists (both docked & undocked).
+ *
+ * TODO:
+ * - pixel generator in transcoder
+ * - test pattern reg in pipe
+ * - test patterns on outputs (e.g. TV)
+ * - handle hotplug (leaks crtcs, can't handle clones)
+ * - allow mode force
+ * - expose output specific controls
+ *  - e.g. DDC-CI brightness
+ *  - HDMI controls
+ *  - panel brightness
+ *  - DP commands (e.g. poweroff)
+ * - verify outputs against VBT/physical connectors
+ */
+#include "config.h"
+
+#include <assert.h>
+#include <cairo.h>
+#include <errno.h>
+#include <math.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/poll.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "testdisplay.h"
+
+#include <stdlib.h>
+#include <signal.h>
+
+drmModeRes *resources;
+int drm_fd, modes;
+int dump_info = 0, test_all_modes =0, test_preferred_mode = 0, force_mode = 0,
+	test_plane, enable_tiling;
+int sleep_between_modes = 5;
+uint32_t depth = 24, stride, bpp;
+int qr_code = 0;
+
+drmModeModeInfo force_timing;
+
+int crtc_x, crtc_y, crtc_w, crtc_h, width, height;
+unsigned int plane_fb_id;
+unsigned int plane_crtc_id;
+unsigned int plane_id;
+int plane_width, plane_height;
+static const uint32_t SPRITE_COLOR_KEY = 0x00aaaaaa;
+uint32_t *fb_ptr;
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+struct type_name {
+	int type;
+	const char *name;
+};
+
+#define type_name_fn(res) \
+static const char * res##_str(int type) {			\
+	unsigned int i;					\
+	for (i = 0; i < ARRAY_SIZE(res##_names); i++) { \
+		if (res##_names[i].type == type)	\
+			return res##_names[i].name;	\
+	}						\
+	return "(invalid)";				\
+}
+
+struct type_name encoder_type_names[] = {
+	{ DRM_MODE_ENCODER_NONE, "none" },
+	{ DRM_MODE_ENCODER_DAC, "DAC" },
+	{ DRM_MODE_ENCODER_TMDS, "TMDS" },
+	{ DRM_MODE_ENCODER_LVDS, "LVDS" },
+	{ DRM_MODE_ENCODER_TVDAC, "TVDAC" },
+};
+
+type_name_fn(encoder_type)
+
+struct type_name connector_status_names[] = {
+	{ DRM_MODE_CONNECTED, "connected" },
+	{ DRM_MODE_DISCONNECTED, "disconnected" },
+	{ DRM_MODE_UNKNOWNCONNECTION, "unknown" },
+};
+
+type_name_fn(connector_status)
+
+struct type_name connector_type_names[] = {
+	{ DRM_MODE_CONNECTOR_Unknown, "unknown" },
+	{ DRM_MODE_CONNECTOR_VGA, "VGA" },
+	{ DRM_MODE_CONNECTOR_DVII, "DVI-I" },
+	{ DRM_MODE_CONNECTOR_DVID, "DVI-D" },
+	{ DRM_MODE_CONNECTOR_DVIA, "DVI-A" },
+	{ DRM_MODE_CONNECTOR_Composite, "composite" },
+	{ DRM_MODE_CONNECTOR_SVIDEO, "s-video" },
+	{ DRM_MODE_CONNECTOR_LVDS, "LVDS" },
+	{ DRM_MODE_CONNECTOR_Component, "component" },
+	{ DRM_MODE_CONNECTOR_9PinDIN, "9-pin DIN" },
+	{ DRM_MODE_CONNECTOR_DisplayPort, "DisplayPort" },
+	{ DRM_MODE_CONNECTOR_HDMIA, "HDMI-A" },
+	{ DRM_MODE_CONNECTOR_HDMIB, "HDMI-B" },
+	{ DRM_MODE_CONNECTOR_TV, "TV" },
+	{ DRM_MODE_CONNECTOR_eDP, "Embedded DisplayPort" },
+};
+
+type_name_fn(connector_type)
+
+/*
+ * Mode setting with the kernel interfaces is a bit of a chore.
+ * First you have to find the connector in question and make sure the
+ * requested mode is available.
+ * Then you need to find the encoder attached to that connector so you
+ * can bind it with a free crtc.
+ */
+struct connector {
+	uint32_t id;
+	int mode_valid;
+	drmModeModeInfo mode;
+	drmModeEncoder *encoder;
+	drmModeConnector *connector;
+	int crtc;
+	int pipe;
+};
+
+static void dump_connectors_fd(int drmfd)
+{
+	int i, j;
+
+	drmModeRes *mode_resources = drmModeGetResources(drmfd);
+
+	if (!mode_resources) {
+		fprintf(stderr, "drmModeGetResources failed: %s\n",
+			strerror(errno));
+		return;
+	}
+
+	printf("Connectors:\n");
+	printf("id\tencoder\tstatus\t\ttype\tsize (mm)\tmodes\n");
+	for (i = 0; i < mode_resources->count_connectors; i++) {
+		drmModeConnector *connector;
+
+		connector = drmModeGetConnector(drmfd, mode_resources->connectors[i]);
+		if (!connector) {
+			fprintf(stderr, "could not get connector %i: %s\n",
+				mode_resources->connectors[i], strerror(errno));
+			continue;
+		}
+
+		printf("%d\t%d\t%s\t%s\t%dx%d\t\t%d\n",
+		       connector->connector_id,
+		       connector->encoder_id,
+		       connector_status_str(connector->connection),
+		       connector_type_str(connector->connector_type),
+		       connector->mmWidth, connector->mmHeight,
+		       connector->count_modes);
+
+		if (!connector->count_modes)
+			continue;
+
+		printf("  modes:\n");
+		printf("  name refresh (Hz) hdisp hss hse htot vdisp "
+		       "vss vse vtot flags type clock\n");
+		for (j = 0; j < connector->count_modes; j++)
+			kmstest_dump_mode(&connector->modes[j]);
+
+		drmModeFreeConnector(connector);
+	}
+	printf("\n");
+
+	drmModeFreeResources(mode_resources);
+}
+
+static void dump_crtcs_fd(int drmfd)
+{
+	int i;
+	drmModeRes *mode_resources = drmModeGetResources(drmfd);
+
+	printf("CRTCs:\n");
+	printf("id\tfb\tpos\tsize\n");
+	for (i = 0; i < mode_resources->count_crtcs; i++) {
+		drmModeCrtc *crtc;
+
+		crtc = drmModeGetCrtc(drmfd, mode_resources->crtcs[i]);
+		if (!crtc) {
+			fprintf(stderr, "could not get crtc %i: %s\n",
+				mode_resources->crtcs[i], strerror(errno));
+			continue;
+		}
+		printf("%d\t%d\t(%d,%d)\t(%dx%d)\n",
+		       crtc->crtc_id,
+		       crtc->buffer_id,
+		       crtc->x, crtc->y,
+		       crtc->width, crtc->height);
+		kmstest_dump_mode(&crtc->mode);
+
+		drmModeFreeCrtc(crtc);
+	}
+	printf("\n");
+
+	drmModeFreeResources(mode_resources);
+}
+
+static void connector_find_preferred_mode(struct connector *c)
+{
+	drmModeConnector *connector;
+	drmModeEncoder *encoder = NULL;
+	int i, j;
+
+	/* First, find the connector & mode */
+	c->mode_valid = 0;
+	connector = drmModeGetConnector(drm_fd, c->id);
+	if (!connector) {
+		fprintf(stderr, "could not get connector %d: %s\n",
+			c->id, strerror(errno));
+		drmModeFreeConnector(connector);
+		return;
+	}
+
+	if (connector->connection != DRM_MODE_CONNECTED) {
+		drmModeFreeConnector(connector);
+		return;
+	}
+
+	if (!connector->count_modes) {
+		fprintf(stderr, "connector %d has no modes\n", c->id);
+		drmModeFreeConnector(connector);
+		return;
+	}
+
+	if (connector->connector_id != c->id) {
+		fprintf(stderr, "connector id doesn't match (%d != %d)\n",
+			connector->connector_id, c->id);
+		drmModeFreeConnector(connector);
+		return;
+	}
+
+	for (j = 0; j < connector->count_modes; j++) {
+		c->mode = connector->modes[j];
+		if (c->mode.type & DRM_MODE_TYPE_PREFERRED) {
+			c->mode_valid = 1;
+			break;
+		}
+	}
+
+	if (!c->mode_valid) {
+		if (connector->count_modes > 0) {
+			/* use the first mode as test mode */
+			c->mode = connector->modes[0];
+			c->mode_valid = 1;
+		}
+		else {
+			fprintf(stderr, "failed to find any modes on connector %d\n",
+				c->id);
+			return;
+		}
+	}
+
+	/* Now get the encoder */
+	for (i = 0; i < connector->count_encoders; i++) {
+		encoder = drmModeGetEncoder(drm_fd, connector->encoders[i]);
+
+		if (!encoder) {
+			fprintf(stderr, "could not get encoder %i: %s\n",
+				resources->encoders[i], strerror(errno));
+			drmModeFreeEncoder(encoder);
+			continue;
+		}
+
+		break;
+	}
+
+	c->encoder = encoder;
+
+	if (i == resources->count_encoders) {
+		fprintf(stderr, "failed to find encoder\n");
+		c->mode_valid = 0;
+		return;
+	}
+
+	/* Find first CRTC not in use */
+	for (i = 0; i < resources->count_crtcs; i++) {
+		if (resources->crtcs[i] && (c->encoder->possible_crtcs & (1<<i)))
+			break;
+	}
+	c->crtc = resources->crtcs[i];
+	c->pipe = i;
+
+	if(test_preferred_mode || force_mode)
+		resources->crtcs[i] = 0;
+
+	c->connector = connector;
+}
+
+static void
+paint_color_key(void)
+{
+	int i, j;
+
+	for (i = crtc_y; i < crtc_y + crtc_h; i++)
+		for (j = crtc_x; j < crtc_x + crtc_w; j++) {
+			uint32_t offset;
+
+			offset = (i * width) + j;
+			fb_ptr[offset] = SPRITE_COLOR_KEY;
+		}
+}
+
+static void paint_image(cairo_t *cr, const char *file)
+{
+	int img_x, img_y, img_w, img_h, img_w_o, img_h_o;
+	double img_w_scale, img_h_scale;
+
+	cairo_surface_t *image;
+
+	img_y = height * (0.10 );
+	img_h = height * 0.08 * 4;
+	img_w = img_h;
+
+	img_x = (width / 2) - (img_w / 2);
+
+	image = cairo_image_surface_create_from_png(file);
+
+	img_w_o = cairo_image_surface_get_width(image);
+	img_h_o = cairo_image_surface_get_height(image);
+
+	cairo_translate(cr, img_x, img_y);
+
+	img_w_scale = (double)img_w / (double)img_w_o;
+	img_h_scale = (double)img_h / (double)img_h_o;
+	cairo_scale(cr, img_w_scale, img_h_scale);
+
+	cairo_set_source_surface(cr, image, 0, 0);
+	cairo_scale(cr, 1, 1);
+
+	cairo_paint(cr);
+	cairo_surface_destroy(image);
+}
+
+static void
+paint_output_info(cairo_t *cr, int l_width, int l_height, void *priv)
+{
+	struct connector *c = priv;
+	cairo_text_extents_t name_extents, mode_extents;
+	char name_buf[128], mode_buf[128];
+	int i, x, y, modes_x, modes_y;
+
+	/* Get text extents for each string */
+	snprintf(name_buf, sizeof name_buf, "%s",
+		 connector_type_str(c->connector->connector_type));
+	cairo_set_font_size(cr, 48);
+	cairo_select_font_face(cr, "Helvetica",
+			       CAIRO_FONT_SLANT_NORMAL,
+			       CAIRO_FONT_WEIGHT_NORMAL);
+	cairo_text_extents(cr, name_buf, &name_extents);
+
+	snprintf(mode_buf, sizeof mode_buf, "%s @ %dHz on %s encoder",
+		 c->mode.name, c->mode.vrefresh,
+		 encoder_type_str(c->encoder->encoder_type));
+	cairo_set_font_size(cr, 36);
+	cairo_text_extents(cr, mode_buf, &mode_extents);
+
+	/* Paint output name */
+	x = l_width / 2;
+	x -= name_extents.width / 2;
+	y = l_height / 2;
+	y -= (name_extents.height / 2) - (mode_extents.height / 2) - 10;
+	cairo_set_font_size(cr, 48);
+	cairo_move_to(cr, x, y);
+	cairo_text_path(cr, name_buf);
+	cairo_set_source_rgb(cr, 0, 0, 0);
+	cairo_stroke_preserve(cr);
+	cairo_set_source_rgb(cr, 1, 1, 1);
+	cairo_fill(cr);
+
+	/* Paint mode name */
+	x = l_width / 2;
+	x -= mode_extents.width / 2;
+	modes_x = x;
+	y = l_height / 2;
+	y += (mode_extents.height / 2) + (name_extents.height / 2) + 10;
+	cairo_set_font_size(cr, 36);
+	cairo_move_to(cr, x, y);
+	cairo_text_path(cr, mode_buf);
+	cairo_set_source_rgb(cr, 0, 0, 0);
+	cairo_stroke_preserve(cr);
+	cairo_set_source_rgb(cr, 1, 1, 1);
+	cairo_fill(cr);
+
+	/* List available modes */
+	snprintf(mode_buf, sizeof mode_buf, "Available modes:");
+	cairo_set_font_size(cr, 18);
+	cairo_text_extents(cr, mode_buf, &mode_extents);
+	x = modes_x;
+	modes_x = x + mode_extents.width;
+	y += mode_extents.height + 10;
+	modes_y = y;
+	cairo_move_to(cr, x, y);
+	cairo_text_path(cr, mode_buf);
+	cairo_set_source_rgb(cr, 0, 0, 0);
+	cairo_stroke_preserve(cr);
+	cairo_set_source_rgb(cr, 1, 1, 1);
+	cairo_fill(cr);
+
+	for (i = 0; i < c->connector->count_modes; i++) {
+		snprintf(mode_buf, sizeof mode_buf, "%s @ %dHz",
+			 c->connector->modes[i].name,
+			 c->connector->modes[i].vrefresh);
+		cairo_set_font_size(cr, 18);
+		cairo_text_extents(cr, mode_buf, &mode_extents);
+		x = modes_x - mode_extents.width; /* right justify modes */
+		y += mode_extents.height + 10;
+		if (y + mode_extents.height >= height) {
+			y = modes_y + mode_extents.height + 10;
+			modes_x += mode_extents.width + 10;
+			x = modes_x - mode_extents.width;
+		}
+		cairo_move_to(cr, x, y);
+		cairo_text_path(cr, mode_buf);
+		cairo_set_source_rgb(cr, 0, 0, 0);
+		cairo_stroke_preserve(cr);
+		cairo_set_source_rgb(cr, 1, 1, 1);
+		cairo_fill(cr);
+	}
+
+	if (qr_code)
+		paint_image(cr, "./pass.png");
+}
+
+static void sighandler(int signo)
+{
+	return;
+}
+
+static void set_single(void)
+{
+	int sigs[] = { SIGUSR1 };
+	struct sigaction sa;
+	sa.sa_handler = sighandler;
+
+	sigemptyset(&sa.sa_mask);
+
+	if (sigaction(sigs[0], &sa, NULL) == -1)
+		perror("Could not set signal handler");
+}
+
+static void
+set_mode(struct connector *c)
+{
+	unsigned int fb_id = 0;
+	int j, test_mode_num;
+
+	if (depth <= 8)
+		bpp = 8;
+	else if (depth > 8 && depth <= 16)
+		bpp = 16;
+	else if (depth > 16 && depth <= 32)
+		bpp = 32;
+
+	connector_find_preferred_mode(c);
+	if (!c->mode_valid)
+		return;
+
+	test_mode_num = 1;
+	if (force_mode){
+		memcpy( &c->mode, &force_timing, sizeof(force_timing));
+		c->mode.vrefresh =(force_timing.clock*1e3)/(force_timing.htotal*force_timing.vtotal);
+		c->mode_valid = 1;
+		sprintf(c->mode.name, "%dx%d", force_timing.hdisplay, force_timing.vdisplay);
+	} else if (test_all_modes)
+		test_mode_num = c->connector->count_modes;
+
+	for (j = 0; j < test_mode_num; j++) {
+		struct kmstest_fb fb_info;
+
+		if (test_all_modes)
+			c->mode = c->connector->modes[j];
+
+		if (!c->mode_valid)
+			continue;
+
+		width = c->mode.hdisplay;
+		height = c->mode.vdisplay;
+
+		fb_id = kmstest_create_fb(drm_fd, width, height, bpp, depth,
+					  enable_tiling, &fb_info,
+					  paint_output_info, c);
+
+		fb_ptr = gem_mmap(drm_fd, fb_info.gem_handle,
+				  fb_info.size, PROT_READ | PROT_WRITE);
+		assert(fb_ptr);
+		paint_color_key();
+
+		gem_close(drm_fd, fb_info.gem_handle);
+
+		fprintf(stdout, "CRTS(%u):",c->crtc);
+		kmstest_dump_mode(&c->mode);
+		if (drmModeSetCrtc(drm_fd, c->crtc, fb_id, 0, 0,
+				   &c->id, 1, &c->mode)) {
+			fprintf(stderr, "failed to set mode (%dx%d@%dHz): %s\n",
+				width, height, c->mode.vrefresh,
+				strerror(errno));
+			continue;
+		}
+
+		if (sleep_between_modes && test_all_modes && !qr_code)
+			sleep(sleep_between_modes);
+
+		if (qr_code){
+			set_single();
+			pause();
+		}
+
+	}
+
+	if(test_all_modes){
+		drmModeRmFB(drm_fd,fb_id);
+		drmModeSetCrtc(drm_fd, c->crtc, fb_id, 0, 0,  &c->id, 1, 0);
+	}
+
+	drmModeFreeEncoder(c->encoder);
+	drmModeFreeConnector(c->connector);
+}
+
+/*
+ * Re-probe outputs and light up as many as possible.
+ *
+ * On Intel, we have two CRTCs that we can drive independently with
+ * different timings and scanout buffers.
+ *
+ * Each connector has a corresponding encoder, except in the SDVO case
+ * where an encoder may have multiple connectors.
+ */
+int update_display(void)
+{
+	struct connector *connectors;
+	int c;
+
+	resources = drmModeGetResources(drm_fd);
+	if (!resources) {
+		fprintf(stderr, "drmModeGetResources failed: %s\n",
+			strerror(errno));
+		return 0;
+	}
+
+	connectors = calloc(resources->count_connectors,
+			    sizeof(struct connector));
+	if (!connectors)
+		return 0;
+
+	if (dump_info) {
+		dump_connectors_fd(drm_fd);
+		dump_crtcs_fd(drm_fd);
+	}
+
+	if (test_preferred_mode || test_all_modes || force_mode) {
+		/* Find any connected displays */
+		for (c = 0; c < resources->count_connectors; c++) {
+			connectors[c].id = resources->connectors[c];
+			set_mode(&connectors[c]);
+		}
+	}
+	drmModeFreeResources(resources);
+	return 1;
+}
+
+static char optstr[] = "hiaf:s:d:p:mrt";
+
+static void usage(char *name)
+{
+	fprintf(stderr, "usage: %s [-hiasdpmtf]\n", name);
+	fprintf(stderr, "\t-i\tdump info\n");
+	fprintf(stderr, "\t-a\ttest all modes\n");
+	fprintf(stderr, "\t-s\t<duration>\tsleep between each mode test\n");
+	fprintf(stderr, "\t-d\t<depth>\tbit depth of scanout buffer\n");
+	fprintf(stderr, "\t-p\t<planew,h>,<crtcx,y>,<crtcw,h> test overlay plane\n");
+	fprintf(stderr, "\t-m\ttest the preferred mode\n");
+	fprintf(stderr, "\t-t\tuse a tiled framebuffer\n");
+	fprintf(stderr, "\t-r\tprint a QR code on the screen whose content is \"pass\" for the automatic test\n");
+	fprintf(stderr, "\t-f\t<clock MHz>,<hdisp>,<hsync-start>,<hsync-end>,<htotal>,\n");
+	fprintf(stderr, "\t\t<vdisp>,<vsync-start>,<vsync-end>,<vtotal>\n");
+	fprintf(stderr, "\t\ttest force mode\n");
+	fprintf(stderr, "\tDefault is to test all modes.\n");
+	exit(0);
+}
+
+#define dump_resource(res) if (res) dump_##res()
+
+static gboolean input_event(GIOChannel *source, GIOCondition condition,
+				gpointer data)
+{
+	gchar buf[2];
+	gsize count;
+
+	count = read(g_io_channel_unix_get_fd(source), buf, sizeof(buf));
+	if (buf[0] == 'q' && (count == 1 || buf[1] == '\n')) {
+		exit(0);
+	}
+
+	return TRUE;
+}
+
+static void enter_exec_path( char **argv )
+{
+	char *exec_path = NULL;
+	char *pos = NULL;
+	short len_path = 0;
+
+	len_path = strlen( argv[0] );
+	exec_path = (char*) malloc(len_path);
+
+	memcpy(exec_path, argv[0], len_path);
+	pos = strrchr(exec_path, '/');
+	if (pos != NULL)
+		*(pos+1) = '\0';
+
+	chdir(exec_path);
+	free(exec_path);
+}
+
+int main(int argc, char **argv)
+{
+	int c;
+	int ret = 0;
+	GIOChannel *stdinchannel;
+	GMainLoop *mainloop;
+	float force_clock;
+
+	enter_exec_path( argv );
+
+	opterr = 0;
+	while ((c = getopt(argc, argv, optstr)) != -1) {
+		switch (c) {
+		case 'i':
+			dump_info = 1;
+			break;
+		case 'a':
+			test_all_modes = 1;
+			break;
+		case 'f':
+			force_mode = 1;
+			if(sscanf(optarg,"%f,%hu,%hu,%hu,%hu,%hu,%hu,%hu,%hu",
+				&force_clock,&force_timing.hdisplay, &force_timing.hsync_start,&force_timing.hsync_end,&force_timing.htotal,
+				&force_timing.vdisplay, &force_timing.vsync_start, &force_timing.vsync_end, &force_timing.vtotal)!= 9)
+				usage(argv[0]);
+			force_timing.clock = force_clock*1000;
+
+			break;
+		case 's':
+			sleep_between_modes = atoi(optarg);
+			break;
+		case 'd':
+			depth = atoi(optarg);
+			fprintf(stderr, "using depth %d\n", depth);
+			break;
+		case 'p':
+			if (sscanf(optarg, "%d,%d,%d,%d,%d,%d", &plane_width,
+				   &plane_height, &crtc_x, &crtc_y,
+				   &crtc_w, &crtc_h) != 6)
+				usage(argv[0]);
+			test_plane = 1;
+			break;
+		case 'm':
+			test_preferred_mode = 1;
+			break;
+		case 't':
+			enable_tiling = 1;
+			break;
+		case 'r':
+			qr_code = 1;
+			break;
+		default:
+			fprintf(stderr, "unknown option %c\n", c);
+			/* fall through */
+		case 'h':
+			usage(argv[0]);
+			break;
+		}
+	}
+	if (!test_all_modes && !force_mode && !dump_info &&
+	    !test_preferred_mode)
+		test_all_modes = 1;
+
+	drm_fd = drm_open_any();
+
+	mainloop = g_main_loop_new(NULL, FALSE);
+	if (!mainloop) {
+		fprintf(stderr, "failed to create glib mainloop\n");
+		ret = -1;
+		goto out_close;
+	}
+
+	if (!testdisplay_setup_hotplug()) {
+		fprintf(stderr, "failed to initialize hotplug support\n");
+		goto out_mainloop;
+	}
+
+	stdinchannel = g_io_channel_unix_new(0);
+	if (!stdinchannel) {
+		fprintf(stderr, "failed to create stdin GIO channel\n");
+		goto out_hotplug;
+	}
+
+	ret = g_io_add_watch(stdinchannel, G_IO_IN | G_IO_ERR, input_event,
+			     NULL);
+	if (ret < 0) {
+		fprintf(stderr, "failed to add watch on stdin GIO channel\n");
+		goto out_stdio;
+	}
+
+	ret = 0;
+
+	if (!update_display()) {
+		ret = 1;
+		goto out_stdio;
+	}
+
+	if (dump_info || test_all_modes)
+		goto out_stdio;
+
+	g_main_loop_run(mainloop);
+
+out_stdio:
+	g_io_channel_shutdown(stdinchannel, TRUE, NULL);
+out_hotplug:
+	testdisplay_cleanup_hotplug();
+out_mainloop:
+	g_main_loop_unref(mainloop);
+out_close:
+	close(drm_fd);
+
+	return ret;
+}
diff --git a/tests/testdisplay.h b/tests/testdisplay.h
new file mode 100644
index 00000000..962e6219
--- /dev/null
+++ b/tests/testdisplay.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2010 Intel Corporation
+ *   Jesse Barnes <jesse.barnes@intel.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <glib.h>
+
+extern int drm_fd;
+
+gboolean testdisplay_setup_hotplug(void);
+void testdisplay_cleanup_hotplug(void);
+
+/* called by the hotplug code */
+int update_display(void);
diff --git a/tests/testdisplay_hotplug.c b/tests/testdisplay_hotplug.c
new file mode 100644
index 00000000..3f80dc73
--- /dev/null
+++ b/tests/testdisplay_hotplug.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright 2010 Intel Corporation
+ *   Jesse Barnes <jesse.barnes@intel.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "testdisplay.h"
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#if HAVE_UDEV
+#include <libudev.h>
+static struct udev_monitor *uevent_monitor;
+static struct udev *udev;
+static GIOChannel *udevchannel;
+
+static gboolean hotplug_event(GIOChannel *source, GIOCondition condition,
+			      gpointer data)
+{
+	struct udev_device *dev;
+	dev_t udev_devnum;
+	struct stat s;
+	const char *hotplug;
+
+	dev = udev_monitor_receive_device(uevent_monitor);
+	if (!dev)
+		goto out;
+
+	udev_devnum = udev_device_get_devnum(dev);
+	fstat(drm_fd, &s);
+
+	hotplug = udev_device_get_property_value(dev, "HOTPLUG");
+
+	if (memcmp(&s.st_rdev, &udev_devnum, sizeof(dev_t)) == 0 &&
+	    hotplug && atoi(hotplug) == 1)
+		update_display();
+
+	udev_device_unref(dev);
+out:
+	return TRUE;
+}
+
+
+gboolean testdisplay_setup_hotplug(void)
+{
+	int ret;
+
+	udev = udev_new();
+	if (!udev) {
+		fprintf(stderr, "failed to create udev object\n");
+		goto out;
+	}
+
+	uevent_monitor = udev_monitor_new_from_netlink(udev, "udev");
+	if (!uevent_monitor) {
+		fprintf(stderr, "failed to create udev event monitor\n");
+		goto out;
+	}
+
+	ret = udev_monitor_filter_add_match_subsystem_devtype(uevent_monitor,
+							      "drm",
+							      "drm_minor");
+	if (ret < 0) {
+		fprintf(stderr, "failed to filter for drm events\n");
+		goto out;
+	}
+
+	ret = udev_monitor_enable_receiving(uevent_monitor);
+	if (ret < 0) {
+		fprintf(stderr, "failed to enable udev event reception\n");
+		goto out;
+	}
+
+	udevchannel =
+		g_io_channel_unix_new(udev_monitor_get_fd(uevent_monitor));
+	if (!udevchannel) {
+		fprintf(stderr, "failed to create udev GIO channel\n");
+		goto out;
+	}
+
+	ret = g_io_add_watch(udevchannel, G_IO_IN | G_IO_ERR, hotplug_event,
+			     udev);
+	if (ret < 0) {
+		fprintf(stderr, "failed to add watch on udev GIO channel\n");
+		goto out;
+	}
+
+	return TRUE;
+
+out:
+	testdisplay_cleanup_hotplug();
+	return FALSE;
+}
+
+void testdisplay_cleanup_hotplug(void)
+{
+	if (udevchannel)
+		g_io_channel_shutdown(udevchannel, TRUE, NULL);
+	if (uevent_monitor)
+		udev_monitor_unref(uevent_monitor);
+	if (udev)
+		udev_unref(udev);
+}
+#else
+gboolean testdisplay_setup_hotplug(void)
+{
+	fprintf(stderr, "no hotplug support on this platform\n");
+	return TRUE;
+}
+
+void testdisplay_cleanup_hotplug(void)
+{
+}
+#endif
author	Graydon, Tracy <tracy.graydon@intel.com>	2013-01-31 15:14:28 -0800
committer	Graydon, Tracy <tracy.graydon@intel.com>	2013-01-31 15:14:28 -0800
commit	3c7b03f2f7b4ecfb18fb30f2e43b6321cb1aa4de (patch)
tree	bb8c57f401c0087a3ce4e96dc733abff854c3a43 /tests
download	intel-gpu-tools-3c7b03f2f7b4ecfb18fb30f2e43b6321cb1aa4de.tar.gz intel-gpu-tools-3c7b03f2f7b4ecfb18fb30f2e43b6321cb1aa4de.tar.bz2 intel-gpu-tools-3c7b03f2f7b4ecfb18fb30f2e43b6321cb1aa4de.zip