220 files changed, 7776 insertions, 3502 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 90bc65d07a35..88e01e08e279 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -263,6 +263,8 @@ source "drivers/gpu/drm/mxsfb/Kconfig"
 
 source "drivers/gpu/drm/meson/Kconfig"
 
+source "drivers/gpu/drm/tinydrm/Kconfig"
+
 # Keep legacy drivers last
 
 menuconfig DRM_LEGACY
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 92de3991fa56..3ee95793d122 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -94,3 +94,4 @@ obj-$(CONFIG_DRM_ARCPGU)+= arc/
 obj-y			+= hisilicon/
 obj-$(CONFIG_DRM_ZTE)	+= zte/
 obj-$(CONFIG_DRM_MXSFB)	+= mxsfb/
+obj-$(CONFIG_DRM_TINYDRM) += tinydrm/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 57301f5936fa..d2d0f60ff36d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -83,6 +83,13 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
 		}
 		break;
 	}
+
+	if (!(*out_ring && (*out_ring)->adev)) {
+		DRM_ERROR("Ring %d is not initialized on IP %d\n",
+			  ring, ip_type);
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index 762f8e82ceb7..e9a176891e13 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -627,11 +627,8 @@ static const struct drm_encoder_helper_funcs dce_virtual_encoder_helper_funcs =
 
 static void dce_virtual_encoder_destroy(struct drm_encoder *encoder)
 {
-	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
-
-	kfree(amdgpu_encoder->enc_priv);
 	drm_encoder_cleanup(encoder);
-	kfree(amdgpu_encoder);
+	kfree(encoder);
 }
 
 static const struct drm_encoder_funcs dce_virtual_encoder_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index e2b0b1646f99..0635829b18cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -254,6 +254,9 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
 	}
 	WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0);
 
+	if (adev->mode_info.num_crtc)
+		amdgpu_display_set_vga_render_state(adev, false);
+
 	gmc_v6_0_mc_stop(adev, &save);
 
 	if (gmc_v6_0_wait_for_idle((void *)adev)) {
@@ -283,7 +286,6 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
 		dev_warn(adev->dev, "Wait for MC idle timedout !\n");
 	}
 	gmc_v6_0_mc_resume(adev, &save);
-	amdgpu_display_set_vga_render_state(adev, false);
 }
 
 static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h
index 1051181d8c0d..5a8fa1c85229 100644
--- a/drivers/gpu/drm/ast/ast_drv.h
+++ b/drivers/gpu/drm/ast/ast_drv.h
@@ -114,6 +114,7 @@ struct ast_private {
 	struct ttm_bo_kmap_obj cache_kmap;
 	int next_cursor;
 	bool support_wide_screen;
+	bool DisableP2A;
 
 	enum ast_tx_chip tx_chip_type;
 	u8 dp501_maxclk;
diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 5992ed2166ec..993909430736 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -124,6 +124,12 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post)
 	} else
 		*need_post = false;
 
+	/* Check P2A Access */
+	ast->DisableP2A = true;
+	data = ast_read32(ast, 0xf004);
+	if (data != 0xFFFFFFFF)
+		ast->DisableP2A = false;
+
 	/* Check if we support wide screen */
 	switch (ast->chip) {
 	case AST1180:
@@ -140,15 +146,17 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post)
 			ast->support_wide_screen = true;
 		else {
 			ast->support_wide_screen = false;
-			/* Read SCU7c (silicon revision register) */
-			ast_write32(ast, 0xf004, 0x1e6e0000);
-			ast_write32(ast, 0xf000, 0x1);
-			data = ast_read32(ast, 0x1207c);
-			data &= 0x300;
-			if (ast->chip == AST2300 && data == 0x0) /* ast1300 */
-				ast->support_wide_screen = true;
-			if (ast->chip == AST2400 && data == 0x100) /* ast1400 */
-				ast->support_wide_screen = true;
+			if (ast->DisableP2A == false) {
+				/* Read SCU7c (silicon revision register) */
+				ast_write32(ast, 0xf004, 0x1e6e0000);
+				ast_write32(ast, 0xf000, 0x1);
+				data = ast_read32(ast, 0x1207c);
+				data &= 0x300;
+				if (ast->chip == AST2300 && data == 0x0) /* ast1300 */
+					ast->support_wide_screen = true;
+				if (ast->chip == AST2400 && data == 0x100) /* ast1400 */
+					ast->support_wide_screen = true;
+			}
 		}
 		break;
 	}
@@ -216,80 +224,81 @@ static int ast_get_dram_info(struct drm_device *dev)
 	uint32_t data, data2;
 	uint32_t denum, num, div, ref_pll;
 
-	ast_write32(ast, 0xf004, 0x1e6e0000);
-	ast_write32(ast, 0xf000, 0x1);
-
-
-	ast_write32(ast, 0x10000, 0xfc600309);
-
-	do {
-		if (pci_channel_offline(dev->pdev))
-			return -EIO;
-	} while (ast_read32(ast, 0x10000) != 0x01);
-	data = ast_read32(ast, 0x10004);
-
-	if (data & 0x40)
+	if (ast->DisableP2A)
+	{
 		ast->dram_bus_width = 16;
+		ast->dram_type = AST_DRAM_1Gx16;
+		ast->mclk = 396;
+	}
 	else
-		ast->dram_bus_width = 32;
+	{
+		ast_write32(ast, 0xf004, 0x1e6e0000);
+		ast_write32(ast, 0xf000, 0x1);
+		data = ast_read32(ast, 0x10004);
+
+		if (data & 0x40)
+			ast->dram_bus_width = 16;
+		else
+			ast->dram_bus_width = 32;
+
+		if (ast->chip == AST2300 || ast->chip == AST2400) {
+			switch (data & 0x03) {
+			case 0:
+				ast->dram_type = AST_DRAM_512Mx16;
+				break;
+			default:
+			case 1:
+				ast->dram_type = AST_DRAM_1Gx16;
+				break;
+			case 2:
+				ast->dram_type = AST_DRAM_2Gx16;
+				break;
+			case 3:
+				ast->dram_type = AST_DRAM_4Gx16;
+				break;
+			}
+		} else {
+			switch (data & 0x0c) {
+			case 0:
+			case 4:
+				ast->dram_type = AST_DRAM_512Mx16;
+				break;
+			case 8:
+				if (data & 0x40)
+					ast->dram_type = AST_DRAM_1Gx16;
+				else
+					ast->dram_type = AST_DRAM_512Mx32;
+				break;
+			case 0xc:
+				ast->dram_type = AST_DRAM_1Gx32;
+				break;
+			}
+		}
 
-	if (ast->chip == AST2300 || ast->chip == AST2400) {
-		switch (data & 0x03) {
-		case 0:
-			ast->dram_type = AST_DRAM_512Mx16;
-			break;
-		default:
-		case 1:
-			ast->dram_type = AST_DRAM_1Gx16;
-			break;
-		case 2:
-			ast->dram_type = AST_DRAM_2Gx16;
-			break;
+		data = ast_read32(ast, 0x10120);
+		data2 = ast_read32(ast, 0x10170);
+		if (data2 & 0x2000)
+			ref_pll = 14318;
+		else
+			ref_pll = 12000;
+
+		denum = data & 0x1f;
+		num = (data & 0x3fe0) >> 5;
+		data = (data & 0xc000) >> 14;
+		switch (data) {
 		case 3:
-			ast->dram_type = AST_DRAM_4Gx16;
-			break;
-		}
-	} else {
-		switch (data & 0x0c) {
-		case 0:
-		case 4:
-			ast->dram_type = AST_DRAM_512Mx16;
+			div = 0x4;
 			break;
-		case 8:
-			if (data & 0x40)
-				ast->dram_type = AST_DRAM_1Gx16;
-			else
-				ast->dram_type = AST_DRAM_512Mx32;
+		case 2:
+		case 1:
+			div = 0x2;
 			break;
-		case 0xc:
-			ast->dram_type = AST_DRAM_1Gx32;
+		default:
+			div = 0x1;
 			break;
 		}
+		ast->mclk = ref_pll * (num + 2) / (denum + 2) * (div * 1000);
 	}
-
-	data = ast_read32(ast, 0x10120);
-	data2 = ast_read32(ast, 0x10170);
-	if (data2 & 0x2000)
-		ref_pll = 14318;
-	else
-		ref_pll = 12000;
-
-	denum = data & 0x1f;
-	num = (data & 0x3fe0) >> 5;
-	data = (data & 0xc000) >> 14;
-	switch (data) {
-	case 3:
-		div = 0x4;
-		break;
-	case 2:
-	case 1:
-		div = 0x2;
-		break;
-	default:
-		div = 0x1;
-		break;
-	}
-	ast->mclk = ref_pll * (num + 2) / (denum + 2) * (div * 1000);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c
index 810c51d92b99..5331ee1df086 100644
--- a/drivers/gpu/drm/ast/ast_post.c
+++ b/drivers/gpu/drm/ast/ast_post.c
@@ -379,12 +379,20 @@ void ast_post_gpu(struct drm_device *dev)
 	ast_open_key(ast);
 	ast_set_def_ext_reg(dev);
 
-	if (ast->chip == AST2300 || ast->chip == AST2400)
-		ast_init_dram_2300(dev);
-	else
-		ast_init_dram_reg(dev);
+	if (ast->DisableP2A == false)
+	{
+		if (ast->chip == AST2300 || ast->chip == AST2400)
+			ast_init_dram_2300(dev);
+		else
+			ast_init_dram_reg(dev);
 
-	ast_init_3rdtx(dev);
+		ast_init_3rdtx(dev);
+	}
+	else
+	{
+		if (ast->tx_chip_type != AST_TX_NONE)
+			ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xa3, 0xcf, 0x80);	/* Enable DVO */
+	}
 }
 
 /* AST 2300 DRAM settings */
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index c97588a28216..a5673107db26 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -286,15 +286,15 @@ drm_atomic_get_crtc_state(struct drm_atomic_state *state,
 EXPORT_SYMBOL(drm_atomic_get_crtc_state);
 
 static void set_out_fence_for_crtc(struct drm_atomic_state *state,
-				   struct drm_crtc *crtc, s64 __user *fence_ptr)
+				   struct drm_crtc *crtc, s32 __user *fence_ptr)
 {
 	state->crtcs[drm_crtc_index(crtc)].out_fence_ptr = fence_ptr;
 }
 
-static s64 __user *get_out_fence_for_crtc(struct drm_atomic_state *state,
+static s32 __user *get_out_fence_for_crtc(struct drm_atomic_state *state,
 					  struct drm_crtc *crtc)
 {
-	s64 __user *fence_ptr;
+	s32 __user *fence_ptr;
 
 	fence_ptr = state->crtcs[drm_crtc_index(crtc)].out_fence_ptr;
 	state->crtcs[drm_crtc_index(crtc)].out_fence_ptr = NULL;
@@ -505,7 +505,7 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
 		state->color_mgmt_changed |= replaced;
 		return ret;
 	} else if (property == config->prop_out_fence_ptr) {
-		s64 __user *fence_ptr = u64_to_user_ptr(val);
+		s32 __user *fence_ptr = u64_to_user_ptr(val);
 
 		if (!fence_ptr)
 			return 0;
@@ -1902,7 +1902,7 @@ EXPORT_SYMBOL(drm_atomic_clean_old_fb);
  */
 
 struct drm_out_fence_state {
-	s64 __user *out_fence_ptr;
+	s32 __user *out_fence_ptr;
 	struct sync_file *sync_file;
 	int fd;
 };
@@ -1939,7 +1939,7 @@ static int prepare_crtc_signaling(struct drm_device *dev,
 		return 0;
 
 	for_each_crtc_in_state(state, crtc, crtc_state, i) {
-		u64 __user *fence_ptr;
+		s32 __user *fence_ptr;
 
 		fence_ptr = get_out_fence_for_crtc(crtc_state->state, crtc);
 
@@ -2019,13 +2019,16 @@ static void complete_crtc_signaling(struct drm_device *dev,
 	}
 
 	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		struct drm_pending_vblank_event *event = crtc_state->event;
 		/*
-		 * TEST_ONLY and PAGE_FLIP_EVENT are mutually
-		 * exclusive, if they weren't, this code should be
-		 * called on success for TEST_ONLY too.
+		 * Free the allocated event. drm_atomic_helper_setup_commit
+		 * can allocate an event too, so only free it if it's ours
+		 * to prevent a double free in drm_atomic_state_clear.
 		 */
-		if (crtc_state->event)
-			drm_event_cancel_free(dev, &crtc_state->event->base);
+		if (event && (event->base.fence || event->base.file_priv)) {
+			drm_event_cancel_free(dev, &event->base);
+			crtc_state->event = NULL;
+		}
 	}
 
 	if (!fence_state)
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index e4d2c8a49076..45464c8b797d 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -378,6 +378,9 @@ int drm_connector_register(struct drm_connector *connector)
 {
 	int ret = 0;
 
+	if (!connector->dev->registered)
+		return 0;
+
 	mutex_lock(&connector->mutex);
 	if (connector->registered)
 		goto unlock;
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 45ce224688ce..b5c6bb46a425 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -776,6 +776,8 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags)
 	if (ret)
 		goto err_minors;
 
+	dev->registered = true;
+
 	if (dev->driver->load) {
 		ret = dev->driver->load(dev, flags);
 		if (ret)
@@ -823,6 +825,8 @@ void drm_dev_unregister(struct drm_device *dev)
 
 	drm_lastclose(dev);
 
+	dev->registered = false;
+
 	if (drm_core_check_feature(dev, DRIVER_MODESET))
 		drm_modeset_unregister_all(dev);
 
diff --git a/drivers/gpu/drm/drm_panel.c b/drivers/gpu/drm/drm_panel.c
index 3dfe3c886502..308d442a531b 100644
--- a/drivers/gpu/drm/drm_panel.c
+++ b/drivers/gpu/drm/drm_panel.c
@@ -137,7 +137,7 @@ EXPORT_SYMBOL(drm_panel_detach);
  * Return: A pointer to the panel registered for the specified device tree
  * node or NULL if no panel matching the device tree node can be found.
  */
-struct drm_panel *of_drm_find_panel(struct device_node *np)
+struct drm_panel *of_drm_find_panel(const struct device_node *np)
 {
 	struct drm_panel *panel;
 
diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
index 7311aeab16f7..3b6caaca9751 100644
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -49,20 +49,21 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm)
 	if (high_gm) {
 		node = &vgpu->gm.high_gm_node;
 		size = vgpu_hidden_sz(vgpu);
-		start = gvt_hidden_gmadr_base(gvt);
-		end = gvt_hidden_gmadr_end(gvt);
+		start = ALIGN(gvt_hidden_gmadr_base(gvt), I915_GTT_PAGE_SIZE);
+		end = ALIGN(gvt_hidden_gmadr_end(gvt), I915_GTT_PAGE_SIZE);
 		flags = PIN_HIGH;
 	} else {
 		node = &vgpu->gm.low_gm_node;
 		size = vgpu_aperture_sz(vgpu);
-		start = gvt_aperture_gmadr_base(gvt);
-		end = gvt_aperture_gmadr_end(gvt);
+		start = ALIGN(gvt_aperture_gmadr_base(gvt), I915_GTT_PAGE_SIZE);
+		end = ALIGN(gvt_aperture_gmadr_end(gvt), I915_GTT_PAGE_SIZE);
 		flags = PIN_MAPPABLE;
 	}
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
 	ret = i915_gem_gtt_insert(&dev_priv->ggtt.base, node,
-				  size, 4096, I915_COLOR_UNEVICTABLE,
+				  size, I915_GTT_PAGE_SIZE,
+				  I915_COLOR_UNEVICTABLE,
 				  start, end, flags);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	if (ret)
@@ -254,7 +255,7 @@ static int alloc_resource(struct intel_vgpu *vgpu,
 	if (request > avail)
 		goto no_enough_resource;
 
-	vgpu_aperture_sz(vgpu) = request;
+	vgpu_aperture_sz(vgpu) = ALIGN(request, I915_GTT_PAGE_SIZE);
 
 	item = "high GM space";
 	max = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE;
@@ -265,7 +266,7 @@ static int alloc_resource(struct intel_vgpu *vgpu,
 	if (request > avail)
 		goto no_enough_resource;
 
-	vgpu_hidden_sz(vgpu) = request;
+	vgpu_hidden_sz(vgpu) = ALIGN(request, I915_GTT_PAGE_SIZE);
 
 	item = "fence";
 	max = gvt_fence_sz(gvt) - HOST_FENCE;
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 9a4b23c3ee97..b9c8e2407682 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -481,7 +481,6 @@ struct parser_exec_state {
 	(s->vgpu->gvt->device_info.gmadr_bytes_in_cmd >> 2)
 
 static unsigned long bypass_scan_mask = 0;
-static bool bypass_batch_buffer_scan = true;
 
 /* ring ALL, type = 0 */
 static struct sub_op_bits sub_op_mi[] = {
@@ -1135,6 +1134,8 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s,
 	u32 dword2 = cmd_val(s, 2);
 	u32 plane = (dword0 & GENMASK(12, 8)) >> 8;
 
+	info->plane = PRIMARY_PLANE;
+
 	switch (plane) {
 	case MI_DISPLAY_FLIP_SKL_PLANE_1_A:
 		info->pipe = PIPE_A;
@@ -1148,12 +1149,28 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s,
 		info->pipe = PIPE_C;
 		info->event = PRIMARY_C_FLIP_DONE;
 		break;
+
+	case MI_DISPLAY_FLIP_SKL_PLANE_2_A:
+		info->pipe = PIPE_A;
+		info->event = SPRITE_A_FLIP_DONE;
+		info->plane = SPRITE_PLANE;
+		break;
+	case MI_DISPLAY_FLIP_SKL_PLANE_2_B:
+		info->pipe = PIPE_B;
+		info->event = SPRITE_B_FLIP_DONE;
+		info->plane = SPRITE_PLANE;
+		break;
+	case MI_DISPLAY_FLIP_SKL_PLANE_2_C:
+		info->pipe = PIPE_C;
+		info->event = SPRITE_C_FLIP_DONE;
+		info->plane = SPRITE_PLANE;
+		break;
+
 	default:
 		gvt_err("unknown plane code %d\n", plane);
 		return -EINVAL;
 	}
 
-	info->pipe = PRIMARY_PLANE;
 	info->stride_val = (dword1 & GENMASK(15, 6)) >> 6;
 	info->tile_val = (dword1 & GENMASK(2, 0));
 	info->surf_val = (dword2 & GENMASK(31, 12)) >> 12;
@@ -1525,9 +1542,6 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s)
 {
 	struct intel_gvt *gvt = s->vgpu->gvt;
 
-	if (bypass_batch_buffer_scan)
-		return 0;
-
 	if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) {
 		/* BDW decides privilege based on address space */
 		if (cmd_val(s, 0) & (1 << 8))
diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index c0c884aeb30e..6d8fde880c39 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -83,7 +83,7 @@ static int pipe_is_enabled(struct intel_vgpu *vgpu, int pipe)
 	return 0;
 }
 
-/* EDID with 1024x768 as its resolution */
+/* EDID with 1920x1200 as its resolution */
 static unsigned char virtual_dp_monitor_edid[] = {
 	/*Header*/
 	0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
@@ -97,11 +97,16 @@ static unsigned char virtual_dp_monitor_edid[] = {
 	0xfc, 0x81, 0xa4, 0x55, 0x4d, 0x9d, 0x25, 0x12, 0x50, 0x54,
 	/* Established Timings: maximum resolution is 1024x768 */
 	0x21, 0x08, 0x00,
-	/* Standard Timings. All invalid */
-	0x00, 0xc0, 0x00, 0xc0, 0x00, 0x40, 0x00, 0x80, 0x00, 0x00,
-	0x00, 0x40, 0x00, 0x00, 0x00, 0x01,
-	/* 18 Byte Data Blocks 1: invalid */
-	0x00, 0x00, 0x80, 0xa0, 0x70, 0xb0,
+	/*
+	 * Standard Timings.
+	 * below new resolutions can be supported:
+	 * 1920x1080, 1280x720, 1280x960, 1280x1024,
+	 * 1440x900, 1600x1200, 1680x1050
+	 */
+	0xd1, 0xc0, 0x81, 0xc0, 0x81, 0x40, 0x81, 0x80, 0x95, 0x00,
+	0xa9, 0x40, 0xb3, 0x00, 0x01, 0x01,
+	/* 18 Byte Data Blocks 1: max resolution is 1920x1200 */
+	0x28, 0x3c, 0x80, 0xa0, 0x70, 0xb0,
 	0x23, 0x40, 0x30, 0x20, 0x36, 0x00, 0x06, 0x44, 0x21, 0x00, 0x00, 0x1a,
 	/* 18 Byte Data Blocks 2: invalid */
 	0x00, 0x00, 0x00, 0xfd, 0x00, 0x18, 0x3c, 0x18, 0x50, 0x11, 0x00, 0x0a,
@@ -115,7 +120,7 @@ static unsigned char virtual_dp_monitor_edid[] = {
 	/* Extension Block Count */
 	0x00,
 	/* Checksum */
-	0xef,
+	0x45,
 };
 
 #define DPCD_HEADER_SIZE        0xb
@@ -328,3 +333,15 @@ int intel_vgpu_init_display(struct intel_vgpu *vgpu)
 	else
 		return setup_virtual_dp_monitor(vgpu, PORT_B, GVT_DP_B);
 }
+
+/**
+ * intel_vgpu_reset_display- reset vGPU virtual display emulation
+ * @vgpu: a vGPU
+ *
+ * This function is used to reset vGPU virtual display emulation stuffs
+ *
+ */
+void intel_vgpu_reset_display(struct intel_vgpu *vgpu)
+{
+	emulate_monitor_status_change(vgpu);
+}
diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h
index 7a60cb848268..8b234ea961f6 100644
--- a/drivers/gpu/drm/i915/gvt/display.h
+++ b/drivers/gpu/drm/i915/gvt/display.h
@@ -158,6 +158,7 @@ void intel_gvt_emulate_vblank(struct intel_gvt *gvt);
 void intel_gvt_check_vblank_emulation(struct intel_gvt *gvt);
 
 int intel_vgpu_init_display(struct intel_vgpu *vgpu);
+void intel_vgpu_reset_display(struct intel_vgpu *vgpu);
 void intel_vgpu_clean_display(struct intel_vgpu *vgpu);
 
 #endif
diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index f32bb6f6495c..46eb9fd3c03f 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -364,58 +364,30 @@ static void free_workload(struct intel_vgpu_workload *workload)
 #define get_desc_from_elsp_dwords(ed, i) \
 	((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2]))
 
-
-#define BATCH_BUFFER_ADDR_MASK ((1UL << 32) - (1U << 2))
-#define BATCH_BUFFER_ADDR_HIGH_MASK ((1UL << 16) - (1U))
-static int set_gma_to_bb_cmd(struct intel_shadow_bb_entry *entry_obj,
-			     unsigned long add, int gmadr_bytes)
-{
-	if (WARN_ON(gmadr_bytes != 4 && gmadr_bytes != 8))
-		return -1;
-
-	*((u32 *)(entry_obj->bb_start_cmd_va + (1 << 2))) = add &
-		BATCH_BUFFER_ADDR_MASK;
-	if (gmadr_bytes == 8) {
-		*((u32 *)(entry_obj->bb_start_cmd_va + (2 << 2))) =
-			add & BATCH_BUFFER_ADDR_HIGH_MASK;
-	}
-
-	return 0;
-}
-
 static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 {
-	int gmadr_bytes = workload->vgpu->gvt->device_info.gmadr_bytes_in_cmd;
+	const int gmadr_bytes = workload->vgpu->gvt->device_info.gmadr_bytes_in_cmd;
+	struct intel_shadow_bb_entry *entry_obj;
 
 	/* pin the gem object to ggtt */
-	if (!list_empty(&workload->shadow_bb)) {
-		struct intel_shadow_bb_entry *entry_obj =
-			list_first_entry(&workload->shadow_bb,
-					 struct intel_shadow_bb_entry,
-					 list);
-		struct intel_shadow_bb_entry *temp;
+	list_for_each_entry(entry_obj, &workload->shadow_bb, list) {
+		struct i915_vma *vma;
 
-		list_for_each_entry_safe(entry_obj, temp, &workload->shadow_bb,
-				list) {
-			struct i915_vma *vma;
-
-			vma = i915_gem_object_ggtt_pin(entry_obj->obj, NULL, 0,
-						       4, 0);
-			if (IS_ERR(vma)) {
-				gvt_err("Cannot pin\n");
-				return;
-			}
-
-			/* FIXME: we are not tracking our pinned VMA leaving it
-			 * up to the core to fix up the stray pin_count upon
-			 * free.
-			 */
-
-			/* update the relocate gma with shadow batch buffer*/
-			set_gma_to_bb_cmd(entry_obj,
-					  i915_ggtt_offset(vma),
-					  gmadr_bytes);
+		vma = i915_gem_object_ggtt_pin(entry_obj->obj, NULL, 0, 4, 0);
+		if (IS_ERR(vma)) {
+			gvt_err("Cannot pin\n");
+			return;
 		}
+
+		/* FIXME: we are not tracking our pinned VMA leaving it
+		 * up to the core to fix up the stray pin_count upon
+		 * free.
+		 */
+
+		/* update the relocate gma with shadow batch buffer*/
+		entry_obj->bb_start_cmd_va[1] = i915_ggtt_offset(vma);
+		if (gmadr_bytes == 8)
+			entry_obj->bb_start_cmd_va[2] = 0;
 	}
 }
 
@@ -515,7 +487,7 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 
 static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 {
-	if (wa_ctx->indirect_ctx.size == 0)
+	if (!wa_ctx->indirect_ctx.obj)
 		return;
 
 	i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj);
@@ -826,7 +798,7 @@ int intel_vgpu_init_execlist(struct intel_vgpu *vgpu)
 		INIT_LIST_HEAD(&vgpu->workload_q_head[i]);
 	}
 
-	vgpu->workloads = kmem_cache_create("gvt-g vgpu workload",
+	vgpu->workloads = kmem_cache_create("gvt-g_vgpu_workload",
 			sizeof(struct intel_vgpu_workload), 0,
 			SLAB_HWCACHE_ALIGN,
 			NULL);
diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c
index 2fae2a2ca96f..1cb29b2d7dc6 100644
--- a/drivers/gpu/drm/i915/gvt/firmware.c
+++ b/drivers/gpu/drm/i915/gvt/firmware.c
@@ -48,31 +48,6 @@ struct gvt_firmware_header {
 	unsigned char data[1];
 };
 
-#define RD(offset) (readl(mmio + offset.reg))
-#define WR(v, offset) (writel(v, mmio + offset.reg))
-
-static void bdw_forcewake_get(void __iomem *mmio)
-{
-	WR(_MASKED_BIT_DISABLE(0xffff), FORCEWAKE_MT);
-
-	RD(ECOBUS);
-
-	if (wait_for((RD(FORCEWAKE_ACK_HSW) & FORCEWAKE_KERNEL) == 0, 50))
-		gvt_err("fail to wait forcewake idle\n");
-
-	WR(_MASKED_BIT_ENABLE(FORCEWAKE_KERNEL), FORCEWAKE_MT);
-
-	if (wait_for((RD(FORCEWAKE_ACK_HSW) & FORCEWAKE_KERNEL), 50))
-		gvt_err("fail to wait forcewake ack\n");
-
-	if (wait_for((RD(GEN6_GT_THREAD_STATUS_REG) &
-		      GEN6_GT_THREAD_STATUS_CORE_MASK) == 0, 50))
-		gvt_err("fail to wait c0 wake up\n");
-}
-
-#undef RD
-#undef WR
-
 #define dev_to_drm_minor(d) dev_get_drvdata((d))
 
 static ssize_t
@@ -91,9 +66,9 @@ static struct bin_attribute firmware_attr = {
 	.mmap = NULL,
 };
 
-static int expose_firmware_sysfs(struct intel_gvt *gvt,
-					void __iomem *mmio)
+static int expose_firmware_sysfs(struct intel_gvt *gvt)
 {
+	struct drm_i915_private *dev_priv = gvt->dev_priv;
 	struct intel_gvt_device_info *info = &gvt->device_info;
 	struct pci_dev *pdev = gvt->dev_priv->drm.pdev;
 	struct intel_gvt_mmio_info *e;
@@ -132,7 +107,7 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt,
 
 		for (j = 0; j < e->length; j += 4)
 			*(u32 *)(p + e->offset + j) =
-				readl(mmio + e->offset + j);
+				I915_READ_NOTRACE(_MMIO(e->offset + j));
 	}
 
 	memcpy(gvt->firmware.mmio, p, info->mmio_size);
@@ -235,7 +210,6 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt)
 	struct gvt_firmware_header *h;
 	const struct firmware *fw;
 	char *path;
-	void __iomem *mmio;
 	void *mem;
 	int ret;
 
@@ -260,17 +234,6 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt)
 
 	firmware->mmio = mem;
 
-	mmio = pci_iomap(pdev, info->mmio_bar, info->mmio_size);
-	if (!mmio) {
-		kfree(path);
-		kfree(firmware->cfg_space);
-		kfree(firmware->mmio);
-		return -EINVAL;
-	}
-
-	if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv))
-		bdw_forcewake_get(mmio);
-
 	sprintf(path, "%s/vid_0x%04x_did_0x%04x_rid_0x%04x.golden_hw_state",
 		 GVT_FIRMWARE_PATH, pdev->vendor, pdev->device,
 		 pdev->revision);
@@ -300,13 +263,11 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt)
 
 	release_firmware(fw);
 	firmware->firmware_loaded = true;
-	pci_iounmap(pdev, mmio);
 	return 0;
 
 out_free_fw:
 	release_firmware(fw);
 expose_firmware:
-	expose_firmware_sysfs(gvt, mmio);
-	pci_iounmap(pdev, mmio);
+	expose_firmware_sysfs(gvt);
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 47dec4acf7ff..28c92346db0e 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -606,21 +606,33 @@ struct intel_vgpu_guest_page *intel_vgpu_find_guest_page(
 static inline int init_shadow_page(struct intel_vgpu *vgpu,
 		struct intel_vgpu_shadow_page *p, int type)
 {
+	struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev;
+	dma_addr_t daddr;
+
+	daddr = dma_map_page(kdev, p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(kdev, daddr)) {
+		gvt_err("fail to map dma addr\n");
+		return -EINVAL;
+	}
+
 	p->vaddr = page_address(p->page);
 	p->type = type;
 
 	INIT_HLIST_NODE(&p->node);
 
-	p->mfn = intel_gvt_hypervisor_virt_to_mfn(p->vaddr);
-	if (p->mfn == INTEL_GVT_INVALID_ADDR)
-		return -EFAULT;
-
+	p->mfn = daddr >> GTT_PAGE_SHIFT;
 	hash_add(vgpu->gtt.shadow_page_hash_table, &p->node, p->mfn);
 	return 0;
 }
 
-static inline void clean_shadow_page(struct intel_vgpu_shadow_page *p)
+static inline void clean_shadow_page(struct intel_vgpu *vgpu,
+		struct intel_vgpu_shadow_page *p)
 {
+	struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev;
+
+	dma_unmap_page(kdev, p->mfn << GTT_PAGE_SHIFT, 4096,
+			PCI_DMA_BIDIRECTIONAL);
+
 	if (!hlist_unhashed(&p->node))
 		hash_del(&p->node);
 }
@@ -670,7 +682,7 @@ static void ppgtt_free_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
 {
 	trace_spt_free(spt->vgpu->id, spt, spt->shadow_page.type);
 
-	clean_shadow_page(&spt->shadow_page);
+	clean_shadow_page(spt->vgpu, &spt->shadow_page);
 	intel_vgpu_clean_guest_page(spt->vgpu, &spt->guest_page);
 	list_del_init(&spt->post_shadow_list);
 
@@ -1875,8 +1887,9 @@ static int alloc_scratch_pages(struct intel_vgpu *vgpu,
 	int page_entry_num = GTT_PAGE_SIZE >>
 				vgpu->gvt->device_info.gtt_entry_size_shift;
 	void *scratch_pt;
-	unsigned long mfn;
 	int i;
+	struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
+	dma_addr_t daddr;
 
 	if (WARN_ON(type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX))
 		return -EINVAL;
@@ -1887,16 +1900,18 @@ static int alloc_scratch_pages(struct intel_vgpu *vgpu,
 		return -ENOMEM;
 	}
 
-	mfn = intel_gvt_hypervisor_virt_to_mfn(scratch_pt);
-	if (mfn == INTEL_GVT_INVALID_ADDR) {
-		gvt_err("fail to translate vaddr:0x%lx\n", (unsigned long)scratch_pt);
-		free_page((unsigned long)scratch_pt);
-		return -EFAULT;
+	daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0,
+			4096, PCI_DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, daddr)) {
+		gvt_err("fail to dmamap scratch_pt\n");
+		__free_page(virt_to_page(scratch_pt));
+		return -ENOMEM;
 	}
-	gtt->scratch_pt[type].page_mfn = mfn;
+	gtt->scratch_pt[type].page_mfn =
+		(unsigned long)(daddr >> GTT_PAGE_SHIFT);
 	gtt->scratch_pt[type].page = virt_to_page(scratch_pt);
 	gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n",
-			vgpu->id, type, mfn);
+			vgpu->id, type, gtt->scratch_pt[type].page_mfn);
 
 	/* Build the tree by full filled the scratch pt with the entries which
 	 * point to the next level scratch pt or scratch page. The
@@ -1930,9 +1945,14 @@ static int alloc_scratch_pages(struct intel_vgpu *vgpu,
 static int release_scratch_page_tree(struct intel_vgpu *vgpu)
 {
 	int i;
+	struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
+	dma_addr_t daddr;
 
 	for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
 		if (vgpu->gtt.scratch_pt[i].page != NULL) {
+			daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn <<
+					GTT_PAGE_SHIFT);
+			dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
 			__free_page(vgpu->gtt.scratch_pt[i].page);
 			vgpu->gtt.scratch_pt[i].page = NULL;
 			vgpu->gtt.scratch_pt[i].page_mfn = 0;
@@ -2192,6 +2212,8 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt)
 {
 	int ret;
 	void *page;
+	struct device *dev = &gvt->dev_priv->drm.pdev->dev;
+	dma_addr_t daddr;
 
 	gvt_dbg_core("init gtt\n");
 
@@ -2209,14 +2231,16 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt)
 		gvt_err("fail to allocate scratch ggtt page\n");
 		return -ENOMEM;
 	}
-	gvt->gtt.scratch_ggtt_page = virt_to_page(page);
 
-	gvt->gtt.scratch_ggtt_mfn = intel_gvt_hypervisor_virt_to_mfn(page);
-	if (gvt->gtt.scratch_ggtt_mfn == INTEL_GVT_INVALID_ADDR) {
-		gvt_err("fail to translate scratch ggtt page\n");
-		__free_page(gvt->gtt.scratch_ggtt_page);
-		return -EFAULT;
+	daddr = dma_map_page(dev, virt_to_page(page), 0,
+			4096, PCI_DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, daddr)) {
+		gvt_err("fail to dmamap scratch ggtt page\n");
+		__free_page(virt_to_page(page));
+		return -ENOMEM;
 	}
+	gvt->gtt.scratch_ggtt_page = virt_to_page(page);
+	gvt->gtt.scratch_ggtt_mfn = (unsigned long)(daddr >> GTT_PAGE_SHIFT);
 
 	if (enable_out_of_sync) {
 		ret = setup_spt_oos(gvt);
@@ -2239,6 +2263,12 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt)
  */
 void intel_gvt_clean_gtt(struct intel_gvt *gvt)
 {
+	struct device *dev = &gvt->dev_priv->drm.pdev->dev;
+	dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_ggtt_mfn <<
+					GTT_PAGE_SHIFT);
+
+	dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
+
 	__free_page(gvt->gtt.scratch_ggtt_page);
 
 	if (enable_out_of_sync)
diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c
index e6bf5c533fbe..3b9d59e457ba 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.c
+++ b/drivers/gpu/drm/i915/gvt/gvt.c
@@ -68,8 +68,6 @@ static const struct intel_gvt_ops intel_gvt_ops = {
  */
 int intel_gvt_init_host(void)
 {
-	int ret;
-
 	if (intel_gvt_host.initialized)
 		return 0;
 
@@ -96,11 +94,6 @@ int intel_gvt_init_host(void)
 	if (!intel_gvt_host.mpt)
 		return -EINVAL;
 
-	/* Try to detect if we're running in host instead of VM. */
-	ret = intel_gvt_hypervisor_detect_host();
-	if (ret)
-		return -ENODEV;
-
 	gvt_dbg_core("Running with hypervisor %s in host mode\n",
 			supported_hypervisors[intel_gvt_host.hypervisor_type]);
 
diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h
index 30e543f5a703..df7f33abd393 100644
--- a/drivers/gpu/drm/i915/gvt/hypercall.h
+++ b/drivers/gpu/drm/i915/gvt/hypercall.h
@@ -38,7 +38,6 @@
  * both Xen and KVM by providing dedicated hypervisor-related MPT modules.
  */
 struct intel_gvt_mpt {
-	int (*detect_host)(void);
 	int (*host_init)(struct device *dev, void *gvt, const void *ops);
 	void (*host_exit)(struct device *dev, void *gvt);
 	int (*attach_vgpu)(void *vgpu, unsigned long *handle);
diff --git a/drivers/gpu/drm/i915/gvt/interrupt.c b/drivers/gpu/drm/i915/gvt/interrupt.c
index f7be02ac4be1..92bb247e3478 100644
--- a/drivers/gpu/drm/i915/gvt/interrupt.c
+++ b/drivers/gpu/drm/i915/gvt/interrupt.c
@@ -176,26 +176,15 @@ int intel_vgpu_reg_imr_handler(struct intel_vgpu *vgpu,
 {
 	struct intel_gvt *gvt = vgpu->gvt;
 	struct intel_gvt_irq_ops *ops = gvt->irq.ops;
-	u32 changed, masked, unmasked;
 	u32 imr = *(u32 *)p_data;
 
-	gvt_dbg_irq("write IMR %x with val %x\n",
-		reg, imr);
-
-	gvt_dbg_irq("old vIMR %x\n", vgpu_vreg(vgpu, reg));
-
-	/* figure out newly masked/unmasked bits */
-	changed = vgpu_vreg(vgpu, reg) ^ imr;
-	masked = (vgpu_vreg(vgpu, reg) & changed) ^ changed;
-	unmasked = masked ^ changed;
-
-	gvt_dbg_irq("changed %x, masked %x, unmasked %x\n",
-		changed, masked, unmasked);
+	gvt_dbg_irq("write IMR %x, new %08x, old %08x, changed %08x\n",
+		    reg, imr, vgpu_vreg(vgpu, reg), vgpu_vreg(vgpu, reg) ^ imr);
 
 	vgpu_vreg(vgpu, reg) = imr;
 
 	ops->check_pending_irq(vgpu);
-	gvt_dbg_irq("IRQ: new vIMR %x\n", vgpu_vreg(vgpu, reg));
+
 	return 0;
 }
 
@@ -217,14 +206,11 @@ int intel_vgpu_reg_master_irq_handler(struct intel_vgpu *vgpu,
 {
 	struct intel_gvt *gvt = vgpu->gvt;
 	struct intel_gvt_irq_ops *ops = gvt->irq.ops;
-	u32 changed, enabled, disabled;
 	u32 ier = *(u32 *)p_data;
 	u32 virtual_ier = vgpu_vreg(vgpu, reg);
 
-	gvt_dbg_irq("write master irq reg %x with val %x\n",
-		reg, ier);
-
-	gvt_dbg_irq("old vreg %x\n", vgpu_vreg(vgpu, reg));
+	gvt_dbg_irq("write MASTER_IRQ %x, new %08x, old %08x, changed %08x\n",
+		    reg, ier, virtual_ier, virtual_ier ^ ier);
 
 	/*
 	 * GEN8_MASTER_IRQ is a special irq register,
@@ -236,16 +222,8 @@ int intel_vgpu_reg_master_irq_handler(struct intel_vgpu *vgpu,
 	vgpu_vreg(vgpu, reg) &= ~GEN8_MASTER_IRQ_CONTROL;
 	vgpu_vreg(vgpu, reg) |= ier;
 
-	/* figure out newly enabled/disable bits */
-	changed = virtual_ier ^ ier;
-	enabled = (virtual_ier & changed) ^ changed;
-	disabled = enabled ^ changed;
-
-	gvt_dbg_irq("changed %x, enabled %x, disabled %x\n",
-			changed, enabled, disabled);
-
 	ops->check_pending_irq(vgpu);
-	gvt_dbg_irq("new vreg %x\n", vgpu_vreg(vgpu, reg));
+
 	return 0;
 }
 
@@ -268,21 +246,11 @@ int intel_vgpu_reg_ier_handler(struct intel_vgpu *vgpu,
 	struct intel_gvt *gvt = vgpu->gvt;
 	struct intel_gvt_irq_ops *ops = gvt->irq.ops;
 	struct intel_gvt_irq_info *info;
-	u32 changed, enabled, disabled;
 	u32 ier = *(u32 *)p_data;
 
-	gvt_dbg_irq("write IER %x with val %x\n",
-		reg, ier);
-
-	gvt_dbg_irq("old vIER %x\n", vgpu_vreg(vgpu, reg));
+	gvt_dbg_irq("write IER %x, new %08x, old %08x, changed %08x\n",
+		    reg, ier, vgpu_vreg(vgpu, reg), vgpu_vreg(vgpu, reg) ^ ier);
 
-	/* figure out newly enabled/disable bits */
-	changed = vgpu_vreg(vgpu, reg) ^ ier;
-	enabled = (vgpu_vreg(vgpu, reg) & changed) ^ changed;
-	disabled = enabled ^ changed;
-
-	gvt_dbg_irq("changed %x, enabled %x, disabled %x\n",
-			changed, enabled, disabled);
 	vgpu_vreg(vgpu, reg) = ier;
 
 	info = regbase_to_irq_info(gvt, ier_to_regbase(reg));
@@ -293,7 +261,7 @@ int intel_vgpu_reg_ier_handler(struct intel_vgpu *vgpu,
 		update_upstream_irq(vgpu, info);
 
 	ops->check_pending_irq(vgpu);
-	gvt_dbg_irq("new vIER %x\n", vgpu_vreg(vgpu, reg));
+
 	return 0;
 }
 
@@ -317,7 +285,8 @@ int intel_vgpu_reg_iir_handler(struct intel_vgpu *vgpu, unsigned int reg,
 		iir_to_regbase(reg));
 	u32 iir = *(u32 *)p_data;
 
-	gvt_dbg_irq("write IIR %x with val %x\n", reg, iir);
+	gvt_dbg_irq("write IIR %x, new %08x, old %08x, changed %08x\n",
+		    reg, iir, vgpu_vreg(vgpu, reg), vgpu_vreg(vgpu, reg) ^ iir);
 
 	if (WARN_ON(!info))
 		return -EINVAL;
@@ -619,6 +588,10 @@ static void gen8_init_irq(
 		SET_BIT_INFO(irq, 3, PRIMARY_A_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_A);
 		SET_BIT_INFO(irq, 3, PRIMARY_B_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_B);
 		SET_BIT_INFO(irq, 3, PRIMARY_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C);
+
+		SET_BIT_INFO(irq, 4, SPRITE_A_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_A);
+		SET_BIT_INFO(irq, 4, SPRITE_B_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_B);
+		SET_BIT_INFO(irq, 4, SPRITE_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C);
 	}
 
 	/* GEN8 interrupt PCU events */
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 0c9234a87a20..0f7f5d97f582 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -77,7 +77,7 @@ struct kvmgt_guest_info {
 struct gvt_dma {
 	struct rb_node node;
 	gfn_t gfn;
-	kvm_pfn_t pfn;
+	unsigned long iova;
 };
 
 static inline bool handle_valid(unsigned long handle)
@@ -89,6 +89,35 @@ static int kvmgt_guest_init(struct mdev_device *mdev);
 static void intel_vgpu_release_work(struct work_struct *work);
 static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);
 
+static int gvt_dma_map_iova(struct intel_vgpu *vgpu, kvm_pfn_t pfn,
+		unsigned long *iova)
+{
+	struct page *page;
+	struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
+	dma_addr_t daddr;
+
+	page = pfn_to_page(pfn);
+	if (is_error_page(page))
+		return -EFAULT;
+
+	daddr = dma_map_page(dev, page, 0, PAGE_SIZE,
+			PCI_DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, daddr))
+		return -ENOMEM;
+
+	*iova = (unsigned long)(daddr >> PAGE_SHIFT);
+	return 0;
+}
+
+static void gvt_dma_unmap_iova(struct intel_vgpu *vgpu, unsigned long iova)
+{
+	struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
+	dma_addr_t daddr;
+
+	daddr = (dma_addr_t)(iova << PAGE_SHIFT);
+	dma_unmap_page(dev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+}
+
 static struct gvt_dma *__gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
 {
 	struct rb_node *node = vgpu->vdev.cache.rb_node;
@@ -111,21 +140,22 @@ out:
 	return ret;
 }
 
-static kvm_pfn_t gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
+static unsigned long gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
 {
 	struct gvt_dma *entry;
-	kvm_pfn_t pfn;
+	unsigned long iova;
 
 	mutex_lock(&vgpu->vdev.cache_lock);
 
 	entry = __gvt_cache_find(vgpu, gfn);
-	pfn = (entry == NULL) ? 0 : entry->pfn;
+	iova = (entry == NULL) ? INTEL_GVT_INVALID_ADDR : entry->iova;
 
 	mutex_unlock(&vgpu->vdev.cache_lock);
-	return pfn;
+	return iova;
 }
 
-static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, kvm_pfn_t pfn)
+static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
+		unsigned long iova)
 {
 	struct gvt_dma *new, *itr;
 	struct rb_node **link = &vgpu->vdev.cache.rb_node, *parent = NULL;
@@ -135,7 +165,7 @@ static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, kvm_pfn_t pfn)
 		return;
 
 	new->gfn = gfn;
-	new->pfn = pfn;
+	new->iova = iova;
 
 	mutex_lock(&vgpu->vdev.cache_lock);
 	while (*link) {
@@ -182,6 +212,7 @@ static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn)
 	}
 
 	g1 = gfn;
+	gvt_dma_unmap_iova(vgpu, this->iova);
 	rc = vfio_unpin_pages(dev, &g1, 1);
 	WARN_ON(rc != 1);
 	__gvt_cache_remove_entry(vgpu, this);
@@ -204,6 +235,7 @@ static void gvt_cache_destroy(struct intel_vgpu *vgpu)
 	mutex_lock(&vgpu->vdev.cache_lock);
 	while ((node = rb_first(&vgpu->vdev.cache))) {
 		dma = rb_entry(node, struct gvt_dma, node);
+		gvt_dma_unmap_iova(vgpu, dma->iova);
 		gfn = dma->gfn;
 
 		vfio_unpin_pages(dev, &gfn, 1);
@@ -230,8 +262,8 @@ static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt,
 	return NULL;
 }
 
-static ssize_t available_instance_show(struct kobject *kobj, struct device *dev,
-		char *buf)
+static ssize_t available_instances_show(struct kobject *kobj,
+					struct device *dev, char *buf)
 {
 	struct intel_vgpu_type *type;
 	unsigned int num = 0;
@@ -269,12 +301,12 @@ static ssize_t description_show(struct kobject *kobj, struct device *dev,
 				type->fence);
 }
 
-static MDEV_TYPE_ATTR_RO(available_instance);
+static MDEV_TYPE_ATTR_RO(available_instances);
 static MDEV_TYPE_ATTR_RO(device_api);
 static MDEV_TYPE_ATTR_RO(description);
 
 static struct attribute *type_attrs[] = {
-	&mdev_type_attr_available_instance.attr,
+	&mdev_type_attr_available_instances.attr,
 	&mdev_type_attr_device_api.attr,
 	&mdev_type_attr_description.attr,
 	NULL,
@@ -965,11 +997,6 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
 			sparse->areas[0].offset =
 					PAGE_ALIGN(vgpu_aperture_offset(vgpu));
 			sparse->areas[0].size = vgpu_aperture_sz(vgpu);
-			if (!caps.buf) {
-				kfree(caps.buf);
-				caps.buf = NULL;
-				caps.size = 0;
-			}
 			break;
 
 		case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
@@ -1248,43 +1275,6 @@ static void kvmgt_page_track_flush_slot(struct kvm *kvm,
 	spin_unlock(&kvm->mmu_lock);
 }
 
-static bool kvmgt_check_guest(void)
-{
-	unsigned int eax, ebx, ecx, edx;
-	char s[12];
-	unsigned int *i;
-
-	eax = KVM_CPUID_SIGNATURE;
-	ebx = ecx = edx = 0;
-
-	asm volatile ("cpuid"
-		      : "+a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
-		      :
-		      : "cc", "memory");
-	i = (unsigned int *)s;
-	i[0] = ebx;
-	i[1] = ecx;
-	i[2] = edx;
-
-	return !strncmp(s, "KVMKVMKVM", strlen("KVMKVMKVM"));
-}
-
-/**
- * NOTE:
- * It's actually impossible to check if we are running in KVM host,
- * since the "KVM host" is simply native. So we only dectect guest here.
- */
-static int kvmgt_detect_host(void)
-{
-#ifdef CONFIG_INTEL_IOMMU
-	if (intel_iommu_gfx_mapped) {
-		gvt_err("Hardware IOMMU compatibility not yet supported, try to boot with intel_iommu=igfx_off\n");
-		return -ENODEV;
-	}
-#endif
-	return kvmgt_check_guest() ? -ENODEV : 0;
-}
-
 static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu, struct kvm *kvm)
 {
 	struct intel_vgpu *itr;
@@ -1390,7 +1380,7 @@ static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data)
 
 static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
 {
-	unsigned long pfn;
+	unsigned long iova, pfn;
 	struct kvmgt_guest_info *info;
 	struct device *dev;
 	int rc;
@@ -1399,9 +1389,9 @@ static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
 		return INTEL_GVT_INVALID_ADDR;
 
 	info = (struct kvmgt_guest_info *)handle;
-	pfn = gvt_cache_find(info->vgpu, gfn);
-	if (pfn != 0)
-		return pfn;
+	iova = gvt_cache_find(info->vgpu, gfn);
+	if (iova != INTEL_GVT_INVALID_ADDR)
+		return iova;
 
 	pfn = INTEL_GVT_INVALID_ADDR;
 	dev = mdev_dev(info->vgpu->vdev.mdev);
@@ -1410,9 +1400,16 @@ static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
 		gvt_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", gfn, rc);
 		return INTEL_GVT_INVALID_ADDR;
 	}
+	/* transfer to host iova for GFX to use DMA */
+	rc = gvt_dma_map_iova(info->vgpu, pfn, &iova);
+	if (rc) {
+		gvt_err("gvt_dma_map_iova failed for gfn: 0x%lx\n", gfn);
+		vfio_unpin_pages(dev, &gfn, 1);
+		return INTEL_GVT_INVALID_ADDR;
+	}
 
-	gvt_cache_add(info->vgpu, gfn, pfn);
-	return pfn;
+	gvt_cache_add(info->vgpu, gfn, iova);
+	return iova;
 }
 
 static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
@@ -1459,7 +1456,6 @@ static unsigned long kvmgt_virt_to_pfn(void *addr)
 }
 
 struct intel_gvt_mpt kvmgt_mpt = {
-	.detect_host = kvmgt_detect_host,
 	.host_init = kvmgt_host_init,
 	.host_exit = kvmgt_host_exit,
 	.attach_vgpu = kvmgt_attach_vgpu,
diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h
index 1af5830c0a56..419353624c5a 100644
--- a/drivers/gpu/drm/i915/gvt/mpt.h
+++ b/drivers/gpu/drm/i915/gvt/mpt.h
@@ -44,18 +44,6 @@
  */
 
 /**
- * intel_gvt_hypervisor_detect_host - check if GVT-g is running within
- * hypervisor host/privilged domain
- *
- * Returns:
- * Zero on success, -ENODEV if current kernel is running inside a VM
- */
-static inline int intel_gvt_hypervisor_detect_host(void)
-{
-	return intel_gvt_host.mpt->detect_host();
-}
-
-/**
  * intel_gvt_hypervisor_host_init - init GVT-g host side
  *
  * Returns:
diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c
index 44136b1f3aab..2b3a642284b6 100644
--- a/drivers/gpu/drm/i915/gvt/render.c
+++ b/drivers/gpu/drm/i915/gvt/render.c
@@ -236,12 +236,18 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id)
 	}
 }
 
+#define CTX_CONTEXT_CONTROL_VAL	0x03
+
 void intel_gvt_load_render_mmio(struct intel_vgpu *vgpu, int ring_id)
 {
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	struct render_mmio *mmio;
 	u32 v;
 	int i, array_size;
+	u32 *reg_state = vgpu->shadow_ctx->engine[ring_id].lrc_reg_state;
+	u32 ctx_ctrl = reg_state[CTX_CONTEXT_CONTROL_VAL];
+	u32 inhibit_mask =
+		_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
 
 	if (IS_SKYLAKE(vgpu->gvt->dev_priv)) {
 		mmio = gen9_render_mmio_list;
@@ -257,6 +263,17 @@ void intel_gvt_load_render_mmio(struct intel_vgpu *vgpu, int ring_id)
 			continue;
 
 		mmio->value = I915_READ(mmio->reg);
+
+		/*
+		 * if it is an inhibit context, load in_context mmio
+		 * into HW by mmio write. If it is not, skip this mmio
+		 * write.
+		 */
+		if (mmio->in_context &&
+				((ctx_ctrl & inhibit_mask) != inhibit_mask) &&
+				i915.enable_execlists)
+			continue;
+
 		if (mmio->mask)
 			v = vgpu_vreg(vgpu, mmio->reg) | (mmio->mask << 16);
 		else
diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c
index 678b0be85376..06c9584ac5f0 100644
--- a/drivers/gpu/drm/i915/gvt/sched_policy.c
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
@@ -125,7 +125,6 @@ static void tbs_sched_func(struct work_struct *work)
 		vgpu_data = scheduler->current_vgpu->sched_data;
 		head = &vgpu_data->list;
 	} else {
-		gvt_dbg_sched("no current vgpu search from q head\n");
 		head = &sched_data->runq_head;
 	}
 
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 7ea68a75dc46..d6b6d0efdd1a 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -169,7 +169,8 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 	gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n",
 		ring_id, workload);
 
-	shadow_ctx->desc_template = workload->ctx_desc.addressing_mode <<
+	shadow_ctx->desc_template &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT);
+	shadow_ctx->desc_template |= workload->ctx_desc.addressing_mode <<
 				    GEN8_CTX_ADDRESSING_MODE_SHIFT;
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
@@ -456,7 +457,7 @@ static int workload_thread(void *priv)
 		}
 
 complete:
-		gvt_dbg_sched("will complete workload %p\n, status: %d\n",
+		gvt_dbg_sched("will complete workload %p, status: %d\n",
 				workload, workload->status);
 
 		if (workload->req)
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
index 3b30c28bff51..2833dfa8c9ae 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.h
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@@ -113,7 +113,7 @@ struct intel_shadow_bb_entry {
 	struct drm_i915_gem_object *obj;
 	void *va;
 	unsigned long len;
-	void *bb_start_cmd_va;
+	u32 *bb_start_cmd_va;
 };
 
 #define workload_q_head(vgpu, ring_id) \
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index 7295bc8e12fb..95a97aa0051e 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -74,7 +74,7 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu)
 int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
 {
 	unsigned int num_types;
-	unsigned int i, low_avail;
+	unsigned int i, low_avail, high_avail;
 	unsigned int min_low;
 
 	/* vGPU type name is defined as GVTg_Vx_y which contains
@@ -89,9 +89,9 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
 	 * to indicate how many vGPU instance can be created for this
 	 * type.
 	 *
-	 * Currently use static size here as we init type earlier..
 	 */
-	low_avail = MB_TO_BYTES(256) - HOST_LOW_GM_SIZE;
+	low_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE;
+	high_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE;
 	num_types = 4;
 
 	gvt->types = kzalloc(num_types * sizeof(struct intel_vgpu_type),
@@ -106,7 +106,8 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
 		gvt->types[i].low_gm_size = min_low;
 		gvt->types[i].high_gm_size = max((min_low<<3), MB_TO_BYTES(384U));
 		gvt->types[i].fence = 4;
-		gvt->types[i].max_instance = low_avail / min_low;
+		gvt->types[i].max_instance = min(low_avail / min_low,
+						 high_avail / gvt->types[i].high_gm_size);
 		gvt->types[i].avail_instance = gvt->types[i].max_instance;
 
 		if (IS_GEN8(gvt->dev_priv))
@@ -142,9 +143,9 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt)
 	/* Need to depend on maxium hw resource size but keep on
 	 * static config for now.
 	 */
-	low_gm_avail = MB_TO_BYTES(256) - HOST_LOW_GM_SIZE -
+	low_gm_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE -
 		gvt->gm.vgpu_allocated_low_gm_size;
-	high_gm_avail = MB_TO_BYTES(256) * 8UL - HOST_HIGH_GM_SIZE -
+	high_gm_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE -
 		gvt->gm.vgpu_allocated_high_gm_size;
 	fence_avail = gvt_fence_sz(gvt) - HOST_FENCE -
 		gvt->fence.vgpu_allocated_fence_num;
@@ -384,6 +385,7 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
 		intel_vgpu_reset_resource(vgpu);
 		intel_vgpu_reset_mmio(vgpu);
 		populate_pvinfo_page(vgpu);
+		intel_vgpu_reset_display(vgpu);
 
 		if (dmlr)
 			intel_vgpu_reset_cfg_space(vgpu);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 4ae69ebe166e..f6017f2cfb86 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -213,7 +213,8 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv)
 			} else if (id == INTEL_PCH_KBP_DEVICE_ID_TYPE) {
 				dev_priv->pch_type = PCH_KBP;
 				DRM_DEBUG_KMS("Found KabyPoint PCH\n");
-				WARN_ON(!IS_KABYLAKE(dev_priv));
+				WARN_ON(!IS_SKYLAKE(dev_priv) &&
+					!IS_KABYLAKE(dev_priv));
 			} else if ((id == INTEL_PCH_P2X_DEVICE_ID_TYPE) ||
 				   (id == INTEL_PCH_P3X_DEVICE_ID_TYPE) ||
 				   ((id == INTEL_PCH_QEMU_DEVICE_ID_TYPE) &&
@@ -824,10 +825,6 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 	if (ret < 0)
 		return ret;
 
-	ret = intel_gvt_init(dev_priv);
-	if (ret < 0)
-		goto err_workqueues;
-
 	/* This must be called before any calls to HAS_PCH_* */
 	intel_detect_pch(dev_priv);
 
@@ -841,7 +838,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 	intel_init_audio_hooks(dev_priv);
 	ret = i915_gem_load_init(dev_priv);
 	if (ret < 0)
-		goto err_gvt;
+		goto err_workqueues;
 
 	intel_display_crc_init(dev_priv);
 
@@ -853,8 +850,6 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 
 	return 0;
 
-err_gvt:
-	intel_gvt_cleanup(dev_priv);
 err_workqueues:
 	i915_workqueues_cleanup(dev_priv);
 	return ret;
@@ -1077,6 +1072,10 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
 			DRM_DEBUG_DRIVER("can't enable MSI");
 	}
 
+	ret = intel_gvt_init(dev_priv);
+	if (ret)
+		goto out_ggtt;
+
 	return 0;
 
 out_ggtt:
@@ -1290,6 +1289,8 @@ void i915_driver_unload(struct drm_device *dev)
 
 	intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
 
+	intel_gvt_cleanup(dev_priv);
+
 	i915_driver_unregister(dev_priv);
 
 	drm_vblank_cleanup(dev);
@@ -2377,7 +2378,7 @@ static int intel_runtime_suspend(struct device *kdev)
 
 	assert_forcewakes_inactive(dev_priv);
 
-	if (!IS_VALLEYVIEW(dev_priv) || !IS_CHERRYVIEW(dev_priv))
+	if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv))
 		intel_hpd_poll_init(dev_priv);
 
 	DRM_DEBUG_KMS("Device suspended\n");
@@ -2426,6 +2427,7 @@ static int intel_runtime_resume(struct device *kdev)
 	 * we can do is to hope that things will still work (and disable RPM).
 	 */
 	i915_gem_init_swizzling(dev_priv);
+	i915_gem_restore_fences(dev_priv);
 
 	intel_runtime_pm_enable_interrupts(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 244628065f94..e44c598ecb82 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2242,6 +2242,11 @@ struct drm_i915_private {
 
 	struct i915_frontbuffer_tracking fb_tracking;
 
+	struct intel_atomic_helper {
+		struct llist_head free_list;
+		struct work_struct free_work;
+	} atomic_helper;
+
 	u16 orig_clock;
 
 	bool mchbar_need_disable;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c8689892a89f..88f3628b4e29 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -440,7 +440,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
 			timeout = i915_gem_object_wait_fence(shared[i],
 							     flags, timeout,
 							     rps);
-			if (timeout <= 0)
+			if (timeout < 0)
 				break;
 
 			dma_fence_put(shared[i]);
@@ -453,7 +453,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
 		excl = reservation_object_get_excl_rcu(resv);
 	}
 
-	if (excl && timeout > 0)
+	if (excl && timeout >= 0)
 		timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps);
 
 	dma_fence_put(excl);
@@ -2009,8 +2009,16 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
 		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
 
-		if (WARN_ON(reg->pin_count))
-			continue;
+		/* Ideally we want to assert that the fence register is not
+		 * live at this point (i.e. that no piece of code will be
+		 * trying to write through fence + GTT, as that both violates
+		 * our tracking of activity and associated locking/barriers,
+		 * but also is illegal given that the hw is powered down).
+		 *
+		 * Previously we used reg->pin_count as a "liveness" indicator.
+		 * That is not sufficient, and we need a more fine-grained
+		 * tool if we want to have a sanity check here.
+		 */
 
 		if (!reg->vma)
 			continue;
@@ -2735,21 +2743,17 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
 		engine->irq_seqno_barrier(engine);
 
 	request = i915_gem_find_active_request(engine);
-	if (!request)
-		return;
+	if (request && i915_gem_reset_request(request)) {
+		DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
+				 engine->name, request->global_seqno);
 
-	if (!i915_gem_reset_request(request))
-		return;
-
-	DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
-			 engine->name, request->global_seqno);
+		/* If this context is now banned, skip all pending requests. */
+		if (i915_gem_context_is_banned(request->ctx))
+			engine_skip_context(request);
+	}
 
 	/* Setup the CS to resume from the breadcrumb of the hung request */
 	engine->reset_hw(engine, request);
-
-	/* If this context is now banned, skip all of its pending requests. */
-	if (i915_gem_context_is_banned(request->ctx))
-		engine_skip_context(request);
 }
 
 void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
@@ -3517,7 +3521,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
 
 	/* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */
-	if (obj->cache_dirty) {
+	if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
 		i915_gem_clflush_object(obj, true);
 		intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 57bec08e80c5..d02cfaefe1c8 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1180,14 +1180,14 @@ validate_exec_list(struct drm_device *dev,
 			if (exec[i].offset !=
 			    gen8_canonical_addr(exec[i].offset & PAGE_MASK))
 				return -EINVAL;
-
-			/* From drm_mm perspective address space is continuous,
-			 * so from this point we're always using non-canonical
-			 * form internally.
-			 */
-			exec[i].offset = gen8_noncanonical_addr(exec[i].offset);
 		}
 
+		/* From drm_mm perspective address space is continuous,
+		 * so from this point we're always using non-canonical
+		 * form internally.
+		 */
+		exec[i].offset = gen8_noncanonical_addr(exec[i].offset);
+
 		if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
 			return -EINVAL;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 30d8dbd04f0b..2801a4d56324 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -755,9 +755,10 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 	GEM_BUG_ON(pte_end > GEN8_PTES);
 
 	bitmap_clear(pt->used_ptes, pte, num_entries);
-
-	if (bitmap_empty(pt->used_ptes, GEN8_PTES))
-		return true;
+	if (USES_FULL_PPGTT(vm->i915)) {
+		if (bitmap_empty(pt->used_ptes, GEN8_PTES))
+			return true;
+	}
 
 	pt_vaddr = kmap_px(pt);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
index 17ce53d0d092..933019e1b206 100644
--- a/drivers/gpu/drm/i915/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/i915_gem_internal.c
@@ -46,16 +46,39 @@ static struct sg_table *
 i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	unsigned int npages = obj->base.size / PAGE_SIZE;
 	struct sg_table *st;
 	struct scatterlist *sg;
+	unsigned int npages;
 	int max_order;
 	gfp_t gfp;
 
+	max_order = MAX_ORDER;
+#ifdef CONFIG_SWIOTLB
+	if (swiotlb_nr_tbl()) {
+		unsigned int max_segment;
+
+		max_segment = swiotlb_max_segment();
+		if (max_segment) {
+			max_segment = max_t(unsigned int, max_segment,
+					    PAGE_SIZE) >> PAGE_SHIFT;
+			max_order = min(max_order, ilog2(max_segment));
+		}
+	}
+#endif
+
+	gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
+	if (IS_I965GM(i915) || IS_I965G(i915)) {
+		/* 965gm cannot relocate objects above 4GiB. */
+		gfp &= ~__GFP_HIGHMEM;
+		gfp |= __GFP_DMA32;
+	}
+
+create_st:
 	st = kmalloc(sizeof(*st), GFP_KERNEL);
 	if (!st)
 		return ERR_PTR(-ENOMEM);
 
+	npages = obj->base.size / PAGE_SIZE;
 	if (sg_alloc_table(st, npages, GFP_KERNEL)) {
 		kfree(st);
 		return ERR_PTR(-ENOMEM);
@@ -64,19 +87,6 @@ i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
 	sg = st->sgl;
 	st->nents = 0;
 
-	max_order = MAX_ORDER;
-#ifdef CONFIG_SWIOTLB
-	if (swiotlb_nr_tbl()) /* minimum max swiotlb size is IO_TLB_SEGSIZE */
-		max_order = min(max_order, ilog2(IO_TLB_SEGPAGES));
-#endif
-
-	gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
-	if (IS_I965GM(i915) || IS_I965G(i915)) {
-		/* 965gm cannot relocate objects above 4GiB. */
-		gfp &= ~__GFP_HIGHMEM;
-		gfp |= __GFP_DMA32;
-	}
-
 	do {
 		int order = min(fls(npages) - 1, max_order);
 		struct page *page;
@@ -104,8 +114,15 @@ i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
 		sg = __sg_next(sg);
 	} while (1);
 
-	if (i915_gem_gtt_prepare_pages(obj, st))
+	if (i915_gem_gtt_prepare_pages(obj, st)) {
+		/* Failed to dma-map try again with single page sg segments */
+		if (get_order(st->sgl->length)) {
+			internal_free_pages(st);
+			max_order = 0;
+			goto create_st;
+		}
 		goto err;
+	}
 
 	/* Mark the pages as dontneed whilst they are still pinned. As soon
 	 * as they are unpinned they are allowed to be reaped by the shrinker,
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 72b7f7d9461d..f31deeb72703 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1025,8 +1025,13 @@ __i915_request_wait_for_execute(struct drm_i915_gem_request *request,
 			break;
 		}
 
+		if (!timeout) {
+			timeout = -ETIME;
+			break;
+		}
+
 		timeout = io_schedule_timeout(timeout);
-	} while (timeout);
+	} while (1);
 	finish_wait(&request->execute.wait, &wait);
 
 	if (flags & I915_WAIT_LOCKED)
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index ec7c5d80fe4f..9673bcc3b6ad 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -405,6 +405,11 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
 
 	mutex_init(&dev_priv->mm.stolen_lock);
 
+	if (intel_vgpu_active(dev_priv)) {
+		DRM_INFO("iGVT-g active, disabling use of stolen memory\n");
+		return 0;
+	}
+
 #ifdef CONFIG_INTEL_IOMMU
 	if (intel_iommu_gfx_mapped && INTEL_GEN(dev_priv) < 8) {
 		DRM_INFO("DMAR active, disabling use of stolen memory\n");
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index b1361cfd4c5c..974ac08df473 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -173,7 +173,7 @@ i915_tiling_ok(struct drm_i915_gem_object *obj,
 	else
 		tile_width = 512;
 
-	if (!IS_ALIGNED(stride, tile_width))
+	if (!stride || !IS_ALIGNED(stride, tile_width))
 		return false;
 
 	/* 965+ just needs multiples of tile width */
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 6fefc34ef602..7dba148ca792 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3123,19 +3123,16 @@ static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv)
 	I915_WRITE(PCH_PORT_HOTPLUG, hotplug);
 }
 
-static void spt_hpd_irq_setup(struct drm_i915_private *dev_priv)
+static void spt_hpd_detection_setup(struct drm_i915_private *dev_priv)
 {
-	u32 hotplug_irqs, hotplug, enabled_irqs;
-
-	hotplug_irqs = SDE_HOTPLUG_MASK_SPT;
-	enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_spt);
-
-	ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs);
+	u32 hotplug;
 
 	/* Enable digital hotplug on the PCH */
 	hotplug = I915_READ(PCH_PORT_HOTPLUG);
-	hotplug |= PORTD_HOTPLUG_ENABLE | PORTC_HOTPLUG_ENABLE |
-		PORTB_HOTPLUG_ENABLE | PORTA_HOTPLUG_ENABLE;
+	hotplug |= PORTA_HOTPLUG_ENABLE |
+		   PORTB_HOTPLUG_ENABLE |
+		   PORTC_HOTPLUG_ENABLE |
+		   PORTD_HOTPLUG_ENABLE;
 	I915_WRITE(PCH_PORT_HOTPLUG, hotplug);
 
 	hotplug = I915_READ(PCH_PORT_HOTPLUG2);
@@ -3143,6 +3140,18 @@ static void spt_hpd_irq_setup(struct drm_i915_private *dev_priv)
 	I915_WRITE(PCH_PORT_HOTPLUG2, hotplug);
 }
 
+static void spt_hpd_irq_setup(struct drm_i915_private *dev_priv)
+{
+	u32 hotplug_irqs, enabled_irqs;
+
+	hotplug_irqs = SDE_HOTPLUG_MASK_SPT;
+	enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_spt);
+
+	ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs);
+
+	spt_hpd_detection_setup(dev_priv);
+}
+
 static void ilk_hpd_irq_setup(struct drm_i915_private *dev_priv)
 {
 	u32 hotplug_irqs, hotplug, enabled_irqs;
@@ -3177,18 +3186,15 @@ static void ilk_hpd_irq_setup(struct drm_i915_private *dev_priv)
 	ibx_hpd_irq_setup(dev_priv);
 }
 
-static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv)
+static void __bxt_hpd_detection_setup(struct drm_i915_private *dev_priv,
+				      u32 enabled_irqs)
 {
-	u32 hotplug_irqs, hotplug, enabled_irqs;
-
-	enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_bxt);
-	hotplug_irqs = BXT_DE_PORT_HOTPLUG_MASK;
-
-	bdw_update_port_irq(dev_priv, hotplug_irqs, enabled_irqs);
+	u32 hotplug;
 
 	hotplug = I915_READ(PCH_PORT_HOTPLUG);
-	hotplug |= PORTC_HOTPLUG_ENABLE | PORTB_HOTPLUG_ENABLE |
-		PORTA_HOTPLUG_ENABLE;
+	hotplug |= PORTA_HOTPLUG_ENABLE |
+		   PORTB_HOTPLUG_ENABLE |
+		   PORTC_HOTPLUG_ENABLE;
 
 	DRM_DEBUG_KMS("Invert bit setting: hp_ctl:%x hp_port:%x\n",
 		      hotplug, enabled_irqs);
@@ -3198,7 +3204,6 @@ static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv)
 	 * For BXT invert bit has to be set based on AOB design
 	 * for HPD detection logic, update it based on VBT fields.
 	 */
-
 	if ((enabled_irqs & BXT_DE_PORT_HP_DDIA) &&
 	    intel_bios_is_port_hpd_inverted(dev_priv, PORT_A))
 		hotplug |= BXT_DDIA_HPD_INVERT;
@@ -3212,6 +3217,23 @@ static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv)
 	I915_WRITE(PCH_PORT_HOTPLUG, hotplug);
 }
 
+static void bxt_hpd_detection_setup(struct drm_i915_private *dev_priv)
+{
+	__bxt_hpd_detection_setup(dev_priv, BXT_DE_PORT_HOTPLUG_MASK);
+}
+
+static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv)
+{
+	u32 hotplug_irqs, enabled_irqs;
+
+	enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_bxt);
+	hotplug_irqs = BXT_DE_PORT_HOTPLUG_MASK;
+
+	bdw_update_port_irq(dev_priv, hotplug_irqs, enabled_irqs);
+
+	__bxt_hpd_detection_setup(dev_priv, enabled_irqs);
+}
+
 static void ibx_irq_postinstall(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -3227,6 +3249,12 @@ static void ibx_irq_postinstall(struct drm_device *dev)
 
 	gen5_assert_iir_is_zero(dev_priv, SDEIIR);
 	I915_WRITE(SDEIMR, ~mask);
+
+	if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) ||
+	    HAS_PCH_LPT(dev_priv))
+		; /* TODO: Enable HPD detection on older PCH platforms too */
+	else
+		spt_hpd_detection_setup(dev_priv);
 }
 
 static void gen5_gt_irq_postinstall(struct drm_device *dev)
@@ -3438,6 +3466,9 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv)
 
 	GEN5_IRQ_INIT(GEN8_DE_PORT_, ~de_port_masked, de_port_enables);
 	GEN5_IRQ_INIT(GEN8_DE_MISC_, ~de_misc_masked, de_misc_masked);
+
+	if (IS_GEN9_LP(dev_priv))
+		bxt_hpd_detection_setup(dev_priv);
 }
 
 static int gen8_irq_postinstall(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 72f9f36ae5ce..675323189f2c 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3307,8 +3307,10 @@ enum skl_disp_power_wells {
 /*
  * Logical Context regs
  */
-#define CCID			_MMIO(0x2180)
-#define   CCID_EN		(1<<0)
+#define CCID				_MMIO(0x2180)
+#define   CCID_EN			BIT(0)
+#define   CCID_EXTENDED_STATE_RESTORE	BIT(2)
+#define   CCID_EXTENDED_STATE_SAVE	BIT(3)
 /*
  * Notes on SNB/IVB/VLV context size:
  * - Power context is saved elsewhere (LLC or stolen)
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 385e29af8baa..2bf5aca6e37c 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -499,6 +499,7 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector)
 	struct drm_i915_private *dev_priv = to_i915(crt->base.base.dev);
 	struct edid *edid;
 	struct i2c_adapter *i2c;
+	bool ret = false;
 
 	BUG_ON(crt->base.type != INTEL_OUTPUT_ANALOG);
 
@@ -515,17 +516,17 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector)
 		 */
 		if (!is_digital) {
 			DRM_DEBUG_KMS("CRT detected via DDC:0x50 [EDID]\n");
-			return true;
+			ret = true;
+		} else {
+			DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [EDID reports a digital panel]\n");
 		}
-
-		DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [EDID reports a digital panel]\n");
 	} else {
 		DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [no valid EDID found]\n");
 	}
 
 	kfree(edid);
 
-	return false;
+	return ret;
 }
 
 static enum drm_connector_status
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index b3e773c9f872..01341670738f 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2578,8 +2578,9 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv,
 			 * We only keep the x/y offsets, so push all of the
 			 * gtt offset into the x/y offsets.
 			 */
-			_intel_adjust_tile_offset(&x, &y, tile_size,
-						  tile_width, tile_height, pitch_tiles,
+			_intel_adjust_tile_offset(&x, &y,
+						  tile_width, tile_height,
+						  tile_size, pitch_tiles,
 						  gtt_offset_rotated * tile_size, 0);
 
 			gtt_offset_rotated += rot_info->plane[i].width * rot_info->plane[i].height;
@@ -4252,10 +4253,10 @@ static void page_flip_completed(struct intel_crtc *intel_crtc)
 	drm_crtc_vblank_put(&intel_crtc->base);
 
 	wake_up_all(&dev_priv->pending_flip_queue);
-	queue_work(dev_priv->wq, &work->unpin_work);
-
 	trace_i915_flip_complete(intel_crtc->plane,
 				 work->pending_flip_obj);
+
+	queue_work(dev_priv->wq, &work->unpin_work);
 }
 
 static int intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc)
@@ -6881,6 +6882,12 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
 	}
 
 	state = drm_atomic_state_alloc(crtc->dev);
+	if (!state) {
+		DRM_DEBUG_KMS("failed to disable [CRTC:%d:%s], out of memory",
+			      crtc->base.id, crtc->name);
+		return;
+	}
+
 	state->acquire_ctx = crtc->dev->mode_config.acquire_ctx;
 
 	/* Everything's already locked, -EDEADLK can't happen. */
@@ -14562,8 +14569,14 @@ intel_atomic_commit_ready(struct i915_sw_fence *fence,
 		break;
 
 	case FENCE_FREE:
-		drm_atomic_state_put(&state->base);
-		break;
+		{
+			struct intel_atomic_helper *helper =
+				&to_i915(state->base.dev)->atomic_helper;
+
+			if (llist_add(&state->freed, &helper->free_list))
+				schedule_work(&helper->free_work);
+			break;
+		}
 	}
 
 	return NOTIFY_DONE;
@@ -16586,6 +16599,18 @@ fail:
 	drm_modeset_acquire_fini(&ctx);
 }
 
+static void intel_atomic_helper_free_state(struct work_struct *work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(work, typeof(*dev_priv), atomic_helper.free_work);
+	struct intel_atomic_state *state, *next;
+	struct llist_node *freed;
+
+	freed = llist_del_all(&dev_priv->atomic_helper.free_list);
+	llist_for_each_entry_safe(state, next, freed, freed)
+		drm_atomic_state_put(&state->base);
+}
+
 int intel_modeset_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -16605,6 +16630,9 @@ int intel_modeset_init(struct drm_device *dev)
 
 	dev->mode_config.funcs = &intel_mode_funcs;
 
+	INIT_WORK(&dev_priv->atomic_helper.free_work,
+		  intel_atomic_helper_free_state);
+
 	intel_init_quirks(dev);
 
 	intel_init_pm(dev_priv);
@@ -17262,6 +17290,9 @@ void intel_modeset_cleanup(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
+	flush_work(&dev_priv->atomic_helper.free_work);
+	WARN_ON(!llist_empty(&dev_priv->atomic_helper.free_list));
+
 	intel_disable_gt_powersave(dev_priv);
 
 	/*
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 3d8ac8aa7214..d1670b8afbf5 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -2887,6 +2887,9 @@ static void vlv_detach_power_sequencer(struct intel_dp *intel_dp)
 
 	WARN_ON(intel_dp->active_pipe != INVALID_PIPE);
 
+	if (WARN_ON(pipe != PIPE_A && pipe != PIPE_B))
+		return;
+
 	edp_panel_vdd_off_sync(intel_dp);
 
 	/*
@@ -2914,9 +2917,6 @@ static void vlv_steal_power_sequencer(struct drm_device *dev,
 
 	lockdep_assert_held(&dev_priv->pps_mutex);
 
-	if (WARN_ON(pipe != PIPE_A && pipe != PIPE_B))
-		return;
-
 	for_each_intel_encoder(dev, encoder) {
 		struct intel_dp *intel_dp;
 		enum port port;
@@ -4406,8 +4406,8 @@ static bool bxt_digital_port_connected(struct drm_i915_private *dev_priv,
  *
  * Return %true if @port is connected, %false otherwise.
  */
-static bool intel_digital_port_connected(struct drm_i915_private *dev_priv,
-					 struct intel_digital_port *port)
+bool intel_digital_port_connected(struct drm_i915_private *dev_priv,
+				  struct intel_digital_port *port)
 {
 	if (HAS_PCH_IBX(dev_priv))
 		return ibx_digital_port_connected(dev_priv, port);
diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c
index c92a2558beb4..e59e43a9f3a6 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c
@@ -1855,7 +1855,8 @@ bxt_get_dpll(struct intel_crtc *crtc,
 		return NULL;
 
 	if ((encoder->type == INTEL_OUTPUT_DP ||
-	     encoder->type == INTEL_OUTPUT_EDP) &&
+	     encoder->type == INTEL_OUTPUT_EDP ||
+	     encoder->type == INTEL_OUTPUT_DP_MST) &&
 	    !bxt_ddi_dp_set_dpll_hw_state(clock, &dpll_hw_state))
 		return NULL;
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 0cec0013ace0..40fed65a791d 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -371,6 +371,8 @@ struct intel_atomic_state {
 	struct skl_wm_values wm_results;
 
 	struct i915_sw_fence commit_ready;
+
+	struct llist_node freed;
 };
 
 struct intel_plane_state {
@@ -1485,6 +1487,8 @@ bool __intel_dp_read_desc(struct intel_dp *intel_dp,
 bool intel_dp_read_desc(struct intel_dp *intel_dp);
 int intel_dp_link_required(int pixel_clock, int bpp);
 int intel_dp_max_data_rate(int max_link_clock, int max_lanes);
+bool intel_digital_port_connected(struct drm_i915_private *dev_priv,
+				  struct intel_digital_port *port);
 
 /* intel_dp_aux_backlight.c */
 int intel_dp_aux_init_backlight_funcs(struct intel_connector *intel_connector);
diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c
index 290384e86c63..d23c0fcff751 100644
--- a/drivers/gpu/drm/i915/intel_gvt.c
+++ b/drivers/gpu/drm/i915/intel_gvt.c
@@ -67,6 +67,11 @@ int intel_gvt_init(struct drm_i915_private *dev_priv)
 		return 0;
 	}
 
+	if (intel_vgpu_active(dev_priv)) {
+		DRM_DEBUG_DRIVER("GVT-g is disabled for guest\n");
+		goto bail;
+	}
+
 	if (!is_supported_device(dev_priv)) {
 		DRM_DEBUG_DRIVER("Unsupported device. GVT-g is disabled\n");
 		goto bail;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 432ee495dec2..ebf8023d21e6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -360,7 +360,8 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
 static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 {
 	struct intel_context *ce = &rq->ctx->engine[rq->engine->id];
-	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
+	struct i915_hw_ppgtt *ppgtt =
+		rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
 	u32 *reg_state = ce->lrc_reg_state;
 
 	reg_state[CTX_RING_TAIL+1] = rq->tail;
@@ -1389,7 +1390,20 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 	struct execlist_port *port = engine->execlist_port;
-	struct intel_context *ce = &request->ctx->engine[engine->id];
+	struct intel_context *ce;
+
+	/* If the request was innocent, we leave the request in the ELSP
+	 * and will try to replay it on restarting. The context image may
+	 * have been corrupted by the reset, in which case we may have
+	 * to service a new GPU hang, but more likely we can continue on
+	 * without impact.
+	 *
+	 * If the request was guilty, we presume the context is corrupt
+	 * and have to at least restore the RING register in the context
+	 * image back to the expected values to skip over the guilty request.
+	 */
+	if (!request || request->fence.error != -EIO)
+		return;
 
 	/* We want a simple context + ring to execute the breadcrumb update.
 	 * We cannot rely on the context being intact across the GPU hang,
@@ -1398,6 +1412,7 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	 * future request will be after userspace has had the opportunity
 	 * to recreate its own state.
 	 */
+	ce = &request->ctx->engine[engine->id];
 	execlists_init_reg_state(ce->lrc_reg_state,
 				 request->ctx, engine, ce->ring);
 
diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c
index f6d4e6940257..c300647ef604 100644
--- a/drivers/gpu/drm/i915/intel_lspcon.c
+++ b/drivers/gpu/drm/i915/intel_lspcon.c
@@ -158,6 +158,8 @@ static bool lspcon_probe(struct intel_lspcon *lspcon)
 static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon)
 {
 	struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon);
+	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+	struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
 	unsigned long start = jiffies;
 
 	if (!lspcon->desc_valid)
@@ -173,7 +175,8 @@ static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon)
 		if (!__intel_dp_read_desc(intel_dp, &desc))
 			return;
 
-		if (!memcmp(&intel_dp->desc, &desc, sizeof(desc))) {
+		if (intel_digital_port_connected(dev_priv, dig_port) &&
+		    !memcmp(&intel_dp->desc, &desc, sizeof(desc))) {
 			DRM_DEBUG_KMS("LSPCON recovering in PCON mode after %u ms\n",
 				      jiffies_to_msecs(jiffies - start));
 			return;
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
index f4429f67a4e3..4a862a358c70 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@@ -982,7 +982,18 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv)
 			opregion->vbt_size = vbt_size;
 		} else {
 			vbt = base + OPREGION_VBT_OFFSET;
-			vbt_size = OPREGION_ASLE_EXT_OFFSET - OPREGION_VBT_OFFSET;
+			/*
+			 * The VBT specification says that if the ASLE ext
+			 * mailbox is not used its area is reserved, but
+			 * on some CHT boards the VBT extends into the
+			 * ASLE ext area. Allow this even though it is
+			 * against the spec, so we do not end up rejecting
+			 * the VBT on those boards (and end up not finding the
+			 * LCD panel because of this).
+			 */
+			vbt_size = (mboxes & MBOX_ASLE_EXT) ?
+				OPREGION_ASLE_EXT_OFFSET : OPREGION_SIZE;
+			vbt_size -= OPREGION_VBT_OFFSET;
 			if (intel_bios_is_valid_vbt(vbt, vbt_size)) {
 				DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (Mailbox #4)\n");
 				opregion->vbt = vbt;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 69035e4f9b3b..91bc4abf5d3e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -599,10 +599,62 @@ out:
 static void reset_ring_common(struct intel_engine_cs *engine,
 			      struct drm_i915_gem_request *request)
 {
-	struct intel_ring *ring = request->ring;
+	/* Try to restore the logical GPU state to match the continuation
+	 * of the request queue. If we skip the context/PD restore, then
+	 * the next request may try to execute assuming that its context
+	 * is valid and loaded on the GPU and so may try to access invalid
+	 * memory, prompting repeated GPU hangs.
+	 *
+	 * If the request was guilty, we still restore the logical state
+	 * in case the next request requires it (e.g. the aliasing ppgtt),
+	 * but skip over the hung batch.
+	 *
+	 * If the request was innocent, we try to replay the request with
+	 * the restored context.
+	 */
+	if (request) {
+		struct drm_i915_private *dev_priv = request->i915;
+		struct intel_context *ce = &request->ctx->engine[engine->id];
+		struct i915_hw_ppgtt *ppgtt;
+
+		/* FIXME consider gen8 reset */
+
+		if (ce->state) {
+			I915_WRITE(CCID,
+				   i915_ggtt_offset(ce->state) |
+				   BIT(8) /* must be set! */ |
+				   CCID_EXTENDED_STATE_SAVE |
+				   CCID_EXTENDED_STATE_RESTORE |
+				   CCID_EN);
+		}
 
-	ring->head = request->postfix;
-	ring->last_retired_head = -1;
+		ppgtt = request->ctx->ppgtt ?: engine->i915->mm.aliasing_ppgtt;
+		if (ppgtt) {
+			u32 pd_offset = ppgtt->pd.base.ggtt_offset << 10;
+
+			I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
+			I915_WRITE(RING_PP_DIR_BASE(engine), pd_offset);
+
+			/* Wait for the PD reload to complete */
+			if (intel_wait_for_register(dev_priv,
+						    RING_PP_DIR_BASE(engine),
+						    BIT(0), 0,
+						    10))
+				DRM_ERROR("Wait for reload of ppgtt page-directory timed out\n");
+
+			ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
+		}
+
+		/* If the rq hung, jump to its breadcrumb and skip the batch */
+		if (request->fence.error == -EIO) {
+			struct intel_ring *ring = request->ring;
+
+			ring->head = request->postfix;
+			ring->last_retired_head = -1;
+		}
+	} else {
+		engine->legacy_active_context = NULL;
+	}
 }
 
 static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c
index 5ae48836652e..f562cb7964b0 100644
--- a/drivers/gpu/drm/imx/imx-drm-core.c
+++ b/drivers/gpu/drm/imx/imx-drm-core.c
@@ -357,8 +357,8 @@ static int imx_drm_bind(struct device *dev)
 	 * this value would be used to check framebuffer size limitation
 	 * at drm_mode_addfb().
 	 */
-	drm->mode_config.min_width = 64;
-	drm->mode_config.min_height = 64;
+	drm->mode_config.min_width = 1;
+	drm->mode_config.min_height = 1;
 	drm->mode_config.max_width = 4096;
 	drm->mode_config.max_height = 4096;
 	drm->mode_config.funcs = &imx_drm_mode_config_funcs;
diff --git a/drivers/gpu/drm/imx/imx-tve.c b/drivers/gpu/drm/imx/imx-tve.c
index 8f8aa4a63122..4826bb781723 100644
--- a/drivers/gpu/drm/imx/imx-tve.c
+++ b/drivers/gpu/drm/imx/imx-tve.c
@@ -98,6 +98,8 @@
 /* TVE_TST_MODE_REG */
 #define TVE_TVDAC_TEST_MODE_MASK	(0x7 << 0)
 
+#define IMX_TVE_DAC_VOLTAGE	2750000
+
 enum {
 	TVE_MODE_TVOUT,
 	TVE_MODE_VGA,
@@ -616,9 +618,8 @@ static int imx_tve_bind(struct device *dev, struct device *master, void *data)
 
 	tve->dac_reg = devm_regulator_get(dev, "dac");
 	if (!IS_ERR(tve->dac_reg)) {
-		ret = regulator_set_voltage(tve->dac_reg, 2750000, 2750000);
-		if (ret)
-			return ret;
+		if (regulator_get_voltage(tve->dac_reg) != IMX_TVE_DAC_VOLTAGE)
+			dev_warn(dev, "dac voltage is not %d uV\n", IMX_TVE_DAC_VOLTAGE);
 		ret = regulator_enable(tve->dac_reg);
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/nouveau/dispnv04/arb.c b/drivers/gpu/drm/nouveau/dispnv04/arb.c
index a555681c3096..90075b676256 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/arb.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/arb.c
@@ -198,7 +198,7 @@ nv04_update_arb(struct drm_device *dev, int VClk, int bpp,
 		int *burst, int *lwm)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	struct nv_fifo_info fifo_data;
 	struct nv_sim_state sim_data;
 	int MClk = nouveau_hw_get_clock(dev, PLL_MEMORY);
@@ -227,7 +227,7 @@ nv04_update_arb(struct drm_device *dev, int VClk, int bpp,
 		sim_data.mem_page_miss = ((cfg1 >> 4) & 0xf) + ((cfg1 >> 31) & 0x1);
 	}
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_TNT)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_TNT)
 		nv04_calc_arb(&fifo_data, &sim_data);
 	else
 		nv10_calc_arb(&fifo_data, &sim_data);
@@ -254,7 +254,7 @@ nouveau_calc_arb(struct drm_device *dev, int vclk, int bpp, int *burst, int *lwm
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_KELVIN)
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_KELVIN)
 		nv04_update_arb(dev, vclk, bpp, burst, lwm);
 	else if ((dev->pdev->device & 0xfff0) == 0x0240 /*CHIPSET_C51*/ ||
 		 (dev->pdev->device & 0xfff0) == 0x03d0 /*CHIPSET_C512*/) {
diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
index a72754d73c84..ab7b69c11d40 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
@@ -113,8 +113,8 @@ static void nv_crtc_calc_state_ext(struct drm_crtc *crtc, struct drm_display_mod
 {
 	struct drm_device *dev = crtc->dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_bios *bios = nvxx_bios(&drm->device);
-	struct nvkm_clk *clk = nvxx_clk(&drm->device);
+	struct nvkm_bios *bios = nvxx_bios(&drm->client.device);
+	struct nvkm_clk *clk = nvxx_clk(&drm->client.device);
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 	struct nv04_mode_state *state = &nv04_display(dev)->mode_reg;
 	struct nv04_crtc_reg *regp = &state->crtc_reg[nv_crtc->index];
@@ -138,7 +138,7 @@ static void nv_crtc_calc_state_ext(struct drm_crtc *crtc, struct drm_display_mod
 	 * has yet been observed in allowing the use a single stage pll on all
 	 * nv43 however.  the behaviour of single stage use is untested on nv40
 	 */
-	if (drm->device.info.chipset > 0x40 && dot_clock <= (pll_lim.vco1.max_freq / 2))
+	if (drm->client.device.info.chipset > 0x40 && dot_clock <= (pll_lim.vco1.max_freq / 2))
 		memset(&pll_lim.vco2, 0, sizeof(pll_lim.vco2));
 
 
@@ -148,10 +148,10 @@ static void nv_crtc_calc_state_ext(struct drm_crtc *crtc, struct drm_display_mod
 	state->pllsel &= PLLSEL_VPLL1_MASK | PLLSEL_VPLL2_MASK | PLLSEL_TV_MASK;
 
 	/* The blob uses this always, so let's do the same */
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		state->pllsel |= NV_PRAMDAC_PLL_COEFF_SELECT_USE_VPLL2_TRUE;
 	/* again nv40 and some nv43 act more like nv3x as described above */
-	if (drm->device.info.chipset < 0x41)
+	if (drm->client.device.info.chipset < 0x41)
 		state->pllsel |= NV_PRAMDAC_PLL_COEFF_SELECT_SOURCE_PROG_MPLL |
 				 NV_PRAMDAC_PLL_COEFF_SELECT_SOURCE_PROG_NVPLL;
 	state->pllsel |= nv_crtc->index ? PLLSEL_VPLL2_MASK : PLLSEL_VPLL1_MASK;
@@ -270,7 +270,7 @@ nv_crtc_mode_set_vga(struct drm_crtc *crtc, struct drm_display_mode *mode)
 		horizEnd = horizTotal - 2;
 		horizBlankEnd = horizTotal + 4;
 #if 0
-		if (dev->overlayAdaptor && drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
+		if (dev->overlayAdaptor && drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
 			/* This reportedly works around some video overlay bandwidth problems */
 			horizTotal += 2;
 #endif
@@ -505,7 +505,7 @@ nv_crtc_mode_set_regs(struct drm_crtc *crtc, struct drm_display_mode * mode)
 	regp->cursor_cfg = NV_PCRTC_CURSOR_CONFIG_CUR_LINES_64 |
 			     NV_PCRTC_CURSOR_CONFIG_CUR_PIXELS_64 |
 			     NV_PCRTC_CURSOR_CONFIG_ADDRESS_SPACE_PNVM;
-	if (drm->device.info.chipset >= 0x11)
+	if (drm->client.device.info.chipset >= 0x11)
 		regp->cursor_cfg |= NV_PCRTC_CURSOR_CONFIG_CUR_BPP_32;
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		regp->cursor_cfg |= NV_PCRTC_CURSOR_CONFIG_DOUBLE_SCAN_ENABLE;
@@ -546,26 +546,26 @@ nv_crtc_mode_set_regs(struct drm_crtc *crtc, struct drm_display_mode * mode)
 	 * 1 << 30 on 0x60.830), for no apparent reason */
 	regp->CRTC[NV_CIO_CRE_59] = off_chip_digital;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
 		regp->CRTC[0x9f] = off_chip_digital ? 0x11 : 0x1;
 
 	regp->crtc_830 = mode->crtc_vdisplay - 3;
 	regp->crtc_834 = mode->crtc_vdisplay - 1;
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		/* This is what the blob does */
 		regp->crtc_850 = NVReadCRTC(dev, 0, NV_PCRTC_850);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
 		regp->gpio_ext = NVReadCRTC(dev, 0, NV_PCRTC_GPIO_EXT);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
 		regp->crtc_cfg = NV10_PCRTC_CONFIG_START_ADDRESS_HSYNC;
 	else
 		regp->crtc_cfg = NV04_PCRTC_CONFIG_START_ADDRESS_HSYNC;
 
 	/* Some misc regs */
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE) {
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE) {
 		regp->CRTC[NV_CIO_CRE_85] = 0xFF;
 		regp->CRTC[NV_CIO_CRE_86] = 0x1;
 	}
@@ -577,7 +577,7 @@ nv_crtc_mode_set_regs(struct drm_crtc *crtc, struct drm_display_mode * mode)
 
 	/* Generic PRAMDAC regs */
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
 		/* Only bit that bios and blob set. */
 		regp->nv10_cursync = (1 << 25);
 
@@ -586,7 +586,7 @@ nv_crtc_mode_set_regs(struct drm_crtc *crtc, struct drm_display_mode * mode)
 				NV_PRAMDAC_GENERAL_CONTROL_PIXMIX_ON;
 	if (fb->format->depth == 16)
 		regp->ramdac_gen_ctrl |= NV_PRAMDAC_GENERAL_CONTROL_ALT_MODE_SEL;
-	if (drm->device.info.chipset >= 0x11)
+	if (drm->client.device.info.chipset >= 0x11)
 		regp->ramdac_gen_ctrl |= NV_PRAMDAC_GENERAL_CONTROL_PIPE_LONG;
 
 	regp->ramdac_630 = 0; /* turn off green mode (tv test pattern?) */
@@ -649,7 +649,7 @@ nv_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode,
 
 	nv_crtc_mode_set_vga(crtc, adjusted_mode);
 	/* calculated in nv04_dfp_prepare, nv40 needs it written before calculating PLLs */
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		NVWriteRAMDAC(dev, 0, NV_PRAMDAC_SEL_CLK, nv04_display(dev)->mode_reg.sel_clk);
 	nv_crtc_mode_set_regs(crtc, adjusted_mode);
 	nv_crtc_calc_state_ext(crtc, mode, adjusted_mode->clock);
@@ -710,7 +710,7 @@ static void nv_crtc_prepare(struct drm_crtc *crtc)
 
 	/* Some more preparation. */
 	NVWriteCRTC(dev, nv_crtc->index, NV_PCRTC_CONFIG, NV_PCRTC_CONFIG_START_ADDRESS_NON_VGA);
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE) {
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE) {
 		uint32_t reg900 = NVReadRAMDAC(dev, nv_crtc->index, NV_PRAMDAC_900);
 		NVWriteRAMDAC(dev, nv_crtc->index, NV_PRAMDAC_900, reg900 & ~0x10000);
 	}
@@ -886,7 +886,7 @@ nv04_crtc_do_mode_set_base(struct drm_crtc *crtc,
 	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_FF_INDEX);
 	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_FFLWM__INDEX);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_KELVIN) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_KELVIN) {
 		regp->CRTC[NV_CIO_CRE_47] = arb_lwm >> 8;
 		crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_47);
 	}
@@ -967,7 +967,7 @@ static void nv11_cursor_upload(struct drm_device *dev, struct nouveau_bo *src,
 		{
 			struct nouveau_drm *drm = nouveau_drm(dev);
 
-			if (drm->device.info.chipset == 0x11) {
+			if (drm->client.device.info.chipset == 0x11) {
 				pixel = ((pixel & 0x000000ff) << 24) |
 					((pixel & 0x0000ff00) << 8) |
 					((pixel & 0x00ff0000) >> 8) |
@@ -1008,7 +1008,7 @@ nv04_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
 	if (ret)
 		goto out;
 
-	if (drm->device.info.chipset >= 0x11)
+	if (drm->client.device.info.chipset >= 0x11)
 		nv11_cursor_upload(dev, cursor, nv_crtc->cursor.nvbo);
 	else
 		nv04_cursor_upload(dev, cursor, nv_crtc->cursor.nvbo);
@@ -1124,8 +1124,9 @@ nv04_crtc_create(struct drm_device *dev, int crtc_num)
 	drm_crtc_helper_add(&nv_crtc->base, &nv04_crtc_helper_funcs);
 	drm_mode_crtc_set_gamma_size(&nv_crtc->base, 256);
 
-	ret = nouveau_bo_new(dev, 64*64*4, 0x100, TTM_PL_FLAG_VRAM,
-			     0, 0x0000, NULL, NULL, &nv_crtc->cursor.nvbo);
+	ret = nouveau_bo_new(&nouveau_drm(dev)->client, 64*64*4, 0x100,
+			     TTM_PL_FLAG_VRAM, 0, 0x0000, NULL, NULL,
+			     &nv_crtc->cursor.nvbo);
 	if (!ret) {
 		ret = nouveau_bo_pin(nv_crtc->cursor.nvbo, TTM_PL_FLAG_VRAM, false);
 		if (!ret) {
diff --git a/drivers/gpu/drm/nouveau/dispnv04/cursor.c b/drivers/gpu/drm/nouveau/dispnv04/cursor.c
index c83116a308a4..f26e44ea7389 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/cursor.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/cursor.c
@@ -55,7 +55,7 @@ nv04_cursor_set_offset(struct nouveau_crtc *nv_crtc, uint32_t offset)
 	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_HCUR_ADDR0_INDEX);
 	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_HCUR_ADDR1_INDEX);
 	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_HCUR_ADDR2_INDEX);
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		nv_fix_nv40_hw_cursor(dev, nv_crtc->index);
 }
 
diff --git a/drivers/gpu/drm/nouveau/dispnv04/dac.c b/drivers/gpu/drm/nouveau/dispnv04/dac.c
index b6cc7766e6f7..4feab0a5419d 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/dac.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/dac.c
@@ -66,7 +66,7 @@ int nv04_dac_output_offset(struct drm_encoder *encoder)
 static int sample_load_twice(struct drm_device *dev, bool sense[2])
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	int i;
 
 	for (i = 0; i < 2; i++) {
@@ -80,19 +80,19 @@ static int sample_load_twice(struct drm_device *dev, bool sense[2])
 		 * use a 10ms timeout (guards against crtc being inactive, in
 		 * which case blank state would never change)
 		 */
-		if (nvif_msec(&drm->device, 10,
+		if (nvif_msec(&drm->client.device, 10,
 			if (!(nvif_rd32(device, NV_PRMCIO_INP0__COLOR) & 1))
 				break;
 		) < 0)
 			return -EBUSY;
 
-		if (nvif_msec(&drm->device, 10,
+		if (nvif_msec(&drm->client.device, 10,
 			if ( (nvif_rd32(device, NV_PRMCIO_INP0__COLOR) & 1))
 				break;
 		) < 0)
 			return -EBUSY;
 
-		if (nvif_msec(&drm->device, 10,
+		if (nvif_msec(&drm->client.device, 10,
 			if (!(nvif_rd32(device, NV_PRMCIO_INP0__COLOR) & 1))
 				break;
 		) < 0)
@@ -133,7 +133,7 @@ static enum drm_connector_status nv04_dac_detect(struct drm_encoder *encoder,
 						 struct drm_connector *connector)
 {
 	struct drm_device *dev = encoder->dev;
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	uint8_t saved_seq1, saved_pi, saved_rpc1, saved_cr_mode;
 	uint8_t saved_palette0[3], saved_palette_mask;
@@ -236,8 +236,8 @@ uint32_t nv17_dac_sample_load(struct drm_encoder *encoder)
 {
 	struct drm_device *dev = encoder->dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
-	struct nvkm_gpio *gpio = nvxx_gpio(&drm->device);
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
+	struct nvkm_gpio *gpio = nvxx_gpio(&drm->client.device);
 	struct dcb_output *dcb = nouveau_encoder(encoder)->dcb;
 	uint32_t sample, testval, regoffset = nv04_dac_output_offset(encoder);
 	uint32_t saved_powerctrl_2 = 0, saved_powerctrl_4 = 0, saved_routput,
@@ -288,7 +288,7 @@ uint32_t nv17_dac_sample_load(struct drm_encoder *encoder)
 	/* nv driver and nv31 use 0xfffffeee, nv34 and 6600 use 0xfffffece */
 	routput = (saved_routput & 0xfffffece) | head << 8;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CURIE) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CURIE) {
 		if (dcb->type == DCB_OUTPUT_TV)
 			routput |= 0x1a << 16;
 		else
@@ -403,7 +403,7 @@ static void nv04_dac_mode_set(struct drm_encoder *encoder,
 	}
 
 	/* This could use refinement for flatpanels, but it should work this way */
-	if (drm->device.info.chipset < 0x44)
+	if (drm->client.device.info.chipset < 0x44)
 		NVWriteRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + nv04_dac_output_offset(encoder), 0xf0000000);
 	else
 		NVWriteRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + nv04_dac_output_offset(encoder), 0x00100000);
diff --git a/drivers/gpu/drm/nouveau/dispnv04/dfp.c b/drivers/gpu/drm/nouveau/dispnv04/dfp.c
index 2e5bb2afda7c..9805d2cdc1a1 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/dfp.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/dfp.c
@@ -281,7 +281,7 @@ static void nv04_dfp_mode_set(struct drm_encoder *encoder,
 			      struct drm_display_mode *adjusted_mode)
 {
 	struct drm_device *dev = encoder->dev;
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
 	struct nv04_crtc_reg *regp = &nv04_display(dev)->mode_reg.crtc_reg[nv_crtc->index];
@@ -417,7 +417,7 @@ static void nv04_dfp_mode_set(struct drm_encoder *encoder,
 	if ((nv_connector->dithering_mode == DITHERING_MODE_ON) ||
 	    (nv_connector->dithering_mode == DITHERING_MODE_AUTO &&
 	     fb->format->depth > connector->display_info.bpc * 3)) {
-		if (drm->device.info.chipset == 0x11)
+		if (drm->client.device.info.chipset == 0x11)
 			regp->dither = savep->dither | 0x00010000;
 		else {
 			int i;
@@ -428,7 +428,7 @@ static void nv04_dfp_mode_set(struct drm_encoder *encoder,
 			}
 		}
 	} else {
-		if (drm->device.info.chipset != 0x11) {
+		if (drm->client.device.info.chipset != 0x11) {
 			/* reset them */
 			int i;
 			for (i = 0; i < 3; i++) {
@@ -464,7 +464,7 @@ static void nv04_dfp_commit(struct drm_encoder *encoder)
 		NVReadRAMDAC(dev, head, NV_PRAMDAC_FP_TG_CONTROL);
 
 	/* This could use refinement for flatpanels, but it should work this way */
-	if (drm->device.info.chipset < 0x44)
+	if (drm->client.device.info.chipset < 0x44)
 		NVWriteRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + nv04_dac_output_offset(encoder), 0xf0000000);
 	else
 		NVWriteRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + nv04_dac_output_offset(encoder), 0x00100000);
@@ -486,7 +486,7 @@ static void nv04_dfp_update_backlight(struct drm_encoder *encoder, int mode)
 {
 #ifdef __powerpc__
 	struct drm_device *dev = encoder->dev;
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 
 	/* BIOS scripts usually take care of the backlight, thanks
 	 * Apple for your consistency.
@@ -624,7 +624,7 @@ static void nv04_tmds_slave_init(struct drm_encoder *encoder)
 	struct drm_device *dev = encoder->dev;
 	struct dcb_output *dcb = nouveau_encoder(encoder)->dcb;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->device);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
 	struct nvkm_i2c_bus *bus = nvkm_i2c_bus_find(i2c, NVKM_I2C_BUS_PRI);
 	struct nvkm_i2c_bus_probe info[] = {
 		{
diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c
index 34c0f2f67548..5b9d549aa791 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c
@@ -35,7 +35,7 @@ int
 nv04_display_create(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->device);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
 	struct dcb_table *dcb = &drm->vbios.dcb;
 	struct drm_connector *connector, *ct;
 	struct drm_encoder *encoder;
@@ -48,7 +48,7 @@ nv04_display_create(struct drm_device *dev)
 	if (!disp)
 		return -ENOMEM;
 
-	nvif_object_map(&drm->device.object);
+	nvif_object_map(&drm->client.device.object);
 
 	nouveau_display(dev)->priv = disp;
 	nouveau_display(dev)->dtor = nv04_display_destroy;
@@ -139,7 +139,7 @@ nv04_display_destroy(struct drm_device *dev)
 	nouveau_display(dev)->priv = NULL;
 	kfree(disp);
 
-	nvif_object_unmap(&drm->device.object);
+	nvif_object_unmap(&drm->client.device.object);
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.h b/drivers/gpu/drm/nouveau/dispnv04/disp.h
index 7030307d2d48..bea4543554ba 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/disp.h
+++ b/drivers/gpu/drm/nouveau/dispnv04/disp.h
@@ -129,7 +129,7 @@ nv_two_heads(struct drm_device *dev)
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	const int impl = dev->pdev->device & 0x0ff0;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS && impl != 0x0100 &&
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS && impl != 0x0100 &&
 	    impl != 0x0150 && impl != 0x01a0 && impl != 0x0200)
 		return true;
 
@@ -148,7 +148,7 @@ nv_two_reg_pll(struct drm_device *dev)
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	const int impl = dev->pdev->device & 0x0ff0;
 
-	if (impl == 0x0310 || impl == 0x0340 || drm->device.info.family >= NV_DEVICE_INFO_V0_CURIE)
+	if (impl == 0x0310 || impl == 0x0340 || drm->client.device.info.family >= NV_DEVICE_INFO_V0_CURIE)
 		return true;
 	return false;
 }
@@ -170,7 +170,7 @@ nouveau_bios_run_init_table(struct drm_device *dev, u16 table,
 			    struct dcb_output *outp, int crtc)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_bios *bios = nvxx_bios(&drm->device);
+	struct nvkm_bios *bios = nvxx_bios(&drm->client.device);
 	struct nvbios_init init = {
 		.subdev = &bios->subdev,
 		.bios = bios,
diff --git a/drivers/gpu/drm/nouveau/dispnv04/hw.c b/drivers/gpu/drm/nouveau/dispnv04/hw.c
index 74856a8b8f35..b98599002831 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/hw.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/hw.c
@@ -89,7 +89,7 @@ NVSetOwner(struct drm_device *dev, int owner)
 	if (owner == 1)
 		owner *= 3;
 
-	if (drm->device.info.chipset == 0x11) {
+	if (drm->client.device.info.chipset == 0x11) {
 		/* This might seem stupid, but the blob does it and
 		 * omitting it often locks the system up.
 		 */
@@ -100,7 +100,7 @@ NVSetOwner(struct drm_device *dev, int owner)
 	/* CR44 is always changed on CRTC0 */
 	NVWriteVgaCrtc(dev, 0, NV_CIO_CRE_44, owner);
 
-	if (drm->device.info.chipset == 0x11) {	/* set me harder */
+	if (drm->client.device.info.chipset == 0x11) {	/* set me harder */
 		NVWriteVgaCrtc(dev, 0, NV_CIO_CRE_2E, owner);
 		NVWriteVgaCrtc(dev, 0, NV_CIO_CRE_2E, owner);
 	}
@@ -149,7 +149,7 @@ nouveau_hw_decode_pll(struct drm_device *dev, uint32_t reg1, uint32_t pll1,
 		pllvals->NM1 = pll1 & 0xffff;
 		if (nv_two_reg_pll(dev) && pll2 & NV31_RAMDAC_ENABLE_VCO2)
 			pllvals->NM2 = pll2 & 0xffff;
-		else if (drm->device.info.chipset == 0x30 || drm->device.info.chipset == 0x35) {
+		else if (drm->client.device.info.chipset == 0x30 || drm->client.device.info.chipset == 0x35) {
 			pllvals->M1 &= 0xf; /* only 4 bits */
 			if (pll1 & NV30_RAMDAC_ENABLE_VCO2) {
 				pllvals->M2 = (pll1 >> 4) & 0x7;
@@ -165,8 +165,8 @@ nouveau_hw_get_pllvals(struct drm_device *dev, enum nvbios_pll_type plltype,
 		       struct nvkm_pll_vals *pllvals)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &drm->device.object;
-	struct nvkm_bios *bios = nvxx_bios(&drm->device);
+	struct nvif_object *device = &drm->client.device.object;
+	struct nvkm_bios *bios = nvxx_bios(&drm->client.device);
 	uint32_t reg1, pll1, pll2 = 0;
 	struct nvbios_pll pll_lim;
 	int ret;
@@ -184,7 +184,7 @@ nouveau_hw_get_pllvals(struct drm_device *dev, enum nvbios_pll_type plltype,
 		pll2 = nvif_rd32(device, reg2);
 	}
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CELSIUS && reg1 >= NV_PRAMDAC_VPLL_COEFF) {
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CELSIUS && reg1 >= NV_PRAMDAC_VPLL_COEFF) {
 		uint32_t ramdac580 = NVReadRAMDAC(dev, 0, NV_PRAMDAC_580);
 
 		/* check whether vpll has been forced into single stage mode */
@@ -222,6 +222,7 @@ nouveau_hw_get_clock(struct drm_device *dev, enum nvbios_pll_type plltype)
 		uint32_t mpllP;
 
 		pci_read_config_dword(pci_get_bus_and_slot(0, 3), 0x6c, &mpllP);
+		mpllP = (mpllP >> 8) & 0xf;
 		if (!mpllP)
 			mpllP = 4;
 
@@ -232,7 +233,7 @@ nouveau_hw_get_clock(struct drm_device *dev, enum nvbios_pll_type plltype)
 		uint32_t clock;
 
 		pci_read_config_dword(pci_get_bus_and_slot(0, 5), 0x4c, &clock);
-		return clock;
+		return clock / 1000;
 	}
 
 	ret = nouveau_hw_get_pllvals(dev, plltype, &pllvals);
@@ -252,7 +253,7 @@ nouveau_hw_fix_bad_vpll(struct drm_device *dev, int head)
 	 */
 
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	struct nvkm_clk *clk = nvxx_clk(device);
 	struct nvkm_bios *bios = nvxx_bios(device);
 	struct nvbios_pll pll_lim;
@@ -391,21 +392,21 @@ nv_save_state_ramdac(struct drm_device *dev, int head,
 	struct nv04_crtc_reg *regp = &state->crtc_reg[head];
 	int i;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
 		regp->nv10_cursync = NVReadRAMDAC(dev, head, NV_RAMDAC_NV10_CURSYNC);
 
 	nouveau_hw_get_pllvals(dev, head ? PLL_VPLL1 : PLL_VPLL0, &regp->pllvals);
 	state->pllsel = NVReadRAMDAC(dev, 0, NV_PRAMDAC_PLL_COEFF_SELECT);
 	if (nv_two_heads(dev))
 		state->sel_clk = NVReadRAMDAC(dev, 0, NV_PRAMDAC_SEL_CLK);
-	if (drm->device.info.chipset == 0x11)
+	if (drm->client.device.info.chipset == 0x11)
 		regp->dither = NVReadRAMDAC(dev, head, NV_RAMDAC_DITHER_NV11);
 
 	regp->ramdac_gen_ctrl = NVReadRAMDAC(dev, head, NV_PRAMDAC_GENERAL_CONTROL);
 
 	if (nv_gf4_disp_arch(dev))
 		regp->ramdac_630 = NVReadRAMDAC(dev, head, NV_PRAMDAC_630);
-	if (drm->device.info.chipset >= 0x30)
+	if (drm->client.device.info.chipset >= 0x30)
 		regp->ramdac_634 = NVReadRAMDAC(dev, head, NV_PRAMDAC_634);
 
 	regp->tv_setup = NVReadRAMDAC(dev, head, NV_PRAMDAC_TV_SETUP);
@@ -447,7 +448,7 @@ nv_save_state_ramdac(struct drm_device *dev, int head,
 	if (nv_gf4_disp_arch(dev))
 		regp->ramdac_8c0 = NVReadRAMDAC(dev, head, NV_PRAMDAC_8C0);
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE) {
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE) {
 		regp->ramdac_a20 = NVReadRAMDAC(dev, head, NV_PRAMDAC_A20);
 		regp->ramdac_a24 = NVReadRAMDAC(dev, head, NV_PRAMDAC_A24);
 		regp->ramdac_a34 = NVReadRAMDAC(dev, head, NV_PRAMDAC_A34);
@@ -463,26 +464,26 @@ nv_load_state_ramdac(struct drm_device *dev, int head,
 		     struct nv04_mode_state *state)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_clk *clk = nvxx_clk(&drm->device);
+	struct nvkm_clk *clk = nvxx_clk(&drm->client.device);
 	struct nv04_crtc_reg *regp = &state->crtc_reg[head];
 	uint32_t pllreg = head ? NV_RAMDAC_VPLL2 : NV_PRAMDAC_VPLL_COEFF;
 	int i;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
 		NVWriteRAMDAC(dev, head, NV_RAMDAC_NV10_CURSYNC, regp->nv10_cursync);
 
 	clk->pll_prog(clk, pllreg, &regp->pllvals);
 	NVWriteRAMDAC(dev, 0, NV_PRAMDAC_PLL_COEFF_SELECT, state->pllsel);
 	if (nv_two_heads(dev))
 		NVWriteRAMDAC(dev, 0, NV_PRAMDAC_SEL_CLK, state->sel_clk);
-	if (drm->device.info.chipset == 0x11)
+	if (drm->client.device.info.chipset == 0x11)
 		NVWriteRAMDAC(dev, head, NV_RAMDAC_DITHER_NV11, regp->dither);
 
 	NVWriteRAMDAC(dev, head, NV_PRAMDAC_GENERAL_CONTROL, regp->ramdac_gen_ctrl);
 
 	if (nv_gf4_disp_arch(dev))
 		NVWriteRAMDAC(dev, head, NV_PRAMDAC_630, regp->ramdac_630);
-	if (drm->device.info.chipset >= 0x30)
+	if (drm->client.device.info.chipset >= 0x30)
 		NVWriteRAMDAC(dev, head, NV_PRAMDAC_634, regp->ramdac_634);
 
 	NVWriteRAMDAC(dev, head, NV_PRAMDAC_TV_SETUP, regp->tv_setup);
@@ -519,7 +520,7 @@ nv_load_state_ramdac(struct drm_device *dev, int head,
 	if (nv_gf4_disp_arch(dev))
 		NVWriteRAMDAC(dev, head, NV_PRAMDAC_8C0, regp->ramdac_8c0);
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE) {
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE) {
 		NVWriteRAMDAC(dev, head, NV_PRAMDAC_A20, regp->ramdac_a20);
 		NVWriteRAMDAC(dev, head, NV_PRAMDAC_A24, regp->ramdac_a24);
 		NVWriteRAMDAC(dev, head, NV_PRAMDAC_A34, regp->ramdac_a34);
@@ -600,10 +601,10 @@ nv_save_state_ext(struct drm_device *dev, int head,
 	rd_cio_state(dev, head, regp, NV_CIO_CRE_FFLWM__INDEX);
 	rd_cio_state(dev, head, regp, NV_CIO_CRE_21);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_KELVIN)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_KELVIN)
 		rd_cio_state(dev, head, regp, NV_CIO_CRE_47);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
 		rd_cio_state(dev, head, regp, 0x9f);
 
 	rd_cio_state(dev, head, regp, NV_CIO_CRE_49);
@@ -612,14 +613,14 @@ nv_save_state_ext(struct drm_device *dev, int head,
 	rd_cio_state(dev, head, regp, NV_CIO_CRE_HCUR_ADDR2_INDEX);
 	rd_cio_state(dev, head, regp, NV_CIO_CRE_ILACE__INDEX);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
 		regp->crtc_830 = NVReadCRTC(dev, head, NV_PCRTC_830);
 		regp->crtc_834 = NVReadCRTC(dev, head, NV_PCRTC_834);
 
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
 			regp->gpio_ext = NVReadCRTC(dev, head, NV_PCRTC_GPIO_EXT);
 
-		if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+		if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 			regp->crtc_850 = NVReadCRTC(dev, head, NV_PCRTC_850);
 
 		if (nv_two_heads(dev))
@@ -631,7 +632,7 @@ nv_save_state_ext(struct drm_device *dev, int head,
 
 	rd_cio_state(dev, head, regp, NV_CIO_CRE_SCRATCH3__INDEX);
 	rd_cio_state(dev, head, regp, NV_CIO_CRE_SCRATCH4__INDEX);
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
 		rd_cio_state(dev, head, regp, NV_CIO_CRE_EBR_INDEX);
 		rd_cio_state(dev, head, regp, NV_CIO_CRE_CSB);
 		rd_cio_state(dev, head, regp, NV_CIO_CRE_4B);
@@ -660,12 +661,12 @@ nv_load_state_ext(struct drm_device *dev, int head,
 		  struct nv04_mode_state *state)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	struct nv04_crtc_reg *regp = &state->crtc_reg[head];
 	uint32_t reg900;
 	int i;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
 		if (nv_two_heads(dev))
 			/* setting ENGINE_CTRL (EC) *must* come before
 			 * CIO_CRE_LCD, as writing CRE_LCD sets bits 16 & 17 in
@@ -677,20 +678,20 @@ nv_load_state_ext(struct drm_device *dev, int head,
 		nvif_wr32(device, NV_PVIDEO_INTR_EN, 0);
 		nvif_wr32(device, NV_PVIDEO_OFFSET_BUFF(0), 0);
 		nvif_wr32(device, NV_PVIDEO_OFFSET_BUFF(1), 0);
-		nvif_wr32(device, NV_PVIDEO_LIMIT(0), drm->device.info.ram_size - 1);
-		nvif_wr32(device, NV_PVIDEO_LIMIT(1), drm->device.info.ram_size - 1);
-		nvif_wr32(device, NV_PVIDEO_UVPLANE_LIMIT(0), drm->device.info.ram_size - 1);
-		nvif_wr32(device, NV_PVIDEO_UVPLANE_LIMIT(1), drm->device.info.ram_size - 1);
+		nvif_wr32(device, NV_PVIDEO_LIMIT(0), drm->client.device.info.ram_size - 1);
+		nvif_wr32(device, NV_PVIDEO_LIMIT(1), drm->client.device.info.ram_size - 1);
+		nvif_wr32(device, NV_PVIDEO_UVPLANE_LIMIT(0), drm->client.device.info.ram_size - 1);
+		nvif_wr32(device, NV_PVIDEO_UVPLANE_LIMIT(1), drm->client.device.info.ram_size - 1);
 		nvif_wr32(device, NV_PBUS_POWERCTRL_2, 0);
 
 		NVWriteCRTC(dev, head, NV_PCRTC_CURSOR_CONFIG, regp->cursor_cfg);
 		NVWriteCRTC(dev, head, NV_PCRTC_830, regp->crtc_830);
 		NVWriteCRTC(dev, head, NV_PCRTC_834, regp->crtc_834);
 
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
 			NVWriteCRTC(dev, head, NV_PCRTC_GPIO_EXT, regp->gpio_ext);
 
-		if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE) {
+		if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE) {
 			NVWriteCRTC(dev, head, NV_PCRTC_850, regp->crtc_850);
 
 			reg900 = NVReadRAMDAC(dev, head, NV_PRAMDAC_900);
@@ -713,23 +714,23 @@ nv_load_state_ext(struct drm_device *dev, int head,
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_FF_INDEX);
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_FFLWM__INDEX);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_KELVIN)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_KELVIN)
 		wr_cio_state(dev, head, regp, NV_CIO_CRE_47);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
 		wr_cio_state(dev, head, regp, 0x9f);
 
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_49);
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_HCUR_ADDR0_INDEX);
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_HCUR_ADDR1_INDEX);
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_HCUR_ADDR2_INDEX);
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		nv_fix_nv40_hw_cursor(dev, head);
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_ILACE__INDEX);
 
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_SCRATCH3__INDEX);
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_SCRATCH4__INDEX);
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
 		wr_cio_state(dev, head, regp, NV_CIO_CRE_EBR_INDEX);
 		wr_cio_state(dev, head, regp, NV_CIO_CRE_CSB);
 		wr_cio_state(dev, head, regp, NV_CIO_CRE_4B);
@@ -737,14 +738,14 @@ nv_load_state_ext(struct drm_device *dev, int head,
 	}
 	/* NV11 and NV20 stop at 0x52. */
 	if (nv_gf4_disp_arch(dev)) {
-		if (drm->device.info.family < NV_DEVICE_INFO_V0_KELVIN) {
+		if (drm->client.device.info.family < NV_DEVICE_INFO_V0_KELVIN) {
 			/* Not waiting for vertical retrace before modifying
 			   CRE_53/CRE_54 causes lockups. */
-			nvif_msec(&drm->device, 650,
+			nvif_msec(&drm->client.device, 650,
 				if ( (nvif_rd32(device, NV_PRMCIO_INP0__COLOR) & 8))
 					break;
 			);
-			nvif_msec(&drm->device, 650,
+			nvif_msec(&drm->client.device, 650,
 				if (!(nvif_rd32(device, NV_PRMCIO_INP0__COLOR) & 8))
 					break;
 			);
@@ -770,7 +771,7 @@ static void
 nv_save_state_palette(struct drm_device *dev, int head,
 		      struct nv04_mode_state *state)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	int head_offset = head * NV_PRMDIO_SIZE, i;
 
 	nvif_wr08(device, NV_PRMDIO_PIXEL_MASK + head_offset,
@@ -789,7 +790,7 @@ void
 nouveau_hw_load_state_palette(struct drm_device *dev, int head,
 			      struct nv04_mode_state *state)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	int head_offset = head * NV_PRMDIO_SIZE, i;
 
 	nvif_wr08(device, NV_PRMDIO_PIXEL_MASK + head_offset,
@@ -809,7 +810,7 @@ void nouveau_hw_save_state(struct drm_device *dev, int head,
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
-	if (drm->device.info.chipset == 0x11)
+	if (drm->client.device.info.chipset == 0x11)
 		/* NB: no attempt is made to restore the bad pll later on */
 		nouveau_hw_fix_bad_vpll(dev, head);
 	nv_save_state_ramdac(dev, head, state);
diff --git a/drivers/gpu/drm/nouveau/dispnv04/hw.h b/drivers/gpu/drm/nouveau/dispnv04/hw.h
index 3bded60c5596..3a2be47fb4f1 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/hw.h
+++ b/drivers/gpu/drm/nouveau/dispnv04/hw.h
@@ -60,7 +60,7 @@ extern void nouveau_calc_arb(struct drm_device *, int vclk, int bpp,
 static inline uint32_t NVReadCRTC(struct drm_device *dev,
 					int head, uint32_t reg)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	uint32_t val;
 	if (head)
 		reg += NV_PCRTC0_SIZE;
@@ -71,7 +71,7 @@ static inline uint32_t NVReadCRTC(struct drm_device *dev,
 static inline void NVWriteCRTC(struct drm_device *dev,
 					int head, uint32_t reg, uint32_t val)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	if (head)
 		reg += NV_PCRTC0_SIZE;
 	nvif_wr32(device, reg, val);
@@ -80,7 +80,7 @@ static inline void NVWriteCRTC(struct drm_device *dev,
 static inline uint32_t NVReadRAMDAC(struct drm_device *dev,
 					int head, uint32_t reg)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	uint32_t val;
 	if (head)
 		reg += NV_PRAMDAC0_SIZE;
@@ -91,7 +91,7 @@ static inline uint32_t NVReadRAMDAC(struct drm_device *dev,
 static inline void NVWriteRAMDAC(struct drm_device *dev,
 					int head, uint32_t reg, uint32_t val)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	if (head)
 		reg += NV_PRAMDAC0_SIZE;
 	nvif_wr32(device, reg, val);
@@ -120,7 +120,7 @@ static inline void nv_write_tmds(struct drm_device *dev,
 static inline void NVWriteVgaCrtc(struct drm_device *dev,
 					int head, uint8_t index, uint8_t value)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	nvif_wr08(device, NV_PRMCIO_CRX__COLOR + head * NV_PRMCIO_SIZE, index);
 	nvif_wr08(device, NV_PRMCIO_CR__COLOR + head * NV_PRMCIO_SIZE, value);
 }
@@ -128,7 +128,7 @@ static inline void NVWriteVgaCrtc(struct drm_device *dev,
 static inline uint8_t NVReadVgaCrtc(struct drm_device *dev,
 					int head, uint8_t index)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	uint8_t val;
 	nvif_wr08(device, NV_PRMCIO_CRX__COLOR + head * NV_PRMCIO_SIZE, index);
 	val = nvif_rd08(device, NV_PRMCIO_CR__COLOR + head * NV_PRMCIO_SIZE);
@@ -165,13 +165,13 @@ static inline uint8_t NVReadVgaCrtc5758(struct drm_device *dev, int head, uint8_
 static inline uint8_t NVReadPRMVIO(struct drm_device *dev,
 					int head, uint32_t reg)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	uint8_t val;
 
 	/* Only NV4x have two pvio ranges; other twoHeads cards MUST call
 	 * NVSetOwner for the relevant head to be programmed */
-	if (head && drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (head && drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		reg += NV_PRMVIO_SIZE;
 
 	val = nvif_rd08(device, reg);
@@ -181,12 +181,12 @@ static inline uint8_t NVReadPRMVIO(struct drm_device *dev,
 static inline void NVWritePRMVIO(struct drm_device *dev,
 					int head, uint32_t reg, uint8_t value)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
 	/* Only NV4x have two pvio ranges; other twoHeads cards MUST call
 	 * NVSetOwner for the relevant head to be programmed */
-	if (head && drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (head && drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		reg += NV_PRMVIO_SIZE;
 
 	nvif_wr08(device, reg, value);
@@ -194,14 +194,14 @@ static inline void NVWritePRMVIO(struct drm_device *dev,
 
 static inline void NVSetEnablePalette(struct drm_device *dev, int head, bool enable)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	nvif_rd08(device, NV_PRMCIO_INP0__COLOR + head * NV_PRMCIO_SIZE);
 	nvif_wr08(device, NV_PRMCIO_ARX + head * NV_PRMCIO_SIZE, enable ? 0 : 0x20);
 }
 
 static inline bool NVGetEnablePalette(struct drm_device *dev, int head)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	nvif_rd08(device, NV_PRMCIO_INP0__COLOR + head * NV_PRMCIO_SIZE);
 	return !(nvif_rd08(device, NV_PRMCIO_ARX + head * NV_PRMCIO_SIZE) & 0x20);
 }
@@ -209,7 +209,7 @@ static inline bool NVGetEnablePalette(struct drm_device *dev, int head)
 static inline void NVWriteVgaAttr(struct drm_device *dev,
 					int head, uint8_t index, uint8_t value)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	if (NVGetEnablePalette(dev, head))
 		index &= ~0x20;
 	else
@@ -223,7 +223,7 @@ static inline void NVWriteVgaAttr(struct drm_device *dev,
 static inline uint8_t NVReadVgaAttr(struct drm_device *dev,
 					int head, uint8_t index)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	uint8_t val;
 	if (NVGetEnablePalette(dev, head))
 		index &= ~0x20;
@@ -259,10 +259,10 @@ static inline void NVVgaProtect(struct drm_device *dev, int head, bool protect)
 static inline bool
 nv_heads_tied(struct drm_device *dev)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
-	if (drm->device.info.chipset == 0x11)
+	if (drm->client.device.info.chipset == 0x11)
 		return !!(nvif_rd32(device, NV_PBUS_DEBUG_1) & (1 << 28));
 
 	return NVReadVgaCrtc(dev, 0, NV_CIO_CRE_44) & 0x4;
@@ -318,7 +318,7 @@ NVLockVgaCrtcs(struct drm_device *dev, bool lock)
 	NVWriteVgaCrtc(dev, 0, NV_CIO_SR_LOCK_INDEX,
 		       lock ? NV_CIO_SR_LOCK_VALUE : NV_CIO_SR_UNLOCK_RW_VALUE);
 	/* NV11 has independently lockable extended crtcs, except when tied */
-	if (drm->device.info.chipset == 0x11 && !nv_heads_tied(dev))
+	if (drm->client.device.info.chipset == 0x11 && !nv_heads_tied(dev))
 		NVWriteVgaCrtc(dev, 1, NV_CIO_SR_LOCK_INDEX,
 			       lock ? NV_CIO_SR_LOCK_VALUE :
 				      NV_CIO_SR_UNLOCK_RW_VALUE);
@@ -335,7 +335,7 @@ static inline int nv_cursor_width(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
-	return drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS ? NV10_CURSOR_SIZE : NV04_CURSOR_SIZE;
+	return drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS ? NV10_CURSOR_SIZE : NV04_CURSOR_SIZE;
 }
 
 static inline void
@@ -357,7 +357,7 @@ nv_set_crtc_base(struct drm_device *dev, int head, uint32_t offset)
 
 	NVWriteCRTC(dev, head, NV_PCRTC_START, offset);
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_TNT) {
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_TNT) {
 		/*
 		 * Hilarious, the 24th bit doesn't want to stick to
 		 * PCRTC_START...
@@ -382,7 +382,7 @@ nv_show_cursor(struct drm_device *dev, int head, bool show)
 		*curctl1 &= ~MASK(NV_CIO_CRE_HCUR_ADDR1_ENABLE);
 	NVWriteVgaCrtc(dev, head, NV_CIO_CRE_HCUR_ADDR1_INDEX, *curctl1);
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		nv_fix_nv40_hw_cursor(dev, head);
 }
 
@@ -398,7 +398,7 @@ nv_pitch_align(struct drm_device *dev, uint32_t width, int bpp)
 		bpp = 8;
 
 	/* Alignment requirements taken from the Haiku driver */
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_TNT)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_TNT)
 		mask = 128 / bpp - 1;
 	else
 		mask = 512 / bpp - 1;
diff --git a/drivers/gpu/drm/nouveau/dispnv04/overlay.c b/drivers/gpu/drm/nouveau/dispnv04/overlay.c
index 6275c270df25..5319f2a7f24d 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/overlay.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/overlay.c
@@ -97,7 +97,7 @@ nv10_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 		  uint32_t src_w, uint32_t src_h)
 {
 	struct nouveau_drm *drm = nouveau_drm(plane->dev);
-	struct nvif_object *dev = &drm->device.object;
+	struct nvif_object *dev = &drm->client.device.object;
 	struct nouveau_plane *nv_plane =
 		container_of(plane, struct nouveau_plane, base);
 	struct nouveau_framebuffer *nv_fb = nouveau_framebuffer(fb);
@@ -119,7 +119,7 @@ nv10_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 	if (format > 0xffff)
 		return -ERANGE;
 
-	if (drm->device.info.chipset >= 0x30) {
+	if (drm->client.device.info.chipset >= 0x30) {
 		if (crtc_w < (src_w >> 1) || crtc_h < (src_h >> 1))
 			return -ERANGE;
 	} else {
@@ -174,7 +174,7 @@ nv10_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 static int
 nv10_disable_plane(struct drm_plane *plane)
 {
-	struct nvif_object *dev = &nouveau_drm(plane->dev)->device.object;
+	struct nvif_object *dev = &nouveau_drm(plane->dev)->client.device.object;
 	struct nouveau_plane *nv_plane =
 		container_of(plane, struct nouveau_plane, base);
 
@@ -198,7 +198,7 @@ nv_destroy_plane(struct drm_plane *plane)
 static void
 nv10_set_params(struct nouveau_plane *plane)
 {
-	struct nvif_object *dev = &nouveau_drm(plane->base.dev)->device.object;
+	struct nvif_object *dev = &nouveau_drm(plane->base.dev)->client.device.object;
 	u32 luma = (plane->brightness - 512) << 16 | plane->contrast;
 	u32 chroma = ((sin_mul(plane->hue, plane->saturation) & 0xffff) << 16) |
 		(cos_mul(plane->hue, plane->saturation) & 0xffff);
@@ -268,7 +268,7 @@ nv10_overlay_init(struct drm_device *device)
 	if (!plane)
 		return;
 
-	switch (drm->device.info.chipset) {
+	switch (drm->client.device.info.chipset) {
 	case 0x10:
 	case 0x11:
 	case 0x15:
@@ -347,7 +347,7 @@ nv04_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 		  uint32_t src_x, uint32_t src_y,
 		  uint32_t src_w, uint32_t src_h)
 {
-	struct nvif_object *dev = &nouveau_drm(plane->dev)->device.object;
+	struct nvif_object *dev = &nouveau_drm(plane->dev)->client.device.object;
 	struct nouveau_plane *nv_plane =
 		container_of(plane, struct nouveau_plane, base);
 	struct nouveau_framebuffer *nv_fb = nouveau_framebuffer(fb);
@@ -427,7 +427,7 @@ nv04_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 static int
 nv04_disable_plane(struct drm_plane *plane)
 {
-	struct nvif_object *dev = &nouveau_drm(plane->dev)->device.object;
+	struct nvif_object *dev = &nouveau_drm(plane->dev)->client.device.object;
 	struct nouveau_plane *nv_plane =
 		container_of(plane, struct nouveau_plane, base);
 
@@ -495,7 +495,7 @@ err:
 void
 nouveau_overlay_init(struct drm_device *device)
 {
-	struct nvif_device *dev = &nouveau_drm(device)->device;
+	struct nvif_device *dev = &nouveau_drm(device)->client.device;
 	if (dev->info.chipset < 0x10)
 		nv04_overlay_init(device);
 	else if (dev->info.chipset <= 0x40)
diff --git a/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c b/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c
index 477a8d072af4..01664357d3e1 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c
@@ -54,7 +54,7 @@ static struct nvkm_i2c_bus_probe nv04_tv_encoder_info[] = {
 int nv04_tv_identify(struct drm_device *dev, int i2c_index)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->device);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
 	struct nvkm_i2c_bus *bus = nvkm_i2c_bus_find(i2c, i2c_index);
 	if (bus) {
 		return nvkm_i2c_bus_probe(bus, "TV encoder",
@@ -206,7 +206,7 @@ nv04_tv_create(struct drm_connector *connector, struct dcb_output *entry)
 	struct drm_encoder *encoder;
 	struct drm_device *dev = connector->dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->device);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
 	struct nvkm_i2c_bus *bus = nvkm_i2c_bus_find(i2c, entry->i2c_index);
 	int type, ret;
 
diff --git a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
index 434d1e29f279..6d99f11fee4e 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
@@ -46,7 +46,7 @@ static uint32_t nv42_tv_sample_load(struct drm_encoder *encoder)
 {
 	struct drm_device *dev = encoder->dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_gpio *gpio = nvxx_gpio(&drm->device);
+	struct nvkm_gpio *gpio = nvxx_gpio(&drm->client.device);
 	uint32_t testval, regoffset = nv04_dac_output_offset(encoder);
 	uint32_t gpio0, gpio1, fp_htotal, fp_hsync_start, fp_hsync_end,
 		fp_control, test_ctrl, dacclk, ctv_14, ctv_1c, ctv_6c;
@@ -130,7 +130,7 @@ static bool
 get_tv_detect_quirks(struct drm_device *dev, uint32_t *pin_mask)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 
 	if (device->quirk && device->quirk->tv_pin_mask) {
 		*pin_mask = device->quirk->tv_pin_mask;
@@ -154,8 +154,8 @@ nv17_tv_detect(struct drm_encoder *encoder, struct drm_connector *connector)
 		return connector_status_disconnected;
 
 	if (reliable) {
-		if (drm->device.info.chipset == 0x42 ||
-		    drm->device.info.chipset == 0x43)
+		if (drm->client.device.info.chipset == 0x42 ||
+		    drm->client.device.info.chipset == 0x43)
 			tv_enc->pin_mask =
 				nv42_tv_sample_load(encoder) >> 28 & 0xe;
 		else
@@ -362,7 +362,7 @@ static void  nv17_tv_dpms(struct drm_encoder *encoder, int mode)
 {
 	struct drm_device *dev = encoder->dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_gpio *gpio = nvxx_gpio(&drm->device);
+	struct nvkm_gpio *gpio = nvxx_gpio(&drm->client.device);
 	struct nv17_tv_state *regs = &to_tv_enc(encoder)->state;
 	struct nv17_tv_norm_params *tv_norm = get_tv_norm(encoder);
 
@@ -435,7 +435,7 @@ static void nv17_tv_prepare(struct drm_encoder *encoder)
 	/* Set the DACCLK register */
 	dacclk = (NVReadRAMDAC(dev, 0, dacclk_off) & ~0x30) | 0x1;
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		dacclk |= 0x1a << 16;
 
 	if (tv_norm->kind == CTV_ENC_MODE) {
@@ -492,7 +492,7 @@ static void nv17_tv_mode_set(struct drm_encoder *encoder,
 			tv_regs->ptv_614 = 0x13;
 		}
 
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_RANKINE) {
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_RANKINE) {
 			tv_regs->ptv_500 = 0xe8e0;
 			tv_regs->ptv_504 = 0x1710;
 			tv_regs->ptv_604 = 0x0;
@@ -587,7 +587,7 @@ static void nv17_tv_commit(struct drm_encoder *encoder)
 	nv17_tv_state_load(dev, &to_tv_enc(encoder)->state);
 
 	/* This could use refinement for flatpanels, but it should work */
-	if (drm->device.info.chipset < 0x44)
+	if (drm->client.device.info.chipset < 0x44)
 		NVWriteRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL +
 					nv04_dac_output_offset(encoder),
 					0xf0000000);
diff --git a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.h b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.h
index 1b07521cde0d..29773b325bd9 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.h
+++ b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.h
@@ -130,13 +130,13 @@ void nv17_ctv_update_rescaler(struct drm_encoder *encoder);
 static inline void nv_write_ptv(struct drm_device *dev, uint32_t reg,
 				uint32_t val)
 {
-	struct nvif_device *device = &nouveau_drm(dev)->device;
+	struct nvif_device *device = &nouveau_drm(dev)->client.device;
 	nvif_wr32(&device->object, reg, val);
 }
 
 static inline uint32_t nv_read_ptv(struct drm_device *dev, uint32_t reg)
 {
-	struct nvif_device *device = &nouveau_drm(dev)->device;
+	struct nvif_device *device = &nouveau_drm(dev)->client.device;
 	return nvif_rd32(&device->object, reg);
 }
 
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl826e.h b/drivers/gpu/drm/nouveau/include/nvif/cl826e.h
index 05e6ef7cd190..91e33db21a2f 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cl826e.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cl826e.h
@@ -10,5 +10,5 @@ struct g82_channel_dma_v0 {
 	__u64 offset;
 };
 
-#define G82_CHANNEL_DMA_V0_NTFY_UEVENT                                     0x00
+#define NV826E_V0_NTFY_NON_STALL_INTERRUPT                                 0x00
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl826f.h b/drivers/gpu/drm/nouveau/include/nvif/cl826f.h
index cecafcb1e954..e34efd4ec537 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cl826f.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cl826f.h
@@ -11,5 +11,5 @@ struct g82_channel_gpfifo_v0 {
 	__u64 vm;
 };
 
-#define G82_CHANNEL_GPFIFO_V0_NTFY_UEVENT                                  0x00
+#define NV826F_V0_NTFY_NON_STALL_INTERRUPT                                 0x00
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl906f.h b/drivers/gpu/drm/nouveau/include/nvif/cl906f.h
index 2caf0838fcfd..a2d5410a491b 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cl906f.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cl906f.h
@@ -10,5 +10,6 @@ struct fermi_channel_gpfifo_v0 {
 	__u64 vm;
 };
 
-#define FERMI_CHANNEL_GPFIFO_V0_NTFY_UEVENT                                0x00
+#define NV906F_V0_NTFY_NON_STALL_INTERRUPT                                 0x00
+#define NV906F_V0_NTFY_KILLED                                              0x01
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cla06f.h b/drivers/gpu/drm/nouveau/include/nvif/cla06f.h
index 46301ec018ce..2efa3d048bb9 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cla06f.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cla06f.h
@@ -25,5 +25,6 @@ struct kepler_channel_gpfifo_a_v0 {
 	__u64 vm;
 };
 
-#define NVA06F_V0_NTFY_UEVENT                                              0x00
+#define NVA06F_V0_NTFY_NON_STALL_INTERRUPT                                 0x00
+#define NVA06F_V0_NTFY_KILLED                                              0x01
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h
index 82235f30277c..3a2c0137d4b4 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/class.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/class.h
@@ -2,23 +2,31 @@
 #define __NVIF_CLASS_H__
 
 /* these class numbers are made up by us, and not nvidia-assigned */
-#define NVIF_CLASS_CONTROL                                    /* if0001.h */ -1
-#define NVIF_CLASS_PERFMON                                    /* if0002.h */ -2
-#define NVIF_CLASS_PERFDOM                                    /* if0003.h */ -3
-#define NVIF_CLASS_SW_NV04                                    /* if0004.h */ -4
-#define NVIF_CLASS_SW_NV10                                    /* if0005.h */ -5
-#define NVIF_CLASS_SW_NV50                                    /* if0005.h */ -6
-#define NVIF_CLASS_SW_GF100                                   /* if0005.h */ -7
+#define NVIF_CLASS_CLIENT                            /* if0000.h */ -0x00000000
+
+#define NVIF_CLASS_CONTROL                           /* if0001.h */ -0x00000001
+
+#define NVIF_CLASS_PERFMON                           /* if0002.h */ -0x00000002
+#define NVIF_CLASS_PERFDOM                           /* if0003.h */ -0x00000003
+
+#define NVIF_CLASS_SW_NV04                           /* if0004.h */ -0x00000004
+#define NVIF_CLASS_SW_NV10                           /* if0005.h */ -0x00000005
+#define NVIF_CLASS_SW_NV50                           /* if0005.h */ -0x00000006
+#define NVIF_CLASS_SW_GF100                          /* if0005.h */ -0x00000007
 
 /* the below match nvidia-assigned (either in hw, or sw) class numbers */
+#define NV_NULL_CLASS                                                0x00000030
+
 #define NV_DEVICE                                     /* cl0080.h */ 0x00000080
 
 #define NV_DMA_FROM_MEMORY                            /* cl0002.h */ 0x00000002
 #define NV_DMA_TO_MEMORY                              /* cl0002.h */ 0x00000003
 #define NV_DMA_IN_MEMORY                              /* cl0002.h */ 0x0000003d
 
+#define NV50_TWOD                                                    0x0000502d
 #define FERMI_TWOD_A                                                 0x0000902d
 
+#define NV50_MEMORY_TO_MEMORY_FORMAT                                 0x00005039
 #define FERMI_MEMORY_TO_MEMORY_FORMAT_A                              0x00009039
 
 #define KEPLER_INLINE_TO_MEMORY_A                                    0x0000a040
@@ -99,6 +107,12 @@
 #define GF110_DISP_OVERLAY_CONTROL_DMA                /* cl507e.h */ 0x0000907e
 #define GK104_DISP_OVERLAY_CONTROL_DMA                /* cl507e.h */ 0x0000917e
 
+#define NV50_TESLA                                                   0x00005097
+#define G82_TESLA                                                    0x00008297
+#define GT200_TESLA                                                  0x00008397
+#define GT214_TESLA                                                  0x00008597
+#define GT21A_TESLA                                                  0x00008697
+
 #define FERMI_A                                       /* cl9097.h */ 0x00009097
 #define FERMI_B                                       /* cl9097.h */ 0x00009197
 #define FERMI_C                                       /* cl9097.h */ 0x00009297
@@ -140,6 +154,8 @@
 
 #define FERMI_DECOMPRESS                                             0x000090b8
 
+#define NV50_COMPUTE                                                 0x000050c0
+#define GT214_COMPUTE                                                0x000085c0
 #define FERMI_COMPUTE_A                                              0x000090c0
 #define FERMI_COMPUTE_B                                              0x000091c0
 #define KEPLER_COMPUTE_A                                             0x0000a0c0
diff --git a/drivers/gpu/drm/nouveau/include/nvif/client.h b/drivers/gpu/drm/nouveau/include/nvif/client.h
index 4a7f6f7b836d..b52a8eadce01 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/client.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/client.h
@@ -11,8 +11,7 @@ struct nvif_client {
 	bool super;
 };
 
-int  nvif_client_init(const char *drv, const char *name, u64 device,
-		      const char *cfg, const char *dbg,
+int  nvif_client_init(struct nvif_client *parent, const char *name, u64 device,
 		      struct nvif_client *);
 void nvif_client_fini(struct nvif_client *);
 int  nvif_client_ioctl(struct nvif_client *, void *, u32);
diff --git a/drivers/gpu/drm/nouveau/include/nvif/driver.h b/drivers/gpu/drm/nouveau/include/nvif/driver.h
index 8bd39e69229c..0c6f48d8140a 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/driver.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/driver.h
@@ -1,5 +1,7 @@
 #ifndef __NVIF_DRIVER_H__
 #define __NVIF_DRIVER_H__
+#include <nvif/os.h>
+struct nvif_client;
 
 struct nvif_driver {
 	const char *name;
@@ -14,9 +16,11 @@ struct nvif_driver {
 	bool keep;
 };
 
+int nvif_driver_init(const char *drv, const char *cfg, const char *dbg,
+		     const char *name, u64 device, struct nvif_client *);
+
 extern const struct nvif_driver nvif_driver_nvkm;
 extern const struct nvif_driver nvif_driver_drm;
 extern const struct nvif_driver nvif_driver_lib;
 extern const struct nvif_driver nvif_driver_null;
-
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/if0000.h b/drivers/gpu/drm/nouveau/include/nvif/if0000.h
index 85c44e8a1201..c2c0fc41e017 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/if0000.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/if0000.h
@@ -1,9 +1,16 @@
 #ifndef __NVIF_IF0000_H__
 #define __NVIF_IF0000_H__
 
-#define NV_CLIENT_DEVLIST                                                  0x00
+struct nvif_client_v0 {
+	__u8  version;
+	__u8  pad01[7];
+	__u64 device;
+	char  name[32];
+};
+
+#define NVIF_CLIENT_V0_DEVLIST                                             0x00
 
-struct nv_client_devlist_v0 {
+struct nvif_client_devlist_v0 {
 	__u8  version;
 	__u8  count;
 	__u8  pad02[6];
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/client.h b/drivers/gpu/drm/nouveau/include/nvkm/core/client.h
index eaf5905a87a3..e876634da10a 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/client.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/client.h
@@ -1,5 +1,6 @@
 #ifndef __NVKM_CLIENT_H__
 #define __NVKM_CLIENT_H__
+#define nvkm_client(p) container_of((p), struct nvkm_client, object)
 #include <core/object.h>
 
 struct nvkm_client {
@@ -8,9 +9,8 @@ struct nvkm_client {
 	u64 device;
 	u32 debug;
 
-	struct nvkm_client_notify *notify[16];
+	struct nvkm_client_notify *notify[32];
 	struct rb_root objroot;
-	struct rb_root dmaroot;
 
 	bool super;
 	void *data;
@@ -19,15 +19,11 @@ struct nvkm_client {
 	struct nvkm_vm *vm;
 };
 
-bool nvkm_client_insert(struct nvkm_client *, struct nvkm_object *);
-void nvkm_client_remove(struct nvkm_client *, struct nvkm_object *);
-struct nvkm_object *nvkm_client_search(struct nvkm_client *, u64 object);
-
 int  nvkm_client_new(const char *name, u64 device, const char *cfg,
-		     const char *dbg, struct nvkm_client **);
-void nvkm_client_del(struct nvkm_client **);
-int  nvkm_client_init(struct nvkm_client *);
-int  nvkm_client_fini(struct nvkm_client *, bool suspend);
+		     const char *dbg,
+		     int (*)(const void *, u32, const void *, u32),
+		     struct nvkm_client **);
+struct nvkm_client *nvkm_client_search(struct nvkm_client *, u64 handle);
 
 int nvkm_client_notify_new(struct nvkm_object *, struct nvkm_event *,
 			   void *data, u32 size);
@@ -37,8 +33,8 @@ int nvkm_client_notify_put(struct nvkm_client *, int index);
 
 /* logging for client-facing objects */
 #define nvif_printk(o,l,p,f,a...) do {                                         \
-	struct nvkm_object *_object = (o);                                     \
-	struct nvkm_client *_client = _object->client;                         \
+	const struct nvkm_object *_object = (o);                               \
+	const struct nvkm_client *_client = _object->client;                   \
 	if (_client->debug >= NV_DBG_##l)                                      \
 		printk(KERN_##p "nouveau: %s:%08x:%08x: "f, _client->name,     \
 		       _object->handle, _object->oclass, ##a);                 \
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
index 6bc712f32c8b..d426b86e2712 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
@@ -262,7 +262,7 @@ extern const struct nvkm_sclass nvkm_udevice_sclass;
 
 /* device logging */
 #define nvdev_printk_(d,l,p,f,a...) do {                                       \
-	struct nvkm_device *_device = (d);                                     \
+	const struct nvkm_device *_device = (d);                               \
 	if (_device->debug >= (l))                                             \
 		dev_##p(_device->dev, f, ##a);                                 \
 } while(0)
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h b/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h
index 9ebfd8782366..d4cd2fbfde88 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h
@@ -20,6 +20,7 @@ struct nvkm_engine_func {
 	int (*fini)(struct nvkm_engine *, bool suspend);
 	void (*intr)(struct nvkm_engine *);
 	void (*tile)(struct nvkm_engine *, int region, struct nvkm_fb_tile *);
+	bool (*chsw_load)(struct nvkm_engine *);
 
 	struct {
 		int (*sclass)(struct nvkm_oclass *, int index,
@@ -44,4 +45,5 @@ int nvkm_engine_new_(const struct nvkm_engine_func *, struct nvkm_device *,
 struct nvkm_engine *nvkm_engine_ref(struct nvkm_engine *);
 void nvkm_engine_unref(struct nvkm_engine **);
 void nvkm_engine_tile(struct nvkm_engine *, int region);
+bool nvkm_engine_chsw_load(struct nvkm_engine *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h b/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h
index 9363b839a9da..33ca6769266a 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h
@@ -6,9 +6,10 @@ struct nvkm_vma;
 struct nvkm_vm;
 
 enum nvkm_memory_target {
-	NVKM_MEM_TARGET_INST,
-	NVKM_MEM_TARGET_VRAM,
-	NVKM_MEM_TARGET_HOST,
+	NVKM_MEM_TARGET_INST, /* instance memory */
+	NVKM_MEM_TARGET_VRAM, /* video memory */
+	NVKM_MEM_TARGET_HOST, /* coherent system memory */
+	NVKM_MEM_TARGET_NCOH, /* non-coherent system memory */
 };
 
 struct nvkm_memory {
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/mm.h b/drivers/gpu/drm/nouveau/include/nvkm/core/mm.h
index d92fd41e4056..7bd4897a8a2a 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/mm.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/mm.h
@@ -5,7 +5,7 @@
 struct nvkm_mm_node {
 	struct list_head nl_entry;
 	struct list_head fl_entry;
-	struct list_head rl_entry;
+	struct nvkm_mm_node *next;
 
 #define NVKM_MM_HEAP_ANY 0x00
 	u8  heap;
@@ -38,4 +38,10 @@ int  nvkm_mm_tail(struct nvkm_mm *, u8 heap, u8 type, u32 size_max,
 		  u32 size_min, u32 align, struct nvkm_mm_node **);
 void nvkm_mm_free(struct nvkm_mm *, struct nvkm_mm_node **);
 void nvkm_mm_dump(struct nvkm_mm *, const char *);
+
+static inline bool
+nvkm_mm_contiguous(struct nvkm_mm_node *node)
+{
+	return !node->next;
+}
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/object.h b/drivers/gpu/drm/nouveau/include/nvkm/core/object.h
index dcd048b91fac..96dda350ada3 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/object.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/object.h
@@ -62,6 +62,11 @@ int nvkm_object_wr32(struct nvkm_object *, u64 addr, u32  data);
 int nvkm_object_bind(struct nvkm_object *, struct nvkm_gpuobj *, int align,
 		     struct nvkm_gpuobj **);
 
+bool nvkm_object_insert(struct nvkm_object *);
+void nvkm_object_remove(struct nvkm_object *);
+struct nvkm_object *nvkm_object_search(struct nvkm_client *, u64 object,
+				       const struct nvkm_object_func *);
+
 struct nvkm_sclass {
 	int minver;
 	int maxver;
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
index 57adefa8b08e..ca9ed3d68f44 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
@@ -32,7 +32,7 @@ void nvkm_subdev_intr(struct nvkm_subdev *);
 
 /* subdev logging */
 #define nvkm_printk_(s,l,p,f,a...) do {                                        \
-	struct nvkm_subdev *_subdev = (s);                                     \
+	const struct nvkm_subdev *_subdev = (s);                               \
 	if (_subdev->debug >= (l)) {                                           \
 		dev_##p(_subdev->device->dev, "%s: "f,                         \
 			nvkm_subdev_name[_subdev->index], ##a);                \
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h
index 114bfb737a81..d2a6532ce3b9 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h
@@ -12,9 +12,6 @@ struct nvkm_dmaobj {
 	u32 access;
 	u64 start;
 	u64 limit;
-
-	struct rb_node rb;
-	u64 handle; /*XXX HANDLE MERGE */
 };
 
 struct nvkm_dma {
@@ -22,8 +19,7 @@ struct nvkm_dma {
 	struct nvkm_engine engine;
 };
 
-struct nvkm_dmaobj *
-nvkm_dma_search(struct nvkm_dma *, struct nvkm_client *, u64 object);
+struct nvkm_dmaobj *nvkm_dmaobj_search(struct nvkm_client *, u64 object);
 
 int nv04_dma_new(struct nvkm_device *, int, struct nvkm_dma **);
 int nv50_dma_new(struct nvkm_device *, int, struct nvkm_dma **);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h
index e6baf039c269..7e498e65b1e8 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h
@@ -4,13 +4,26 @@
 #include <core/engine.h>
 struct nvkm_fifo_chan;
 
+enum nvkm_falcon_dmaidx {
+	FALCON_DMAIDX_UCODE		= 0,
+	FALCON_DMAIDX_VIRT		= 1,
+	FALCON_DMAIDX_PHYS_VID		= 2,
+	FALCON_DMAIDX_PHYS_SYS_COH	= 3,
+	FALCON_DMAIDX_PHYS_SYS_NCOH	= 4,
+};
+
 struct nvkm_falcon {
 	const struct nvkm_falcon_func *func;
-	struct nvkm_engine engine;
-
+	const struct nvkm_subdev *owner;
+	const char *name;
 	u32 addr;
-	u8  version;
-	u8  secret;
+
+	struct mutex mutex;
+	const struct nvkm_subdev *user;
+
+	u8 version;
+	u8 secret;
+	bool debug;
 
 	struct nvkm_memory *core;
 	bool external;
@@ -19,15 +32,25 @@ struct nvkm_falcon {
 		u32 limit;
 		u32 *data;
 		u32  size;
+		u8 ports;
 	} code;
 
 	struct {
 		u32 limit;
 		u32 *data;
 		u32  size;
+		u8 ports;
 	} data;
+
+	struct nvkm_engine engine;
 };
 
+int nvkm_falcon_v1_new(struct nvkm_subdev *owner, const char *name, u32 addr,
+		       struct nvkm_falcon **);
+void nvkm_falcon_del(struct nvkm_falcon **);
+int nvkm_falcon_get(struct nvkm_falcon *, const struct nvkm_subdev *);
+void nvkm_falcon_put(struct nvkm_falcon *, const struct nvkm_subdev *);
+
 int nvkm_falcon_new_(const struct nvkm_falcon_func *, struct nvkm_device *,
 		     int index, bool enable, u32 addr, struct nvkm_engine **);
 
@@ -42,6 +65,51 @@ struct nvkm_falcon_func {
 	} data;
 	void (*init)(struct nvkm_falcon *);
 	void (*intr)(struct nvkm_falcon *, struct nvkm_fifo_chan *);
+	void (*load_imem)(struct nvkm_falcon *, void *, u32, u32, u16, u8, bool);
+	void (*load_dmem)(struct nvkm_falcon *, void *, u32, u32, u8);
+	void (*read_dmem)(struct nvkm_falcon *, u32, u32, u8, void *);
+	void (*bind_context)(struct nvkm_falcon *, struct nvkm_gpuobj *);
+	int (*wait_for_halt)(struct nvkm_falcon *, u32);
+	int (*clear_interrupt)(struct nvkm_falcon *, u32);
+	void (*set_start_addr)(struct nvkm_falcon *, u32 start_addr);
+	void (*start)(struct nvkm_falcon *);
+	int (*enable)(struct nvkm_falcon *falcon);
+	void (*disable)(struct nvkm_falcon *falcon);
+
 	struct nvkm_sclass sclass[];
 };
+
+static inline u32
+nvkm_falcon_rd32(struct nvkm_falcon *falcon, u32 addr)
+{
+	return nvkm_rd32(falcon->owner->device, falcon->addr + addr);
+}
+
+static inline void
+nvkm_falcon_wr32(struct nvkm_falcon *falcon, u32 addr, u32 data)
+{
+	nvkm_wr32(falcon->owner->device, falcon->addr + addr, data);
+}
+
+static inline u32
+nvkm_falcon_mask(struct nvkm_falcon *falcon, u32 addr, u32 mask, u32 val)
+{
+	struct nvkm_device *device = falcon->owner->device;
+
+	return nvkm_mask(device, falcon->addr + addr, mask, val);
+}
+
+void nvkm_falcon_load_imem(struct nvkm_falcon *, void *, u32, u32, u16, u8,
+			   bool);
+void nvkm_falcon_load_dmem(struct nvkm_falcon *, void *, u32, u32, u8);
+void nvkm_falcon_read_dmem(struct nvkm_falcon *, u32, u32, u8, void *);
+void nvkm_falcon_bind_context(struct nvkm_falcon *, struct nvkm_gpuobj *);
+void nvkm_falcon_set_start_addr(struct nvkm_falcon *, u32);
+void nvkm_falcon_start(struct nvkm_falcon *);
+int nvkm_falcon_wait_for_halt(struct nvkm_falcon *, u32);
+int nvkm_falcon_clear_interrupt(struct nvkm_falcon *, u32);
+int nvkm_falcon_enable(struct nvkm_falcon *);
+void nvkm_falcon_disable(struct nvkm_falcon *);
+int nvkm_falcon_reset(struct nvkm_falcon *);
+
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
index ed92fec5292c..24efa900d8ca 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
@@ -40,6 +40,7 @@ struct nvkm_fifo {
 
 	struct nvkm_event uevent; /* async user trigger */
 	struct nvkm_event cevent; /* channel creation event */
+	struct nvkm_event kevent; /* channel killed */
 };
 
 void nvkm_fifo_pause(struct nvkm_fifo *, unsigned long *);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/power_budget.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/power_budget.h
new file mode 100644
index 000000000000..f5f4a14c4030
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/power_budget.h
@@ -0,0 +1,26 @@
+#ifndef __NVBIOS_POWER_BUDGET_H__
+#define __NVBIOS_POWER_BUDGET_H__
+
+#include <nvkm/subdev/bios.h>
+
+struct nvbios_power_budget_entry {
+	u32 min_w;
+	u32 avg_w;
+	u32 max_w;
+};
+
+struct nvbios_power_budget {
+	u32 offset;
+	u8  ver;
+	u8  hlen;
+	u8  elen;
+	u8  ecount;
+	u8  cap_entry;
+};
+
+int nvbios_power_budget_header(struct nvkm_bios *,
+                               struct nvbios_power_budget *);
+int nvbios_power_budget_entry(struct nvkm_bios *, struct nvbios_power_budget *,
+                              u8 idx, struct nvbios_power_budget_entry *);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
index 794e432578b2..0b26a4c860ec 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
@@ -29,7 +29,7 @@ struct nvkm_mem {
 	u8  page_shift;
 
 	struct nvkm_mm_node *tag;
-	struct list_head regions;
+	struct nvkm_mm_node *mem;
 	dma_addr_t *pages;
 	u32 memtype;
 	u64 offset;
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/iccsense.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/iccsense.h
index 3c2ddd975273..b7a9b041e130 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/iccsense.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/iccsense.h
@@ -8,6 +8,9 @@ struct nvkm_iccsense {
 	bool data_valid;
 	struct list_head sensors;
 	struct list_head rails;
+
+	u32 power_w_max;
+	u32 power_w_crit;
 };
 
 int gf100_iccsense_new(struct nvkm_device *, int index, struct nvkm_iccsense **);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
index 27d25b18d85c..e68ba636741b 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
@@ -9,6 +9,7 @@ struct nvkm_mc {
 
 void nvkm_mc_enable(struct nvkm_device *, enum nvkm_devidx);
 void nvkm_mc_disable(struct nvkm_device *, enum nvkm_devidx);
+bool nvkm_mc_enabled(struct nvkm_device *, enum nvkm_devidx);
 void nvkm_mc_reset(struct nvkm_device *, enum nvkm_devidx);
 void nvkm_mc_intr(struct nvkm_device *, bool *handled);
 void nvkm_mc_intr_unarm(struct nvkm_device *);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
index e6523e2cea9f..ac2a695963c1 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
@@ -43,6 +43,7 @@ int nv40_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int nv46_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int nv4c_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int g84_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
+int g92_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int g94_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int gf100_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int gf106_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pmu.h
index f37538eb1fe5..179b6ed3f595 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pmu.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pmu.h
@@ -1,10 +1,12 @@
 #ifndef __NVKM_PMU_H__
 #define __NVKM_PMU_H__
 #include <core/subdev.h>
+#include <engine/falcon.h>
 
 struct nvkm_pmu {
 	const struct nvkm_pmu_func *func;
 	struct nvkm_subdev subdev;
+	struct nvkm_falcon *falcon;
 
 	struct {
 		u32 base;
@@ -35,6 +37,7 @@ int gk110_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
 int gk208_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
 int gk20a_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
 int gm107_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
+int gm20b_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
 int gp100_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
 int gp102_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
 
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h
index b04c38c07761..5dbd8aa4f8c2 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h
@@ -26,7 +26,7 @@
 #include <core/subdev.h>
 
 enum nvkm_secboot_falcon {
-	NVKM_SECBOOT_FALCON_PMU	= 0,
+	NVKM_SECBOOT_FALCON_PMU = 0,
 	NVKM_SECBOOT_FALCON_RESERVED = 1,
 	NVKM_SECBOOT_FALCON_FECS = 2,
 	NVKM_SECBOOT_FALCON_GPCCS = 3,
@@ -35,22 +35,23 @@ enum nvkm_secboot_falcon {
 };
 
 /**
- * @base:		base IO address of the falcon performing secure boot
- * @irq_mask:		IRQ mask of the falcon performing secure boot
- * @enable_mask:	enable mask of the falcon performing secure boot
+ * @wpr_set: whether the WPR region is currently set
 */
 struct nvkm_secboot {
 	const struct nvkm_secboot_func *func;
+	struct nvkm_acr *acr;
 	struct nvkm_subdev subdev;
+	struct nvkm_falcon *boot_falcon;
 
-	enum nvkm_devidx devidx;
-	u32 base;
+	u64 wpr_addr;
+	u32 wpr_size;
+
+	bool wpr_set;
 };
 #define nvkm_secboot(p) container_of((p), struct nvkm_secboot, subdev)
 
 bool nvkm_secboot_is_managed(struct nvkm_secboot *, enum nvkm_secboot_falcon);
-int nvkm_secboot_reset(struct nvkm_secboot *, u32 falcon);
-int nvkm_secboot_start(struct nvkm_secboot *, u32 falcon);
+int nvkm_secboot_reset(struct nvkm_secboot *, enum nvkm_secboot_falcon);
 
 int gm200_secboot_new(struct nvkm_device *, int, struct nvkm_secboot **);
 int gm20b_secboot_new(struct nvkm_device *, int, struct nvkm_secboot **);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h
index 82d3e28918fd..6a567fe347b3 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h
@@ -48,10 +48,8 @@ void nvkm_timer_alarm_cancel(struct nvkm_timer *, struct nvkm_alarm *);
 	} while (_taken = nvkm_timer_read(_tmr) - _time0, _taken < _nsecs);    \
                                                                                \
 	if (_taken >= _nsecs) {                                                \
-		if (_warn) {                                                   \
-			dev_warn(_device->dev, "timeout at %s:%d/%s()!\n",     \
-				 __FILE__, __LINE__, __func__);                \
-		}                                                              \
+		if (_warn)                                                     \
+			dev_WARN(_device->dev, "timeout\n");                   \
 		_taken = -ETIMEDOUT;                                           \
 	}                                                                      \
 	_taken;                                                                \
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
index 71ebbfd4484f..d23209b62c25 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
@@ -11,6 +11,7 @@ struct nvkm_top {
 u32 nvkm_top_reset(struct nvkm_device *, enum nvkm_devidx);
 u32 nvkm_top_intr(struct nvkm_device *, u32 intr, u64 *subdevs);
 u32 nvkm_top_intr_mask(struct nvkm_device *, enum nvkm_devidx);
+int nvkm_top_fault_id(struct nvkm_device *, enum nvkm_devidx);
 enum nvkm_devidx nvkm_top_fault(struct nvkm_device *, int fault);
 enum nvkm_devidx nvkm_top_engine(struct nvkm_device *, int, int *runl, int *engn);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index 4df4f6ed4886..f98f800cc011 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -87,7 +87,7 @@ nouveau_abi16_put(struct nouveau_abi16 *abi16, int ret)
 s32
 nouveau_abi16_swclass(struct nouveau_drm *drm)
 {
-	switch (drm->device.info.family) {
+	switch (drm->client.device.info.family) {
 	case NV_DEVICE_INFO_V0_TNT:
 		return NVIF_CLASS_SW_NV04;
 	case NV_DEVICE_INFO_V0_CELSIUS:
@@ -175,7 +175,7 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS)
 {
 	struct nouveau_cli *cli = nouveau_cli(file_priv);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	struct nvkm_gr *gr = nvxx_gr(device);
 	struct drm_nouveau_getparam *getparam = data;
 
@@ -321,7 +321,7 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
 	}
 
 	/* Named memory object area */
-	ret = nouveau_gem_new(dev, PAGE_SIZE, 0, NOUVEAU_GEM_DOMAIN_GART,
+	ret = nouveau_gem_new(cli, PAGE_SIZE, 0, NOUVEAU_GEM_DOMAIN_GART,
 			      0, 0, &chan->ntfy);
 	if (ret == 0)
 		ret = nouveau_bo_pin(chan->ntfy, TTM_PL_FLAG_TT, false);
diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c
index 8b1ca4add2ed..380f340204e8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_backlight.c
+++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c
@@ -65,7 +65,7 @@ static int
 nv40_get_intensity(struct backlight_device *bd)
 {
 	struct nouveau_drm *drm = bl_get_data(bd);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	int val = (nvif_rd32(device, NV40_PMC_BACKLIGHT) &
 				   NV40_PMC_BACKLIGHT_MASK) >> 16;
 
@@ -76,7 +76,7 @@ static int
 nv40_set_intensity(struct backlight_device *bd)
 {
 	struct nouveau_drm *drm = bl_get_data(bd);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	int val = bd->props.brightness;
 	int reg = nvif_rd32(device, NV40_PMC_BACKLIGHT);
 
@@ -96,7 +96,7 @@ static int
 nv40_backlight_init(struct drm_connector *connector)
 {
 	struct nouveau_drm *drm = nouveau_drm(connector->dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	struct backlight_properties props;
 	struct backlight_device *bd;
 	struct backlight_connector bl_connector;
@@ -133,7 +133,7 @@ nv50_get_intensity(struct backlight_device *bd)
 {
 	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
 	struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	int or = nv_encoder->or;
 	u32 div = 1025;
 	u32 val;
@@ -148,7 +148,7 @@ nv50_set_intensity(struct backlight_device *bd)
 {
 	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
 	struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	int or = nv_encoder->or;
 	u32 div = 1025;
 	u32 val = (bd->props.brightness * div) / 100;
@@ -169,7 +169,7 @@ nva3_get_intensity(struct backlight_device *bd)
 {
 	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
 	struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	int or = nv_encoder->or;
 	u32 div, val;
 
@@ -187,7 +187,7 @@ nva3_set_intensity(struct backlight_device *bd)
 {
 	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
 	struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	int or = nv_encoder->or;
 	u32 div, val;
 
@@ -213,7 +213,7 @@ static int
 nv50_backlight_init(struct drm_connector *connector)
 {
 	struct nouveau_drm *drm = nouveau_drm(connector->dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	struct nouveau_encoder *nv_encoder;
 	struct backlight_properties props;
 	struct backlight_device *bd;
@@ -231,9 +231,9 @@ nv50_backlight_init(struct drm_connector *connector)
 	if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(nv_encoder->or)))
 		return 0;
 
-	if (drm->device.info.chipset <= 0xa0 ||
-	    drm->device.info.chipset == 0xaa ||
-	    drm->device.info.chipset == 0xac)
+	if (drm->client.device.info.chipset <= 0xa0 ||
+	    drm->client.device.info.chipset == 0xaa ||
+	    drm->client.device.info.chipset == 0xac)
 		ops = &nv50_bl_ops;
 	else
 		ops = &nva3_bl_ops;
@@ -265,7 +265,7 @@ int
 nouveau_backlight_init(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	struct drm_connector *connector;
 
 	if (apple_gmux_present()) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index 23ffe8571a99..9a0772ad495a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -215,7 +215,7 @@ int call_lvds_script(struct drm_device *dev, struct dcb_output *dcbent, int head
 	 */
 
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	struct nvbios *bios = &drm->vbios;
 	uint8_t lvds_ver = bios->data[bios->fp.lvdsmanufacturerpointer];
 	uint32_t sel_clk_binding, sel_clk;
@@ -319,7 +319,7 @@ static int
 get_fp_strap(struct drm_device *dev, struct nvbios *bios)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 
 	/*
 	 * The fp strap is normally dictated by the "User Strap" in
@@ -333,10 +333,10 @@ get_fp_strap(struct drm_device *dev, struct nvbios *bios)
 	if (bios->major_version < 5 && bios->data[0x48] & 0x4)
 		return NVReadVgaCrtc5758(dev, 0, 0xf) & 0xf;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_MAXWELL)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_MAXWELL)
 		return nvif_rd32(device, 0x001800) & 0x0000000f;
 	else
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA)
 		return (nvif_rd32(device, NV_PEXTDEV_BOOT_0) >> 24) & 0xf;
 	else
 		return (nvif_rd32(device, NV_PEXTDEV_BOOT_0) >> 16) & 0xf;
@@ -638,7 +638,7 @@ int run_tmds_table(struct drm_device *dev, struct dcb_output *dcbent, int head,
 	 */
 
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	struct nvbios *bios = &drm->vbios;
 	int cv = bios->chip_version;
 	uint16_t clktable = 0, scriptptr;
@@ -1255,7 +1255,7 @@ olddcb_table(struct drm_device *dev)
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	u8 *dcb = NULL;
 
-	if (drm->device.info.family > NV_DEVICE_INFO_V0_TNT)
+	if (drm->client.device.info.family > NV_DEVICE_INFO_V0_TNT)
 		dcb = ROMPTR(dev, drm->vbios.data[0x36]);
 	if (!dcb) {
 		NV_WARN(drm, "No DCB data found in VBIOS\n");
@@ -1918,7 +1918,7 @@ static int load_nv17_hwsq_ucode_entry(struct drm_device *dev, struct nvbios *bio
 	 */
 
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	uint8_t bytes_to_write;
 	uint16_t hwsq_entry_offset;
 	int i;
@@ -2012,7 +2012,7 @@ uint8_t *nouveau_bios_embedded_edid(struct drm_device *dev)
 static bool NVInitVBIOS(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_bios *bios = nvxx_bios(&drm->device);
+	struct nvkm_bios *bios = nvxx_bios(&drm->client.device);
 	struct nvbios *legacy = &drm->vbios;
 
 	memset(legacy, 0, sizeof(struct nvbios));
@@ -2064,7 +2064,7 @@ nouveau_bios_posted(struct drm_device *dev)
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	unsigned htotal;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA)
 		return true;
 
 	htotal  = NVReadVgaCrtc(dev, 0, 0x06);
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 8a528ebe30f3..548f36d33924 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -48,7 +48,7 @@ nv10_bo_update_tile_region(struct drm_device *dev, struct nouveau_drm_tile *reg,
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	int i = reg - drm->tile.reg;
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 	struct nvkm_fb *fb = device->fb;
 	struct nvkm_fb_tile *tile = &fb->tile.region[i];
 
@@ -100,7 +100,7 @@ nv10_bo_set_tiling(struct drm_device *dev, u32 addr,
 		   u32 size, u32 pitch, u32 flags)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_fb *fb = nvxx_fb(&drm->device);
+	struct nvkm_fb *fb = nvxx_fb(&drm->client.device);
 	struct nouveau_drm_tile *tile, *found = NULL;
 	int i;
 
@@ -139,60 +139,62 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
 	kfree(nvbo);
 }
 
+static inline u64
+roundup_64(u64 x, u32 y)
+{
+	x += y - 1;
+	do_div(x, y);
+	return x * y;
+}
+
 static void
 nouveau_bo_fixup_align(struct nouveau_bo *nvbo, u32 flags,
-		       int *align, int *size)
+		       int *align, u64 *size)
 {
 	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 
 	if (device->info.family < NV_DEVICE_INFO_V0_TESLA) {
 		if (nvbo->tile_mode) {
 			if (device->info.chipset >= 0x40) {
 				*align = 65536;
-				*size = roundup(*size, 64 * nvbo->tile_mode);
+				*size = roundup_64(*size, 64 * nvbo->tile_mode);
 
 			} else if (device->info.chipset >= 0x30) {
 				*align = 32768;
-				*size = roundup(*size, 64 * nvbo->tile_mode);
+				*size = roundup_64(*size, 64 * nvbo->tile_mode);
 
 			} else if (device->info.chipset >= 0x20) {
 				*align = 16384;
-				*size = roundup(*size, 64 * nvbo->tile_mode);
+				*size = roundup_64(*size, 64 * nvbo->tile_mode);
 
 			} else if (device->info.chipset >= 0x10) {
 				*align = 16384;
-				*size = roundup(*size, 32 * nvbo->tile_mode);
+				*size = roundup_64(*size, 32 * nvbo->tile_mode);
 			}
 		}
 	} else {
-		*size = roundup(*size, (1 << nvbo->page_shift));
+		*size = roundup_64(*size, (1 << nvbo->page_shift));
 		*align = max((1 <<  nvbo->page_shift), *align);
 	}
 
-	*size = roundup(*size, PAGE_SIZE);
+	*size = roundup_64(*size, PAGE_SIZE);
 }
 
 int
-nouveau_bo_new(struct drm_device *dev, int size, int align,
+nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
 	       uint32_t flags, uint32_t tile_mode, uint32_t tile_flags,
 	       struct sg_table *sg, struct reservation_object *robj,
 	       struct nouveau_bo **pnvbo)
 {
-	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nouveau_drm *drm = nouveau_drm(cli->dev);
 	struct nouveau_bo *nvbo;
 	size_t acc_size;
 	int ret;
 	int type = ttm_bo_type_device;
-	int lpg_shift = 12;
-	int max_size;
-
-	if (drm->client.vm)
-		lpg_shift = drm->client.vm->mmu->lpg_shift;
-	max_size = INT_MAX & ~((1 << lpg_shift) - 1);
 
-	if (size <= 0 || size > max_size) {
-		NV_WARN(drm, "skipped size %x\n", (u32)size);
+	if (!size) {
+		NV_WARN(drm, "skipped size %016llx\n", size);
 		return -EINVAL;
 	}
 
@@ -208,8 +210,9 @@ nouveau_bo_new(struct drm_device *dev, int size, int align,
 	nvbo->tile_mode = tile_mode;
 	nvbo->tile_flags = tile_flags;
 	nvbo->bo.bdev = &drm->ttm.bdev;
+	nvbo->cli = cli;
 
-	if (!nvxx_device(&drm->device)->func->cpu_coherent)
+	if (!nvxx_device(&drm->client.device)->func->cpu_coherent)
 		nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED;
 
 	nvbo->page_shift = 12;
@@ -255,10 +258,10 @@ static void
 set_placement_range(struct nouveau_bo *nvbo, uint32_t type)
 {
 	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
-	u32 vram_pages = drm->device.info.ram_size >> PAGE_SHIFT;
+	u32 vram_pages = drm->client.device.info.ram_size >> PAGE_SHIFT;
 	unsigned i, fpfn, lpfn;
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CELSIUS &&
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CELSIUS &&
 	    nvbo->tile_mode && (type & TTM_PL_FLAG_VRAM) &&
 	    nvbo->bo.mem.num_pages < vram_pages / 4) {
 		/*
@@ -316,12 +319,12 @@ nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t memtype, bool contig)
 	if (ret)
 		return ret;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA &&
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA &&
 	    memtype == TTM_PL_FLAG_VRAM && contig) {
 		if (nvbo->tile_flags & NOUVEAU_GEM_TILE_NONCONTIG) {
 			if (bo->mem.mem_type == TTM_PL_VRAM) {
 				struct nvkm_mem *mem = bo->mem.mm_node;
-				if (!list_is_singular(&mem->regions))
+				if (!nvkm_mm_contiguous(mem->mem))
 					evict = true;
 			}
 			nvbo->tile_flags &= ~NOUVEAU_GEM_TILE_NONCONTIG;
@@ -443,7 +446,7 @@ void
 nouveau_bo_sync_for_device(struct nouveau_bo *nvbo)
 {
 	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 	struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
 	int i;
 
@@ -463,7 +466,7 @@ void
 nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo)
 {
 	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 	struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
 	int i;
 
@@ -579,9 +582,9 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 					 TTM_PL_FLAG_WC;
 		man->default_caching = TTM_PL_FLAG_WC;
 
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
 			/* Some BARs do not support being ioremapped WC */
-			if (nvxx_bar(&drm->device)->iomap_uncached) {
+			if (nvxx_bar(&drm->client.device)->iomap_uncached) {
 				man->available_caching = TTM_PL_FLAG_UNCACHED;
 				man->default_caching = TTM_PL_FLAG_UNCACHED;
 			}
@@ -594,7 +597,7 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 		}
 		break;
 	case TTM_PL_TT:
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA)
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA)
 			man->func = &nouveau_gart_manager;
 		else
 		if (!drm->agp.bridge)
@@ -654,20 +657,20 @@ nve0_bo_move_init(struct nouveau_channel *chan, u32 handle)
 
 static int
 nve0_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	struct nvkm_mem *node = old_mem->mm_node;
+	struct nvkm_mem *mem = old_reg->mm_node;
 	int ret = RING_SPACE(chan, 10);
 	if (ret == 0) {
 		BEGIN_NVC0(chan, NvSubCopy, 0x0400, 8);
-		OUT_RING  (chan, upper_32_bits(node->vma[0].offset));
-		OUT_RING  (chan, lower_32_bits(node->vma[0].offset));
-		OUT_RING  (chan, upper_32_bits(node->vma[1].offset));
-		OUT_RING  (chan, lower_32_bits(node->vma[1].offset));
+		OUT_RING  (chan, upper_32_bits(mem->vma[0].offset));
+		OUT_RING  (chan, lower_32_bits(mem->vma[0].offset));
+		OUT_RING  (chan, upper_32_bits(mem->vma[1].offset));
+		OUT_RING  (chan, lower_32_bits(mem->vma[1].offset));
 		OUT_RING  (chan, PAGE_SIZE);
 		OUT_RING  (chan, PAGE_SIZE);
 		OUT_RING  (chan, PAGE_SIZE);
-		OUT_RING  (chan, new_mem->num_pages);
+		OUT_RING  (chan, new_reg->num_pages);
 		BEGIN_IMC0(chan, NvSubCopy, 0x0300, 0x0386);
 	}
 	return ret;
@@ -686,15 +689,15 @@ nvc0_bo_move_init(struct nouveau_channel *chan, u32 handle)
 
 static int
 nvc0_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	struct nvkm_mem *node = old_mem->mm_node;
-	u64 src_offset = node->vma[0].offset;
-	u64 dst_offset = node->vma[1].offset;
-	u32 page_count = new_mem->num_pages;
+	struct nvkm_mem *mem = old_reg->mm_node;
+	u64 src_offset = mem->vma[0].offset;
+	u64 dst_offset = mem->vma[1].offset;
+	u32 page_count = new_reg->num_pages;
 	int ret;
 
-	page_count = new_mem->num_pages;
+	page_count = new_reg->num_pages;
 	while (page_count) {
 		int line_count = (page_count > 8191) ? 8191 : page_count;
 
@@ -724,15 +727,15 @@ nvc0_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 
 static int
 nvc0_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	struct nvkm_mem *node = old_mem->mm_node;
-	u64 src_offset = node->vma[0].offset;
-	u64 dst_offset = node->vma[1].offset;
-	u32 page_count = new_mem->num_pages;
+	struct nvkm_mem *mem = old_reg->mm_node;
+	u64 src_offset = mem->vma[0].offset;
+	u64 dst_offset = mem->vma[1].offset;
+	u32 page_count = new_reg->num_pages;
 	int ret;
 
-	page_count = new_mem->num_pages;
+	page_count = new_reg->num_pages;
 	while (page_count) {
 		int line_count = (page_count > 2047) ? 2047 : page_count;
 
@@ -763,15 +766,15 @@ nvc0_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 
 static int
 nva3_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	struct nvkm_mem *node = old_mem->mm_node;
-	u64 src_offset = node->vma[0].offset;
-	u64 dst_offset = node->vma[1].offset;
-	u32 page_count = new_mem->num_pages;
+	struct nvkm_mem *mem = old_reg->mm_node;
+	u64 src_offset = mem->vma[0].offset;
+	u64 dst_offset = mem->vma[1].offset;
+	u32 page_count = new_reg->num_pages;
 	int ret;
 
-	page_count = new_mem->num_pages;
+	page_count = new_reg->num_pages;
 	while (page_count) {
 		int line_count = (page_count > 8191) ? 8191 : page_count;
 
@@ -801,35 +804,35 @@ nva3_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 
 static int
 nv98_bo_move_exec(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	struct nvkm_mem *node = old_mem->mm_node;
+	struct nvkm_mem *mem = old_reg->mm_node;
 	int ret = RING_SPACE(chan, 7);
 	if (ret == 0) {
 		BEGIN_NV04(chan, NvSubCopy, 0x0320, 6);
-		OUT_RING  (chan, upper_32_bits(node->vma[0].offset));
-		OUT_RING  (chan, lower_32_bits(node->vma[0].offset));
-		OUT_RING  (chan, upper_32_bits(node->vma[1].offset));
-		OUT_RING  (chan, lower_32_bits(node->vma[1].offset));
+		OUT_RING  (chan, upper_32_bits(mem->vma[0].offset));
+		OUT_RING  (chan, lower_32_bits(mem->vma[0].offset));
+		OUT_RING  (chan, upper_32_bits(mem->vma[1].offset));
+		OUT_RING  (chan, lower_32_bits(mem->vma[1].offset));
 		OUT_RING  (chan, 0x00000000 /* COPY */);
-		OUT_RING  (chan, new_mem->num_pages << PAGE_SHIFT);
+		OUT_RING  (chan, new_reg->num_pages << PAGE_SHIFT);
 	}
 	return ret;
 }
 
 static int
 nv84_bo_move_exec(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	struct nvkm_mem *node = old_mem->mm_node;
+	struct nvkm_mem *mem = old_reg->mm_node;
 	int ret = RING_SPACE(chan, 7);
 	if (ret == 0) {
 		BEGIN_NV04(chan, NvSubCopy, 0x0304, 6);
-		OUT_RING  (chan, new_mem->num_pages << PAGE_SHIFT);
-		OUT_RING  (chan, upper_32_bits(node->vma[0].offset));
-		OUT_RING  (chan, lower_32_bits(node->vma[0].offset));
-		OUT_RING  (chan, upper_32_bits(node->vma[1].offset));
-		OUT_RING  (chan, lower_32_bits(node->vma[1].offset));
+		OUT_RING  (chan, new_reg->num_pages << PAGE_SHIFT);
+		OUT_RING  (chan, upper_32_bits(mem->vma[0].offset));
+		OUT_RING  (chan, lower_32_bits(mem->vma[0].offset));
+		OUT_RING  (chan, upper_32_bits(mem->vma[1].offset));
+		OUT_RING  (chan, lower_32_bits(mem->vma[1].offset));
 		OUT_RING  (chan, 0x00000000 /* MODE_COPY, QUERY_NONE */);
 	}
 	return ret;
@@ -853,14 +856,14 @@ nv50_bo_move_init(struct nouveau_channel *chan, u32 handle)
 
 static int
 nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	struct nvkm_mem *node = old_mem->mm_node;
-	u64 length = (new_mem->num_pages << PAGE_SHIFT);
-	u64 src_offset = node->vma[0].offset;
-	u64 dst_offset = node->vma[1].offset;
-	int src_tiled = !!node->memtype;
-	int dst_tiled = !!((struct nvkm_mem *)new_mem->mm_node)->memtype;
+	struct nvkm_mem *mem = old_reg->mm_node;
+	u64 length = (new_reg->num_pages << PAGE_SHIFT);
+	u64 src_offset = mem->vma[0].offset;
+	u64 dst_offset = mem->vma[1].offset;
+	int src_tiled = !!mem->memtype;
+	int dst_tiled = !!((struct nvkm_mem *)new_reg->mm_node)->memtype;
 	int ret;
 
 	while (length) {
@@ -940,20 +943,20 @@ nv04_bo_move_init(struct nouveau_channel *chan, u32 handle)
 
 static inline uint32_t
 nouveau_bo_mem_ctxdma(struct ttm_buffer_object *bo,
-		      struct nouveau_channel *chan, struct ttm_mem_reg *mem)
+		      struct nouveau_channel *chan, struct ttm_mem_reg *reg)
 {
-	if (mem->mem_type == TTM_PL_TT)
+	if (reg->mem_type == TTM_PL_TT)
 		return NvDmaTT;
 	return chan->vram.handle;
 }
 
 static int
 nv04_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	u32 src_offset = old_mem->start << PAGE_SHIFT;
-	u32 dst_offset = new_mem->start << PAGE_SHIFT;
-	u32 page_count = new_mem->num_pages;
+	u32 src_offset = old_reg->start << PAGE_SHIFT;
+	u32 dst_offset = new_reg->start << PAGE_SHIFT;
+	u32 page_count = new_reg->num_pages;
 	int ret;
 
 	ret = RING_SPACE(chan, 3);
@@ -961,10 +964,10 @@ nv04_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 		return ret;
 
 	BEGIN_NV04(chan, NvSubCopy, NV_MEMORY_TO_MEMORY_FORMAT_DMA_SOURCE, 2);
-	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, old_mem));
-	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, new_mem));
+	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, old_reg));
+	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, new_reg));
 
-	page_count = new_mem->num_pages;
+	page_count = new_reg->num_pages;
 	while (page_count) {
 		int line_count = (page_count > 2047) ? 2047 : page_count;
 
@@ -995,33 +998,33 @@ nv04_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 
 static int
 nouveau_bo_move_prep(struct nouveau_drm *drm, struct ttm_buffer_object *bo,
-		     struct ttm_mem_reg *mem)
+		     struct ttm_mem_reg *reg)
 {
-	struct nvkm_mem *old_node = bo->mem.mm_node;
-	struct nvkm_mem *new_node = mem->mm_node;
-	u64 size = (u64)mem->num_pages << PAGE_SHIFT;
+	struct nvkm_mem *old_mem = bo->mem.mm_node;
+	struct nvkm_mem *new_mem = reg->mm_node;
+	u64 size = (u64)reg->num_pages << PAGE_SHIFT;
 	int ret;
 
-	ret = nvkm_vm_get(drm->client.vm, size, old_node->page_shift,
-			  NV_MEM_ACCESS_RW, &old_node->vma[0]);
+	ret = nvkm_vm_get(drm->client.vm, size, old_mem->page_shift,
+			  NV_MEM_ACCESS_RW, &old_mem->vma[0]);
 	if (ret)
 		return ret;
 
-	ret = nvkm_vm_get(drm->client.vm, size, new_node->page_shift,
-			  NV_MEM_ACCESS_RW, &old_node->vma[1]);
+	ret = nvkm_vm_get(drm->client.vm, size, new_mem->page_shift,
+			  NV_MEM_ACCESS_RW, &old_mem->vma[1]);
 	if (ret) {
-		nvkm_vm_put(&old_node->vma[0]);
+		nvkm_vm_put(&old_mem->vma[0]);
 		return ret;
 	}
 
-	nvkm_vm_map(&old_node->vma[0], old_node);
-	nvkm_vm_map(&old_node->vma[1], new_node);
+	nvkm_vm_map(&old_mem->vma[0], old_mem);
+	nvkm_vm_map(&old_mem->vma[1], new_mem);
 	return 0;
 }
 
 static int
 nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
-		     bool no_wait_gpu, struct ttm_mem_reg *new_mem)
+		     bool no_wait_gpu, struct ttm_mem_reg *new_reg)
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct nouveau_channel *chan = drm->ttm.chan;
@@ -1033,8 +1036,8 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
 	 * old nvkm_mem node, these will get cleaned up after ttm has
 	 * destroyed the ttm_mem_reg
 	 */
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
-		ret = nouveau_bo_move_prep(drm, bo, new_mem);
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+		ret = nouveau_bo_move_prep(drm, bo, new_reg);
 		if (ret)
 			return ret;
 	}
@@ -1042,14 +1045,14 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
 	mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING);
 	ret = nouveau_fence_sync(nouveau_bo(bo), chan, true, intr);
 	if (ret == 0) {
-		ret = drm->ttm.move(chan, bo, &bo->mem, new_mem);
+		ret = drm->ttm.move(chan, bo, &bo->mem, new_reg);
 		if (ret == 0) {
 			ret = nouveau_fence_new(chan, false, &fence);
 			if (ret == 0) {
 				ret = ttm_bo_move_accel_cleanup(bo,
 								&fence->base,
 								evict,
-								new_mem);
+								new_reg);
 				nouveau_fence_unref(&fence);
 			}
 		}
@@ -1124,7 +1127,7 @@ nouveau_bo_move_init(struct nouveau_drm *drm)
 
 static int
 nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
-		      bool no_wait_gpu, struct ttm_mem_reg *new_mem)
+		      bool no_wait_gpu, struct ttm_mem_reg *new_reg)
 {
 	struct ttm_place placement_memtype = {
 		.fpfn = 0,
@@ -1132,35 +1135,35 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
 		.flags = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING
 	};
 	struct ttm_placement placement;
-	struct ttm_mem_reg tmp_mem;
+	struct ttm_mem_reg tmp_reg;
 	int ret;
 
 	placement.num_placement = placement.num_busy_placement = 1;
 	placement.placement = placement.busy_placement = &placement_memtype;
 
-	tmp_mem = *new_mem;
-	tmp_mem.mm_node = NULL;
-	ret = ttm_bo_mem_space(bo, &placement, &tmp_mem, intr, no_wait_gpu);
+	tmp_reg = *new_reg;
+	tmp_reg.mm_node = NULL;
+	ret = ttm_bo_mem_space(bo, &placement, &tmp_reg, intr, no_wait_gpu);
 	if (ret)
 		return ret;
 
-	ret = ttm_tt_bind(bo->ttm, &tmp_mem);
+	ret = ttm_tt_bind(bo->ttm, &tmp_reg);
 	if (ret)
 		goto out;
 
-	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_gpu, &tmp_mem);
+	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_gpu, &tmp_reg);
 	if (ret)
 		goto out;
 
-	ret = ttm_bo_move_ttm(bo, intr, no_wait_gpu, new_mem);
+	ret = ttm_bo_move_ttm(bo, intr, no_wait_gpu, new_reg);
 out:
-	ttm_bo_mem_put(bo, &tmp_mem);
+	ttm_bo_mem_put(bo, &tmp_reg);
 	return ret;
 }
 
 static int
 nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
-		      bool no_wait_gpu, struct ttm_mem_reg *new_mem)
+		      bool no_wait_gpu, struct ttm_mem_reg *new_reg)
 {
 	struct ttm_place placement_memtype = {
 		.fpfn = 0,
@@ -1168,34 +1171,34 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
 		.flags = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING
 	};
 	struct ttm_placement placement;
-	struct ttm_mem_reg tmp_mem;
+	struct ttm_mem_reg tmp_reg;
 	int ret;
 
 	placement.num_placement = placement.num_busy_placement = 1;
 	placement.placement = placement.busy_placement = &placement_memtype;
 
-	tmp_mem = *new_mem;
-	tmp_mem.mm_node = NULL;
-	ret = ttm_bo_mem_space(bo, &placement, &tmp_mem, intr, no_wait_gpu);
+	tmp_reg = *new_reg;
+	tmp_reg.mm_node = NULL;
+	ret = ttm_bo_mem_space(bo, &placement, &tmp_reg, intr, no_wait_gpu);
 	if (ret)
 		return ret;
 
-	ret = ttm_bo_move_ttm(bo, intr, no_wait_gpu, &tmp_mem);
+	ret = ttm_bo_move_ttm(bo, intr, no_wait_gpu, &tmp_reg);
 	if (ret)
 		goto out;
 
-	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_gpu, new_mem);
+	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_gpu, new_reg);
 	if (ret)
 		goto out;
 
 out:
-	ttm_bo_mem_put(bo, &tmp_mem);
+	ttm_bo_mem_put(bo, &tmp_reg);
 	return ret;
 }
 
 static void
 nouveau_bo_move_ntfy(struct ttm_buffer_object *bo, bool evict,
-		     struct ttm_mem_reg *new_mem)
+		     struct ttm_mem_reg *new_reg)
 {
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
 	struct nvkm_vma *vma;
@@ -1205,10 +1208,10 @@ nouveau_bo_move_ntfy(struct ttm_buffer_object *bo, bool evict,
 		return;
 
 	list_for_each_entry(vma, &nvbo->vma_list, head) {
-		if (new_mem && new_mem->mem_type != TTM_PL_SYSTEM &&
-			      (new_mem->mem_type == TTM_PL_VRAM ||
+		if (new_reg && new_reg->mem_type != TTM_PL_SYSTEM &&
+			      (new_reg->mem_type == TTM_PL_VRAM ||
 			       nvbo->page_shift != vma->vm->mmu->lpg_shift)) {
-			nvkm_vm_map(vma, new_mem->mm_node);
+			nvkm_vm_map(vma, new_reg->mm_node);
 		} else {
 			WARN_ON(ttm_bo_wait(bo, false, false));
 			nvkm_vm_unmap(vma);
@@ -1217,20 +1220,20 @@ nouveau_bo_move_ntfy(struct ttm_buffer_object *bo, bool evict,
 }
 
 static int
-nouveau_bo_vm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem,
+nouveau_bo_vm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_reg,
 		   struct nouveau_drm_tile **new_tile)
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct drm_device *dev = drm->dev;
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
-	u64 offset = new_mem->start << PAGE_SHIFT;
+	u64 offset = new_reg->start << PAGE_SHIFT;
 
 	*new_tile = NULL;
-	if (new_mem->mem_type != TTM_PL_VRAM)
+	if (new_reg->mem_type != TTM_PL_VRAM)
 		return 0;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
-		*new_tile = nv10_bo_set_tiling(dev, offset, new_mem->size,
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
+		*new_tile = nv10_bo_set_tiling(dev, offset, new_reg->size,
 						nvbo->tile_mode,
 						nvbo->tile_flags);
 	}
@@ -1253,11 +1256,11 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
 
 static int
 nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
-		bool no_wait_gpu, struct ttm_mem_reg *new_mem)
+		bool no_wait_gpu, struct ttm_mem_reg *new_reg)
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
-	struct ttm_mem_reg *old_mem = &bo->mem;
+	struct ttm_mem_reg *old_reg = &bo->mem;
 	struct nouveau_drm_tile *new_tile = NULL;
 	int ret = 0;
 
@@ -1268,31 +1271,31 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
 	if (nvbo->pin_refcnt)
 		NV_WARN(drm, "Moving pinned object %p!\n", nvbo);
 
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) {
-		ret = nouveau_bo_vm_bind(bo, new_mem, &new_tile);
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) {
+		ret = nouveau_bo_vm_bind(bo, new_reg, &new_tile);
 		if (ret)
 			return ret;
 	}
 
 	/* Fake bo copy. */
-	if (old_mem->mem_type == TTM_PL_SYSTEM && !bo->ttm) {
+	if (old_reg->mem_type == TTM_PL_SYSTEM && !bo->ttm) {
 		BUG_ON(bo->mem.mm_node != NULL);
-		bo->mem = *new_mem;
-		new_mem->mm_node = NULL;
+		bo->mem = *new_reg;
+		new_reg->mm_node = NULL;
 		goto out;
 	}
 
 	/* Hardware assisted copy. */
 	if (drm->ttm.move) {
-		if (new_mem->mem_type == TTM_PL_SYSTEM)
+		if (new_reg->mem_type == TTM_PL_SYSTEM)
 			ret = nouveau_bo_move_flipd(bo, evict, intr,
-						    no_wait_gpu, new_mem);
-		else if (old_mem->mem_type == TTM_PL_SYSTEM)
+						    no_wait_gpu, new_reg);
+		else if (old_reg->mem_type == TTM_PL_SYSTEM)
 			ret = nouveau_bo_move_flips(bo, evict, intr,
-						    no_wait_gpu, new_mem);
+						    no_wait_gpu, new_reg);
 		else
 			ret = nouveau_bo_move_m2mf(bo, evict, intr,
-						   no_wait_gpu, new_mem);
+						   no_wait_gpu, new_reg);
 		if (!ret)
 			goto out;
 	}
@@ -1300,10 +1303,10 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
 	/* Fallback to software copy. */
 	ret = ttm_bo_wait(bo, intr, no_wait_gpu);
 	if (ret == 0)
-		ret = ttm_bo_move_memcpy(bo, intr, no_wait_gpu, new_mem);
+		ret = ttm_bo_move_memcpy(bo, intr, no_wait_gpu, new_reg);
 
 out:
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) {
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) {
 		if (ret)
 			nouveau_bo_vm_cleanup(bo, NULL, &new_tile);
 		else
@@ -1323,54 +1326,54 @@ nouveau_bo_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 }
 
 static int
-nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg)
 {
-	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
+	struct ttm_mem_type_manager *man = &bdev->man[reg->mem_type];
 	struct nouveau_drm *drm = nouveau_bdev(bdev);
-	struct nvkm_device *device = nvxx_device(&drm->device);
-	struct nvkm_mem *node = mem->mm_node;
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
+	struct nvkm_mem *mem = reg->mm_node;
 	int ret;
 
-	mem->bus.addr = NULL;
-	mem->bus.offset = 0;
-	mem->bus.size = mem->num_pages << PAGE_SHIFT;
-	mem->bus.base = 0;
-	mem->bus.is_iomem = false;
+	reg->bus.addr = NULL;
+	reg->bus.offset = 0;
+	reg->bus.size = reg->num_pages << PAGE_SHIFT;
+	reg->bus.base = 0;
+	reg->bus.is_iomem = false;
 	if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
 		return -EINVAL;
-	switch (mem->mem_type) {
+	switch (reg->mem_type) {
 	case TTM_PL_SYSTEM:
 		/* System memory */
 		return 0;
 	case TTM_PL_TT:
 #if IS_ENABLED(CONFIG_AGP)
 		if (drm->agp.bridge) {
-			mem->bus.offset = mem->start << PAGE_SHIFT;
-			mem->bus.base = drm->agp.base;
-			mem->bus.is_iomem = !drm->agp.cma;
+			reg->bus.offset = reg->start << PAGE_SHIFT;
+			reg->bus.base = drm->agp.base;
+			reg->bus.is_iomem = !drm->agp.cma;
 		}
 #endif
-		if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA || !node->memtype)
+		if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA || !mem->memtype)
 			/* untiled */
 			break;
 		/* fallthrough, tiled memory */
 	case TTM_PL_VRAM:
-		mem->bus.offset = mem->start << PAGE_SHIFT;
-		mem->bus.base = device->func->resource_addr(device, 1);
-		mem->bus.is_iomem = true;
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
-			struct nvkm_bar *bar = nvxx_bar(&drm->device);
+		reg->bus.offset = reg->start << PAGE_SHIFT;
+		reg->bus.base = device->func->resource_addr(device, 1);
+		reg->bus.is_iomem = true;
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+			struct nvkm_bar *bar = nvxx_bar(&drm->client.device);
 			int page_shift = 12;
-			if (drm->device.info.family >= NV_DEVICE_INFO_V0_FERMI)
-				page_shift = node->page_shift;
+			if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_FERMI)
+				page_shift = mem->page_shift;
 
-			ret = nvkm_bar_umap(bar, node->size << 12, page_shift,
-					    &node->bar_vma);
+			ret = nvkm_bar_umap(bar, mem->size << 12, page_shift,
+					    &mem->bar_vma);
 			if (ret)
 				return ret;
 
-			nvkm_vm_map(&node->bar_vma, node);
-			mem->bus.offset = node->bar_vma.offset;
+			nvkm_vm_map(&mem->bar_vma, mem);
+			reg->bus.offset = mem->bar_vma.offset;
 		}
 		break;
 	default:
@@ -1380,15 +1383,15 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 }
 
 static void
-nouveau_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+nouveau_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg)
 {
-	struct nvkm_mem *node = mem->mm_node;
+	struct nvkm_mem *mem = reg->mm_node;
 
-	if (!node->bar_vma.node)
+	if (!mem->bar_vma.node)
 		return;
 
-	nvkm_vm_unmap(&node->bar_vma);
-	nvkm_vm_put(&node->bar_vma);
+	nvkm_vm_unmap(&mem->bar_vma);
+	nvkm_vm_put(&mem->bar_vma);
 }
 
 static int
@@ -1396,7 +1399,7 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 	u32 mappable = device->func->resource_size(device, 1) >> PAGE_SHIFT;
 	int i, ret;
 
@@ -1404,7 +1407,7 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
 	 * nothing to do here.
 	 */
 	if (bo->mem.mem_type != TTM_PL_VRAM) {
-		if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA ||
+		if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA ||
 		    !nouveau_bo_tile_layout(nvbo))
 			return 0;
 
@@ -1419,7 +1422,7 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
 	}
 
 	/* make sure bo is in mappable vram */
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA ||
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA ||
 	    bo->mem.start + bo->mem.num_pages < mappable)
 		return 0;
 
@@ -1461,7 +1464,7 @@ nouveau_ttm_tt_populate(struct ttm_tt *ttm)
 	}
 
 	drm = nouveau_bdev(ttm->bdev);
-	device = nvxx_device(&drm->device);
+	device = nvxx_device(&drm->client.device);
 	dev = drm->dev;
 	pdev = device->dev;
 
@@ -1518,7 +1521,7 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm)
 		return;
 
 	drm = nouveau_bdev(ttm->bdev);
-	device = nvxx_device(&drm->device);
+	device = nvxx_device(&drm->client.device);
 	dev = drm->dev;
 	pdev = device->dev;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h
index e42360983229..b06a5385d6dd 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
@@ -26,6 +26,8 @@ struct nouveau_bo {
 	struct list_head vma_list;
 	unsigned page_shift;
 
+	struct nouveau_cli *cli;
+
 	u32 tile_mode;
 	u32 tile_flags;
 	struct nouveau_drm_tile *tile;
@@ -69,7 +71,7 @@ nouveau_bo_ref(struct nouveau_bo *ref, struct nouveau_bo **pnvbo)
 extern struct ttm_bo_driver nouveau_bo_driver;
 
 void nouveau_bo_move_init(struct nouveau_drm *);
-int  nouveau_bo_new(struct drm_device *, int size, int align, u32 flags,
+int  nouveau_bo_new(struct nouveau_cli *, u64 size, int align, u32 flags,
 		    u32 tile_mode, u32 tile_flags, struct sg_table *sg,
 		    struct reservation_object *robj,
 		    struct nouveau_bo **);
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
index f9b3c811187e..dbc41fa86ee8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -45,10 +45,20 @@ MODULE_PARM_DESC(vram_pushbuf, "Create DMA push buffers in VRAM");
 int nouveau_vram_pushbuf;
 module_param_named(vram_pushbuf, nouveau_vram_pushbuf, int, 0400);
 
+static int
+nouveau_channel_killed(struct nvif_notify *ntfy)
+{
+	struct nouveau_channel *chan = container_of(ntfy, typeof(*chan), kill);
+	struct nouveau_cli *cli = (void *)chan->user.client;
+	NV_PRINTK(warn, cli, "channel %d killed!\n", chan->chid);
+	atomic_set(&chan->killed, 1);
+	return NVIF_NOTIFY_DROP;
+}
+
 int
 nouveau_channel_idle(struct nouveau_channel *chan)
 {
-	if (likely(chan && chan->fence)) {
+	if (likely(chan && chan->fence && !atomic_read(&chan->killed))) {
 		struct nouveau_cli *cli = (void *)chan->user.client;
 		struct nouveau_fence *fence = NULL;
 		int ret;
@@ -78,6 +88,7 @@ nouveau_channel_del(struct nouveau_channel **pchan)
 		nvif_object_fini(&chan->nvsw);
 		nvif_object_fini(&chan->gart);
 		nvif_object_fini(&chan->vram);
+		nvif_notify_fini(&chan->kill);
 		nvif_object_fini(&chan->user);
 		nvif_object_fini(&chan->push.ctxdma);
 		nouveau_bo_vma_del(chan->push.buffer, &chan->push.vma);
@@ -107,13 +118,14 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device,
 
 	chan->device = device;
 	chan->drm = drm;
+	atomic_set(&chan->killed, 0);
 
 	/* allocate memory for dma push buffer */
 	target = TTM_PL_FLAG_TT | TTM_PL_FLAG_UNCACHED;
 	if (nouveau_vram_pushbuf)
 		target = TTM_PL_FLAG_VRAM;
 
-	ret = nouveau_bo_new(drm->dev, size, 0, target, 0, 0, NULL, NULL,
+	ret = nouveau_bo_new(cli, size, 0, target, 0, 0, NULL, NULL,
 			    &chan->push.buffer);
 	if (ret == 0) {
 		ret = nouveau_bo_pin(chan->push.buffer, target, false);
@@ -301,12 +313,26 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart)
 {
 	struct nvif_device *device = chan->device;
 	struct nouveau_cli *cli = (void *)chan->user.client;
+	struct nouveau_drm *drm = chan->drm;
 	struct nvkm_mmu *mmu = nvxx_mmu(device);
 	struct nv_dma_v0 args = {};
 	int ret, i;
 
 	nvif_object_map(&chan->user);
 
+	if (chan->user.oclass >= FERMI_CHANNEL_GPFIFO) {
+		ret = nvif_notify_init(&chan->user, nouveau_channel_killed,
+				       true, NV906F_V0_NTFY_KILLED,
+				       NULL, 0, 0, &chan->kill);
+		if (ret == 0)
+			ret = nvif_notify_get(&chan->kill);
+		if (ret) {
+			NV_ERROR(drm, "Failed to request channel kill "
+				      "notification: %d\n", ret);
+			return ret;
+		}
+	}
+
 	/* allocate dma objects to cover all allowed vram, and gart */
 	if (device->info.family < NV_DEVICE_INFO_V0_FERMI) {
 		if (device->info.family >= NV_DEVICE_INFO_V0_TESLA) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.h b/drivers/gpu/drm/nouveau/nouveau_chan.h
index 48062c94f36d..46b947ba1cf4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.h
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.h
@@ -1,7 +1,7 @@
 #ifndef __NOUVEAU_CHAN_H__
 #define __NOUVEAU_CHAN_H__
-
 #include <nvif/object.h>
+#include <nvif/notify.h>
 struct nvif_device;
 
 struct nouveau_channel {
@@ -38,6 +38,9 @@ struct nouveau_channel {
 	u32 user_put;
 
 	struct nvif_object user;
+
+	struct nvif_notify kill;
+	atomic_t killed;
 };
 
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 966d20ab4de4..f5add64c093f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -419,7 +419,7 @@ nouveau_connector_ddc_detect(struct drm_connector *connector)
 	struct drm_device *dev = connector->dev;
 	struct nouveau_connector *nv_connector = nouveau_connector(connector);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_gpio *gpio = nvxx_gpio(&drm->device);
+	struct nvkm_gpio *gpio = nvxx_gpio(&drm->client.device);
 	struct nouveau_encoder *nv_encoder;
 	struct drm_encoder *encoder;
 	int i, panel = -ENODEV;
@@ -521,7 +521,7 @@ nouveau_connector_set_encoder(struct drm_connector *connector,
 		return;
 	nv_connector->detected_encoder = nv_encoder;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
 		connector->interlace_allowed = true;
 		connector->doublescan_allowed = true;
 	} else
@@ -531,8 +531,8 @@ nouveau_connector_set_encoder(struct drm_connector *connector,
 		connector->interlace_allowed = false;
 	} else {
 		connector->doublescan_allowed = true;
-		if (drm->device.info.family == NV_DEVICE_INFO_V0_KELVIN ||
-		    (drm->device.info.family == NV_DEVICE_INFO_V0_CELSIUS &&
+		if (drm->client.device.info.family == NV_DEVICE_INFO_V0_KELVIN ||
+		    (drm->client.device.info.family == NV_DEVICE_INFO_V0_CELSIUS &&
 		     (dev->pdev->device & 0x0ff0) != 0x0100 &&
 		     (dev->pdev->device & 0x0ff0) != 0x0150))
 			/* HW is broken */
@@ -984,17 +984,17 @@ get_tmds_link_bandwidth(struct drm_connector *connector, bool hdmi)
 		/* Note: these limits are conservative, some Fermi's
 		 * can do 297 MHz. Unclear how this can be determined.
 		 */
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_KEPLER)
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_KEPLER)
 			return 297000;
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_FERMI)
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_FERMI)
 			return 225000;
 	}
 	if (dcb->location != DCB_LOC_ON_CHIP ||
-	    drm->device.info.chipset >= 0x46)
+	    drm->client.device.info.chipset >= 0x46)
 		return 165000;
-	else if (drm->device.info.chipset >= 0x40)
+	else if (drm->client.device.info.chipset >= 0x40)
 		return 155000;
-	else if (drm->device.info.chipset >= 0x18)
+	else if (drm->client.device.info.chipset >= 0x18)
 		return 135000;
 	else
 		return 112000;
@@ -1041,7 +1041,7 @@ nouveau_connector_mode_valid(struct drm_connector *connector,
 		clock = clock * (connector->display_info.bpc * 3) / 10;
 		break;
 	default:
-		BUG_ON(1);
+		BUG();
 		return MODE_BAD;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
index 411c12cdb249..fd64dfdc7d4f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c
+++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
@@ -259,8 +259,9 @@ nouveau_debugfs_init(struct nouveau_drm *drm)
 	if (!drm->debugfs)
 		return -ENOMEM;
 
-	ret = nvif_object_init(&drm->device.object, 0, NVIF_CLASS_CONTROL,
-			       NULL, 0, &drm->debugfs->ctrl);
+	ret = nvif_object_init(&drm->client.device.object, 0,
+			       NVIF_CLASS_CONTROL, NULL, 0,
+			       &drm->debugfs->ctrl);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 6b570079d185..72fdba1a1c5d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -414,7 +414,8 @@ nouveau_display_init(struct drm_device *dev)
 		return ret;
 
 	/* enable polling for external displays */
-	drm_kms_helper_poll_enable(dev);
+	if (!dev->mode_config.poll_enabled)
+		drm_kms_helper_poll_enable(dev);
 
 	/* enable hotplug interrupts */
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
@@ -495,7 +496,7 @@ int
 nouveau_display_create(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 	struct nouveau_display *disp;
 	int ret;
 
@@ -512,15 +513,15 @@ nouveau_display_create(struct drm_device *dev)
 
 	dev->mode_config.min_width = 0;
 	dev->mode_config.min_height = 0;
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_CELSIUS) {
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_CELSIUS) {
 		dev->mode_config.max_width = 2048;
 		dev->mode_config.max_height = 2048;
 	} else
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) {
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) {
 		dev->mode_config.max_width = 4096;
 		dev->mode_config.max_height = 4096;
 	} else
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_FERMI) {
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_FERMI) {
 		dev->mode_config.max_width = 8192;
 		dev->mode_config.max_height = 8192;
 	} else {
@@ -531,7 +532,7 @@ nouveau_display_create(struct drm_device *dev)
 	dev->mode_config.preferred_depth = 24;
 	dev->mode_config.prefer_shadow = 1;
 
-	if (drm->device.info.chipset < 0x11)
+	if (drm->client.device.info.chipset < 0x11)
 		dev->mode_config.async_page_flip = false;
 	else
 		dev->mode_config.async_page_flip = true;
@@ -558,7 +559,7 @@ nouveau_display_create(struct drm_device *dev)
 		int i;
 
 		for (i = 0, ret = -ENODEV; ret && i < ARRAY_SIZE(oclass); i++) {
-			ret = nvif_object_init(&drm->device.object, 0,
+			ret = nvif_object_init(&drm->client.device.object, 0,
 					       oclass[i], NULL, 0, &disp->disp);
 		}
 
@@ -1057,6 +1058,7 @@ int
 nouveau_display_dumb_create(struct drm_file *file_priv, struct drm_device *dev,
 			    struct drm_mode_create_dumb *args)
 {
+	struct nouveau_cli *cli = nouveau_cli(file_priv);
 	struct nouveau_bo *bo;
 	uint32_t domain;
 	int ret;
@@ -1066,12 +1068,12 @@ nouveau_display_dumb_create(struct drm_file *file_priv, struct drm_device *dev,
 	args->size = roundup(args->size, PAGE_SIZE);
 
 	/* Use VRAM if there is any ; otherwise fallback to system memory */
-	if (nouveau_drm(dev)->device.info.ram_size != 0)
+	if (nouveau_drm(dev)->client.device.info.ram_size != 0)
 		domain = NOUVEAU_GEM_DOMAIN_VRAM;
 	else
 		domain = NOUVEAU_GEM_DOMAIN_GART;
 
-	ret = nouveau_gem_new(dev, args->size, 0, domain, 0, 0, &bo);
+	ret = nouveau_gem_new(cli, args->size, 0, domain, 0, 0, &bo);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index dd7b52ab505a..468ed1d3bb26 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -37,6 +37,8 @@
 #include <core/pci.h>
 #include <core/tegra.h>
 
+#include <nvif/driver.h>
+
 #include <nvif/class.h>
 #include <nvif/cl0002.h>
 #include <nvif/cla06f.h>
@@ -109,35 +111,53 @@ nouveau_name(struct drm_device *dev)
 		return nouveau_platform_name(dev->platformdev);
 }
 
+static void
+nouveau_cli_fini(struct nouveau_cli *cli)
+{
+	nvkm_vm_ref(NULL, &nvxx_client(&cli->base)->vm, NULL);
+	usif_client_fini(cli);
+	nvif_device_fini(&cli->device);
+	nvif_client_fini(&cli->base);
+}
+
 static int
-nouveau_cli_create(struct drm_device *dev, const char *sname,
-		   int size, void **pcli)
+nouveau_cli_init(struct nouveau_drm *drm, const char *sname,
+		 struct nouveau_cli *cli)
 {
-	struct nouveau_cli *cli = *pcli = kzalloc(size, GFP_KERNEL);
+	u64 device = nouveau_name(drm->dev);
 	int ret;
-	if (cli) {
-		snprintf(cli->name, sizeof(cli->name), "%s", sname);
-		cli->dev = dev;
 
-		ret = nvif_client_init(NULL, cli->name, nouveau_name(dev),
-				       nouveau_config, nouveau_debug,
+	snprintf(cli->name, sizeof(cli->name), "%s", sname);
+	cli->dev = drm->dev;
+	mutex_init(&cli->mutex);
+	usif_client_init(cli);
+
+	if (cli == &drm->client) {
+		ret = nvif_driver_init(NULL, nouveau_config, nouveau_debug,
+				       cli->name, device, &cli->base);
+	} else {
+		ret = nvif_client_init(&drm->client.base, cli->name, device,
 				       &cli->base);
-		if (ret == 0) {
-			mutex_init(&cli->mutex);
-			usif_client_init(cli);
-		}
-		return ret;
 	}
-	return -ENOMEM;
-}
+	if (ret) {
+		NV_ERROR(drm, "Client allocation failed: %d\n", ret);
+		goto done;
+	}
 
-static void
-nouveau_cli_destroy(struct nouveau_cli *cli)
-{
-	nvkm_vm_ref(NULL, &nvxx_client(&cli->base)->vm, NULL);
-	nvif_client_fini(&cli->base);
-	usif_client_fini(cli);
-	kfree(cli);
+	ret = nvif_device_init(&cli->base.object, 0, NV_DEVICE,
+			       &(struct nv_device_v0) {
+					.device = ~0,
+			       }, sizeof(struct nv_device_v0),
+			       &cli->device);
+	if (ret) {
+		NV_ERROR(drm, "Device allocation failed: %d\n", ret);
+		goto done;
+	}
+
+done:
+	if (ret)
+		nouveau_cli_fini(cli);
+	return ret;
 }
 
 static void
@@ -161,7 +181,7 @@ nouveau_accel_fini(struct nouveau_drm *drm)
 static void
 nouveau_accel_init(struct nouveau_drm *drm)
 {
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	struct nvif_sclass *sclass;
 	u32 arg0, arg1;
 	int ret, i, n;
@@ -215,7 +235,7 @@ nouveau_accel_init(struct nouveau_drm *drm)
 	}
 
 	if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) {
-		ret = nouveau_channel_new(drm, &drm->device,
+		ret = nouveau_channel_new(drm, &drm->client.device,
 					  NVA06F_V0_ENGINE_CE0 |
 					  NVA06F_V0_ENGINE_CE1,
 					  0, &drm->cechan);
@@ -228,7 +248,7 @@ nouveau_accel_init(struct nouveau_drm *drm)
 	if (device->info.chipset >= 0xa3 &&
 	    device->info.chipset != 0xaa &&
 	    device->info.chipset != 0xac) {
-		ret = nouveau_channel_new(drm, &drm->device,
+		ret = nouveau_channel_new(drm, &drm->client.device,
 					  NvDmaFB, NvDmaTT, &drm->cechan);
 		if (ret)
 			NV_ERROR(drm, "failed to create ce channel, %d\n", ret);
@@ -240,7 +260,8 @@ nouveau_accel_init(struct nouveau_drm *drm)
 		arg1 = NvDmaTT;
 	}
 
-	ret = nouveau_channel_new(drm, &drm->device, arg0, arg1, &drm->channel);
+	ret = nouveau_channel_new(drm, &drm->client.device,
+				  arg0, arg1, &drm->channel);
 	if (ret) {
 		NV_ERROR(drm, "failed to create kernel channel, %d\n", ret);
 		nouveau_accel_fini(drm);
@@ -280,8 +301,8 @@ nouveau_accel_init(struct nouveau_drm *drm)
 	}
 
 	if (device->info.family < NV_DEVICE_INFO_V0_FERMI) {
-		ret = nvkm_gpuobj_new(nvxx_device(&drm->device), 32, 0, false,
-				      NULL, &drm->notify);
+		ret = nvkm_gpuobj_new(nvxx_device(&drm->client.device), 32, 0,
+				      false, NULL, &drm->notify);
 		if (ret) {
 			NV_ERROR(drm, "failed to allocate notifier, %d\n", ret);
 			nouveau_accel_fini(drm);
@@ -407,12 +428,17 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags)
 	struct nouveau_drm *drm;
 	int ret;
 
-	ret = nouveau_cli_create(dev, "DRM", sizeof(*drm), (void **)&drm);
+	if (!(drm = kzalloc(sizeof(*drm), GFP_KERNEL)))
+		return -ENOMEM;
+	dev->dev_private = drm;
+	drm->dev = dev;
+
+	ret = nouveau_cli_init(drm, "DRM", &drm->client);
 	if (ret)
 		return ret;
 
-	dev->dev_private = drm;
-	drm->dev = dev;
+	dev->irq_enabled = true;
+
 	nvxx_client(&drm->client.base)->debug =
 		nvkm_dbgopt(nouveau_debug, "DRM");
 
@@ -421,33 +447,24 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags)
 
 	nouveau_get_hdmi_dev(drm);
 
-	ret = nvif_device_init(&drm->client.base.object, 0, NV_DEVICE,
-			       &(struct nv_device_v0) {
-					.device = ~0,
-			       }, sizeof(struct nv_device_v0),
-			       &drm->device);
-	if (ret)
-		goto fail_device;
-
-	dev->irq_enabled = true;
-
 	/* workaround an odd issue on nvc1 by disabling the device's
 	 * nosnoop capability.  hopefully won't cause issues until a
 	 * better fix is found - assuming there is one...
 	 */
-	if (drm->device.info.chipset == 0xc1)
-		nvif_mask(&drm->device.object, 0x00088080, 0x00000800, 0x00000000);
+	if (drm->client.device.info.chipset == 0xc1)
+		nvif_mask(&drm->client.device.object, 0x00088080, 0x00000800, 0x00000000);
 
 	nouveau_vga_init(drm);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
-		if (!nvxx_device(&drm->device)->mmu) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+		if (!nvxx_device(&drm->client.device)->mmu) {
 			ret = -ENOSYS;
 			goto fail_device;
 		}
 
-		ret = nvkm_vm_new(nvxx_device(&drm->device), 0, (1ULL << 40),
-				  0x1000, NULL, &drm->client.vm);
+		ret = nvkm_vm_new(nvxx_device(&drm->client.device),
+				  0, (1ULL << 40), 0x1000, NULL,
+				  &drm->client.vm);
 		if (ret)
 			goto fail_device;
 
@@ -497,8 +514,8 @@ fail_bios:
 fail_ttm:
 	nouveau_vga_fini(drm);
 fail_device:
-	nvif_device_fini(&drm->device);
-	nouveau_cli_destroy(&drm->client);
+	nouveau_cli_fini(&drm->client);
+	kfree(drm);
 	return ret;
 }
 
@@ -527,10 +544,10 @@ nouveau_drm_unload(struct drm_device *dev)
 	nouveau_ttm_fini(drm);
 	nouveau_vga_fini(drm);
 
-	nvif_device_fini(&drm->device);
 	if (drm->hdmi_device)
 		pci_dev_put(drm->hdmi_device);
-	nouveau_cli_destroy(&drm->client);
+	nouveau_cli_fini(&drm->client);
+	kfree(drm);
 }
 
 void
@@ -560,7 +577,6 @@ static int
 nouveau_do_suspend(struct drm_device *dev, bool runtime)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_cli *cli;
 	int ret;
 
 	nouveau_led_suspend(dev);
@@ -590,7 +606,7 @@ nouveau_do_suspend(struct drm_device *dev, bool runtime)
 			goto fail_display;
 	}
 
-	NV_INFO(drm, "suspending client object trees...\n");
+	NV_INFO(drm, "suspending fence...\n");
 	if (drm->fence && nouveau_fence(drm)->suspend) {
 		if (!nouveau_fence(drm)->suspend(drm)) {
 			ret = -ENOMEM;
@@ -598,13 +614,7 @@ nouveau_do_suspend(struct drm_device *dev, bool runtime)
 		}
 	}
 
-	list_for_each_entry(cli, &drm->clients, head) {
-		ret = nvif_client_suspend(&cli->base);
-		if (ret)
-			goto fail_client;
-	}
-
-	NV_INFO(drm, "suspending kernel object tree...\n");
+	NV_INFO(drm, "suspending object tree...\n");
 	ret = nvif_client_suspend(&drm->client.base);
 	if (ret)
 		goto fail_client;
@@ -612,10 +622,6 @@ nouveau_do_suspend(struct drm_device *dev, bool runtime)
 	return 0;
 
 fail_client:
-	list_for_each_entry_continue_reverse(cli, &drm->clients, head) {
-		nvif_client_resume(&cli->base);
-	}
-
 	if (drm->fence && nouveau_fence(drm)->resume)
 		nouveau_fence(drm)->resume(drm);
 
@@ -631,19 +637,14 @@ static int
 nouveau_do_resume(struct drm_device *dev, bool runtime)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_cli *cli;
 
-	NV_INFO(drm, "resuming kernel object tree...\n");
+	NV_INFO(drm, "resuming object tree...\n");
 	nvif_client_resume(&drm->client.base);
 
-	NV_INFO(drm, "resuming client object trees...\n");
+	NV_INFO(drm, "resuming fence...\n");
 	if (drm->fence && nouveau_fence(drm)->resume)
 		nouveau_fence(drm)->resume(drm);
 
-	list_for_each_entry(cli, &drm->clients, head) {
-		nvif_client_resume(&cli->base);
-	}
-
 	nouveau_run_vbios_init(dev);
 
 	if (dev->mode_config.num_crtc) {
@@ -758,7 +759,7 @@ nouveau_pmops_runtime_resume(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
-	struct nvif_device *device = &nouveau_drm(drm_dev)->device;
+	struct nvif_device *device = &nouveau_drm(drm_dev)->client.device;
 	int ret;
 
 	if (nouveau_runtime_pm == 0)
@@ -772,7 +773,10 @@ nouveau_pmops_runtime_resume(struct device *dev)
 	pci_set_master(pdev);
 
 	ret = nouveau_do_resume(drm_dev, true);
-	drm_kms_helper_poll_enable(drm_dev);
+
+	if (!drm_dev->mode_config.poll_enabled)
+		drm_kms_helper_poll_enable(drm_dev);
+
 	/* do magic */
 	nvif_mask(&device->object, 0x088488, (1 << 25), (1 << 25));
 	vga_switcheroo_set_dynamic_switch(pdev, VGA_SWITCHEROO_ON);
@@ -841,20 +845,20 @@ nouveau_drm_open(struct drm_device *dev, struct drm_file *fpriv)
 	get_task_comm(tmpname, current);
 	snprintf(name, sizeof(name), "%s[%d]", tmpname, pid_nr(fpriv->pid));
 
-	ret = nouveau_cli_create(dev, name, sizeof(*cli), (void **)&cli);
+	if (!(cli = kzalloc(sizeof(*cli), GFP_KERNEL)))
+		return ret;
 
+	ret = nouveau_cli_init(drm, name, cli);
 	if (ret)
-		goto out_suspend;
+		goto done;
 
 	cli->base.super = false;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
-		ret = nvkm_vm_new(nvxx_device(&drm->device), 0, (1ULL << 40),
-				  0x1000, NULL, &cli->vm);
-		if (ret) {
-			nouveau_cli_destroy(cli);
-			goto out_suspend;
-		}
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+		ret = nvkm_vm_new(nvxx_device(&drm->client.device), 0,
+				  (1ULL << 40), 0x1000, NULL, &cli->vm);
+		if (ret)
+			goto done;
 
 		nvxx_client(&cli->base)->vm = cli->vm;
 	}
@@ -865,10 +869,14 @@ nouveau_drm_open(struct drm_device *dev, struct drm_file *fpriv)
 	list_add(&cli->head, &drm->clients);
 	mutex_unlock(&drm->client.mutex);
 
-out_suspend:
+done:
+	if (ret && cli) {
+		nouveau_cli_fini(cli);
+		kfree(cli);
+	}
+
 	pm_runtime_mark_last_busy(dev->dev);
 	pm_runtime_put_autosuspend(dev->dev);
-
 	return ret;
 }
 
@@ -895,7 +903,8 @@ static void
 nouveau_drm_postclose(struct drm_device *dev, struct drm_file *fpriv)
 {
 	struct nouveau_cli *cli = nouveau_cli(fpriv);
-	nouveau_cli_destroy(cli);
+	nouveau_cli_fini(cli);
+	kfree(cli);
 	pm_runtime_mark_last_busy(dev->dev);
 	pm_runtime_put_autosuspend(dev->dev);
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 8d5ed5bfdacb..eadec2f49ad3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -86,14 +86,17 @@ enum nouveau_drm_handle {
 
 struct nouveau_cli {
 	struct nvif_client base;
+	struct drm_device *dev;
+	struct mutex mutex;
+
+	struct nvif_device device;
+
 	struct nvkm_vm *vm; /*XXX*/
 	struct list_head head;
-	struct mutex mutex;
 	void *abi16;
 	struct list_head objects;
 	struct list_head notifys;
 	char name[32];
-	struct drm_device *dev;
 };
 
 static inline struct nouveau_cli *
@@ -111,7 +114,6 @@ struct nouveau_drm {
 	struct nouveau_cli client;
 	struct drm_device *dev;
 
-	struct nvif_device device;
 	struct list_head clients;
 
 	struct {
@@ -165,6 +167,8 @@ struct nouveau_drm {
 	struct backlight_device *backlight;
 	struct list_head bl_connectors;
 	struct work_struct hpd_work;
+	struct work_struct fbcon_work;
+	int fbcon_new_state;
 #ifdef CONFIG_ACPI
 	struct notifier_block acpi_nb;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 971c147a3984..442e25c17383 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -60,7 +60,7 @@ nouveau_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
 {
 	struct nouveau_fbdev *fbcon = info->par;
 	struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	int ret;
 
 	if (info->state != FBINFO_STATE_RUNNING)
@@ -92,7 +92,7 @@ nouveau_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *image)
 {
 	struct nouveau_fbdev *fbcon = info->par;
 	struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	int ret;
 
 	if (info->state != FBINFO_STATE_RUNNING)
@@ -124,7 +124,7 @@ nouveau_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 {
 	struct nouveau_fbdev *fbcon = info->par;
 	struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	int ret;
 
 	if (info->state != FBINFO_STATE_RUNNING)
@@ -266,10 +266,10 @@ nouveau_fbcon_accel_init(struct drm_device *dev)
 	struct fb_info *info = fbcon->helper.fbdev;
 	int ret;
 
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA)
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA)
 		ret = nv04_fbcon_accel_init(info);
 	else
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_FERMI)
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_FERMI)
 		ret = nv50_fbcon_accel_init(info);
 	else
 		ret = nvc0_fbcon_accel_init(info);
@@ -324,7 +324,7 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 		container_of(helper, struct nouveau_fbdev, helper);
 	struct drm_device *dev = fbcon->helper.dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	struct fb_info *info;
 	struct nouveau_framebuffer *fb;
 	struct nouveau_channel *chan;
@@ -341,8 +341,9 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
 							  sizes->surface_depth);
 
-	ret = nouveau_gem_new(dev, mode_cmd.pitches[0] * mode_cmd.height,
-			      0, NOUVEAU_GEM_DOMAIN_VRAM, 0, 0x0000, &nvbo);
+	ret = nouveau_gem_new(&drm->client, mode_cmd.pitches[0] *
+			      mode_cmd.height, 0, NOUVEAU_GEM_DOMAIN_VRAM,
+			      0, 0x0000, &nvbo);
 	if (ret) {
 		NV_ERROR(drm, "failed to allocate framebuffer\n");
 		goto out;
@@ -472,19 +473,43 @@ static const struct drm_fb_helper_funcs nouveau_fbcon_helper_funcs = {
 	.fb_probe = nouveau_fbcon_create,
 };
 
+static void
+nouveau_fbcon_set_suspend_work(struct work_struct *work)
+{
+	struct nouveau_drm *drm = container_of(work, typeof(*drm), fbcon_work);
+	int state = READ_ONCE(drm->fbcon_new_state);
+
+	if (state == FBINFO_STATE_RUNNING)
+		pm_runtime_get_sync(drm->dev->dev);
+
+	console_lock();
+	if (state == FBINFO_STATE_RUNNING)
+		nouveau_fbcon_accel_restore(drm->dev);
+	drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
+	if (state != FBINFO_STATE_RUNNING)
+		nouveau_fbcon_accel_save_disable(drm->dev);
+	console_unlock();
+
+	if (state == FBINFO_STATE_RUNNING) {
+		pm_runtime_mark_last_busy(drm->dev->dev);
+		pm_runtime_put_sync(drm->dev->dev);
+	}
+}
+
 void
 nouveau_fbcon_set_suspend(struct drm_device *dev, int state)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	if (drm->fbcon) {
-		console_lock();
-		if (state == FBINFO_STATE_RUNNING)
-			nouveau_fbcon_accel_restore(dev);
-		drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
-		if (state != FBINFO_STATE_RUNNING)
-			nouveau_fbcon_accel_save_disable(dev);
-		console_unlock();
-	}
+
+	if (!drm->fbcon)
+		return;
+
+	drm->fbcon_new_state = state;
+	/* Since runtime resume can happen as a result of a sysfs operation,
+	 * it's possible we already have the console locked. So handle fbcon
+	 * init/deinit from a seperate work thread
+	 */
+	schedule_work(&drm->fbcon_work);
 }
 
 int
@@ -504,6 +529,7 @@ nouveau_fbcon_init(struct drm_device *dev)
 		return -ENOMEM;
 
 	drm->fbcon = fbcon;
+	INIT_WORK(&drm->fbcon_work, nouveau_fbcon_set_suspend_work);
 
 	drm_fb_helper_prepare(dev, &fbcon->helper, &nouveau_fbcon_helper_funcs);
 
@@ -515,10 +541,10 @@ nouveau_fbcon_init(struct drm_device *dev)
 	if (ret)
 		goto fini;
 
-	if (drm->device.info.ram_size <= 32 * 1024 * 1024)
+	if (drm->client.device.info.ram_size <= 32 * 1024 * 1024)
 		preferred_bpp = 8;
 	else
-	if (drm->device.info.ram_size <= 64 * 1024 * 1024)
+	if (drm->client.device.info.ram_size <= 64 * 1024 * 1024)
 		preferred_bpp = 16;
 	else
 		preferred_bpp = 32;
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index a6126c93f215..f3e551f1aa46 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -190,7 +190,7 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha
 		return;
 
 	ret = nvif_notify_init(&chan->user, nouveau_fence_wait_uevent_handler,
-			       false, G82_CHANNEL_DMA_V0_NTFY_UEVENT,
+			       false, NV826E_V0_NTFY_NON_STALL_INTERRUPT,
 			       &(struct nvif_notify_uevent_req) { },
 			       sizeof(struct nvif_notify_uevent_req),
 			       sizeof(struct nvif_notify_uevent_rep),
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index ccdce1b4eec4..d5e58a38f160 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -99,6 +99,7 @@ struct nv84_fence_priv {
 	struct nouveau_bo *bo;
 	struct nouveau_bo *bo_gart;
 	u32 *suspend;
+	struct mutex mutex;
 };
 
 int  nv84_fence_context_new(struct nouveau_channel *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 201b52b750dd..ca5397beb357 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -175,11 +175,11 @@ nouveau_gem_object_close(struct drm_gem_object *gem, struct drm_file *file_priv)
 }
 
 int
-nouveau_gem_new(struct drm_device *dev, int size, int align, uint32_t domain,
+nouveau_gem_new(struct nouveau_cli *cli, u64 size, int align, uint32_t domain,
 		uint32_t tile_mode, uint32_t tile_flags,
 		struct nouveau_bo **pnvbo)
 {
-	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nouveau_drm *drm = nouveau_drm(cli->dev);
 	struct nouveau_bo *nvbo;
 	u32 flags = 0;
 	int ret;
@@ -194,7 +194,7 @@ nouveau_gem_new(struct drm_device *dev, int size, int align, uint32_t domain,
 	if (domain & NOUVEAU_GEM_DOMAIN_COHERENT)
 		flags |= TTM_PL_FLAG_UNCACHED;
 
-	ret = nouveau_bo_new(dev, size, align, flags, tile_mode,
+	ret = nouveau_bo_new(cli, size, align, flags, tile_mode,
 			     tile_flags, NULL, NULL, pnvbo);
 	if (ret)
 		return ret;
@@ -206,12 +206,12 @@ nouveau_gem_new(struct drm_device *dev, int size, int align, uint32_t domain,
 	 */
 	nvbo->valid_domains = NOUVEAU_GEM_DOMAIN_VRAM |
 			      NOUVEAU_GEM_DOMAIN_GART;
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA)
 		nvbo->valid_domains &= domain;
 
 	/* Initialize the embedded gem-object. We return a single gem-reference
 	 * to the caller, instead of a normal nouveau_bo ttm reference. */
-	ret = drm_gem_object_init(dev, &nvbo->gem, nvbo->bo.mem.size);
+	ret = drm_gem_object_init(drm->dev, &nvbo->gem, nvbo->bo.mem.size);
 	if (ret) {
 		nouveau_bo_ref(NULL, pnvbo);
 		return -ENOMEM;
@@ -257,7 +257,7 @@ nouveau_gem_ioctl_new(struct drm_device *dev, void *data,
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_cli *cli = nouveau_cli(file_priv);
-	struct nvkm_fb *fb = nvxx_fb(&drm->device);
+	struct nvkm_fb *fb = nvxx_fb(&drm->client.device);
 	struct drm_nouveau_gem_new *req = data;
 	struct nouveau_bo *nvbo = NULL;
 	int ret = 0;
@@ -267,7 +267,7 @@ nouveau_gem_ioctl_new(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	ret = nouveau_gem_new(dev, req->info.size, req->align,
+	ret = nouveau_gem_new(cli, req->info.size, req->align,
 			      req->info.domain, req->info.tile_mode,
 			      req->info.tile_flags, &nvbo);
 	if (ret)
@@ -496,7 +496,7 @@ validate_list(struct nouveau_channel *chan, struct nouveau_cli *cli,
 			return ret;
 		}
 
-		if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) {
+		if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) {
 			if (nvbo->bo.offset == b->presumed.offset &&
 			    ((nvbo->bo.mem.mem_type == TTM_PL_VRAM &&
 			      b->presumed.domain & NOUVEAU_GEM_DOMAIN_VRAM) ||
@@ -767,7 +767,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 				      push[i].length);
 		}
 	} else
-	if (drm->device.info.chipset >= 0x25) {
+	if (drm->client.device.info.chipset >= 0x25) {
 		ret = RING_SPACE(chan, req->nr_push * 2);
 		if (ret) {
 			NV_PRINTK(err, cli, "cal_space: %d\n", ret);
@@ -840,7 +840,7 @@ out_next:
 		req->suffix0 = 0x00000000;
 		req->suffix1 = 0x00000000;
 	} else
-	if (drm->device.info.chipset >= 0x25) {
+	if (drm->client.device.info.chipset >= 0x25) {
 		req->suffix0 = 0x00020000;
 		req->suffix1 = 0x00000000;
 	} else {
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.h b/drivers/gpu/drm/nouveau/nouveau_gem.h
index 7e32da2e037a..8fa6ed9ddd3a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.h
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.h
@@ -16,7 +16,7 @@ nouveau_gem_object(struct drm_gem_object *gem)
 }
 
 /* nouveau_gem.c */
-extern int nouveau_gem_new(struct drm_device *, int size, int align,
+extern int nouveau_gem_new(struct nouveau_cli *, u64 size, int align,
 			   uint32_t domain, uint32_t tile_mode,
 			   uint32_t tile_flags, struct nouveau_bo **);
 extern void nouveau_gem_object_del(struct drm_gem_object *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_hwmon.c b/drivers/gpu/drm/nouveau/nouveau_hwmon.c
index 71f764bf4cc6..23b1670c1c2f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_hwmon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_hwmon.c
@@ -43,7 +43,7 @@ nouveau_hwmon_show_temp(struct device *d, struct device_attribute *a, char *buf)
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	int temp = nvkm_therm_temp_get(therm);
 
 	if (temp < 0)
@@ -69,7 +69,7 @@ nouveau_hwmon_temp1_auto_point1_temp(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	      therm->attr_get(therm, NVKM_THERM_ATTR_THRS_FAN_BOOST) * 1000);
@@ -81,7 +81,7 @@ nouveau_hwmon_set_temp1_auto_point1_temp(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -102,7 +102,7 @@ nouveau_hwmon_temp1_auto_point1_temp_hyst(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	 therm->attr_get(therm, NVKM_THERM_ATTR_THRS_FAN_BOOST_HYST) * 1000);
@@ -114,7 +114,7 @@ nouveau_hwmon_set_temp1_auto_point1_temp_hyst(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -134,7 +134,7 @@ nouveau_hwmon_max_temp(struct device *d, struct device_attribute *a, char *buf)
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	       therm->attr_get(therm, NVKM_THERM_ATTR_THRS_DOWN_CLK) * 1000);
@@ -145,7 +145,7 @@ nouveau_hwmon_set_max_temp(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -165,7 +165,7 @@ nouveau_hwmon_max_temp_hyst(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	  therm->attr_get(therm, NVKM_THERM_ATTR_THRS_DOWN_CLK_HYST) * 1000);
@@ -176,7 +176,7 @@ nouveau_hwmon_set_max_temp_hyst(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -197,7 +197,7 @@ nouveau_hwmon_critical_temp(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	       therm->attr_get(therm, NVKM_THERM_ATTR_THRS_CRITICAL) * 1000);
@@ -209,7 +209,7 @@ nouveau_hwmon_set_critical_temp(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -230,7 +230,7 @@ nouveau_hwmon_critical_temp_hyst(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	  therm->attr_get(therm, NVKM_THERM_ATTR_THRS_CRITICAL_HYST) * 1000);
@@ -243,7 +243,7 @@ nouveau_hwmon_set_critical_temp_hyst(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -263,7 +263,7 @@ nouveau_hwmon_emergency_temp(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	       therm->attr_get(therm, NVKM_THERM_ATTR_THRS_SHUTDOWN) * 1000);
@@ -275,7 +275,7 @@ nouveau_hwmon_set_emergency_temp(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -296,7 +296,7 @@ nouveau_hwmon_emergency_temp_hyst(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	  therm->attr_get(therm, NVKM_THERM_ATTR_THRS_SHUTDOWN_HYST) * 1000);
@@ -309,7 +309,7 @@ nouveau_hwmon_set_emergency_temp_hyst(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -349,7 +349,7 @@ nouveau_hwmon_show_fan1_input(struct device *d, struct device_attribute *attr,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n", nvkm_therm_fan_sense(therm));
 }
@@ -362,7 +362,7 @@ nouveau_hwmon_get_pwm1_enable(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	int ret;
 
 	ret = therm->attr_get(therm, NVKM_THERM_ATTR_FAN_MODE);
@@ -378,7 +378,7 @@ nouveau_hwmon_set_pwm1_enable(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 	int ret;
 
@@ -401,7 +401,7 @@ nouveau_hwmon_get_pwm1(struct device *d, struct device_attribute *a, char *buf)
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	int ret;
 
 	ret = therm->fan_get(therm);
@@ -417,7 +417,7 @@ nouveau_hwmon_set_pwm1(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	int ret = -ENODEV;
 	long value;
 
@@ -441,7 +441,7 @@ nouveau_hwmon_get_pwm1_min(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	int ret;
 
 	ret = therm->attr_get(therm, NVKM_THERM_ATTR_FAN_MIN_DUTY);
@@ -457,7 +457,7 @@ nouveau_hwmon_set_pwm1_min(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 	int ret;
 
@@ -481,7 +481,7 @@ nouveau_hwmon_get_pwm1_max(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	int ret;
 
 	ret = therm->attr_get(therm, NVKM_THERM_ATTR_FAN_MAX_DUTY);
@@ -497,7 +497,7 @@ nouveau_hwmon_set_pwm1_max(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 	int ret;
 
@@ -521,7 +521,7 @@ nouveau_hwmon_get_in0_input(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_volt *volt = nvxx_volt(&drm->device);
+	struct nvkm_volt *volt = nvxx_volt(&drm->client.device);
 	int ret;
 
 	ret = nvkm_volt_get(volt);
@@ -540,7 +540,7 @@ nouveau_hwmon_get_in0_min(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_volt *volt = nvxx_volt(&drm->device);
+	struct nvkm_volt *volt = nvxx_volt(&drm->client.device);
 
 	if (!volt || !volt->min_uv)
 		return -ENODEV;
@@ -557,7 +557,7 @@ nouveau_hwmon_get_in0_max(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_volt *volt = nvxx_volt(&drm->device);
+	struct nvkm_volt *volt = nvxx_volt(&drm->client.device);
 
 	if (!volt || !volt->max_uv)
 		return -ENODEV;
@@ -584,7 +584,7 @@ nouveau_hwmon_get_power1_input(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_iccsense *iccsense = nvxx_iccsense(&drm->device);
+	struct nvkm_iccsense *iccsense = nvxx_iccsense(&drm->client.device);
 	int result = nvkm_iccsense_read_all(iccsense);
 
 	if (result < 0)
@@ -596,6 +596,32 @@ nouveau_hwmon_get_power1_input(struct device *d, struct device_attribute *a,
 static SENSOR_DEVICE_ATTR(power1_input, S_IRUGO,
 			  nouveau_hwmon_get_power1_input, NULL, 0);
 
+static ssize_t
+nouveau_hwmon_get_power1_max(struct device *d, struct device_attribute *a,
+			     char *buf)
+{
+	struct drm_device *dev = dev_get_drvdata(d);
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nvkm_iccsense *iccsense = nvxx_iccsense(&drm->client.device);
+	return sprintf(buf, "%i\n", iccsense->power_w_max);
+}
+
+static SENSOR_DEVICE_ATTR(power1_max, S_IRUGO,
+			  nouveau_hwmon_get_power1_max, NULL, 0);
+
+static ssize_t
+nouveau_hwmon_get_power1_crit(struct device *d, struct device_attribute *a,
+			      char *buf)
+{
+	struct drm_device *dev = dev_get_drvdata(d);
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nvkm_iccsense *iccsense = nvxx_iccsense(&drm->client.device);
+	return sprintf(buf, "%i\n", iccsense->power_w_crit);
+}
+
+static SENSOR_DEVICE_ATTR(power1_crit, S_IRUGO,
+			  nouveau_hwmon_get_power1_crit, NULL, 0);
+
 static struct attribute *hwmon_default_attributes[] = {
 	&sensor_dev_attr_name.dev_attr.attr,
 	&sensor_dev_attr_update_rate.dev_attr.attr,
@@ -639,6 +665,12 @@ static struct attribute *hwmon_power_attributes[] = {
 	NULL
 };
 
+static struct attribute *hwmon_power_caps_attributes[] = {
+	&sensor_dev_attr_power1_max.dev_attr.attr,
+	&sensor_dev_attr_power1_crit.dev_attr.attr,
+	NULL
+};
+
 static const struct attribute_group hwmon_default_attrgroup = {
 	.attrs = hwmon_default_attributes,
 };
@@ -657,6 +689,9 @@ static const struct attribute_group hwmon_in0_attrgroup = {
 static const struct attribute_group hwmon_power_attrgroup = {
 	.attrs = hwmon_power_attributes,
 };
+static const struct attribute_group hwmon_power_caps_attrgroup = {
+	.attrs = hwmon_power_caps_attributes,
+};
 #endif
 
 int
@@ -664,9 +699,9 @@ nouveau_hwmon_init(struct drm_device *dev)
 {
 #if defined(CONFIG_HWMON) || (defined(MODULE) && defined(CONFIG_HWMON_MODULE))
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
-	struct nvkm_volt *volt = nvxx_volt(&drm->device);
-	struct nvkm_iccsense *iccsense = nvxx_iccsense(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
+	struct nvkm_volt *volt = nvxx_volt(&drm->client.device);
+	struct nvkm_iccsense *iccsense = nvxx_iccsense(&drm->client.device);
 	struct nouveau_hwmon *hwmon;
 	struct device *hwmon_dev;
 	int ret = 0;
@@ -728,8 +763,16 @@ nouveau_hwmon_init(struct drm_device *dev)
 	if (iccsense && iccsense->data_valid && !list_empty(&iccsense->rails)) {
 		ret = sysfs_create_group(&hwmon_dev->kobj,
 					 &hwmon_power_attrgroup);
+
 		if (ret)
 			goto error;
+
+		if (iccsense->power_w_max && iccsense->power_w_crit) {
+			ret = sysfs_create_group(&hwmon_dev->kobj,
+						 &hwmon_power_caps_attrgroup);
+			if (ret)
+				goto error;
+		}
 	}
 
 	hwmon->hwmon = hwmon_dev;
@@ -759,6 +802,7 @@ nouveau_hwmon_fini(struct drm_device *dev)
 		sysfs_remove_group(&hwmon->hwmon->kobj, &hwmon_fan_rpm_attrgroup);
 		sysfs_remove_group(&hwmon->hwmon->kobj, &hwmon_in0_attrgroup);
 		sysfs_remove_group(&hwmon->hwmon->kobj, &hwmon_power_attrgroup);
+		sysfs_remove_group(&hwmon->hwmon->kobj, &hwmon_power_caps_attrgroup);
 
 		hwmon_device_unregister(hwmon->hwmon);
 	}
diff --git a/drivers/gpu/drm/nouveau/nouveau_led.c b/drivers/gpu/drm/nouveau/nouveau_led.c
index 3e2f1b6cd4df..2c5e0628da12 100644
--- a/drivers/gpu/drm/nouveau/nouveau_led.c
+++ b/drivers/gpu/drm/nouveau/nouveau_led.c
@@ -38,7 +38,7 @@ nouveau_led_get_brightness(struct led_classdev *led)
 {
 	struct drm_device *drm_dev = container_of(led, struct nouveau_led, led)->dev;
 	struct nouveau_drm *drm = nouveau_drm(drm_dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	u32 div, duty;
 
 	div =  nvif_rd32(device, 0x61c880) & 0x00ffffff;
@@ -55,7 +55,7 @@ nouveau_led_set_brightness(struct led_classdev *led, enum led_brightness value)
 {
 	struct drm_device *drm_dev = container_of(led, struct nouveau_led, led)->dev;
 	struct nouveau_drm *drm = nouveau_drm(drm_dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 
 	u32 input_clk = 27e6; /* PDISPLAY.SOR[1].PWM is connected to the crystal */
 	u32 freq = 100; /* this is what nvidia uses and it should be good-enough */
@@ -78,7 +78,7 @@ int
 nouveau_led_init(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_gpio *gpio = nvxx_gpio(&drm->device);
+	struct nvkm_gpio *gpio = nvxx_gpio(&drm->client.device);
 	struct dcb_gpio_func logo_led;
 	int ret;
 
@@ -102,6 +102,7 @@ nouveau_led_init(struct drm_device *dev)
 	ret = led_classdev_register(dev->dev, &drm->led->led);
 	if (ret) {
 		kfree(drm->led);
+		drm->led = NULL;
 		return ret;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_led.h b/drivers/gpu/drm/nouveau/nouveau_led.h
index 187ecdb82002..21a5775028cc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_led.h
+++ b/drivers/gpu/drm/nouveau/nouveau_led.h
@@ -42,7 +42,7 @@ nouveau_led(struct drm_device *dev)
 }
 
 /* nouveau_led.c */
-#if IS_ENABLED(CONFIG_LEDS_CLASS)
+#if IS_REACHABLE(CONFIG_LEDS_CLASS)
 int  nouveau_led_init(struct drm_device *dev);
 void nouveau_led_suspend(struct drm_device *dev);
 void nouveau_led_resume(struct drm_device *dev);
diff --git a/drivers/gpu/drm/nouveau/nouveau_nvif.c b/drivers/gpu/drm/nouveau/nouveau_nvif.c
index 15f0925ea13b..b3f29b1ce9ea 100644
--- a/drivers/gpu/drm/nouveau/nouveau_nvif.c
+++ b/drivers/gpu/drm/nouveau/nouveau_nvif.c
@@ -60,20 +60,15 @@ nvkm_client_ioctl(void *priv, bool super, void *data, u32 size, void **hack)
 static int
 nvkm_client_resume(void *priv)
 {
-	return nvkm_client_init(priv);
+	struct nvkm_client *client = priv;
+	return nvkm_object_init(&client->object);
 }
 
 static int
 nvkm_client_suspend(void *priv)
 {
-	return nvkm_client_fini(priv, true);
-}
-
-static void
-nvkm_client_driver_fini(void *priv)
-{
 	struct nvkm_client *client = priv;
-	nvkm_client_del(&client);
+	return nvkm_object_fini(&client->object, true);
 }
 
 static int
@@ -108,23 +103,14 @@ static int
 nvkm_client_driver_init(const char *name, u64 device, const char *cfg,
 			const char *dbg, void **ppriv)
 {
-	struct nvkm_client *client;
-	int ret;
-
-	ret = nvkm_client_new(name, device, cfg, dbg, &client);
-	*ppriv = client;
-	if (ret)
-		return ret;
-
-	client->ntfy = nvkm_client_ntfy;
-	return 0;
+	return nvkm_client_new(name, device, cfg, dbg, nvkm_client_ntfy,
+			       (struct nvkm_client **)ppriv);
 }
 
 const struct nvif_driver
 nvif_driver_nvkm = {
 	.name = "nvkm",
 	.init = nvkm_client_driver_init,
-	.fini = nvkm_client_driver_fini,
 	.suspend = nvkm_client_suspend,
 	.resume = nvkm_client_resume,
 	.ioctl = nvkm_client_ioctl,
diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c
index a0a9704cfe2b..1fefc93af1d7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_prime.c
+++ b/drivers/gpu/drm/nouveau/nouveau_prime.c
@@ -60,6 +60,7 @@ struct drm_gem_object *nouveau_gem_prime_import_sg_table(struct drm_device *dev,
 							 struct dma_buf_attachment *attach,
 							 struct sg_table *sg)
 {
+	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_bo *nvbo;
 	struct reservation_object *robj = attach->dmabuf->resv;
 	u32 flags = 0;
@@ -68,7 +69,7 @@ struct drm_gem_object *nouveau_gem_prime_import_sg_table(struct drm_device *dev,
 	flags = TTM_PL_FLAG_TT;
 
 	ww_mutex_lock(&robj->lock, NULL);
-	ret = nouveau_bo_new(dev, attach->dmabuf->size, 0, flags, 0, 0,
+	ret = nouveau_bo_new(&drm->client, attach->dmabuf->size, 0, flags, 0, 0,
 			     sg, robj, &nvbo);
 	ww_mutex_unlock(&robj->lock);
 	if (ret)
diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
index db35ab5883ac..b7ab268f7d6f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
@@ -24,10 +24,10 @@ nouveau_sgdma_destroy(struct ttm_tt *ttm)
 }
 
 static int
-nv04_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
+nv04_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *reg)
 {
 	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)ttm;
-	struct nvkm_mem *node = mem->mm_node;
+	struct nvkm_mem *node = reg->mm_node;
 
 	if (ttm->sg) {
 		node->sg    = ttm->sg;
@@ -36,7 +36,7 @@ nv04_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
 		node->sg    = NULL;
 		node->pages = nvbe->ttm.dma_address;
 	}
-	node->size = (mem->num_pages << PAGE_SHIFT) >> 12;
+	node->size = (reg->num_pages << PAGE_SHIFT) >> 12;
 
 	nvkm_vm_map(&node->vma[0], node);
 	nvbe->node = node;
@@ -58,10 +58,10 @@ static struct ttm_backend_func nv04_sgdma_backend = {
 };
 
 static int
-nv50_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
+nv50_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *reg)
 {
 	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)ttm;
-	struct nvkm_mem *node = mem->mm_node;
+	struct nvkm_mem *node = reg->mm_node;
 
 	/* noop: bound in move_notify() */
 	if (ttm->sg) {
@@ -71,7 +71,7 @@ nv50_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
 		node->sg    = NULL;
 		node->pages = nvbe->ttm.dma_address;
 	}
-	node->size = (mem->num_pages << PAGE_SHIFT) >> 12;
+	node->size = (reg->num_pages << PAGE_SHIFT) >> 12;
 	return 0;
 }
 
@@ -100,7 +100,7 @@ nouveau_sgdma_create_ttm(struct ttm_bo_device *bdev,
 	if (!nvbe)
 		return NULL;
 
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA)
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA)
 		nvbe->ttm.ttm.func = &nv04_sgdma_backend;
 	else
 		nvbe->ttm.ttm.func = &nv50_sgdma_backend;
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index ec4668a41e01..13e5cc5f07fe 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -36,7 +36,7 @@ static int
 nouveau_vram_manager_init(struct ttm_mem_type_manager *man, unsigned long psize)
 {
 	struct nouveau_drm *drm = nouveau_bdev(man->bdev);
-	struct nvkm_fb *fb = nvxx_fb(&drm->device);
+	struct nvkm_fb *fb = nvxx_fb(&drm->client.device);
 	man->priv = fb;
 	return 0;
 }
@@ -64,45 +64,45 @@ nvkm_mem_node_cleanup(struct nvkm_mem *node)
 
 static void
 nouveau_vram_manager_del(struct ttm_mem_type_manager *man,
-			 struct ttm_mem_reg *mem)
+			 struct ttm_mem_reg *reg)
 {
 	struct nouveau_drm *drm = nouveau_bdev(man->bdev);
-	struct nvkm_ram *ram = nvxx_fb(&drm->device)->ram;
-	nvkm_mem_node_cleanup(mem->mm_node);
-	ram->func->put(ram, (struct nvkm_mem **)&mem->mm_node);
+	struct nvkm_ram *ram = nvxx_fb(&drm->client.device)->ram;
+	nvkm_mem_node_cleanup(reg->mm_node);
+	ram->func->put(ram, (struct nvkm_mem **)&reg->mm_node);
 }
 
 static int
 nouveau_vram_manager_new(struct ttm_mem_type_manager *man,
 			 struct ttm_buffer_object *bo,
 			 const struct ttm_place *place,
-			 struct ttm_mem_reg *mem)
+			 struct ttm_mem_reg *reg)
 {
 	struct nouveau_drm *drm = nouveau_bdev(man->bdev);
-	struct nvkm_ram *ram = nvxx_fb(&drm->device)->ram;
+	struct nvkm_ram *ram = nvxx_fb(&drm->client.device)->ram;
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
 	struct nvkm_mem *node;
 	u32 size_nc = 0;
 	int ret;
 
-	if (drm->device.info.ram_size == 0)
+	if (drm->client.device.info.ram_size == 0)
 		return -ENOMEM;
 
 	if (nvbo->tile_flags & NOUVEAU_GEM_TILE_NONCONTIG)
 		size_nc = 1 << nvbo->page_shift;
 
-	ret = ram->func->get(ram, mem->num_pages << PAGE_SHIFT,
-			     mem->page_alignment << PAGE_SHIFT, size_nc,
+	ret = ram->func->get(ram, reg->num_pages << PAGE_SHIFT,
+			     reg->page_alignment << PAGE_SHIFT, size_nc,
 			     (nvbo->tile_flags >> 8) & 0x3ff, &node);
 	if (ret) {
-		mem->mm_node = NULL;
+		reg->mm_node = NULL;
 		return (ret == -ENOSPC) ? 0 : ret;
 	}
 
 	node->page_shift = nvbo->page_shift;
 
-	mem->mm_node = node;
-	mem->start   = node->offset >> PAGE_SHIFT;
+	reg->mm_node = node;
+	reg->start   = node->offset >> PAGE_SHIFT;
 	return 0;
 }
 
@@ -127,18 +127,18 @@ nouveau_gart_manager_fini(struct ttm_mem_type_manager *man)
 
 static void
 nouveau_gart_manager_del(struct ttm_mem_type_manager *man,
-			 struct ttm_mem_reg *mem)
+			 struct ttm_mem_reg *reg)
 {
-	nvkm_mem_node_cleanup(mem->mm_node);
-	kfree(mem->mm_node);
-	mem->mm_node = NULL;
+	nvkm_mem_node_cleanup(reg->mm_node);
+	kfree(reg->mm_node);
+	reg->mm_node = NULL;
 }
 
 static int
 nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
 			 struct ttm_buffer_object *bo,
 			 const struct ttm_place *place,
-			 struct ttm_mem_reg *mem)
+			 struct ttm_mem_reg *reg)
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
@@ -150,7 +150,7 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
 
 	node->page_shift = 12;
 
-	switch (drm->device.info.family) {
+	switch (drm->client.device.info.family) {
 	case NV_DEVICE_INFO_V0_TNT:
 	case NV_DEVICE_INFO_V0_CELSIUS:
 	case NV_DEVICE_INFO_V0_KELVIN:
@@ -158,7 +158,7 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
 	case NV_DEVICE_INFO_V0_CURIE:
 		break;
 	case NV_DEVICE_INFO_V0_TESLA:
-		if (drm->device.info.chipset != 0x50)
+		if (drm->client.device.info.chipset != 0x50)
 			node->memtype = (nvbo->tile_flags & 0x7f00) >> 8;
 		break;
 	case NV_DEVICE_INFO_V0_FERMI:
@@ -169,12 +169,12 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
 		break;
 	default:
 		NV_WARN(drm, "%s: unhandled family type %x\n", __func__,
-			drm->device.info.family);
+			drm->client.device.info.family);
 		break;
 	}
 
-	mem->mm_node = node;
-	mem->start   = 0;
+	reg->mm_node = node;
+	reg->start   = 0;
 	return 0;
 }
 
@@ -197,7 +197,7 @@ static int
 nv04_gart_manager_init(struct ttm_mem_type_manager *man, unsigned long psize)
 {
 	struct nouveau_drm *drm = nouveau_bdev(man->bdev);
-	struct nvkm_mmu *mmu = nvxx_mmu(&drm->device);
+	struct nvkm_mmu *mmu = nvxx_mmu(&drm->client.device);
 	struct nv04_mmu *priv = (void *)mmu;
 	struct nvkm_vm *vm = NULL;
 	nvkm_vm_ref(priv->vm, &vm, NULL);
@@ -215,20 +215,20 @@ nv04_gart_manager_fini(struct ttm_mem_type_manager *man)
 }
 
 static void
-nv04_gart_manager_del(struct ttm_mem_type_manager *man, struct ttm_mem_reg *mem)
+nv04_gart_manager_del(struct ttm_mem_type_manager *man, struct ttm_mem_reg *reg)
 {
-	struct nvkm_mem *node = mem->mm_node;
+	struct nvkm_mem *node = reg->mm_node;
 	if (node->vma[0].node)
 		nvkm_vm_put(&node->vma[0]);
-	kfree(mem->mm_node);
-	mem->mm_node = NULL;
+	kfree(reg->mm_node);
+	reg->mm_node = NULL;
 }
 
 static int
 nv04_gart_manager_new(struct ttm_mem_type_manager *man,
 		      struct ttm_buffer_object *bo,
 		      const struct ttm_place *place,
-		      struct ttm_mem_reg *mem)
+		      struct ttm_mem_reg *reg)
 {
 	struct nvkm_mem *node;
 	int ret;
@@ -239,15 +239,15 @@ nv04_gart_manager_new(struct ttm_mem_type_manager *man,
 
 	node->page_shift = 12;
 
-	ret = nvkm_vm_get(man->priv, mem->num_pages << 12, node->page_shift,
+	ret = nvkm_vm_get(man->priv, reg->num_pages << 12, node->page_shift,
 			  NV_MEM_ACCESS_RW, &node->vma[0]);
 	if (ret) {
 		kfree(node);
 		return ret;
 	}
 
-	mem->mm_node = node;
-	mem->start   = node->vma[0].offset >> PAGE_SHIFT;
+	reg->mm_node = node;
+	reg->start   = node->vma[0].offset >> PAGE_SHIFT;
 	return 0;
 }
 
@@ -339,7 +339,7 @@ nouveau_ttm_global_release(struct nouveau_drm *drm)
 int
 nouveau_ttm_init(struct nouveau_drm *drm)
 {
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 	struct nvkm_pci *pci = device->pci;
 	struct drm_device *dev = drm->dev;
 	u8 bits;
@@ -352,8 +352,8 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 		drm->agp.cma = pci->agp.cma;
 	}
 
-	bits = nvxx_mmu(&drm->device)->dma_bits;
-	if (nvxx_device(&drm->device)->func->pci) {
+	bits = nvxx_mmu(&drm->client.device)->dma_bits;
+	if (nvxx_device(&drm->client.device)->func->pci) {
 		if (drm->agp.bridge)
 			bits = 32;
 	} else if (device->func->tegra) {
@@ -396,7 +396,7 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 	}
 
 	/* VRAM init */
-	drm->gem.vram_available = drm->device.info.ram_user;
+	drm->gem.vram_available = drm->client.device.info.ram_user;
 
 	arch_io_reserve_memtype_wc(device->func->resource_addr(device, 1),
 				   device->func->resource_size(device, 1));
@@ -413,7 +413,7 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 
 	/* GART init */
 	if (!drm->agp.bridge) {
-		drm->gem.gart_available = nvxx_mmu(&drm->device)->limit;
+		drm->gem.gart_available = nvxx_mmu(&drm->client.device)->limit;
 	} else {
 		drm->gem.gart_available = drm->agp.size;
 	}
@@ -433,7 +433,7 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 void
 nouveau_ttm_fini(struct nouveau_drm *drm)
 {
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 
 	ttm_bo_clean_mm(&drm->ttm.bdev, TTM_PL_VRAM);
 	ttm_bo_clean_mm(&drm->ttm.bdev, TTM_PL_TT);
diff --git a/drivers/gpu/drm/nouveau/nouveau_usif.c b/drivers/gpu/drm/nouveau/nouveau_usif.c
index 08f9c6fa0f7f..afbdbed1a690 100644
--- a/drivers/gpu/drm/nouveau/nouveau_usif.c
+++ b/drivers/gpu/drm/nouveau/nouveau_usif.c
@@ -103,7 +103,7 @@ usif_notify(const void *header, u32 length, const void *data, u32 size)
 	}
 		break;
 	default:
-		BUG_ON(1);
+		BUG();
 		break;
 	}
 
@@ -313,7 +313,8 @@ usif_ioctl(struct drm_file *filp, void __user *user, u32 argc)
 	if (!(ret = nvif_unpack(-ENOSYS, &data, &size, argv->v0, 0, 0, true))) {
 		/* block access to objects not created via this interface */
 		owner = argv->v0.owner;
-		if (argv->v0.object == 0ULL)
+		if (argv->v0.object == 0ULL &&
+		    argv->v0.type != NVIF_IOCTL_V0_DEL)
 			argv->v0.owner = NVDRM_OBJECT_ANY; /* except client */
 		else
 			argv->v0.owner = NVDRM_OBJECT_USIF;
diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c b/drivers/gpu/drm/nouveau/nouveau_vga.c
index c6a180a0c284..eef22c6b9665 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vga.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vga.c
@@ -13,13 +13,13 @@ static unsigned int
 nouveau_vga_set_decode(void *priv, bool state)
 {
 	struct nouveau_drm *drm = nouveau_drm(priv);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE &&
-	    drm->device.info.chipset >= 0x4c)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE &&
+	    drm->client.device.info.chipset >= 0x4c)
 		nvif_wr32(device, 0x088060, state);
 	else
-	if (drm->device.info.chipset >= 0x40)
+	if (drm->client.device.info.chipset >= 0x40)
 		nvif_wr32(device, 0x088054, state);
 	else
 		nvif_wr32(device, 0x001854, state);
diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c b/drivers/gpu/drm/nouveau/nv04_fbcon.c
index 6a2b187e3c3b..01731dbeb3d8 100644
--- a/drivers/gpu/drm/nouveau/nv04_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv04_fbcon.c
@@ -136,7 +136,7 @@ nv04_fbcon_accel_init(struct fb_info *info)
 	struct drm_device *dev = nfbdev->helper.dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_channel *chan = drm->channel;
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	int surface_fmt, pattern_fmt, rect_fmt;
 	int ret;
 
diff --git a/drivers/gpu/drm/nouveau/nv17_fence.c b/drivers/gpu/drm/nouveau/nv17_fence.c
index 79bc01111351..6477b7069e14 100644
--- a/drivers/gpu/drm/nouveau/nv17_fence.c
+++ b/drivers/gpu/drm/nouveau/nv17_fence.c
@@ -76,9 +76,9 @@ nv17_fence_context_new(struct nouveau_channel *chan)
 {
 	struct nv10_fence_priv *priv = chan->drm->fence;
 	struct nv10_fence_chan *fctx;
-	struct ttm_mem_reg *mem = &priv->bo->bo.mem;
-	u32 start = mem->start * PAGE_SIZE;
-	u32 limit = start + mem->size - 1;
+	struct ttm_mem_reg *reg = &priv->bo->bo.mem;
+	u32 start = reg->start * PAGE_SIZE;
+	u32 limit = start + reg->size - 1;
 	int ret = 0;
 
 	fctx = chan->fence = kzalloc(sizeof(*fctx), GFP_KERNEL);
@@ -129,7 +129,7 @@ nv17_fence_create(struct nouveau_drm *drm)
 	priv->base.context_base = dma_fence_context_alloc(priv->base.contexts);
 	spin_lock_init(&priv->lock);
 
-	ret = nouveau_bo_new(drm->dev, 4096, 0x1000, TTM_PL_FLAG_VRAM,
+	ret = nouveau_bo_new(&drm->client, 4096, 0x1000, TTM_PL_FLAG_VRAM,
 			     0, 0x0000, NULL, NULL, &priv->bo);
 	if (!ret) {
 		ret = nouveau_bo_pin(priv->bo, TTM_PL_FLAG_VRAM, false);
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 452da483ca01..0b4440ffbeae 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -447,18 +447,18 @@ nv50_dmac_ctxdma_new(struct nv50_dmac *dmac, struct nouveau_framebuffer *fb)
 	args.base.target = NV_DMA_V0_TARGET_VRAM;
 	args.base.access = NV_DMA_V0_ACCESS_RDWR;
 	args.base.start  = 0;
-	args.base.limit  = drm->device.info.ram_user - 1;
+	args.base.limit  = drm->client.device.info.ram_user - 1;
 
-	if (drm->device.info.chipset < 0x80) {
+	if (drm->client.device.info.chipset < 0x80) {
 		args.nv50.part = NV50_DMA_V0_PART_256;
 		argc += sizeof(args.nv50);
 	} else
-	if (drm->device.info.chipset < 0xc0) {
+	if (drm->client.device.info.chipset < 0xc0) {
 		args.nv50.part = NV50_DMA_V0_PART_256;
 		args.nv50.kind = kind;
 		argc += sizeof(args.nv50);
 	} else
-	if (drm->device.info.chipset < 0xd0) {
+	if (drm->client.device.info.chipset < 0xd0) {
 		args.gf100.kind = kind;
 		argc += sizeof(args.gf100);
 	} else {
@@ -848,7 +848,7 @@ nv50_wndw_atomic_check_acquire(struct nv50_wndw *wndw,
 	asyw->image.kind = (fb->nvbo->tile_flags & 0x0000ff00) >> 8;
 	if (asyw->image.kind) {
 		asyw->image.layout = 0;
-		if (drm->device.info.chipset >= 0xc0)
+		if (drm->client.device.info.chipset >= 0xc0)
 			asyw->image.block = fb->nvbo->tile_mode >> 4;
 		else
 			asyw->image.block = fb->nvbo->tile_mode;
@@ -1397,7 +1397,7 @@ nv50_base_ntfy_wait_begun(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
 {
 	struct nouveau_drm *drm = nouveau_drm(wndw->plane.dev);
 	struct nv50_disp *disp = nv50_disp(wndw->plane.dev);
-	if (nvif_msec(&drm->device, 2000ULL,
+	if (nvif_msec(&drm->client.device, 2000ULL,
 		u32 data = nouveau_bo_rd32(disp->sync, asyw->ntfy.offset / 4);
 		if ((data & 0xc0000000) == 0x40000000)
 			break;
@@ -1522,7 +1522,7 @@ nv50_base_new(struct nouveau_drm *drm, struct nv50_head *head,
 		return ret;
 	}
 
-	ret = nv50_base_create(&drm->device, disp->disp, base->id,
+	ret = nv50_base_create(&drm->client.device, disp->disp, base->id,
 			       disp->sync->bo.offset, &base->chan);
 	if (ret)
 		return ret;
@@ -2394,7 +2394,7 @@ static int
 nv50_head_create(struct drm_device *dev, int index)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	struct nv50_disp *disp = nv50_disp(dev);
 	struct nv50_head *head;
 	struct nv50_base *base;
@@ -2428,7 +2428,7 @@ nv50_head_create(struct drm_device *dev, int index)
 	drm_crtc_helper_add(crtc, &nv50_head_help);
 	drm_mode_crtc_set_gamma_size(crtc, 256);
 
-	ret = nouveau_bo_new(dev, 8192, 0x100, TTM_PL_FLAG_VRAM,
+	ret = nouveau_bo_new(&drm->client, 8192, 0x100, TTM_PL_FLAG_VRAM,
 			     0, 0x0000, NULL, NULL, &head->base.lut.nvbo);
 	if (!ret) {
 		ret = nouveau_bo_pin(head->base.lut.nvbo, TTM_PL_FLAG_VRAM, true);
@@ -2667,7 +2667,7 @@ static int
 nv50_dac_create(struct drm_connector *connector, struct dcb_output *dcbe)
 {
 	struct nouveau_drm *drm = nouveau_drm(connector->dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->device);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
 	struct nvkm_i2c_bus *bus;
 	struct nouveau_encoder *nv_encoder;
 	struct drm_encoder *encoder;
@@ -3623,7 +3623,7 @@ nv50_sor_enable(struct drm_encoder *encoder)
 		nv50_audio_enable(encoder, mode);
 		break;
 	default:
-		BUG_ON(1);
+		BUG();
 		break;
 	}
 
@@ -3657,7 +3657,7 @@ nv50_sor_create(struct drm_connector *connector, struct dcb_output *dcbe)
 {
 	struct nouveau_connector *nv_connector = nouveau_connector(connector);
 	struct nouveau_drm *drm = nouveau_drm(connector->dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->device);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
 	struct nouveau_encoder *nv_encoder;
 	struct drm_encoder *encoder;
 	int type, ret;
@@ -3796,7 +3796,7 @@ nv50_pior_enable(struct drm_encoder *encoder)
 		proto = 0x0;
 		break;
 	default:
-		BUG_ON(1);
+		BUG();
 		break;
 	}
 
@@ -3842,7 +3842,7 @@ static int
 nv50_pior_create(struct drm_connector *connector, struct dcb_output *dcbe)
 {
 	struct nouveau_drm *drm = nouveau_drm(connector->dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->device);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
 	struct nvkm_i2c_bus *bus = NULL;
 	struct nvkm_i2c_aux *aux = NULL;
 	struct i2c_adapter *ddc;
@@ -3915,7 +3915,7 @@ nv50_disp_atomic_commit_core(struct nouveau_drm *drm, u32 interlock)
 		evo_data(push, 0x00000000);
 		nouveau_bo_wr32(disp->sync, 0, 0x00000000);
 		evo_kick(push, core);
-		if (nvif_msec(&drm->device, 2000ULL,
+		if (nvif_msec(&drm->client.device, 2000ULL,
 			if (nouveau_bo_rd32(disp->sync, 0))
 				break;
 			usleep_range(1, 2);
@@ -4050,6 +4050,11 @@ nv50_disp_atomic_commit_tail(struct drm_atomic_state *state)
 		}
 	}
 
+	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		if (crtc->state->event)
+			drm_crtc_vblank_get(crtc);
+	}
+
 	/* Update plane(s). */
 	for_each_plane_in_state(state, plane, plane_state, i) {
 		struct nv50_wndw_atom *asyw = nv50_wndw_atom(plane->state);
@@ -4099,6 +4104,7 @@ nv50_disp_atomic_commit_tail(struct drm_atomic_state *state)
 			drm_crtc_send_vblank_event(crtc, crtc->state->event);
 			spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 			crtc->state->event = NULL;
+			drm_crtc_vblank_put(crtc);
 		}
 	}
 
@@ -4427,7 +4433,7 @@ module_param_named(atomic, nouveau_atomic, int, 0400);
 int
 nv50_display_create(struct drm_device *dev)
 {
-	struct nvif_device *device = &nouveau_drm(dev)->device;
+	struct nvif_device *device = &nouveau_drm(dev)->client.device;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct dcb_table *dcb = &drm->vbios.dcb;
 	struct drm_connector *connector, *tmp;
@@ -4451,7 +4457,7 @@ nv50_display_create(struct drm_device *dev)
 		dev->driver->driver_features |= DRIVER_ATOMIC;
 
 	/* small shared memory area we use for notifiers and semaphores */
-	ret = nouveau_bo_new(dev, 4096, 0x1000, TTM_PL_FLAG_VRAM,
+	ret = nouveau_bo_new(&drm->client, 4096, 0x1000, TTM_PL_FLAG_VRAM,
 			     0, 0x0000, NULL, NULL, &disp->sync);
 	if (!ret) {
 		ret = nouveau_bo_pin(disp->sync, TTM_PL_FLAG_VRAM, true);
diff --git a/drivers/gpu/drm/nouveau/nv50_fence.c b/drivers/gpu/drm/nouveau/nv50_fence.c
index f68c7054fd53..a369d978e267 100644
--- a/drivers/gpu/drm/nouveau/nv50_fence.c
+++ b/drivers/gpu/drm/nouveau/nv50_fence.c
@@ -37,9 +37,9 @@ nv50_fence_context_new(struct nouveau_channel *chan)
 {
 	struct nv10_fence_priv *priv = chan->drm->fence;
 	struct nv10_fence_chan *fctx;
-	struct ttm_mem_reg *mem = &priv->bo->bo.mem;
-	u32 start = mem->start * PAGE_SIZE;
-	u32 limit = start + mem->size - 1;
+	struct ttm_mem_reg *reg = &priv->bo->bo.mem;
+	u32 start = reg->start * PAGE_SIZE;
+	u32 limit = start + reg->size - 1;
 	int ret;
 
 	fctx = chan->fence = kzalloc(sizeof(*fctx), GFP_KERNEL);
@@ -82,7 +82,7 @@ nv50_fence_create(struct nouveau_drm *drm)
 	priv->base.context_base = dma_fence_context_alloc(priv->base.contexts);
 	spin_lock_init(&priv->lock);
 
-	ret = nouveau_bo_new(drm->dev, 4096, 0x1000, TTM_PL_FLAG_VRAM,
+	ret = nouveau_bo_new(&drm->client, 4096, 0x1000, TTM_PL_FLAG_VRAM,
 			     0, 0x0000, NULL, NULL, &priv->bo);
 	if (!ret) {
 		ret = nouveau_bo_pin(priv->bo, TTM_PL_FLAG_VRAM, false);
diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c
index 52b87ae83e7b..bd7a8a1e4ad9 100644
--- a/drivers/gpu/drm/nouveau/nv84_fence.c
+++ b/drivers/gpu/drm/nouveau/nv84_fence.c
@@ -107,8 +107,10 @@ nv84_fence_context_del(struct nouveau_channel *chan)
 	struct nv84_fence_chan *fctx = chan->fence;
 
 	nouveau_bo_wr32(priv->bo, chan->chid * 16 / 4, fctx->base.sequence);
+	mutex_lock(&priv->mutex);
 	nouveau_bo_vma_del(priv->bo, &fctx->vma_gart);
 	nouveau_bo_vma_del(priv->bo, &fctx->vma);
+	mutex_unlock(&priv->mutex);
 	nouveau_fence_context_del(&fctx->base);
 	chan->fence = NULL;
 	nouveau_fence_context_free(&fctx->base);
@@ -134,11 +136,13 @@ nv84_fence_context_new(struct nouveau_channel *chan)
 	fctx->base.sync32 = nv84_fence_sync32;
 	fctx->base.sequence = nv84_fence_read(chan);
 
+	mutex_lock(&priv->mutex);
 	ret = nouveau_bo_vma_add(priv->bo, cli->vm, &fctx->vma);
 	if (ret == 0) {
 		ret = nouveau_bo_vma_add(priv->bo_gart, cli->vm,
 					&fctx->vma_gart);
 	}
+	mutex_unlock(&priv->mutex);
 
 	if (ret)
 		nv84_fence_context_del(chan);
@@ -193,7 +197,7 @@ nv84_fence_destroy(struct nouveau_drm *drm)
 int
 nv84_fence_create(struct nouveau_drm *drm)
 {
-	struct nvkm_fifo *fifo = nvxx_fifo(&drm->device);
+	struct nvkm_fifo *fifo = nvxx_fifo(&drm->client.device);
 	struct nv84_fence_priv *priv;
 	u32 domain;
 	int ret;
@@ -212,15 +216,17 @@ nv84_fence_create(struct nouveau_drm *drm)
 	priv->base.context_base = dma_fence_context_alloc(priv->base.contexts);
 	priv->base.uevent = true;
 
+	mutex_init(&priv->mutex);
+
 	/* Use VRAM if there is any ; otherwise fallback to system memory */
-	domain = drm->device.info.ram_size != 0 ? TTM_PL_FLAG_VRAM :
+	domain = drm->client.device.info.ram_size != 0 ? TTM_PL_FLAG_VRAM :
 			 /*
 			  * fences created in sysmem must be non-cached or we
 			  * will lose CPU/GPU coherency!
 			  */
 			 TTM_PL_FLAG_TT | TTM_PL_FLAG_UNCACHED;
-	ret = nouveau_bo_new(drm->dev, 16 * priv->base.contexts, 0, domain, 0,
-			     0, NULL, NULL, &priv->bo);
+	ret = nouveau_bo_new(&drm->client, 16 * priv->base.contexts, 0,
+			     domain, 0, 0, NULL, NULL, &priv->bo);
 	if (ret == 0) {
 		ret = nouveau_bo_pin(priv->bo, domain, false);
 		if (ret == 0) {
@@ -233,7 +239,7 @@ nv84_fence_create(struct nouveau_drm *drm)
 	}
 
 	if (ret == 0)
-		ret = nouveau_bo_new(drm->dev, 16 * priv->base.contexts, 0,
+		ret = nouveau_bo_new(&drm->client, 16 * priv->base.contexts, 0,
 				     TTM_PL_FLAG_TT | TTM_PL_FLAG_UNCACHED, 0,
 				     0, NULL, NULL, &priv->bo_gart);
 	if (ret == 0) {
diff --git a/drivers/gpu/drm/nouveau/nvif/Kbuild b/drivers/gpu/drm/nouveau/nvif/Kbuild
index ff8ed3a04d06..067b5e9f5ec1 100644
--- a/drivers/gpu/drm/nouveau/nvif/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvif/Kbuild
@@ -1,4 +1,5 @@
 nvif-y := nvif/object.o
 nvif-y += nvif/client.o
 nvif-y += nvif/device.o
+nvif-y += nvif/driver.o
 nvif-y += nvif/notify.o
diff --git a/drivers/gpu/drm/nouveau/nvif/client.c b/drivers/gpu/drm/nouveau/nvif/client.c
index 29c20dfd894d..12db54965c20 100644
--- a/drivers/gpu/drm/nouveau/nvif/client.c
+++ b/drivers/gpu/drm/nouveau/nvif/client.c
@@ -26,6 +26,9 @@
 #include <nvif/driver.h>
 #include <nvif/ioctl.h>
 
+#include <nvif/class.h>
+#include <nvif/if0000.h>
+
 int
 nvif_client_ioctl(struct nvif_client *client, void *data, u32 size)
 {
@@ -47,37 +50,29 @@ nvif_client_resume(struct nvif_client *client)
 void
 nvif_client_fini(struct nvif_client *client)
 {
+	nvif_object_fini(&client->object);
 	if (client->driver) {
-		client->driver->fini(client->object.priv);
+		if (client->driver->fini)
+			client->driver->fini(client->object.priv);
 		client->driver = NULL;
-		client->object.client = NULL;
-		nvif_object_fini(&client->object);
 	}
 }
 
-static const struct nvif_driver *
-nvif_drivers[] = {
-#ifdef __KERNEL__
-	&nvif_driver_nvkm,
-#else
-	&nvif_driver_drm,
-	&nvif_driver_lib,
-	&nvif_driver_null,
-#endif
-	NULL
-};
-
 int
-nvif_client_init(const char *driver, const char *name, u64 device,
-		 const char *cfg, const char *dbg, struct nvif_client *client)
+nvif_client_init(struct nvif_client *parent, const char *name, u64 device,
+		 struct nvif_client *client)
 {
+	struct nvif_client_v0 args = { .device = device };
 	struct {
 		struct nvif_ioctl_v0 ioctl;
 		struct nvif_ioctl_nop_v0 nop;
-	} args = {};
-	int ret, i;
+	} nop = {};
+	int ret;
 
-	ret = nvif_object_init(NULL, 0, 0, NULL, 0, &client->object);
+	strncpy(args.name, name, sizeof(args.name));
+	ret = nvif_object_init(parent != client ? &parent->object : NULL,
+			       0, NVIF_CLASS_CLIENT, &args, sizeof(args),
+			       &client->object);
 	if (ret)
 		return ret;
 
@@ -85,19 +80,11 @@ nvif_client_init(const char *driver, const char *name, u64 device,
 	client->object.handle = ~0;
 	client->route = NVIF_IOCTL_V0_ROUTE_NVIF;
 	client->super = true;
-
-	for (i = 0, ret = -EINVAL; (client->driver = nvif_drivers[i]); i++) {
-		if (!driver || !strcmp(client->driver->name, driver)) {
-			ret = client->driver->init(name, device, cfg, dbg,
-						  &client->object.priv);
-			if (!ret || driver)
-				break;
-		}
-	}
+	client->driver = parent->driver;
 
 	if (ret == 0) {
-		ret = nvif_client_ioctl(client, &args, sizeof(args));
-		client->version = args.nop.version;
+		ret = nvif_client_ioctl(client, &nop, sizeof(nop));
+		client->version = nop.nop.version;
 	}
 
 	if (ret)
diff --git a/drivers/gpu/drm/nouveau/nvif/driver.c b/drivers/gpu/drm/nouveau/nvif/driver.c
new file mode 100644
index 000000000000..701330956e33
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvif/driver.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include <nvif/driver.h>
+#include <nvif/client.h>
+
+static const struct nvif_driver *
+nvif_driver[] = {
+#ifdef __KERNEL__
+	&nvif_driver_nvkm,
+#else
+	&nvif_driver_drm,
+	&nvif_driver_lib,
+	&nvif_driver_null,
+#endif
+	NULL
+};
+
+int
+nvif_driver_init(const char *drv, const char *cfg, const char *dbg,
+		 const char *name, u64 device, struct nvif_client *client)
+{
+	int ret = -EINVAL, i;
+
+	for (i = 0; (client->driver = nvif_driver[i]); i++) {
+		if (!drv || !strcmp(client->driver->name, drv)) {
+			ret = client->driver->init(name, device, cfg, dbg,
+						   &client->object.priv);
+			if (ret == 0)
+				break;
+			client->driver->fini(client->object.priv);
+		}
+	}
+
+	if (ret == 0)
+		ret = nvif_client_init(client, name, device, client);
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/Kbuild b/drivers/gpu/drm/nouveau/nvkm/Kbuild
index 2832147b676c..e664378f6eda 100644
--- a/drivers/gpu/drm/nouveau/nvkm/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/Kbuild
@@ -1,3 +1,4 @@
 include $(src)/nvkm/core/Kbuild
+include $(src)/nvkm/falcon/Kbuild
 include $(src)/nvkm/subdev/Kbuild
 include $(src)/nvkm/engine/Kbuild
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/client.c b/drivers/gpu/drm/nouveau/nvkm/core/client.c
index e1943910858e..0d3a896892b4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/client.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/client.c
@@ -31,6 +31,43 @@
 #include <nvif/if0000.h>
 #include <nvif/unpack.h>
 
+static int
+nvkm_uclient_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		 struct nvkm_object **pobject)
+{
+	union {
+		struct nvif_client_v0 v0;
+	} *args = argv;
+	struct nvkm_client *client;
+	int ret = -ENOSYS;
+
+	if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))){
+		args->v0.name[sizeof(args->v0.name) - 1] = 0;
+		ret = nvkm_client_new(args->v0.name, args->v0.device, NULL,
+				      NULL, oclass->client->ntfy, &client);
+		if (ret)
+			return ret;
+	} else
+		return ret;
+
+	client->object.client = oclass->client;
+	client->object.handle = oclass->handle;
+	client->object.route  = oclass->route;
+	client->object.token  = oclass->token;
+	client->object.object = oclass->object;
+	client->debug = oclass->client->debug;
+	*pobject = &client->object;
+	return 0;
+}
+
+const struct nvkm_sclass
+nvkm_uclient_sclass = {
+	.oclass = NVIF_CLASS_CLIENT,
+	.minver = 0,
+	.maxver = 0,
+	.ctor = nvkm_uclient_new,
+};
+
 struct nvkm_client_notify {
 	struct nvkm_client *client;
 	struct nvkm_notify n;
@@ -138,17 +175,30 @@ nvkm_client_notify_new(struct nvkm_object *object,
 	return ret;
 }
 
+static const struct nvkm_object_func nvkm_client;
+struct nvkm_client *
+nvkm_client_search(struct nvkm_client *client, u64 handle)
+{
+	struct nvkm_object *object;
+
+	object = nvkm_object_search(client, handle, &nvkm_client);
+	if (IS_ERR(object))
+		return (void *)object;
+
+	return nvkm_client(object);
+}
+
 static int
-nvkm_client_mthd_devlist(struct nvkm_object *object, void *data, u32 size)
+nvkm_client_mthd_devlist(struct nvkm_client *client, void *data, u32 size)
 {
 	union {
-		struct nv_client_devlist_v0 v0;
+		struct nvif_client_devlist_v0 v0;
 	} *args = data;
 	int ret = -ENOSYS;
 
-	nvif_ioctl(object, "client devlist size %d\n", size);
+	nvif_ioctl(&client->object, "client devlist size %d\n", size);
 	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) {
-		nvif_ioctl(object, "client devlist vers %d count %d\n",
+		nvif_ioctl(&client->object, "client devlist vers %d count %d\n",
 			   args->v0.version, args->v0.count);
 		if (size == sizeof(args->v0.device[0]) * args->v0.count) {
 			ret = nvkm_device_list(args->v0.device, args->v0.count);
@@ -167,9 +217,10 @@ nvkm_client_mthd_devlist(struct nvkm_object *object, void *data, u32 size)
 static int
 nvkm_client_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
 {
+	struct nvkm_client *client = nvkm_client(object);
 	switch (mthd) {
-	case NV_CLIENT_DEVLIST:
-		return nvkm_client_mthd_devlist(object, data, size);
+	case NVIF_CLIENT_V0_DEVLIST:
+		return nvkm_client_mthd_devlist(client, data, size);
 	default:
 		break;
 	}
@@ -190,7 +241,8 @@ nvkm_client_child_get(struct nvkm_object *object, int index,
 	const struct nvkm_sclass *sclass;
 
 	switch (index) {
-	case 0: sclass = &nvkm_udevice_sclass; break;
+	case 0: sclass = &nvkm_uclient_sclass; break;
+	case 1: sclass = &nvkm_udevice_sclass; break;
 	default:
 		return -EINVAL;
 	}
@@ -200,110 +252,54 @@ nvkm_client_child_get(struct nvkm_object *object, int index,
 	return 0;
 }
 
-static const struct nvkm_object_func
-nvkm_client_object_func = {
-	.mthd = nvkm_client_mthd,
-	.sclass = nvkm_client_child_get,
-};
-
-void
-nvkm_client_remove(struct nvkm_client *client, struct nvkm_object *object)
-{
-	if (!RB_EMPTY_NODE(&object->node))
-		rb_erase(&object->node, &client->objroot);
-}
-
-bool
-nvkm_client_insert(struct nvkm_client *client, struct nvkm_object *object)
-{
-	struct rb_node **ptr = &client->objroot.rb_node;
-	struct rb_node *parent = NULL;
-
-	while (*ptr) {
-		struct nvkm_object *this =
-			container_of(*ptr, typeof(*this), node);
-		parent = *ptr;
-		if (object->object < this->object)
-			ptr = &parent->rb_left;
-		else
-		if (object->object > this->object)
-			ptr = &parent->rb_right;
-		else
-			return false;
-	}
-
-	rb_link_node(&object->node, parent, ptr);
-	rb_insert_color(&object->node, &client->objroot);
-	return true;
-}
-
-struct nvkm_object *
-nvkm_client_search(struct nvkm_client *client, u64 handle)
-{
-	struct rb_node *node = client->objroot.rb_node;
-	while (node) {
-		struct nvkm_object *object =
-			container_of(node, typeof(*object), node);
-		if (handle < object->object)
-			node = node->rb_left;
-		else
-		if (handle > object->object)
-			node = node->rb_right;
-		else
-			return object;
-	}
-	return NULL;
-}
-
-int
-nvkm_client_fini(struct nvkm_client *client, bool suspend)
+static int
+nvkm_client_fini(struct nvkm_object *object, bool suspend)
 {
-	struct nvkm_object *object = &client->object;
+	struct nvkm_client *client = nvkm_client(object);
 	const char *name[2] = { "fini", "suspend" };
 	int i;
 	nvif_debug(object, "%s notify\n", name[suspend]);
 	for (i = 0; i < ARRAY_SIZE(client->notify); i++)
 		nvkm_client_notify_put(client, i);
-	return nvkm_object_fini(&client->object, suspend);
-}
-
-int
-nvkm_client_init(struct nvkm_client *client)
-{
-	return nvkm_object_init(&client->object);
+	return 0;
 }
 
-void
-nvkm_client_del(struct nvkm_client **pclient)
+static void *
+nvkm_client_dtor(struct nvkm_object *object)
 {
-	struct nvkm_client *client = *pclient;
+	struct nvkm_client *client = nvkm_client(object);
 	int i;
-	if (client) {
-		nvkm_client_fini(client, false);
-		for (i = 0; i < ARRAY_SIZE(client->notify); i++)
-			nvkm_client_notify_del(client, i);
-		nvkm_object_dtor(&client->object);
-		kfree(*pclient);
-		*pclient = NULL;
-	}
+	for (i = 0; i < ARRAY_SIZE(client->notify); i++)
+		nvkm_client_notify_del(client, i);
+	return client;
 }
 
+static const struct nvkm_object_func
+nvkm_client = {
+	.dtor = nvkm_client_dtor,
+	.fini = nvkm_client_fini,
+	.mthd = nvkm_client_mthd,
+	.sclass = nvkm_client_child_get,
+};
+
 int
 nvkm_client_new(const char *name, u64 device, const char *cfg,
-		const char *dbg, struct nvkm_client **pclient)
+		const char *dbg,
+		int (*ntfy)(const void *, u32, const void *, u32),
+		struct nvkm_client **pclient)
 {
-	struct nvkm_oclass oclass = {};
+	struct nvkm_oclass oclass = { .base = nvkm_uclient_sclass };
 	struct nvkm_client *client;
 
 	if (!(client = *pclient = kzalloc(sizeof(*client), GFP_KERNEL)))
 		return -ENOMEM;
 	oclass.client = client;
 
-	nvkm_object_ctor(&nvkm_client_object_func, &oclass, &client->object);
+	nvkm_object_ctor(&nvkm_client, &oclass, &client->object);
 	snprintf(client->name, sizeof(client->name), "%s", name);
 	client->device = device;
 	client->debug = nvkm_dbgopt(dbg, "CLIENT");
 	client->objroot = RB_ROOT;
-	client->dmaroot = RB_ROOT;
+	client->ntfy = ntfy;
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/engine.c b/drivers/gpu/drm/nouveau/nvkm/core/engine.c
index ee8e5831fe37..b6c916954a10 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/engine.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/engine.c
@@ -27,6 +27,14 @@
 
 #include <subdev/fb.h>
 
+bool
+nvkm_engine_chsw_load(struct nvkm_engine *engine)
+{
+	if (engine->func->chsw_load)
+		return engine->func->chsw_load(engine);
+	return false;
+}
+
 void
 nvkm_engine_unref(struct nvkm_engine **pengine)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c b/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c
index b0db51847c36..be19bbe56bba 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c
@@ -29,7 +29,8 @@
 #include <nvif/ioctl.h>
 
 static int
-nvkm_ioctl_nop(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_nop(struct nvkm_client *client,
+	       struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_nop_v0 v0;
@@ -46,7 +47,8 @@ nvkm_ioctl_nop(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_sclass(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_sclass(struct nvkm_client *client,
+		  struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_sclass_v0 v0;
@@ -78,12 +80,12 @@ nvkm_ioctl_sclass(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_new(struct nvkm_object *parent, void *data, u32 size)
+nvkm_ioctl_new(struct nvkm_client *client,
+	       struct nvkm_object *parent, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_new_v0 v0;
 	} *args = data;
-	struct nvkm_client *client = parent->client;
 	struct nvkm_object *object = NULL;
 	struct nvkm_oclass oclass;
 	int ret = -ENOSYS, i = 0;
@@ -104,9 +106,11 @@ nvkm_ioctl_new(struct nvkm_object *parent, void *data, u32 size)
 
 	do {
 		memset(&oclass, 0x00, sizeof(oclass));
-		oclass.client = client;
 		oclass.handle = args->v0.handle;
+		oclass.route  = args->v0.route;
+		oclass.token  = args->v0.token;
 		oclass.object = args->v0.object;
+		oclass.client = client;
 		oclass.parent = parent;
 		ret = parent->func->sclass(parent, i++, &oclass);
 		if (ret)
@@ -125,10 +129,7 @@ nvkm_ioctl_new(struct nvkm_object *parent, void *data, u32 size)
 		ret = nvkm_object_init(object);
 		if (ret == 0) {
 			list_add(&object->head, &parent->tree);
-			object->route = args->v0.route;
-			object->token = args->v0.token;
-			object->object = args->v0.object;
-			if (nvkm_client_insert(client, object)) {
+			if (nvkm_object_insert(object)) {
 				client->data = object;
 				return 0;
 			}
@@ -142,7 +143,8 @@ nvkm_ioctl_new(struct nvkm_object *parent, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_del(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_del(struct nvkm_client *client,
+	       struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_del none;
@@ -156,11 +158,12 @@ nvkm_ioctl_del(struct nvkm_object *object, void *data, u32 size)
 		nvkm_object_del(&object);
 	}
 
-	return ret;
+	return ret ? ret : 1;
 }
 
 static int
-nvkm_ioctl_mthd(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_mthd(struct nvkm_client *client,
+		struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_mthd_v0 v0;
@@ -179,7 +182,8 @@ nvkm_ioctl_mthd(struct nvkm_object *object, void *data, u32 size)
 
 
 static int
-nvkm_ioctl_rd(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_rd(struct nvkm_client *client,
+	      struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_rd_v0 v0;
@@ -218,7 +222,8 @@ nvkm_ioctl_rd(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_wr(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_wr(struct nvkm_client *client,
+	      struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_wr_v0 v0;
@@ -246,7 +251,8 @@ nvkm_ioctl_wr(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_map(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_map(struct nvkm_client *client,
+	       struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_map_v0 v0;
@@ -264,7 +270,8 @@ nvkm_ioctl_map(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_unmap(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_unmap(struct nvkm_client *client,
+		 struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_unmap none;
@@ -280,7 +287,8 @@ nvkm_ioctl_unmap(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_ntfy_new(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_ntfy_new(struct nvkm_client *client,
+		    struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_ntfy_new_v0 v0;
@@ -306,9 +314,9 @@ nvkm_ioctl_ntfy_new(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_ntfy_del(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_ntfy_del(struct nvkm_client *client,
+		    struct nvkm_object *object, void *data, u32 size)
 {
-	struct nvkm_client *client = object->client;
 	union {
 		struct nvif_ioctl_ntfy_del_v0 v0;
 	} *args = data;
@@ -325,9 +333,9 @@ nvkm_ioctl_ntfy_del(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_ntfy_get(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_ntfy_get(struct nvkm_client *client,
+		    struct nvkm_object *object, void *data, u32 size)
 {
-	struct nvkm_client *client = object->client;
 	union {
 		struct nvif_ioctl_ntfy_get_v0 v0;
 	} *args = data;
@@ -344,9 +352,9 @@ nvkm_ioctl_ntfy_get(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_ntfy_put(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_ntfy_put(struct nvkm_client *client,
+		    struct nvkm_object *object, void *data, u32 size)
 {
-	struct nvkm_client *client = object->client;
 	union {
 		struct nvif_ioctl_ntfy_put_v0 v0;
 	} *args = data;
@@ -364,7 +372,7 @@ nvkm_ioctl_ntfy_put(struct nvkm_object *object, void *data, u32 size)
 
 static struct {
 	int version;
-	int (*func)(struct nvkm_object *, void *, u32);
+	int (*func)(struct nvkm_client *, struct nvkm_object *, void *, u32);
 }
 nvkm_ioctl_v0[] = {
 	{ 0x00, nvkm_ioctl_nop },
@@ -389,13 +397,10 @@ nvkm_ioctl_path(struct nvkm_client *client, u64 handle, u32 type,
 	struct nvkm_object *object;
 	int ret;
 
-	if (handle)
-		object = nvkm_client_search(client, handle);
-	else
-		object = &client->object;
-	if (unlikely(!object)) {
+	object = nvkm_object_search(client, handle, NULL);
+	if (IS_ERR(object)) {
 		nvif_ioctl(&client->object, "object not found\n");
-		return -ENOENT;
+		return PTR_ERR(object);
 	}
 
 	if (owner != NVIF_IOCTL_V0_OWNER_ANY && owner != object->route) {
@@ -407,7 +412,7 @@ nvkm_ioctl_path(struct nvkm_client *client, u64 handle, u32 type,
 
 	if (ret = -EINVAL, type < ARRAY_SIZE(nvkm_ioctl_v0)) {
 		if (nvkm_ioctl_v0[type].version == 0)
-			ret = nvkm_ioctl_v0[type].func(object, data, size);
+			ret = nvkm_ioctl_v0[type].func(client, object, data, size);
 	}
 
 	return ret;
@@ -436,12 +441,13 @@ nvkm_ioctl(struct nvkm_client *client, bool supervisor,
 				      &args->v0.route, &args->v0.token);
 	}
 
-	nvif_ioctl(object, "return %d\n", ret);
-	if (hack) {
-		*hack = client->data;
-		client->data = NULL;
+	if (ret != 1) {
+		nvif_ioctl(object, "return %d\n", ret);
+		if (hack) {
+			*hack = client->data;
+			client->data = NULL;
+		}
 	}
 
-	client->super = false;
 	return ret;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/mm.c b/drivers/gpu/drm/nouveau/nvkm/core/mm.c
index 09a1eee8fd33..fd19d652a7ab 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/mm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/mm.c
@@ -147,6 +147,7 @@ nvkm_mm_head(struct nvkm_mm *mm, u8 heap, u8 type, u32 size_max, u32 size_min,
 		if (!this)
 			return -ENOMEM;
 
+		this->next = NULL;
 		this->type = type;
 		list_del(&this->fl_entry);
 		*pnode = this;
@@ -225,6 +226,7 @@ nvkm_mm_tail(struct nvkm_mm *mm, u8 heap, u8 type, u32 size_max, u32 size_min,
 		if (!this)
 			return -ENOMEM;
 
+		this->next = NULL;
 		this->type = type;
 		list_del(&this->fl_entry);
 		*pnode = this;
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/object.c b/drivers/gpu/drm/nouveau/nvkm/core/object.c
index 67aa7223dcd7..89d2e9da11c7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/object.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/object.c
@@ -25,6 +25,65 @@
 #include <core/client.h>
 #include <core/engine.h>
 
+struct nvkm_object *
+nvkm_object_search(struct nvkm_client *client, u64 handle,
+		   const struct nvkm_object_func *func)
+{
+	struct nvkm_object *object;
+
+	if (handle) {
+		struct rb_node *node = client->objroot.rb_node;
+		while (node) {
+			object = rb_entry(node, typeof(*object), node);
+			if (handle < object->object)
+				node = node->rb_left;
+			else
+			if (handle > object->object)
+				node = node->rb_right;
+			else
+				goto done;
+		}
+		return ERR_PTR(-ENOENT);
+	} else {
+		object = &client->object;
+	}
+
+done:
+	if (unlikely(func && object->func != func))
+		return ERR_PTR(-EINVAL);
+	return object;
+}
+
+void
+nvkm_object_remove(struct nvkm_object *object)
+{
+	if (!RB_EMPTY_NODE(&object->node))
+		rb_erase(&object->node, &object->client->objroot);
+}
+
+bool
+nvkm_object_insert(struct nvkm_object *object)
+{
+	struct rb_node **ptr = &object->client->objroot.rb_node;
+	struct rb_node *parent = NULL;
+
+	while (*ptr) {
+		struct nvkm_object *this = rb_entry(*ptr, typeof(*this), node);
+		parent = *ptr;
+		if (object->object < this->object)
+			ptr = &parent->rb_left;
+		else
+		if (object->object > this->object)
+			ptr = &parent->rb_right;
+		else
+			return false;
+	}
+
+	rb_link_node(&object->node, parent, ptr);
+	rb_insert_color(&object->node, &object->client->objroot);
+	return true;
+}
+
 int
 nvkm_object_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
 {
@@ -214,7 +273,7 @@ nvkm_object_del(struct nvkm_object **pobject)
 	struct nvkm_object *object = *pobject;
 	if (object && !WARN_ON(!object->func)) {
 		*pobject = nvkm_object_dtor(object);
-		nvkm_client_remove(object->client, object);
+		nvkm_object_remove(object);
 		list_del(&object->head);
 		kfree(*pobject);
 		*pobject = NULL;
@@ -230,6 +289,9 @@ nvkm_object_ctor(const struct nvkm_object_func *func,
 	object->engine = nvkm_engine_ref(oclass->engine);
 	object->oclass = oclass->base.oclass;
 	object->handle = oclass->handle;
+	object->route  = oclass->route;
+	object->token  = oclass->token;
+	object->object = oclass->object;
 	INIT_LIST_HEAD(&object->head);
 	INIT_LIST_HEAD(&object->tree);
 	RB_CLEAR_NODE(&object->node);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index cceda959b47c..273562dd6bbd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -993,7 +993,7 @@ nv92_chipset = {
 	.mc = g84_mc_new,
 	.mmu = nv50_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = g84_pci_new,
+	.pci = g92_pci_new,
 	.therm = g84_therm_new,
 	.timer = nv41_timer_new,
 	.volt = nv40_volt_new,
@@ -2138,6 +2138,7 @@ nv12b_chipset = {
 	.ltc = gm200_ltc_new,
 	.mc = gk20a_mc_new,
 	.mmu = gf100_mmu_new,
+	.pmu = gm20b_pmu_new,
 	.secboot = gm20b_secboot_new,
 	.timer = gk20a_timer_new,
 	.top = gk104_top_new,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c
index 0a1381a84552..070ec5e18fdb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c
@@ -137,7 +137,6 @@ nv50_disp_dmac_new_(const struct nv50_disp_dmac_func *func,
 		    const struct nvkm_oclass *oclass,
 		    struct nvkm_object **pobject)
 {
-	struct nvkm_device *device = root->disp->base.engine.subdev.device;
 	struct nvkm_client *client = oclass->client;
 	struct nvkm_dmaobj *dmaobj;
 	struct nv50_disp_dmac *chan;
@@ -153,9 +152,9 @@ nv50_disp_dmac_new_(const struct nv50_disp_dmac_func *func,
 	if (ret)
 		return ret;
 
-	dmaobj = nvkm_dma_search(device->dma, client, push);
-	if (!dmaobj)
-		return -ENOENT;
+	dmaobj = nvkm_dmaobj_search(client, push);
+	if (IS_ERR(dmaobj))
+		return PTR_ERR(dmaobj);
 
 	if (dmaobj->limit - dmaobj->start != 0xfff)
 		return -EINVAL;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c
index 6f0436df0219..f8f2f16c22a2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c
@@ -59,7 +59,7 @@ gt215_hda_eld(NV50_DISP_MTHD_V1)
 			);
 		}
 		for (i = 0; i < size; i++)
-			nvkm_wr32(device, 0x61c440 + soff, (i << 8) | args->v0.data[0]);
+			nvkm_wr32(device, 0x61c440 + soff, (i << 8) | args->v0.data[i]);
 		for (; i < 0x60; i++)
 			nvkm_wr32(device, 0x61c440 + soff, (i << 8));
 		nvkm_mask(device, 0x61c448 + soff, 0x80000003, 0x80000003);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
index 567466f93cd5..0db8efbf1c2e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
@@ -433,8 +433,6 @@ nv50_disp_dptmds_war(struct nvkm_device *device)
 	case 0x94:
 	case 0x96:
 	case 0x98:
-	case 0xaa:
-	case 0xac:
 		return true;
 	default:
 		break;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c
index 4510cb6e10a8..627b9ee1ddd2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c
@@ -39,13 +39,6 @@ g94_sor_loff(struct nvkm_output_dp *outp)
 }
 
 /*******************************************************************************
- * TMDS/LVDS
- ******************************************************************************/
-static const struct nvkm_output_func
-g94_sor_output_func = {
-};
-
-/*******************************************************************************
  * DisplayPort
  ******************************************************************************/
 u32
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/dma/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/dma/base.c
index f11ebdd16c77..11b7b8fd5dda 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/dma/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/dma/base.c
@@ -28,24 +28,6 @@
 
 #include <nvif/class.h>
 
-struct nvkm_dmaobj *
-nvkm_dma_search(struct nvkm_dma *dma, struct nvkm_client *client, u64 object)
-{
-	struct rb_node *node = client->dmaroot.rb_node;
-	while (node) {
-		struct nvkm_dmaobj *dmaobj =
-			container_of(node, typeof(*dmaobj), rb);
-		if (object < dmaobj->handle)
-			node = node->rb_left;
-		else
-		if (object > dmaobj->handle)
-			node = node->rb_right;
-		else
-			return dmaobj;
-	}
-	return NULL;
-}
-
 static int
 nvkm_dma_oclass_new(struct nvkm_device *device,
 		    const struct nvkm_oclass *oclass, void *data, u32 size,
@@ -53,34 +35,12 @@ nvkm_dma_oclass_new(struct nvkm_device *device,
 {
 	struct nvkm_dma *dma = nvkm_dma(oclass->engine);
 	struct nvkm_dmaobj *dmaobj = NULL;
-	struct nvkm_client *client = oclass->client;
-	struct rb_node **ptr = &client->dmaroot.rb_node;
-	struct rb_node *parent = NULL;
 	int ret;
 
 	ret = dma->func->class_new(dma, oclass, data, size, &dmaobj);
 	if (dmaobj)
 		*pobject = &dmaobj->object;
-	if (ret)
-		return ret;
-
-	dmaobj->handle = oclass->object;
-
-	while (*ptr) {
-		struct nvkm_dmaobj *obj = container_of(*ptr, typeof(*obj), rb);
-		parent = *ptr;
-		if (dmaobj->handle < obj->handle)
-			ptr = &parent->rb_left;
-		else
-		if (dmaobj->handle > obj->handle)
-			ptr = &parent->rb_right;
-		else
-			return -EEXIST;
-	}
-
-	rb_link_node(&dmaobj->rb, parent, ptr);
-	rb_insert_color(&dmaobj->rb, &client->dmaroot);
-	return 0;
+	return ret;
 }
 
 static const struct nvkm_device_oclass
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c
index 13c661b1ef14..d20cc0681a88 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c
@@ -31,6 +31,19 @@
 #include <nvif/cl0002.h>
 #include <nvif/unpack.h>
 
+static const struct nvkm_object_func nvkm_dmaobj_func;
+struct nvkm_dmaobj *
+nvkm_dmaobj_search(struct nvkm_client *client, u64 handle)
+{
+	struct nvkm_object *object;
+
+	object = nvkm_object_search(client, handle, &nvkm_dmaobj_func);
+	if (IS_ERR(object))
+		return (void *)object;
+
+	return nvkm_dmaobj(object);
+}
+
 static int
 nvkm_dmaobj_bind(struct nvkm_object *base, struct nvkm_gpuobj *gpuobj,
 		 int align, struct nvkm_gpuobj **pgpuobj)
@@ -42,10 +55,7 @@ nvkm_dmaobj_bind(struct nvkm_object *base, struct nvkm_gpuobj *gpuobj,
 static void *
 nvkm_dmaobj_dtor(struct nvkm_object *base)
 {
-	struct nvkm_dmaobj *dmaobj = nvkm_dmaobj(base);
-	if (!RB_EMPTY_NODE(&dmaobj->rb))
-		rb_erase(&dmaobj->rb, &dmaobj->object.client->dmaroot);
-	return dmaobj;
+	return nvkm_dmaobj(base);
 }
 
 static const struct nvkm_object_func
@@ -74,7 +84,6 @@ nvkm_dmaobj_ctor(const struct nvkm_dmaobj_func *func, struct nvkm_dma *dma,
 	nvkm_object_ctor(&nvkm_dmaobj_func, oclass, &dmaobj->object);
 	dmaobj->func = func;
 	dmaobj->dma = dma;
-	RB_CLEAR_NODE(&dmaobj->rb);
 
 	nvif_ioctl(parent, "create dma size %d\n", *psize);
 	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
index 1c9682ae3a6b..660ca7aa95ea 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
@@ -32,6 +32,17 @@
 #include <nvif/unpack.h>
 
 void
+nvkm_fifo_recover_chan(struct nvkm_fifo *fifo, int chid)
+{
+	unsigned long flags;
+	if (WARN_ON(!fifo->func->recover_chan))
+		return;
+	spin_lock_irqsave(&fifo->lock, flags);
+	fifo->func->recover_chan(fifo, chid);
+	spin_unlock_irqrestore(&fifo->lock, flags);
+}
+
+void
 nvkm_fifo_pause(struct nvkm_fifo *fifo, unsigned long *flags)
 {
 	return fifo->func->pause(fifo, flags);
@@ -55,19 +66,29 @@ nvkm_fifo_chan_put(struct nvkm_fifo *fifo, unsigned long flags,
 }
 
 struct nvkm_fifo_chan *
-nvkm_fifo_chan_inst(struct nvkm_fifo *fifo, u64 inst, unsigned long *rflags)
+nvkm_fifo_chan_inst_locked(struct nvkm_fifo *fifo, u64 inst)
 {
 	struct nvkm_fifo_chan *chan;
-	unsigned long flags;
-	spin_lock_irqsave(&fifo->lock, flags);
 	list_for_each_entry(chan, &fifo->chan, head) {
 		if (chan->inst->addr == inst) {
 			list_del(&chan->head);
 			list_add(&chan->head, &fifo->chan);
-			*rflags = flags;
 			return chan;
 		}
 	}
+	return NULL;
+}
+
+struct nvkm_fifo_chan *
+nvkm_fifo_chan_inst(struct nvkm_fifo *fifo, u64 inst, unsigned long *rflags)
+{
+	struct nvkm_fifo_chan *chan;
+	unsigned long flags;
+	spin_lock_irqsave(&fifo->lock, flags);
+	if ((chan = nvkm_fifo_chan_inst_locked(fifo, inst))) {
+		*rflags = flags;
+		return chan;
+	}
 	spin_unlock_irqrestore(&fifo->lock, flags);
 	return NULL;
 }
@@ -90,9 +111,34 @@ nvkm_fifo_chan_chid(struct nvkm_fifo *fifo, int chid, unsigned long *rflags)
 	return NULL;
 }
 
+void
+nvkm_fifo_kevent(struct nvkm_fifo *fifo, int chid)
+{
+	nvkm_event_send(&fifo->kevent, 1, chid, NULL, 0);
+}
+
 static int
-nvkm_fifo_event_ctor(struct nvkm_object *object, void *data, u32 size,
-		     struct nvkm_notify *notify)
+nvkm_fifo_kevent_ctor(struct nvkm_object *object, void *data, u32 size,
+		      struct nvkm_notify *notify)
+{
+	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
+	if (size == 0) {
+		notify->size  = 0;
+		notify->types = 1;
+		notify->index = chan->chid;
+		return 0;
+	}
+	return -ENOSYS;
+}
+
+static const struct nvkm_event_func
+nvkm_fifo_kevent_func = {
+	.ctor = nvkm_fifo_kevent_ctor,
+};
+
+static int
+nvkm_fifo_cevent_ctor(struct nvkm_object *object, void *data, u32 size,
+		      struct nvkm_notify *notify)
 {
 	if (size == 0) {
 		notify->size  = 0;
@@ -104,10 +150,16 @@ nvkm_fifo_event_ctor(struct nvkm_object *object, void *data, u32 size,
 }
 
 static const struct nvkm_event_func
-nvkm_fifo_event_func = {
-	.ctor = nvkm_fifo_event_ctor,
+nvkm_fifo_cevent_func = {
+	.ctor = nvkm_fifo_cevent_ctor,
 };
 
+void
+nvkm_fifo_cevent(struct nvkm_fifo *fifo)
+{
+	nvkm_event_send(&fifo->cevent, 1, 0, NULL, 0);
+}
+
 static void
 nvkm_fifo_uevent_fini(struct nvkm_event *event, int type, int index)
 {
@@ -241,6 +293,7 @@ nvkm_fifo_dtor(struct nvkm_engine *engine)
 	void *data = fifo;
 	if (fifo->func->dtor)
 		data = fifo->func->dtor(fifo);
+	nvkm_event_fini(&fifo->kevent);
 	nvkm_event_fini(&fifo->cevent);
 	nvkm_event_fini(&fifo->uevent);
 	return data;
@@ -283,5 +336,9 @@ nvkm_fifo_ctor(const struct nvkm_fifo_func *func, struct nvkm_device *device,
 			return ret;
 	}
 
-	return nvkm_event_init(&nvkm_fifo_event_func, 1, 1, &fifo->cevent);
+	ret = nvkm_event_init(&nvkm_fifo_cevent_func, 1, 1, &fifo->cevent);
+	if (ret)
+		return ret;
+
+	return nvkm_event_init(&nvkm_fifo_kevent_func, 1, nr, &fifo->kevent);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
index dc6d4678f228..fab760ae922f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
@@ -371,9 +371,9 @@ nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *func,
 
 	/* allocate push buffer ctxdma instance */
 	if (push) {
-		dmaobj = nvkm_dma_search(device->dma, oclass->client, push);
-		if (!dmaobj)
-			return -ENOENT;
+		dmaobj = nvkm_dmaobj_search(client, push);
+		if (IS_ERR(dmaobj))
+			return PTR_ERR(dmaobj);
 
 		ret = nvkm_object_bind(&dmaobj->object, chan->inst, -16,
 				       &chan->push);
@@ -410,6 +410,6 @@ nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *func,
 		     base + user * chan->chid;
 	chan->size = user;
 
-	nvkm_event_send(&fifo->cevent, 1, 0, NULL, 0);
+	nvkm_fifo_cevent(fifo);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
index 55dc415c5c08..d8019bdacd61 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
@@ -29,5 +29,5 @@ struct nvkm_fifo_chan_oclass {
 	struct nvkm_sclass base;
 };
 
-int g84_fifo_chan_ntfy(struct nvkm_fifo_chan *, u32, struct nvkm_event **);
+int gf100_fifo_chan_ntfy(struct nvkm_fifo_chan *, u32, struct nvkm_event **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c
index 15a992b3580a..61797c4dd07a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c
@@ -30,12 +30,12 @@
 
 #include <nvif/cl826e.h>
 
-int
+static int
 g84_fifo_chan_ntfy(struct nvkm_fifo_chan *chan, u32 type,
 		   struct nvkm_event **pevent)
 {
 	switch (type) {
-	case G82_CHANNEL_DMA_V0_NTFY_UEVENT:
+	case NV826E_V0_NTFY_NON_STALL_INTERRUPT:
 		*pevent = &chan->fifo->uevent;
 		return 0;
 	default:
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
index ec68ea9747d5..cd468ab1db12 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
@@ -68,7 +68,14 @@ gf100_fifo_runlist_commit(struct gf100_fifo *fifo)
 	}
 	nvkm_done(cur);
 
-	target = (nvkm_memory_target(cur) == NVKM_MEM_TARGET_HOST) ? 0x3 : 0x0;
+	switch (nvkm_memory_target(cur)) {
+	case NVKM_MEM_TARGET_VRAM: target = 0; break;
+	case NVKM_MEM_TARGET_NCOH: target = 3; break;
+	default:
+		mutex_unlock(&subdev->mutex);
+		WARN_ON(1);
+		return;
+	}
 
 	nvkm_wr32(device, 0x002270, (nvkm_memory_addr(cur) >> 12) |
 				    (target << 28));
@@ -183,6 +190,7 @@ gf100_fifo_recover(struct gf100_fifo *fifo, struct nvkm_engine *engine,
 	if (engine != &fifo->base.engine)
 		fifo->recover.mask |= 1ULL << engine->subdev.index;
 	schedule_work(&fifo->recover.work);
+	nvkm_fifo_kevent(&fifo->base, chid);
 }
 
 static const struct nvkm_enum
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
index 38c0910722c0..3a24788c3185 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
@@ -27,11 +27,71 @@
 #include <core/client.h>
 #include <core/gpuobj.h>
 #include <subdev/bar.h>
+#include <subdev/timer.h>
 #include <subdev/top.h>
 #include <engine/sw.h>
 
 #include <nvif/class.h>
 
+struct gk104_fifo_engine_status {
+	bool busy;
+	bool faulted;
+	bool chsw;
+	bool save;
+	bool load;
+	struct {
+		bool tsg;
+		u32 id;
+	} prev, next, *chan;
+};
+
+static void
+gk104_fifo_engine_status(struct gk104_fifo *fifo, int engn,
+			 struct gk104_fifo_engine_status *status)
+{
+	struct nvkm_engine *engine = fifo->engine[engn].engine;
+	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 stat = nvkm_rd32(device, 0x002640 + (engn * 0x08));
+
+	status->busy     = !!(stat & 0x80000000);
+	status->faulted  = !!(stat & 0x40000000);
+	status->next.tsg = !!(stat & 0x10000000);
+	status->next.id  =   (stat & 0x0fff0000) >> 16;
+	status->chsw     = !!(stat & 0x00008000);
+	status->save     = !!(stat & 0x00004000);
+	status->load     = !!(stat & 0x00002000);
+	status->prev.tsg = !!(stat & 0x00001000);
+	status->prev.id  =   (stat & 0x00000fff);
+	status->chan     = NULL;
+
+	if (status->busy && status->chsw) {
+		if (status->load && status->save) {
+			if (engine && nvkm_engine_chsw_load(engine))
+				status->chan = &status->next;
+			else
+				status->chan = &status->prev;
+		} else
+		if (status->load) {
+			status->chan = &status->next;
+		} else {
+			status->chan = &status->prev;
+		}
+	} else
+	if (status->load) {
+		status->chan = &status->prev;
+	}
+
+	nvkm_debug(subdev, "engine %02d: busy %d faulted %d chsw %d "
+			   "save %d load %d %sid %d%s-> %sid %d%s\n",
+		   engn, status->busy, status->faulted,
+		   status->chsw, status->save, status->load,
+		   status->prev.tsg ? "tsg" : "ch", status->prev.id,
+		   status->chan == &status->prev ? "*" : " ",
+		   status->next.tsg ? "tsg" : "ch", status->next.id,
+		   status->chan == &status->next ? "*" : " ");
+}
+
 static int
 gk104_fifo_class_get(struct nvkm_fifo *base, int index,
 		     const struct nvkm_fifo_chan_oclass **psclass)
@@ -83,10 +143,13 @@ gk104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl)
 	}
 	nvkm_done(mem);
 
-	if (nvkm_memory_target(mem) == NVKM_MEM_TARGET_VRAM)
-		target = 0;
-	else
-		target = 3;
+	switch (nvkm_memory_target(mem)) {
+	case NVKM_MEM_TARGET_VRAM: target = 0; break;
+	case NVKM_MEM_TARGET_NCOH: target = 3; break;
+	default:
+		WARN_ON(1);
+		return;
+	}
 
 	nvkm_wr32(device, 0x002270, (nvkm_memory_addr(mem) >> 12) |
 				    (target << 28));
@@ -149,31 +212,137 @@ gk104_fifo_recover_work(struct work_struct *w)
 	nvkm_mask(device, 0x002630, runm, 0x00000000);
 }
 
+static void gk104_fifo_recover_engn(struct gk104_fifo *fifo, int engn);
+
 static void
-gk104_fifo_recover(struct gk104_fifo *fifo, struct nvkm_engine *engine,
-		   struct gk104_fifo_chan *chan)
+gk104_fifo_recover_runl(struct gk104_fifo *fifo, int runl)
 {
 	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	u32 chid = chan->base.chid;
-	int engn;
+	const u32 runm = BIT(runl);
 
-	nvkm_error(subdev, "%s engine fault on channel %d, recovering...\n",
-		   nvkm_subdev_name[engine->subdev.index], chid);
 	assert_spin_locked(&fifo->base.lock);
+	if (fifo->recover.runm & runm)
+		return;
+	fifo->recover.runm |= runm;
 
-	nvkm_mask(device, 0x800004 + (chid * 0x08), 0x00000800, 0x00000800);
-	list_del_init(&chan->head);
-	chan->killed = true;
+	/* Block runlist to prevent channel assignment(s) from changing. */
+	nvkm_mask(device, 0x002630, runm, runm);
 
-	for (engn = 0; engn < fifo->engine_nr; engn++) {
-		if (fifo->engine[engn].engine == engine) {
-			fifo->recover.engm |= BIT(engn);
+	/* Schedule recovery. */
+	nvkm_warn(subdev, "runlist %d: scheduled for recovery\n", runl);
+	schedule_work(&fifo->recover.work);
+}
+
+static void
+gk104_fifo_recover_chan(struct nvkm_fifo *base, int chid)
+{
+	struct gk104_fifo *fifo = gk104_fifo(base);
+	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	const u32  stat = nvkm_rd32(device, 0x800004 + (chid * 0x08));
+	const u32  runl = (stat & 0x000f0000) >> 16;
+	const bool used = (stat & 0x00000001);
+	unsigned long engn, engm = fifo->runlist[runl].engm;
+	struct gk104_fifo_chan *chan;
+
+	assert_spin_locked(&fifo->base.lock);
+	if (!used)
+		return;
+
+	/* Lookup SW state for channel, and mark it as dead. */
+	list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
+		if (chan->base.chid == chid) {
+			list_del_init(&chan->head);
+			chan->killed = true;
+			nvkm_fifo_kevent(&fifo->base, chid);
 			break;
 		}
 	}
 
-	fifo->recover.runm |= BIT(chan->runl);
+	/* Disable channel. */
+	nvkm_wr32(device, 0x800004 + (chid * 0x08), stat | 0x00000800);
+	nvkm_warn(subdev, "channel %d: killed\n", chid);
+
+	/* Block channel assignments from changing during recovery. */
+	gk104_fifo_recover_runl(fifo, runl);
+
+	/* Schedule recovery for any engines the channel is on. */
+	for_each_set_bit(engn, &engm, fifo->engine_nr) {
+		struct gk104_fifo_engine_status status;
+		gk104_fifo_engine_status(fifo, engn, &status);
+		if (!status.chan || status.chan->id != chid)
+			continue;
+		gk104_fifo_recover_engn(fifo, engn);
+	}
+}
+
+static void
+gk104_fifo_recover_engn(struct gk104_fifo *fifo, int engn)
+{
+	struct nvkm_engine *engine = fifo->engine[engn].engine;
+	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	const u32 runl = fifo->engine[engn].runl;
+	const u32 engm = BIT(engn);
+	struct gk104_fifo_engine_status status;
+	int mmui = -1;
+
+	assert_spin_locked(&fifo->base.lock);
+	if (fifo->recover.engm & engm)
+		return;
+	fifo->recover.engm |= engm;
+
+	/* Block channel assignments from changing during recovery. */
+	gk104_fifo_recover_runl(fifo, runl);
+
+	/* Determine which channel (if any) is currently on the engine. */
+	gk104_fifo_engine_status(fifo, engn, &status);
+	if (status.chan) {
+		/* The channel is not longer viable, kill it. */
+		gk104_fifo_recover_chan(&fifo->base, status.chan->id);
+	}
+
+	/* Determine MMU fault ID for the engine, if we're not being
+	 * called from the fault handler already.
+	 */
+	if (!status.faulted && engine) {
+		mmui = nvkm_top_fault_id(device, engine->subdev.index);
+		if (mmui < 0) {
+			const struct nvkm_enum *en = fifo->func->fault.engine;
+			for (; en && en->name; en++) {
+				if (en->data2 == engine->subdev.index) {
+					mmui = en->value;
+					break;
+				}
+			}
+		}
+		WARN_ON(mmui < 0);
+	}
+
+	/* Trigger a MMU fault for the engine.
+	 *
+	 * No good idea why this is needed, but nvgpu does something similar,
+	 * and it makes recovery from CTXSW_TIMEOUT a lot more reliable.
+	 */
+	if (mmui >= 0) {
+		nvkm_wr32(device, 0x002a30 + (engn * 0x04), 0x00000100 | mmui);
+
+		/* Wait for fault to trigger. */
+		nvkm_msec(device, 2000,
+			gk104_fifo_engine_status(fifo, engn, &status);
+			if (status.faulted)
+				break;
+		);
+
+		/* Release MMU fault trigger, and ACK the fault. */
+		nvkm_wr32(device, 0x002a30 + (engn * 0x04), 0x00000000);
+		nvkm_wr32(device, 0x00259c, BIT(mmui));
+		nvkm_wr32(device, 0x002100, 0x10000000);
+	}
+
+	/* Schedule recovery. */
+	nvkm_warn(subdev, "engine %d: scheduled for recovery\n", engn);
 	schedule_work(&fifo->recover.work);
 }
 
@@ -211,34 +380,30 @@ static void
 gk104_fifo_intr_sched_ctxsw(struct gk104_fifo *fifo)
 {
 	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct gk104_fifo_chan *chan;
-	unsigned long flags;
+	unsigned long flags, engm = 0;
 	u32 engn;
 
+	/* We need to ACK the SCHED_ERROR here, and prevent it reasserting,
+	 * as MMU_FAULT cannot be triggered while it's pending.
+	 */
 	spin_lock_irqsave(&fifo->base.lock, flags);
+	nvkm_mask(device, 0x002140, 0x00000100, 0x00000000);
+	nvkm_wr32(device, 0x002100, 0x00000100);
+
 	for (engn = 0; engn < fifo->engine_nr; engn++) {
-		struct nvkm_engine *engine = fifo->engine[engn].engine;
-		int runl = fifo->engine[engn].runl;
-		u32 stat = nvkm_rd32(device, 0x002640 + (engn * 0x08));
-		u32 busy = (stat & 0x80000000);
-		u32 next = (stat & 0x0fff0000) >> 16;
-		u32 chsw = (stat & 0x00008000);
-		u32 save = (stat & 0x00004000);
-		u32 load = (stat & 0x00002000);
-		u32 prev = (stat & 0x00000fff);
-		u32 chid = load ? next : prev;
-		(void)save;
-
-		if (!busy || !chsw)
+		struct gk104_fifo_engine_status status;
+
+		gk104_fifo_engine_status(fifo, engn, &status);
+		if (!status.busy || !status.chsw)
 			continue;
 
-		list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
-			if (chan->base.chid == chid && engine) {
-				gk104_fifo_recover(fifo, engine, chan);
-				break;
-			}
-		}
+		engm |= BIT(engn);
 	}
+
+	for_each_set_bit(engn, &engm, fifo->engine_nr)
+		gk104_fifo_recover_engn(fifo, engn);
+
+	nvkm_mask(device, 0x002140, 0x00000100, 0x00000100);
 	spin_unlock_irqrestore(&fifo->base.lock, flags);
 }
 
@@ -301,6 +466,7 @@ gk104_fifo_intr_fault(struct gk104_fifo *fifo, int unit)
 	struct nvkm_fifo_chan *chan;
 	unsigned long flags;
 	char gpcid[8] = "", en[16] = "";
+	int engn;
 
 	er = nvkm_enum_find(fifo->func->fault.reason, reason);
 	eu = nvkm_enum_find(fifo->func->fault.engine, unit);
@@ -342,7 +508,8 @@ gk104_fifo_intr_fault(struct gk104_fifo *fifo, int unit)
 		snprintf(en, sizeof(en), "%s", eu->name);
 	}
 
-	chan = nvkm_fifo_chan_inst(&fifo->base, (u64)inst << 12, &flags);
+	spin_lock_irqsave(&fifo->base.lock, flags);
+	chan = nvkm_fifo_chan_inst_locked(&fifo->base, (u64)inst << 12);
 
 	nvkm_error(subdev,
 		   "%s fault at %010llx engine %02x [%s] client %02x [%s%s] "
@@ -353,9 +520,23 @@ gk104_fifo_intr_fault(struct gk104_fifo *fifo, int unit)
 		   (u64)inst << 12,
 		   chan ? chan->object.client->name : "unknown");
 
-	if (engine && chan)
-		gk104_fifo_recover(fifo, engine, (void *)chan);
-	nvkm_fifo_chan_put(&fifo->base, flags, &chan);
+
+	/* Kill the channel that caused the fault. */
+	if (chan)
+		gk104_fifo_recover_chan(&fifo->base, chan->chid);
+
+	/* Channel recovery will probably have already done this for the
+	 * correct engine(s), but just in case we can't find the channel
+	 * information...
+	 */
+	for (engn = 0; engn < fifo->engine_nr && engine; engn++) {
+		if (fifo->engine[engn].engine == engine) {
+			gk104_fifo_recover_engn(fifo, engn);
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&fifo->base.lock, flags);
 }
 
 static const struct nvkm_bitfield gk104_fifo_pbdma_intr_0[] = {
@@ -716,6 +897,7 @@ gk104_fifo_ = {
 	.intr = gk104_fifo_intr,
 	.uevent_init = gk104_fifo_uevent_init,
 	.uevent_fini = gk104_fifo_uevent_fini,
+	.recover_chan = gk104_fifo_recover_chan,
 	.class_get = gk104_fifo_class_get,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c
index 12d964260a29..f9e0377d3d24 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c
@@ -32,6 +32,23 @@
 #include <nvif/cl906f.h>
 #include <nvif/unpack.h>
 
+int
+gf100_fifo_chan_ntfy(struct nvkm_fifo_chan *chan, u32 type,
+		     struct nvkm_event **pevent)
+{
+	switch (type) {
+	case NV906F_V0_NTFY_NON_STALL_INTERRUPT:
+		*pevent = &chan->fifo->uevent;
+		return 0;
+	case NV906F_V0_NTFY_KILLED:
+		*pevent = &chan->fifo->kevent;
+		return 0;
+	default:
+		break;
+	}
+	return -EINVAL;
+}
+
 static u32
 gf100_fifo_gpfifo_engine_addr(struct nvkm_engine *engine)
 {
@@ -184,7 +201,7 @@ gf100_fifo_gpfifo_func = {
 	.dtor = gf100_fifo_gpfifo_dtor,
 	.init = gf100_fifo_gpfifo_init,
 	.fini = gf100_fifo_gpfifo_fini,
-	.ntfy = g84_fifo_chan_ntfy,
+	.ntfy = gf100_fifo_chan_ntfy,
 	.engine_ctor = gf100_fifo_gpfifo_engine_ctor,
 	.engine_dtor = gf100_fifo_gpfifo_engine_dtor,
 	.engine_init = gf100_fifo_gpfifo_engine_init,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
index a2df4f3e7763..8abf6f8ef445 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
@@ -50,6 +50,7 @@ gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *chan)
 	) < 0) {
 		nvkm_error(subdev, "channel %d [%s] kick timeout\n",
 			   chan->base.chid, client->name);
+		nvkm_fifo_recover_chan(&fifo->base, chan->base.chid);
 		ret = -ETIMEDOUT;
 	}
 	mutex_unlock(&subdev->mutex);
@@ -213,7 +214,7 @@ gk104_fifo_gpfifo_func = {
 	.dtor = gk104_fifo_gpfifo_dtor,
 	.init = gk104_fifo_gpfifo_init,
 	.fini = gk104_fifo_gpfifo_fini,
-	.ntfy = g84_fifo_chan_ntfy,
+	.ntfy = gf100_fifo_chan_ntfy,
 	.engine_ctor = gk104_fifo_gpfifo_engine_ctor,
 	.engine_dtor = gk104_fifo_gpfifo_engine_dtor,
 	.engine_init = gk104_fifo_gpfifo_engine_init,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
index f6dfb37d9429..f889b13b5e41 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
@@ -6,6 +6,12 @@
 int nvkm_fifo_ctor(const struct nvkm_fifo_func *, struct nvkm_device *,
 		   int index, int nr, struct nvkm_fifo *);
 void nvkm_fifo_uevent(struct nvkm_fifo *);
+void nvkm_fifo_cevent(struct nvkm_fifo *);
+void nvkm_fifo_kevent(struct nvkm_fifo *, int chid);
+void nvkm_fifo_recover_chan(struct nvkm_fifo *, int chid);
+
+struct nvkm_fifo_chan *
+nvkm_fifo_chan_inst_locked(struct nvkm_fifo *, u64 inst);
 
 struct nvkm_fifo_chan_oclass;
 struct nvkm_fifo_func {
@@ -18,6 +24,7 @@ struct nvkm_fifo_func {
 	void (*start)(struct nvkm_fifo *, unsigned long *);
 	void (*uevent_init)(struct nvkm_fifo *);
 	void (*uevent_fini)(struct nvkm_fifo *);
+	void (*recover_chan)(struct nvkm_fifo *, int chid);
 	int (*class_get)(struct nvkm_fifo *, int index,
 			 const struct nvkm_fifo_chan_oclass **);
 	const struct nvkm_fifo_chan_oclass *chan[];
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
index 467065d1b4e6..cd8cf6f7024c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
@@ -25,6 +25,15 @@
 
 #include <engine/fifo.h>
 
+static bool
+nvkm_gr_chsw_load(struct nvkm_engine *engine)
+{
+	struct nvkm_gr *gr = nvkm_gr(engine);
+	if (gr->func->chsw_load)
+		return gr->func->chsw_load(gr);
+	return false;
+}
+
 static void
 nvkm_gr_tile(struct nvkm_engine *engine, int region, struct nvkm_fb_tile *tile)
 {
@@ -106,6 +115,15 @@ nvkm_gr_init(struct nvkm_engine *engine)
 	return gr->func->init(gr);
 }
 
+static int
+nvkm_gr_fini(struct nvkm_engine *engine, bool suspend)
+{
+	struct nvkm_gr *gr = nvkm_gr(engine);
+	if (gr->func->fini)
+		return gr->func->fini(gr, suspend);
+	return 0;
+}
+
 static void *
 nvkm_gr_dtor(struct nvkm_engine *engine)
 {
@@ -120,8 +138,10 @@ nvkm_gr = {
 	.dtor = nvkm_gr_dtor,
 	.oneinit = nvkm_gr_oneinit,
 	.init = nvkm_gr_init,
+	.fini = nvkm_gr_fini,
 	.intr = nvkm_gr_intr,
 	.tile = nvkm_gr_tile,
+	.chsw_load = nvkm_gr_chsw_load,
 	.fifo.cclass = nvkm_gr_cclass_new,
 	.fifo.sclass = nvkm_gr_oclass_get,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/g84.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/g84.c
index ce913300539f..da1ba74682b4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/g84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/g84.c
@@ -25,6 +25,8 @@
 
 #include <subdev/timer.h>
 
+#include <nvif/class.h>
+
 static const struct nvkm_bitfield nv50_gr_status[] = {
 	{ 0x00000001, "BUSY" }, /* set when any bit is set */
 	{ 0x00000002, "DISPATCH" },
@@ -180,11 +182,11 @@ g84_gr = {
 	.tlb_flush = g84_gr_tlb_flush,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
-		{ -1, -1, 0x8297, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
+		{ -1, -1, G82_TESLA, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index f65a5b0a1a4d..f9acb8a944d2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -702,6 +702,22 @@ gf100_gr_pack_mmio[] = {
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
+static bool
+gf100_gr_chsw_load(struct nvkm_gr *base)
+{
+	struct gf100_gr *gr = gf100_gr(base);
+	if (!gr->firmware) {
+		u32 trace = nvkm_rd32(gr->base.engine.subdev.device, 0x40981c);
+		if (trace & 0x00000040)
+			return true;
+	} else {
+		u32 mthd = nvkm_rd32(gr->base.engine.subdev.device, 0x409808);
+		if (mthd & 0x00080000)
+			return true;
+	}
+	return false;
+}
+
 int
 gf100_gr_rops(struct gf100_gr *gr)
 {
@@ -1136,7 +1152,7 @@ gf100_gr_trap_intr(struct gf100_gr *gr)
 	if (trap & 0x00000008) {
 		u32 stat = nvkm_rd32(device, 0x408030);
 
-		nvkm_snprintbf(error, sizeof(error), gf100_m2mf_error,
+		nvkm_snprintbf(error, sizeof(error), gf100_ccache_error,
 			       stat & 0x3fffffff);
 		nvkm_error(subdev, "CCACHE %08x [%s]\n", stat, error);
 		nvkm_wr32(device, 0x408030, 0xc0000000);
@@ -1391,26 +1407,11 @@ gf100_gr_intr(struct nvkm_gr *base)
 }
 
 static void
-gf100_gr_init_fw(struct gf100_gr *gr, u32 fuc_base,
+gf100_gr_init_fw(struct nvkm_falcon *falcon,
 		 struct gf100_gr_fuc *code, struct gf100_gr_fuc *data)
 {
-	struct nvkm_device *device = gr->base.engine.subdev.device;
-	int i;
-
-	nvkm_wr32(device, fuc_base + 0x01c0, 0x01000000);
-	for (i = 0; i < data->size / 4; i++)
-		nvkm_wr32(device, fuc_base + 0x01c4, data->data[i]);
-
-	nvkm_wr32(device, fuc_base + 0x0180, 0x01000000);
-	for (i = 0; i < code->size / 4; i++) {
-		if ((i & 0x3f) == 0)
-			nvkm_wr32(device, fuc_base + 0x0188, i >> 6);
-		nvkm_wr32(device, fuc_base + 0x0184, code->data[i]);
-	}
-
-	/* code must be padded to 0x40 words */
-	for (; i & 0x3f; i++)
-		nvkm_wr32(device, fuc_base + 0x0184, 0);
+	nvkm_falcon_load_dmem(falcon, data->data, 0x0, data->size, 0);
+	nvkm_falcon_load_imem(falcon, code->data, 0x0, code->size, 0, 0, false);
 }
 
 static void
@@ -1455,162 +1456,149 @@ gf100_gr_init_csdata(struct gf100_gr *gr,
 	nvkm_wr32(device, falcon + 0x01c4, star + 4);
 }
 
-int
-gf100_gr_init_ctxctl(struct gf100_gr *gr)
+/* Initialize context from an external (secure or not) firmware */
+static int
+gf100_gr_init_ctxctl_ext(struct gf100_gr *gr)
 {
-	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	struct nvkm_secboot *sb = device->secboot;
-	int i;
 	int ret = 0;
 
-	if (gr->firmware) {
-		/* load fuc microcode */
-		nvkm_mc_unk260(device, 0);
-
-		/* securely-managed falcons must be reset using secure boot */
-		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_FECS))
-			ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_FECS);
-		else
-			gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c,
-					 &gr->fuc409d);
-		if (ret)
-			return ret;
+	/* load fuc microcode */
+	nvkm_mc_unk260(device, 0);
 
-		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_GPCCS))
-			ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_GPCCS);
-		else
-			gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac,
-					 &gr->fuc41ad);
-		if (ret)
-			return ret;
+	/* securely-managed falcons must be reset using secure boot */
+	if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_FECS))
+		ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_FECS);
+	else
+		gf100_gr_init_fw(gr->fecs, &gr->fuc409c, &gr->fuc409d);
+	if (ret)
+		return ret;
 
-		nvkm_mc_unk260(device, 1);
-
-		/* start both of them running */
-		nvkm_wr32(device, 0x409840, 0xffffffff);
-		nvkm_wr32(device, 0x41a10c, 0x00000000);
-		nvkm_wr32(device, 0x40910c, 0x00000000);
-
-		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_GPCCS))
-			nvkm_secboot_start(sb, NVKM_SECBOOT_FALCON_GPCCS);
-		else
-			nvkm_wr32(device, 0x41a100, 0x00000002);
-		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_FECS))
-			nvkm_secboot_start(sb, NVKM_SECBOOT_FALCON_FECS);
-		else
-			nvkm_wr32(device, 0x409100, 0x00000002);
-		if (nvkm_msec(device, 2000,
-			if (nvkm_rd32(device, 0x409800) & 0x00000001)
-				break;
-		) < 0)
-			return -EBUSY;
+	if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_GPCCS))
+		ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_GPCCS);
+	else
+		gf100_gr_init_fw(gr->gpccs, &gr->fuc41ac, &gr->fuc41ad);
+	if (ret)
+		return ret;
+
+	nvkm_mc_unk260(device, 1);
+
+	/* start both of them running */
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x41a10c, 0x00000000);
+	nvkm_wr32(device, 0x40910c, 0x00000000);
+
+	nvkm_falcon_start(gr->gpccs);
+	nvkm_falcon_start(gr->fecs);
 
-		nvkm_wr32(device, 0x409840, 0xffffffff);
-		nvkm_wr32(device, 0x409500, 0x7fffffff);
-		nvkm_wr32(device, 0x409504, 0x00000021);
+	if (nvkm_msec(device, 2000,
+		if (nvkm_rd32(device, 0x409800) & 0x00000001)
+			break;
+	) < 0)
+		return -EBUSY;
+
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409500, 0x7fffffff);
+	nvkm_wr32(device, 0x409504, 0x00000021);
+
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409500, 0x00000000);
+	nvkm_wr32(device, 0x409504, 0x00000010);
+	if (nvkm_msec(device, 2000,
+		if ((gr->size = nvkm_rd32(device, 0x409800)))
+			break;
+	) < 0)
+		return -EBUSY;
+
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409500, 0x00000000);
+	nvkm_wr32(device, 0x409504, 0x00000016);
+	if (nvkm_msec(device, 2000,
+		if (nvkm_rd32(device, 0x409800))
+			break;
+	) < 0)
+		return -EBUSY;
+
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409500, 0x00000000);
+	nvkm_wr32(device, 0x409504, 0x00000025);
+	if (nvkm_msec(device, 2000,
+		if (nvkm_rd32(device, 0x409800))
+			break;
+	) < 0)
+		return -EBUSY;
 
-		nvkm_wr32(device, 0x409840, 0xffffffff);
-		nvkm_wr32(device, 0x409500, 0x00000000);
-		nvkm_wr32(device, 0x409504, 0x00000010);
+	if (device->chipset >= 0xe0) {
+		nvkm_wr32(device, 0x409800, 0x00000000);
+		nvkm_wr32(device, 0x409500, 0x00000001);
+		nvkm_wr32(device, 0x409504, 0x00000030);
 		if (nvkm_msec(device, 2000,
-			if ((gr->size = nvkm_rd32(device, 0x409800)))
+			if (nvkm_rd32(device, 0x409800))
 				break;
 		) < 0)
 			return -EBUSY;
 
-		nvkm_wr32(device, 0x409840, 0xffffffff);
-		nvkm_wr32(device, 0x409500, 0x00000000);
-		nvkm_wr32(device, 0x409504, 0x00000016);
+		nvkm_wr32(device, 0x409810, 0xb00095c8);
+		nvkm_wr32(device, 0x409800, 0x00000000);
+		nvkm_wr32(device, 0x409500, 0x00000001);
+		nvkm_wr32(device, 0x409504, 0x00000031);
 		if (nvkm_msec(device, 2000,
 			if (nvkm_rd32(device, 0x409800))
 				break;
 		) < 0)
 			return -EBUSY;
 
-		nvkm_wr32(device, 0x409840, 0xffffffff);
-		nvkm_wr32(device, 0x409500, 0x00000000);
-		nvkm_wr32(device, 0x409504, 0x00000025);
+		nvkm_wr32(device, 0x409810, 0x00080420);
+		nvkm_wr32(device, 0x409800, 0x00000000);
+		nvkm_wr32(device, 0x409500, 0x00000001);
+		nvkm_wr32(device, 0x409504, 0x00000032);
 		if (nvkm_msec(device, 2000,
 			if (nvkm_rd32(device, 0x409800))
 				break;
 		) < 0)
 			return -EBUSY;
 
-		if (device->chipset >= 0xe0) {
-			nvkm_wr32(device, 0x409800, 0x00000000);
-			nvkm_wr32(device, 0x409500, 0x00000001);
-			nvkm_wr32(device, 0x409504, 0x00000030);
-			if (nvkm_msec(device, 2000,
-				if (nvkm_rd32(device, 0x409800))
-					break;
-			) < 0)
-				return -EBUSY;
-
-			nvkm_wr32(device, 0x409810, 0xb00095c8);
-			nvkm_wr32(device, 0x409800, 0x00000000);
-			nvkm_wr32(device, 0x409500, 0x00000001);
-			nvkm_wr32(device, 0x409504, 0x00000031);
-			if (nvkm_msec(device, 2000,
-				if (nvkm_rd32(device, 0x409800))
-					break;
-			) < 0)
-				return -EBUSY;
-
-			nvkm_wr32(device, 0x409810, 0x00080420);
-			nvkm_wr32(device, 0x409800, 0x00000000);
-			nvkm_wr32(device, 0x409500, 0x00000001);
-			nvkm_wr32(device, 0x409504, 0x00000032);
-			if (nvkm_msec(device, 2000,
-				if (nvkm_rd32(device, 0x409800))
-					break;
-			) < 0)
-				return -EBUSY;
+		nvkm_wr32(device, 0x409614, 0x00000070);
+		nvkm_wr32(device, 0x409614, 0x00000770);
+		nvkm_wr32(device, 0x40802c, 0x00000001);
+	}
 
-			nvkm_wr32(device, 0x409614, 0x00000070);
-			nvkm_wr32(device, 0x409614, 0x00000770);
-			nvkm_wr32(device, 0x40802c, 0x00000001);
+	if (gr->data == NULL) {
+		int ret = gf100_grctx_generate(gr);
+		if (ret) {
+			nvkm_error(subdev, "failed to construct context\n");
+			return ret;
 		}
+	}
 
-		if (gr->data == NULL) {
-			int ret = gf100_grctx_generate(gr);
-			if (ret) {
-				nvkm_error(subdev, "failed to construct context\n");
-				return ret;
-			}
-		}
+	return 0;
+}
+
+static int
+gf100_gr_init_ctxctl_int(struct gf100_gr *gr)
+{
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
 
-		return 0;
-	} else
 	if (!gr->func->fecs.ucode) {
 		return -ENOSYS;
 	}
 
 	/* load HUB microcode */
 	nvkm_mc_unk260(device, 0);
-	nvkm_wr32(device, 0x4091c0, 0x01000000);
-	for (i = 0; i < gr->func->fecs.ucode->data.size / 4; i++)
-		nvkm_wr32(device, 0x4091c4, gr->func->fecs.ucode->data.data[i]);
-
-	nvkm_wr32(device, 0x409180, 0x01000000);
-	for (i = 0; i < gr->func->fecs.ucode->code.size / 4; i++) {
-		if ((i & 0x3f) == 0)
-			nvkm_wr32(device, 0x409188, i >> 6);
-		nvkm_wr32(device, 0x409184, gr->func->fecs.ucode->code.data[i]);
-	}
+	nvkm_falcon_load_dmem(gr->fecs, gr->func->fecs.ucode->data.data, 0x0,
+			      gr->func->fecs.ucode->data.size, 0);
+	nvkm_falcon_load_imem(gr->fecs, gr->func->fecs.ucode->code.data, 0x0,
+			      gr->func->fecs.ucode->code.size, 0, 0, false);
 
 	/* load GPC microcode */
-	nvkm_wr32(device, 0x41a1c0, 0x01000000);
-	for (i = 0; i < gr->func->gpccs.ucode->data.size / 4; i++)
-		nvkm_wr32(device, 0x41a1c4, gr->func->gpccs.ucode->data.data[i]);
-
-	nvkm_wr32(device, 0x41a180, 0x01000000);
-	for (i = 0; i < gr->func->gpccs.ucode->code.size / 4; i++) {
-		if ((i & 0x3f) == 0)
-			nvkm_wr32(device, 0x41a188, i >> 6);
-		nvkm_wr32(device, 0x41a184, gr->func->gpccs.ucode->code.data[i]);
-	}
+	nvkm_falcon_load_dmem(gr->gpccs, gr->func->gpccs.ucode->data.data, 0x0,
+			      gr->func->gpccs.ucode->data.size, 0);
+	nvkm_falcon_load_imem(gr->gpccs, gr->func->gpccs.ucode->code.data, 0x0,
+			      gr->func->gpccs.ucode->code.size, 0, 0, false);
 	nvkm_mc_unk260(device, 1);
 
 	/* load register lists */
@@ -1642,6 +1630,19 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
 	return 0;
 }
 
+int
+gf100_gr_init_ctxctl(struct gf100_gr *gr)
+{
+	int ret;
+
+	if (gr->firmware)
+		ret = gf100_gr_init_ctxctl_ext(gr);
+	else
+		ret = gf100_gr_init_ctxctl_int(gr);
+
+	return ret;
+}
+
 static int
 gf100_gr_oneinit(struct nvkm_gr *base)
 {
@@ -1711,10 +1712,32 @@ static int
 gf100_gr_init_(struct nvkm_gr *base)
 {
 	struct gf100_gr *gr = gf100_gr(base);
+	struct nvkm_subdev *subdev = &base->engine.subdev;
+	u32 ret;
+
 	nvkm_pmu_pgob(gr->base.engine.subdev.device->pmu, false);
+
+	ret = nvkm_falcon_get(gr->fecs, subdev);
+	if (ret)
+		return ret;
+
+	ret = nvkm_falcon_get(gr->gpccs, subdev);
+	if (ret)
+		return ret;
+
 	return gr->func->init(gr);
 }
 
+static int
+gf100_gr_fini_(struct nvkm_gr *base, bool suspend)
+{
+	struct gf100_gr *gr = gf100_gr(base);
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	nvkm_falcon_put(gr->gpccs, subdev);
+	nvkm_falcon_put(gr->fecs, subdev);
+	return 0;
+}
+
 void
 gf100_gr_dtor_fw(struct gf100_gr_fuc *fuc)
 {
@@ -1737,6 +1760,9 @@ gf100_gr_dtor(struct nvkm_gr *base)
 		gr->func->dtor(gr);
 	kfree(gr->data);
 
+	nvkm_falcon_del(&gr->gpccs);
+	nvkm_falcon_del(&gr->fecs);
+
 	gf100_gr_dtor_fw(&gr->fuc409c);
 	gf100_gr_dtor_fw(&gr->fuc409d);
 	gf100_gr_dtor_fw(&gr->fuc41ac);
@@ -1755,10 +1781,12 @@ gf100_gr_ = {
 	.dtor = gf100_gr_dtor,
 	.oneinit = gf100_gr_oneinit,
 	.init = gf100_gr_init_,
+	.fini = gf100_gr_fini_,
 	.intr = gf100_gr_intr,
 	.units = gf100_gr_units,
 	.chan_new = gf100_gr_chan_new,
 	.object_get = gf100_gr_object_get,
+	.chsw_load = gf100_gr_chsw_load,
 };
 
 int
@@ -1828,6 +1856,7 @@ int
 gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device,
 	      int index, struct gf100_gr *gr)
 {
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	int ret;
 
 	gr->func = func;
@@ -1840,7 +1869,11 @@ gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device,
 	if (ret)
 		return ret;
 
-	return 0;
+	ret = nvkm_falcon_v1_new(subdev, "FECS", 0x409000, &gr->fecs);
+	if (ret)
+		return ret;
+
+	return nvkm_falcon_v1_new(subdev, "GPCCS", 0x41a000, &gr->gpccs);
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
index 268b8d60ff73..db6ee3b06841 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
@@ -29,6 +29,7 @@
 #include <core/gpuobj.h>
 #include <subdev/ltc.h>
 #include <subdev/mmu.h>
+#include <engine/falcon.h>
 
 #define GPC_MAX 32
 #define TPC_MAX_PER_GPC 8
@@ -75,6 +76,8 @@ struct gf100_gr {
 	const struct gf100_gr_func *func;
 	struct nvkm_gr base;
 
+	struct nvkm_falcon *fecs;
+	struct nvkm_falcon *gpccs;
 	struct gf100_gr_fuc fuc409c;
 	struct gf100_gr_fuc fuc409d;
 	struct gf100_gr_fuc fuc41ac;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt200.c
index 2e68919f00b2..c711a55ce392 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt200.c
@@ -23,6 +23,8 @@
  */
 #include "nv50.h"
 
+#include <nvif/class.h>
+
 static const struct nvkm_gr_func
 gt200_gr = {
 	.init = nv50_gr_init,
@@ -31,11 +33,11 @@ gt200_gr = {
 	.tlb_flush = g84_gr_tlb_flush,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
-		{ -1, -1, 0x8397, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
+		{ -1, -1, GT200_TESLA, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt215.c
index 2bf7aac360cc..fa103df32ec7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt215.c
@@ -23,6 +23,8 @@
  */
 #include "nv50.h"
 
+#include <nvif/class.h>
+
 static const struct nvkm_gr_func
 gt215_gr = {
 	.init = nv50_gr_init,
@@ -31,12 +33,12 @@ gt215_gr = {
 	.tlb_flush = g84_gr_tlb_flush,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
-		{ -1, -1, 0x8597, &nv50_gr_object },
-		{ -1, -1, 0x85c0, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
+		{ -1, -1, GT214_TESLA, &nv50_gr_object },
+		{ -1, -1, GT214_COMPUTE, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp79.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp79.c
index 95d5219faf93..eb1a90644752 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp79.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp79.c
@@ -23,6 +23,8 @@
  */
 #include "nv50.h"
 
+#include <nvif/class.h>
+
 static const struct nvkm_gr_func
 mcp79_gr = {
 	.init = nv50_gr_init,
@@ -30,11 +32,11 @@ mcp79_gr = {
 	.chan_new = nv50_gr_chan_new,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
-		{ -1, -1, 0x8397, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
+		{ -1, -1, GT200_TESLA, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp89.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp89.c
index 027b58e5976b..c91eb56e9327 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp89.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp89.c
@@ -23,6 +23,8 @@
  */
 #include "nv50.h"
 
+#include <nvif/class.h>
+
 static const struct nvkm_gr_func
 mcp89_gr = {
 	.init = nv50_gr_init,
@@ -31,12 +33,12 @@ mcp89_gr = {
 	.tlb_flush = g84_gr_tlb_flush,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
-		{ -1, -1, 0x85c0, &nv50_gr_object },
-		{ -1, -1, 0x8697, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
+		{ -1, -1, GT214_COMPUTE, &nv50_gr_object },
+		{ -1, -1, GT21A_TESLA, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
index fca67de43f2b..df16ffda1749 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
@@ -27,6 +27,8 @@
 #include <core/gpuobj.h>
 #include <engine/fifo.h>
 
+#include <nvif/class.h>
+
 u64
 nv50_gr_units(struct nvkm_gr *gr)
 {
@@ -778,11 +780,11 @@ nv50_gr = {
 	.chan_new = nv50_gr_chan_new,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x5097, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_TESLA, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
index d8adcdf6985a..2a52d9f026ec 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
@@ -15,6 +15,7 @@ struct nvkm_gr_func {
 	void *(*dtor)(struct nvkm_gr *);
 	int (*oneinit)(struct nvkm_gr *);
 	int (*init)(struct nvkm_gr *);
+	int (*fini)(struct nvkm_gr *, bool);
 	void (*intr)(struct nvkm_gr *);
 	void (*tile)(struct nvkm_gr *, int region, struct nvkm_fb_tile *);
 	int (*tlb_flush)(struct nvkm_gr *);
@@ -24,6 +25,7 @@ struct nvkm_gr_func {
 	/* Returns chipset-specific counts of units packed into an u64.
 	 */
 	u64 (*units)(struct nvkm_gr *);
+	bool (*chsw_load)(struct nvkm_gr *);
 	struct nvkm_sclass sclass[];
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild b/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild
new file mode 100644
index 000000000000..584863db9bfc
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild
@@ -0,0 +1,2 @@
+nvkm-y += nvkm/falcon/base.o
+nvkm-y += nvkm/falcon/v1.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c
new file mode 100644
index 000000000000..4852f313762f
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <subdev/mc.h>
+
+void
+nvkm_falcon_load_imem(struct nvkm_falcon *falcon, void *data, u32 start,
+		      u32 size, u16 tag, u8 port, bool secure)
+{
+	if (secure && !falcon->secret) {
+		nvkm_warn(falcon->user,
+			  "writing with secure tag on a non-secure falcon!\n");
+		return;
+	}
+
+	falcon->func->load_imem(falcon, data, start, size, tag, port,
+				secure);
+}
+
+void
+nvkm_falcon_load_dmem(struct nvkm_falcon *falcon, void *data, u32 start,
+		      u32 size, u8 port)
+{
+	falcon->func->load_dmem(falcon, data, start, size, port);
+}
+
+void
+nvkm_falcon_read_dmem(struct nvkm_falcon *falcon, u32 start, u32 size, u8 port,
+		      void *data)
+{
+	falcon->func->read_dmem(falcon, start, size, port, data);
+}
+
+void
+nvkm_falcon_bind_context(struct nvkm_falcon *falcon, struct nvkm_gpuobj *inst)
+{
+	if (!falcon->func->bind_context) {
+		nvkm_error(falcon->user,
+			   "Context binding not supported on this falcon!\n");
+		return;
+	}
+
+	falcon->func->bind_context(falcon, inst);
+}
+
+void
+nvkm_falcon_set_start_addr(struct nvkm_falcon *falcon, u32 start_addr)
+{
+	falcon->func->set_start_addr(falcon, start_addr);
+}
+
+void
+nvkm_falcon_start(struct nvkm_falcon *falcon)
+{
+	falcon->func->start(falcon);
+}
+
+int
+nvkm_falcon_enable(struct nvkm_falcon *falcon)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	enum nvkm_devidx id = falcon->owner->index;
+	int ret;
+
+	nvkm_mc_enable(device, id);
+	ret = falcon->func->enable(falcon);
+	if (ret) {
+		nvkm_mc_disable(device, id);
+		return ret;
+	}
+
+	return 0;
+}
+
+void
+nvkm_falcon_disable(struct nvkm_falcon *falcon)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	enum nvkm_devidx id = falcon->owner->index;
+
+	/* already disabled, return or wait_idle will timeout */
+	if (!nvkm_mc_enabled(device, id))
+		return;
+
+	falcon->func->disable(falcon);
+
+	nvkm_mc_disable(device, id);
+}
+
+int
+nvkm_falcon_reset(struct nvkm_falcon *falcon)
+{
+	nvkm_falcon_disable(falcon);
+	return nvkm_falcon_enable(falcon);
+}
+
+int
+nvkm_falcon_wait_for_halt(struct nvkm_falcon *falcon, u32 ms)
+{
+	return falcon->func->wait_for_halt(falcon, ms);
+}
+
+int
+nvkm_falcon_clear_interrupt(struct nvkm_falcon *falcon, u32 mask)
+{
+	return falcon->func->clear_interrupt(falcon, mask);
+}
+
+void
+nvkm_falcon_put(struct nvkm_falcon *falcon, const struct nvkm_subdev *user)
+{
+	mutex_lock(&falcon->mutex);
+	if (falcon->user == user) {
+		nvkm_debug(falcon->user, "released %s falcon\n", falcon->name);
+		falcon->user = NULL;
+	}
+	mutex_unlock(&falcon->mutex);
+}
+
+int
+nvkm_falcon_get(struct nvkm_falcon *falcon, const struct nvkm_subdev *user)
+{
+	mutex_lock(&falcon->mutex);
+	if (falcon->user) {
+		nvkm_error(user, "%s falcon already acquired by %s!\n",
+			   falcon->name, nvkm_subdev_name[falcon->user->index]);
+		mutex_unlock(&falcon->mutex);
+		return -EBUSY;
+	}
+
+	nvkm_debug(user, "acquired %s falcon\n", falcon->name);
+	falcon->user = user;
+	mutex_unlock(&falcon->mutex);
+	return 0;
+}
+
+void
+nvkm_falcon_ctor(const struct nvkm_falcon_func *func,
+		 struct nvkm_subdev *subdev, const char *name, u32 addr,
+		 struct nvkm_falcon *falcon)
+{
+	u32 reg;
+
+	falcon->func = func;
+	falcon->owner = subdev;
+	falcon->name = name;
+	falcon->addr = addr;
+	mutex_init(&falcon->mutex);
+
+	reg = nvkm_falcon_rd32(falcon, 0x12c);
+	falcon->version = reg & 0xf;
+	falcon->secret = (reg >> 4) & 0x3;
+	falcon->code.ports = (reg >> 8) & 0xf;
+	falcon->data.ports = (reg >> 12) & 0xf;
+
+	reg = nvkm_falcon_rd32(falcon, 0x108);
+	falcon->code.limit = (reg & 0x1ff) << 8;
+	falcon->data.limit = (reg & 0x3fe00) >> 1;
+
+	reg = nvkm_falcon_rd32(falcon, 0xc08);
+	falcon->debug = (reg >> 20) & 0x1;
+}
+
+void
+nvkm_falcon_del(struct nvkm_falcon **pfalcon)
+{
+	if (*pfalcon) {
+		kfree(*pfalcon);
+		*pfalcon = NULL;
+	}
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/priv.h b/drivers/gpu/drm/nouveau/nvkm/falcon/priv.h
new file mode 100644
index 000000000000..97b56f759d0b
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/priv.h
@@ -0,0 +1,8 @@
+#ifndef __NVKM_FALCON_PRIV_H__
+#define __NVKM_FALCON_PRIV_H__
+#include <engine/falcon.h>
+
+void
+nvkm_falcon_ctor(const struct nvkm_falcon_func *, struct nvkm_subdev *,
+		 const char *, u32, struct nvkm_falcon *);
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c
new file mode 100644
index 000000000000..b537f111f39c
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <core/gpuobj.h>
+#include <core/memory.h>
+#include <subdev/timer.h>
+
+static void
+nvkm_falcon_v1_load_imem(struct nvkm_falcon *falcon, void *data, u32 start,
+			 u32 size, u16 tag, u8 port, bool secure)
+{
+	u8 rem = size % 4;
+	u32 reg;
+	int i;
+
+	size -= rem;
+
+	reg = start | BIT(24) | (secure ? BIT(28) : 0);
+	nvkm_falcon_wr32(falcon, 0x180 + (port * 16), reg);
+	for (i = 0; i < size / 4; i++) {
+		/* write new tag every 256B */
+		if ((i & 0x3f) == 0)
+			nvkm_falcon_wr32(falcon, 0x188, tag++);
+		nvkm_falcon_wr32(falcon, 0x184, ((u32 *)data)[i]);
+	}
+
+	/*
+	 * If size is not a multiple of 4, mask the last work to ensure garbage
+	 * does not get written
+	 */
+	if (rem) {
+		u32 extra = ((u32 *)data)[i];
+
+		/* write new tag every 256B */
+		if ((i & 0x3f) == 0)
+			nvkm_falcon_wr32(falcon, 0x188, tag++);
+		nvkm_falcon_wr32(falcon, 0x184, extra & (BIT(rem * 8) - 1));
+		++i;
+	}
+
+	/* code must be padded to 0x40 words */
+	for (; i & 0x3f; i++)
+		nvkm_falcon_wr32(falcon, 0x184, 0);
+}
+
+static void
+nvkm_falcon_v1_load_dmem(struct nvkm_falcon *falcon, void *data, u32 start,
+		      u32 size, u8 port)
+{
+	u8 rem = size % 4;
+	int i;
+
+	size -= rem;
+
+	nvkm_falcon_wr32(falcon, 0x1c0 + (port * 16), start | (0x1 << 24));
+	for (i = 0; i < size / 4; i++)
+		nvkm_falcon_wr32(falcon, 0x1c4, ((u32 *)data)[i]);
+
+	/*
+	 * If size is not a multiple of 4, mask the last work to ensure garbage
+	 * does not get read
+	 */
+	if (rem) {
+		u32 extra = ((u32 *)data)[i];
+
+		nvkm_falcon_wr32(falcon, 0x1c4, extra & (BIT(rem * 8) - 1));
+	}
+}
+
+static void
+nvkm_falcon_v1_read_dmem(struct nvkm_falcon *falcon, u32 start, u32 size,
+			 u8 port, void *data)
+{
+	u8 rem = size % 4;
+	int i;
+
+	size -= rem;
+
+	nvkm_falcon_wr32(falcon, 0x1c0 + (port * 16), start | (0x1 << 25));
+	for (i = 0; i < size / 4; i++)
+		((u32 *)data)[i] = nvkm_falcon_rd32(falcon, 0x1c4);
+
+	/*
+	 * If size is not a multiple of 4, mask the last work to ensure garbage
+	 * does not get read
+	 */
+	if (rem) {
+		u32 extra = nvkm_falcon_rd32(falcon, 0x1c4);
+
+		for (i = size; i < size + rem; i++) {
+			((u8 *)data)[i] = (u8)(extra & 0xff);
+			extra >>= 8;
+		}
+	}
+}
+
+static void
+nvkm_falcon_v1_bind_context(struct nvkm_falcon *falcon, struct nvkm_gpuobj *ctx)
+{
+	u32 inst_loc;
+
+	/* disable instance block binding */
+	if (ctx == NULL) {
+		nvkm_falcon_wr32(falcon, 0x10c, 0x0);
+		return;
+	}
+
+	nvkm_falcon_wr32(falcon, 0x10c, 0x1);
+
+	/* setup apertures - virtual */
+	nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_UCODE, 0x4);
+	nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_VIRT, 0x0);
+	/* setup apertures - physical */
+	nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_PHYS_VID, 0x4);
+	nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_PHYS_SYS_COH, 0x5);
+	nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_PHYS_SYS_NCOH, 0x6);
+
+	/* Set context */
+	switch (nvkm_memory_target(ctx->memory)) {
+	case NVKM_MEM_TARGET_VRAM: inst_loc = 0; break;
+	case NVKM_MEM_TARGET_NCOH: inst_loc = 3; break;
+	default:
+		WARN_ON(1);
+		return;
+	}
+
+	/* Enable context */
+	nvkm_falcon_mask(falcon, 0x048, 0x1, 0x1);
+	nvkm_falcon_wr32(falcon, 0x480,
+			 ((ctx->addr >> 12) & 0xfffffff) |
+			 (inst_loc << 28) | (1 << 30));
+}
+
+static void
+nvkm_falcon_v1_set_start_addr(struct nvkm_falcon *falcon, u32 start_addr)
+{
+	nvkm_falcon_wr32(falcon, 0x104, start_addr);
+}
+
+static void
+nvkm_falcon_v1_start(struct nvkm_falcon *falcon)
+{
+	u32 reg = nvkm_falcon_rd32(falcon, 0x100);
+
+	if (reg & BIT(6))
+		nvkm_falcon_wr32(falcon, 0x130, 0x2);
+	else
+		nvkm_falcon_wr32(falcon, 0x100, 0x2);
+}
+
+static int
+nvkm_falcon_v1_wait_for_halt(struct nvkm_falcon *falcon, u32 ms)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	int ret;
+
+	ret = nvkm_wait_msec(device, ms, falcon->addr + 0x100, 0x10, 0x10);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int
+nvkm_falcon_v1_clear_interrupt(struct nvkm_falcon *falcon, u32 mask)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	int ret;
+
+	/* clear interrupt(s) */
+	nvkm_falcon_mask(falcon, 0x004, mask, mask);
+	/* wait until interrupts are cleared */
+	ret = nvkm_wait_msec(device, 10, falcon->addr + 0x008, mask, 0x0);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int
+falcon_v1_wait_idle(struct nvkm_falcon *falcon)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	int ret;
+
+	ret = nvkm_wait_msec(device, 10, falcon->addr + 0x04c, 0xffff, 0x0);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int
+nvkm_falcon_v1_enable(struct nvkm_falcon *falcon)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	int ret;
+
+	ret = nvkm_wait_msec(device, 10, falcon->addr + 0x10c, 0x6, 0x0);
+	if (ret < 0) {
+		nvkm_error(falcon->user, "Falcon mem scrubbing timeout\n");
+		return ret;
+	}
+
+	ret = falcon_v1_wait_idle(falcon);
+	if (ret)
+		return ret;
+
+	/* enable IRQs */
+	nvkm_falcon_wr32(falcon, 0x010, 0xff);
+
+	return 0;
+}
+
+static void
+nvkm_falcon_v1_disable(struct nvkm_falcon *falcon)
+{
+	/* disable IRQs and wait for any previous code to complete */
+	nvkm_falcon_wr32(falcon, 0x014, 0xff);
+	falcon_v1_wait_idle(falcon);
+}
+
+static const struct nvkm_falcon_func
+nvkm_falcon_v1 = {
+	.load_imem = nvkm_falcon_v1_load_imem,
+	.load_dmem = nvkm_falcon_v1_load_dmem,
+	.read_dmem = nvkm_falcon_v1_read_dmem,
+	.bind_context = nvkm_falcon_v1_bind_context,
+	.start = nvkm_falcon_v1_start,
+	.wait_for_halt = nvkm_falcon_v1_wait_for_halt,
+	.clear_interrupt = nvkm_falcon_v1_clear_interrupt,
+	.enable = nvkm_falcon_v1_enable,
+	.disable = nvkm_falcon_v1_disable,
+	.set_start_addr = nvkm_falcon_v1_set_start_addr,
+};
+
+int
+nvkm_falcon_v1_new(struct nvkm_subdev *owner, const char *name, u32 addr,
+		   struct nvkm_falcon **pfalcon)
+{
+	struct nvkm_falcon *falcon;
+	if (!(falcon = *pfalcon = kzalloc(sizeof(*falcon), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_falcon_ctor(&nvkm_falcon_v1, owner, name, addr, falcon);
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/Kbuild
index be57220a2e01..6b4f1e06a38f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/Kbuild
@@ -19,6 +19,7 @@ nvkm-y += nvkm/subdev/bios/pcir.o
 nvkm-y += nvkm/subdev/bios/perf.o
 nvkm-y += nvkm/subdev/bios/pll.o
 nvkm-y += nvkm/subdev/bios/pmu.o
+nvkm-y += nvkm/subdev/bios/power_budget.o
 nvkm-y += nvkm/subdev/bios/ramcfg.o
 nvkm-y += nvkm/subdev/bios/rammap.o
 nvkm-y += nvkm/subdev/bios/shadow.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/power_budget.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/power_budget.c
new file mode 100644
index 000000000000..617bfffce4ad
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/power_budget.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2016 Karol Herbst
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Karol Herbst
+ */
+#include <subdev/bios.h>
+#include <subdev/bios/bit.h>
+#include <subdev/bios/power_budget.h>
+
+static u32
+nvbios_power_budget_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt,
+			  u8 *len)
+{
+	struct bit_entry bit_P;
+	u32 power_budget;
+
+	if (bit_entry(bios, 'P', &bit_P) || bit_P.version != 2 ||
+	    bit_P.length < 0x2c)
+		return 0;
+
+	power_budget = nvbios_rd32(bios, bit_P.offset + 0x2c);
+	if (!power_budget)
+		return 0;
+
+	*ver = nvbios_rd08(bios, power_budget);
+	switch (*ver) {
+	case 0x20:
+	case 0x30:
+		*hdr = nvbios_rd08(bios, power_budget + 0x1);
+		*len = nvbios_rd08(bios, power_budget + 0x2);
+		*cnt = nvbios_rd08(bios, power_budget + 0x3);
+		return power_budget;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+int
+nvbios_power_budget_header(struct nvkm_bios *bios,
+                           struct nvbios_power_budget *budget)
+{
+	struct nvkm_subdev *subdev = &bios->subdev;
+	u8 ver, hdr, cnt, len, cap_entry;
+	u32 header;
+
+	if (!bios || !budget)
+		return -EINVAL;
+
+	header = nvbios_power_budget_table(bios, &ver, &hdr, &cnt, &len);
+	if (!header || !cnt)
+		return -ENODEV;
+
+	switch (ver) {
+	case 0x20:
+		cap_entry = nvbios_rd08(bios, header + 0x9);
+		break;
+	case 0x30:
+		cap_entry = nvbios_rd08(bios, header + 0xa);
+		break;
+	default:
+		cap_entry = 0xff;
+	}
+
+	if (cap_entry >= cnt && cap_entry != 0xff) {
+		nvkm_warn(subdev,
+		          "invalid cap_entry in power budget table found\n");
+		budget->cap_entry = 0xff;
+		return -EINVAL;
+	}
+
+	budget->offset = header;
+	budget->ver = ver;
+	budget->hlen = hdr;
+	budget->elen = len;
+	budget->ecount = cnt;
+
+	budget->cap_entry = cap_entry;
+
+	return 0;
+}
+
+int
+nvbios_power_budget_entry(struct nvkm_bios *bios,
+                          struct nvbios_power_budget *budget,
+                          u8 idx, struct nvbios_power_budget_entry *entry)
+{
+	u32 entry_offset;
+
+	if (!bios || !budget || !budget->offset || idx >= budget->ecount
+		|| !entry)
+		return -EINVAL;
+
+	entry_offset = budget->offset + budget->hlen + idx * budget->elen;
+
+	if (budget->ver >= 0x20) {
+		entry->min_w = nvbios_rd32(bios, entry_offset + 0x2);
+		entry->avg_w = nvbios_rd32(bios, entry_offset + 0x6);
+		entry->max_w = nvbios_rd32(bios, entry_offset + 0xa);
+	} else {
+		entry->min_w = 0;
+		entry->max_w = nvbios_rd32(bios, entry_offset + 0x2);
+		entry->avg_w = entry->max_w;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.c
index 5841f297973c..da1770e47490 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.c
@@ -112,7 +112,7 @@ read_pll_src(struct nv50_clk *clk, u32 base)
 		M    = (coef & 0x000000ff) >> 0;
 		break;
 	default:
-		BUG_ON(1);
+		BUG();
 	}
 
 	if (M)
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.c
index c714b097719c..59362f8dee22 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.c
@@ -50,7 +50,7 @@ nv50_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq)
 	ret = nv04_pll_calc(subdev, &info, freq, &N1, &M1, &N2, &M2, &P);
 	if (!ret) {
 		nvkm_error(subdev, "failed pll calculation\n");
-		return ret;
+		return -EINVAL;
 	}
 
 	switch (info.type) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c
index 093223d1df4f..6758da93a3a1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c
@@ -445,7 +445,7 @@ gf100_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 {
 	struct nvkm_ltc *ltc = ram->fb->subdev.device->ltc;
 	struct nvkm_mm *mm = &ram->vram;
-	struct nvkm_mm_node *r;
+	struct nvkm_mm_node **node, *r;
 	struct nvkm_mem *mem;
 	int type = (memtype & 0x0ff);
 	int back = (memtype & 0x800);
@@ -462,7 +462,6 @@ gf100_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 	if (!mem)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&mem->regions);
 	mem->size = size;
 
 	mutex_lock(&ram->fb->subdev.mutex);
@@ -478,6 +477,7 @@ gf100_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 	}
 	mem->memtype = type;
 
+	node = &mem->mem;
 	do {
 		if (back)
 			ret = nvkm_mm_tail(mm, 0, 1, size, ncmin, align, &r);
@@ -489,13 +489,13 @@ gf100_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 			return ret;
 		}
 
-		list_add_tail(&r->rl_entry, &mem->regions);
+		*node = r;
+		node = &r->next;
 		size -= r->length;
 	} while (size);
 	mutex_unlock(&ram->fb->subdev.mutex);
 
-	r = list_first_entry(&mem->regions, struct nvkm_mm_node, rl_entry);
-	mem->offset = (u64)r->offset << NVKM_RAM_MM_SHIFT;
+	mem->offset = (u64)mem->mem->offset << NVKM_RAM_MM_SHIFT;
 	*pmem = mem;
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c
index 7904fa41acef..fb8a1239743d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c
@@ -989,7 +989,7 @@ gk104_pll_calc_hiclk(int target_khz, int crystal,
 		int *N1, int *fN1, int *M1, int *P1,
 		int *N2, int *M2, int *P2)
 {
-	int best_clk = 0, best_err = target_khz, p_ref, n_ref;
+	int best_err = target_khz, p_ref, n_ref;
 	bool upper = false;
 
 	*M1 = 1;
@@ -1010,7 +1010,6 @@ gk104_pll_calc_hiclk(int target_khz, int crystal,
 			/* we found a better combination */
 			if (cur_err < best_err) {
 				best_err = cur_err;
-				best_clk = cur_clk;
 				*N2 = cur_N;
 				*N1 = n_ref;
 				*P1 = p_ref;
@@ -1022,7 +1021,6 @@ gk104_pll_calc_hiclk(int target_khz, int crystal,
 				- target_khz;
 			if (cur_err < best_err) {
 				best_err = cur_err;
-				best_clk = cur_clk;
 				*N2 = cur_N;
 				*N1 = n_ref;
 				*P1 = p_ref;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c
index 0a0e44b75577..017a91de74a0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c
@@ -39,7 +39,7 @@ mcp77_ram_init(struct nvkm_ram *base)
 	u32 flush  = ((ram->base.size - (ram->poller_base + 0x40)) >> 5) - 1;
 
 	/* Enable NISO poller for various clients and set their associated
-	 * read address, only for MCP77/78 and MCP79/7A. (fd#25701)
+	 * read address, only for MCP77/78 and MCP79/7A. (fd#27501)
 	 */
 	nvkm_wr32(device, 0x100c18, dniso);
 	nvkm_mask(device, 0x100c14, 0x00000000, 0x00000001);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
index 87bde8ff2d6b..6549b0588309 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
@@ -496,15 +496,12 @@ nv50_ram_tidy(struct nvkm_ram *base)
 void
 __nv50_ram_put(struct nvkm_ram *ram, struct nvkm_mem *mem)
 {
-	struct nvkm_mm_node *this;
-
-	while (!list_empty(&mem->regions)) {
-		this = list_first_entry(&mem->regions, typeof(*this), rl_entry);
-
-		list_del(&this->rl_entry);
-		nvkm_mm_free(&ram->vram, &this);
+	struct nvkm_mm_node *next = mem->mem;
+	struct nvkm_mm_node *node;
+	while ((node = next)) {
+		next = node->next;
+		nvkm_mm_free(&ram->vram, &node);
 	}
-
 	nvkm_mm_free(&ram->tags, &mem->tag);
 }
 
@@ -530,7 +527,7 @@ nv50_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 {
 	struct nvkm_mm *heap = &ram->vram;
 	struct nvkm_mm *tags = &ram->tags;
-	struct nvkm_mm_node *r;
+	struct nvkm_mm_node **node, *r;
 	struct nvkm_mem *mem;
 	int comp = (memtype & 0x300) >> 8;
 	int type = (memtype & 0x07f);
@@ -559,11 +556,11 @@ nv50_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 			comp = 0;
 	}
 
-	INIT_LIST_HEAD(&mem->regions);
 	mem->memtype = (comp << 7) | type;
 	mem->size = max;
 
 	type = nv50_fb_memtype[type];
+	node = &mem->mem;
 	do {
 		if (back)
 			ret = nvkm_mm_tail(heap, 0, type, max, min, align, &r);
@@ -575,13 +572,13 @@ nv50_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 			return ret;
 		}
 
-		list_add_tail(&r->rl_entry, &mem->regions);
+		*node = r;
+		node = &r->next;
 		max -= r->length;
 	} while (max);
 	mutex_unlock(&ram->fb->subdev.mutex);
 
-	r = list_first_entry(&mem->regions, struct nvkm_mm_node, rl_entry);
-	mem->offset = (u64)r->offset << NVKM_RAM_MM_SHIFT;
+	mem->offset = (u64)mem->mem->offset << NVKM_RAM_MM_SHIFT;
 	*pmem = mem;
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
index f0af2a381eea..fecfa6afcf54 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
@@ -26,6 +26,7 @@
 #include <subdev/bios.h>
 #include <subdev/bios/extdev.h>
 #include <subdev/bios/iccsense.h>
+#include <subdev/bios/power_budget.h>
 #include <subdev/i2c.h>
 
 static bool
@@ -216,10 +217,25 @@ nvkm_iccsense_oneinit(struct nvkm_subdev *subdev)
 {
 	struct nvkm_iccsense *iccsense = nvkm_iccsense(subdev);
 	struct nvkm_bios *bios = subdev->device->bios;
+	struct nvbios_power_budget budget;
 	struct nvbios_iccsense stbl;
-	int i;
+	int i, ret;
 
-	if (!bios || nvbios_iccsense_parse(bios, &stbl) || !stbl.nr_entry)
+	if (!bios)
+		return 0;
+
+	ret = nvbios_power_budget_header(bios, &budget);
+	if (!ret && budget.cap_entry != 0xff) {
+		struct nvbios_power_budget_entry entry;
+		ret = nvbios_power_budget_entry(bios, &budget,
+		                                budget.cap_entry, &entry);
+		if (!ret) {
+			iccsense->power_w_max  = entry.avg_w;
+			iccsense->power_w_crit = entry.max_w;
+		}
+	}
+
+	if (nvbios_iccsense_parse(bios, &stbl) || !stbl.nr_entry)
 		return 0;
 
 	iccsense->data_valid = true;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
index a6a7fa0d7679..9dec58ec3d9f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
@@ -116,7 +116,7 @@ struct gk20a_instmem {
 static enum nvkm_memory_target
 gk20a_instobj_target(struct nvkm_memory *memory)
 {
-	return NVKM_MEM_TARGET_HOST;
+	return NVKM_MEM_TARGET_NCOH;
 }
 
 static u64
@@ -305,11 +305,11 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory)
 	struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory);
 	struct gk20a_instmem *imem = node->base.imem;
 	struct device *dev = imem->base.subdev.device->dev;
-	struct nvkm_mm_node *r;
+	struct nvkm_mm_node *r = node->base.mem.mem;
 	unsigned long flags;
 	int i;
 
-	if (unlikely(list_empty(&node->base.mem.regions)))
+	if (unlikely(!r))
 		goto out;
 
 	spin_lock_irqsave(&imem->lock, flags);
@@ -320,9 +320,6 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory)
 
 	spin_unlock_irqrestore(&imem->lock, flags);
 
-	r = list_first_entry(&node->base.mem.regions, struct nvkm_mm_node,
-			     rl_entry);
-
 	/* clear IOMMU bit to unmap pages */
 	r->offset &= ~BIT(imem->iommu_bit - imem->iommu_pgshift);
 
@@ -404,10 +401,7 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align,
 	node->r.length = (npages << PAGE_SHIFT) >> 12;
 
 	node->base.mem.offset = node->handle;
-
-	INIT_LIST_HEAD(&node->base.mem.regions);
-	list_add_tail(&node->r.rl_entry, &node->base.mem.regions);
-
+	node->base.mem.mem = &node->r;
 	return 0;
 }
 
@@ -484,10 +478,7 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
 	r->offset |= BIT(imem->iommu_bit - imem->iommu_pgshift);
 
 	node->base.mem.offset = ((u64)r->offset) << imem->iommu_pgshift;
-
-	INIT_LIST_HEAD(&node->base.mem.regions);
-	list_add_tail(&r->rl_entry, &node->base.mem.regions);
-
+	node->base.mem.mem = r;
 	return 0;
 
 release_area:
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
index 6b25e25f9eba..09f669ac6630 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
@@ -161,6 +161,16 @@ nvkm_mc_enable(struct nvkm_device *device, enum nvkm_devidx devidx)
 	}
 }
 
+bool
+nvkm_mc_enabled(struct nvkm_device *device, enum nvkm_devidx devidx)
+{
+	u64 pmc_enable = nvkm_mc_reset_mask(device, false, devidx);
+
+	return (pmc_enable != 0) &&
+	       ((nvkm_rd32(device, 0x000200) & pmc_enable) == pmc_enable);
+}
+
+
 static int
 nvkm_mc_fini(struct nvkm_subdev *subdev, bool suspend)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c
index 5df9669ea39c..d06ad2c372bf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c
@@ -31,7 +31,7 @@ nvkm_vm_map_at(struct nvkm_vma *vma, u64 delta, struct nvkm_mem *node)
 {
 	struct nvkm_vm *vm = vma->vm;
 	struct nvkm_mmu *mmu = vm->mmu;
-	struct nvkm_mm_node *r;
+	struct nvkm_mm_node *r = node->mem;
 	int big = vma->node->type != mmu->func->spg_shift;
 	u32 offset = vma->node->offset + (delta >> 12);
 	u32 bits = vma->node->type - 12;
@@ -41,7 +41,7 @@ nvkm_vm_map_at(struct nvkm_vma *vma, u64 delta, struct nvkm_mem *node)
 	u32 end, len;
 
 	delta = 0;
-	list_for_each_entry(r, &node->regions, rl_entry) {
+	while (r) {
 		u64 phys = (u64)r->offset << 12;
 		u32 num  = r->length >> bits;
 
@@ -65,7 +65,8 @@ nvkm_vm_map_at(struct nvkm_vma *vma, u64 delta, struct nvkm_mem *node)
 
 			delta += (u64)len << vma->node->type;
 		}
-	}
+		r = r->next;
+	};
 
 	mmu->func->flush(vm);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
index 2a31b7d66a6d..87bf41cef0c6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
@@ -6,6 +6,7 @@ nvkm-y += nvkm/subdev/pci/nv40.o
 nvkm-y += nvkm/subdev/pci/nv46.o
 nvkm-y += nvkm/subdev/pci/nv4c.o
 nvkm-y += nvkm/subdev/pci/g84.o
+nvkm-y += nvkm/subdev/pci/g92.o
 nvkm-y += nvkm/subdev/pci/g94.o
 nvkm-y += nvkm/subdev/pci/gf100.o
 nvkm-y += nvkm/subdev/pci/gf106.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g92.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g92.c
new file mode 100644
index 000000000000..48874359d5f6
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g92.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "priv.h"
+
+int
+g92_pcie_version_supported(struct nvkm_pci *pci)
+{
+	if ((nvkm_pci_rd32(pci, 0x460) & 0x200) == 0x200)
+		return 2;
+	return 1;
+}
+
+static const struct nvkm_pci_func
+g92_pci_func = {
+	.init = g84_pci_init,
+	.rd32 = nv40_pci_rd32,
+	.wr08 = nv40_pci_wr08,
+	.wr32 = nv40_pci_wr32,
+	.msi_rearm = nv46_pci_msi_rearm,
+
+	.pcie.init = g84_pcie_init,
+	.pcie.set_link = g84_pcie_set_link,
+
+	.pcie.max_speed = g84_pcie_max_speed,
+	.pcie.cur_speed = g84_pcie_cur_speed,
+
+	.pcie.set_version = g84_pcie_set_version,
+	.pcie.version = g84_pcie_version,
+	.pcie.version_supported = g92_pcie_version_supported,
+};
+
+int
+g92_pci_new(struct nvkm_device *device, int index, struct nvkm_pci **ppci)
+{
+	return nvkm_pci_new_(&g92_pci_func, device, index, ppci);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c
index 43444123bc04..09adb37a5664 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c
@@ -23,14 +23,6 @@
  */
 #include "priv.h"
 
-int
-g94_pcie_version_supported(struct nvkm_pci *pci)
-{
-	if ((nvkm_pci_rd32(pci, 0x460) & 0x200) == 0x200)
-		return 2;
-	return 1;
-}
-
 static const struct nvkm_pci_func
 g94_pci_func = {
 	.init = g84_pci_init,
@@ -47,7 +39,7 @@ g94_pci_func = {
 
 	.pcie.set_version = g84_pcie_set_version,
 	.pcie.version = g84_pcie_version,
-	.pcie.version_supported = g94_pcie_version_supported,
+	.pcie.version_supported = g92_pcie_version_supported,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf100.c
index e30ea676baf6..00a5e7d3ee9d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf100.c
@@ -92,7 +92,7 @@ gf100_pci_func = {
 
 	.pcie.set_version = gf100_pcie_set_version,
 	.pcie.version = gf100_pcie_version,
-	.pcie.version_supported = g94_pcie_version_supported,
+	.pcie.version_supported = g92_pcie_version_supported,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf106.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf106.c
index c3b798c5c6dd..11bf419afe3f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf106.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf106.c
@@ -39,7 +39,7 @@ gf106_pci_func = {
 
 	.pcie.set_version = gf100_pcie_set_version,
 	.pcie.version = gf100_pcie_version,
-	.pcie.version_supported = g94_pcie_version_supported,
+	.pcie.version_supported = g92_pcie_version_supported,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
index 23de3180aae5..86921ec962d6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
@@ -44,7 +44,7 @@ enum nvkm_pcie_speed g84_pcie_max_speed(struct nvkm_pci *);
 int g84_pcie_init(struct nvkm_pci *);
 int g84_pcie_set_link(struct nvkm_pci *, enum nvkm_pcie_speed, u8);
 
-int g94_pcie_version_supported(struct nvkm_pci *);
+int g92_pcie_version_supported(struct nvkm_pci *);
 
 void gf100_pcie_set_version(struct nvkm_pci *, u8);
 int gf100_pcie_version(struct nvkm_pci *);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/Kbuild
index 51fb4bf94a44..ca57c1e491b0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/Kbuild
@@ -8,5 +8,6 @@ nvkm-y += nvkm/subdev/pmu/gk110.o
 nvkm-y += nvkm/subdev/pmu/gk208.o
 nvkm-y += nvkm/subdev/pmu/gk20a.o
 nvkm-y += nvkm/subdev/pmu/gm107.o
+nvkm-y += nvkm/subdev/pmu/gm20b.o
 nvkm-y += nvkm/subdev/pmu/gp100.o
 nvkm-y += nvkm/subdev/pmu/gp102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c
index e611ce80f8ef..a73f690eb4b5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c
@@ -116,6 +116,8 @@ nvkm_pmu_init(struct nvkm_subdev *subdev)
 static void *
 nvkm_pmu_dtor(struct nvkm_subdev *subdev)
 {
+	struct nvkm_pmu *pmu = nvkm_pmu(subdev);
+	nvkm_falcon_del(&pmu->falcon);
 	return nvkm_pmu(subdev);
 }
 
@@ -129,15 +131,22 @@ nvkm_pmu = {
 };
 
 int
+nvkm_pmu_ctor(const struct nvkm_pmu_func *func, struct nvkm_device *device,
+	      int index, struct nvkm_pmu *pmu)
+{
+	nvkm_subdev_ctor(&nvkm_pmu, device, index, &pmu->subdev);
+	pmu->func = func;
+	INIT_WORK(&pmu->recv.work, nvkm_pmu_recv);
+	init_waitqueue_head(&pmu->recv.wait);
+	return nvkm_falcon_v1_new(&pmu->subdev, "PMU", 0x10a000, &pmu->falcon);
+}
+
+int
 nvkm_pmu_new_(const struct nvkm_pmu_func *func, struct nvkm_device *device,
 	      int index, struct nvkm_pmu **ppmu)
 {
 	struct nvkm_pmu *pmu;
 	if (!(pmu = *ppmu = kzalloc(sizeof(*pmu), GFP_KERNEL)))
 		return -ENOMEM;
-	nvkm_subdev_ctor(&nvkm_pmu, device, index, &pmu->subdev);
-	pmu->func = func;
-	INIT_WORK(&pmu->recv.work, nvkm_pmu_recv);
-	init_waitqueue_head(&pmu->recv.wait);
-	return 0;
+	return nvkm_pmu_ctor(func, device, index, *ppmu);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c
index f996d90c9f0d..9ca0db796cbe 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c
@@ -19,7 +19,7 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
-#define gk20a_pmu(p) container_of((p), struct gk20a_pmu, base.subdev)
+#define gk20a_pmu(p) container_of((p), struct gk20a_pmu, base)
 #include "priv.h"
 
 #include <subdev/clk.h>
@@ -43,9 +43,8 @@ struct gk20a_pmu {
 };
 
 struct gk20a_pmu_dvfs_dev_status {
-	unsigned long total;
-	unsigned long busy;
-	int cur_state;
+	u32 total;
+	u32 busy;
 };
 
 static int
@@ -56,13 +55,12 @@ gk20a_pmu_dvfs_target(struct gk20a_pmu *pmu, int *state)
 	return nvkm_clk_astate(clk, *state, 0, false);
 }
 
-static int
+static void
 gk20a_pmu_dvfs_get_cur_state(struct gk20a_pmu *pmu, int *state)
 {
 	struct nvkm_clk *clk = pmu->base.subdev.device->clk;
 
 	*state = clk->pstate;
-	return 0;
 }
 
 static int
@@ -90,28 +88,26 @@ gk20a_pmu_dvfs_get_target_state(struct gk20a_pmu *pmu,
 
 	*state = level;
 
-	if (level == cur_level)
-		return 0;
-	else
-		return 1;
+	return (level != cur_level);
 }
 
-static int
+static void
 gk20a_pmu_dvfs_get_dev_status(struct gk20a_pmu *pmu,
 			      struct gk20a_pmu_dvfs_dev_status *status)
 {
-	struct nvkm_device *device = pmu->base.subdev.device;
-	status->busy = nvkm_rd32(device, 0x10a508 + (BUSY_SLOT * 0x10));
-	status->total= nvkm_rd32(device, 0x10a508 + (CLK_SLOT * 0x10));
-	return 0;
+	struct nvkm_falcon *falcon = pmu->base.falcon;
+
+	status->busy = nvkm_falcon_rd32(falcon, 0x508 + (BUSY_SLOT * 0x10));
+	status->total= nvkm_falcon_rd32(falcon, 0x508 + (CLK_SLOT * 0x10));
 }
 
 static void
 gk20a_pmu_dvfs_reset_dev_status(struct gk20a_pmu *pmu)
 {
-	struct nvkm_device *device = pmu->base.subdev.device;
-	nvkm_wr32(device, 0x10a508 + (BUSY_SLOT * 0x10), 0x80000000);
-	nvkm_wr32(device, 0x10a508 + (CLK_SLOT * 0x10), 0x80000000);
+	struct nvkm_falcon *falcon = pmu->base.falcon;
+
+	nvkm_falcon_wr32(falcon, 0x508 + (BUSY_SLOT * 0x10), 0x80000000);
+	nvkm_falcon_wr32(falcon, 0x508 + (CLK_SLOT * 0x10), 0x80000000);
 }
 
 static void
@@ -127,7 +123,7 @@ gk20a_pmu_dvfs_work(struct nvkm_alarm *alarm)
 	struct nvkm_timer *tmr = device->timer;
 	struct nvkm_volt *volt = device->volt;
 	u32 utilization = 0;
-	int state, ret;
+	int state;
 
 	/*
 	 * The PMU is initialized before CLK and VOLT, so we have to make sure the
@@ -136,11 +132,7 @@ gk20a_pmu_dvfs_work(struct nvkm_alarm *alarm)
 	if (!clk || !volt)
 		goto resched;
 
-	ret = gk20a_pmu_dvfs_get_dev_status(pmu, &status);
-	if (ret) {
-		nvkm_warn(subdev, "failed to get device status\n");
-		goto resched;
-	}
+	gk20a_pmu_dvfs_get_dev_status(pmu, &status);
 
 	if (status.total)
 		utilization = div_u64((u64)status.busy * 100, status.total);
@@ -150,11 +142,7 @@ gk20a_pmu_dvfs_work(struct nvkm_alarm *alarm)
 	nvkm_trace(subdev, "utilization = %d %%, avg_load = %d %%\n",
 		   utilization, data->avg_load);
 
-	ret = gk20a_pmu_dvfs_get_cur_state(pmu, &state);
-	if (ret) {
-		nvkm_warn(subdev, "failed to get current state\n");
-		goto resched;
-	}
+	gk20a_pmu_dvfs_get_cur_state(pmu, &state);
 
 	if (gk20a_pmu_dvfs_get_target_state(pmu, &state, data->avg_load)) {
 		nvkm_trace(subdev, "set new state to %d\n", state);
@@ -166,32 +154,36 @@ resched:
 	nvkm_timer_alarm(tmr, 100000000, alarm);
 }
 
-static int
-gk20a_pmu_fini(struct nvkm_subdev *subdev, bool suspend)
+static void
+gk20a_pmu_fini(struct nvkm_pmu *pmu)
 {
-	struct gk20a_pmu *pmu = gk20a_pmu(subdev);
-	nvkm_timer_alarm_cancel(subdev->device->timer, &pmu->alarm);
-	return 0;
-}
+	struct gk20a_pmu *gpmu = gk20a_pmu(pmu);
+	nvkm_timer_alarm_cancel(pmu->subdev.device->timer, &gpmu->alarm);
 
-static void *
-gk20a_pmu_dtor(struct nvkm_subdev *subdev)
-{
-	return gk20a_pmu(subdev);
+	nvkm_falcon_put(pmu->falcon, &pmu->subdev);
 }
 
 static int
-gk20a_pmu_init(struct nvkm_subdev *subdev)
+gk20a_pmu_init(struct nvkm_pmu *pmu)
 {
-	struct gk20a_pmu *pmu = gk20a_pmu(subdev);
-	struct nvkm_device *device = pmu->base.subdev.device;
+	struct gk20a_pmu *gpmu = gk20a_pmu(pmu);
+	struct nvkm_subdev *subdev = &pmu->subdev;
+	struct nvkm_device *device = pmu->subdev.device;
+	struct nvkm_falcon *falcon = pmu->falcon;
+	int ret;
+
+	ret = nvkm_falcon_get(falcon, subdev);
+	if (ret) {
+		nvkm_error(subdev, "cannot acquire %s falcon!\n", falcon->name);
+		return ret;
+	}
 
 	/* init pwr perf counter */
-	nvkm_wr32(device, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001);
-	nvkm_wr32(device, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002);
-	nvkm_wr32(device, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003);
+	nvkm_falcon_wr32(falcon, 0x504 + (BUSY_SLOT * 0x10), 0x00200001);
+	nvkm_falcon_wr32(falcon, 0x50c + (BUSY_SLOT * 0x10), 0x00000002);
+	nvkm_falcon_wr32(falcon, 0x50c + (CLK_SLOT * 0x10), 0x00000003);
 
-	nvkm_timer_alarm(device->timer, 2000000000, &pmu->alarm);
+	nvkm_timer_alarm(device->timer, 2000000000, &gpmu->alarm);
 	return 0;
 }
 
@@ -202,26 +194,26 @@ gk20a_dvfs_data= {
 	.p_smooth = 1,
 };
 
-static const struct nvkm_subdev_func
+static const struct nvkm_pmu_func
 gk20a_pmu = {
 	.init = gk20a_pmu_init,
 	.fini = gk20a_pmu_fini,
-	.dtor = gk20a_pmu_dtor,
+	.reset = gt215_pmu_reset,
 };
 
 int
 gk20a_pmu_new(struct nvkm_device *device, int index, struct nvkm_pmu **ppmu)
 {
-	static const struct nvkm_pmu_func func = {};
 	struct gk20a_pmu *pmu;
 
 	if (!(pmu = kzalloc(sizeof(*pmu), GFP_KERNEL)))
 		return -ENOMEM;
-	pmu->base.func = &func;
 	*ppmu = &pmu->base;
 
-	nvkm_subdev_ctor(&gk20a_pmu, device, index, &pmu->base.subdev);
+	nvkm_pmu_ctor(&gk20a_pmu, device, index, &pmu->base);
+
 	pmu->data = &gk20a_dvfs_data;
 	nvkm_alarm_init(&pmu->alarm, gk20a_pmu_dvfs_work);
+
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c
new file mode 100644
index 000000000000..0b8a1cc4a0ee
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "priv.h"
+
+static const struct nvkm_pmu_func
+gm20b_pmu = {
+	.reset = gt215_pmu_reset,
+};
+
+int
+gm20b_pmu_new(struct nvkm_device *device, int index, struct nvkm_pmu **ppmu)
+{
+	return nvkm_pmu_new_(&gm20b_pmu, device, index, ppmu);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h
index 2e2179a4ad17..096cba069f72 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h
@@ -4,6 +4,8 @@
 #include <subdev/pmu.h>
 #include <subdev/pmu/fuc/os.h>
 
+int nvkm_pmu_ctor(const struct nvkm_pmu_func *, struct nvkm_device *,
+		  int index, struct nvkm_pmu *);
 int nvkm_pmu_new_(const struct nvkm_pmu_func *, struct nvkm_device *,
 		  int index, struct nvkm_pmu **);
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild
index b02b868a6589..5076d1500f47 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild
@@ -1,3 +1,7 @@
 nvkm-y += nvkm/subdev/secboot/base.o
+nvkm-y += nvkm/subdev/secboot/ls_ucode_gr.o
+nvkm-y += nvkm/subdev/secboot/acr.o
+nvkm-y += nvkm/subdev/secboot/acr_r352.o
+nvkm-y += nvkm/subdev/secboot/acr_r361.o
 nvkm-y += nvkm/subdev/secboot/gm200.o
 nvkm-y += nvkm/subdev/secboot/gm20b.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.c
new file mode 100644
index 000000000000..75dc06557877
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "acr.h"
+
+#include <core/firmware.h>
+
+/**
+ * Convenience function to duplicate a firmware file in memory and check that
+ * it has the required minimum size.
+ */
+void *
+nvkm_acr_load_firmware(const struct nvkm_subdev *subdev, const char *name,
+		       size_t min_size)
+{
+	const struct firmware *fw;
+	void *blob;
+	int ret;
+
+	ret = nvkm_firmware_get(subdev->device, name, &fw);
+	if (ret)
+		return ERR_PTR(ret);
+	if (fw->size < min_size) {
+		nvkm_error(subdev, "%s is smaller than expected size %zu\n",
+			   name, min_size);
+		nvkm_firmware_put(fw);
+		return ERR_PTR(-EINVAL);
+	}
+	blob = kmemdup(fw->data, fw->size, GFP_KERNEL);
+	nvkm_firmware_put(fw);
+	if (!blob)
+		return ERR_PTR(-ENOMEM);
+
+	return blob;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h
new file mode 100644
index 000000000000..97795b342b6f
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __NVKM_SECBOOT_ACR_H__
+#define __NVKM_SECBOOT_ACR_H__
+
+#include "priv.h"
+
+struct nvkm_acr;
+
+/**
+ * struct nvkm_acr_func - properties and functions specific to an ACR
+ *
+ * @load: make the ACR ready to run on the given secboot device
+ * @reset: reset the specified falcon
+ * @start: start the specified falcon (assumed to have been reset)
+ */
+struct nvkm_acr_func {
+	void (*dtor)(struct nvkm_acr *);
+	int (*oneinit)(struct nvkm_acr *, struct nvkm_secboot *);
+	int (*fini)(struct nvkm_acr *, struct nvkm_secboot *, bool);
+	int (*load)(struct nvkm_acr *, struct nvkm_secboot *,
+		    struct nvkm_gpuobj *, u64);
+	int (*reset)(struct nvkm_acr *, struct nvkm_secboot *,
+		     enum nvkm_secboot_falcon);
+	int (*start)(struct nvkm_acr *, struct nvkm_secboot *,
+		     enum nvkm_secboot_falcon);
+};
+
+/**
+ * struct nvkm_acr - instance of an ACR
+ *
+ * @boot_falcon: ID of the falcon that will perform secure boot
+ * @managed_falcons: bitfield of falcons managed by this ACR
+ * @start_address: virtual start address of the HS bootloader
+ */
+struct nvkm_acr {
+	const struct nvkm_acr_func *func;
+	const struct nvkm_subdev *subdev;
+
+	enum nvkm_secboot_falcon boot_falcon;
+	unsigned long managed_falcons;
+	u32 start_address;
+};
+
+void *nvkm_acr_load_firmware(const struct nvkm_subdev *, const char *, size_t);
+
+struct nvkm_acr *acr_r352_new(unsigned long);
+struct nvkm_acr *acr_r361_new(unsigned long);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c
new file mode 100644
index 000000000000..1aa37ea18580
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c
@@ -0,0 +1,936 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "acr_r352.h"
+
+#include <core/gpuobj.h>
+#include <core/firmware.h>
+#include <engine/falcon.h>
+
+/**
+ * struct hsf_fw_header - HS firmware descriptor
+ * @sig_dbg_offset:	offset of the debug signature
+ * @sig_dbg_size:	size of the debug signature
+ * @sig_prod_offset:	offset of the production signature
+ * @sig_prod_size:	size of the production signature
+ * @patch_loc:		offset of the offset (sic) of where the signature is
+ * @patch_sig:		offset of the offset (sic) to add to sig_*_offset
+ * @hdr_offset:		offset of the load header (see struct hs_load_header)
+ * @hdr_size:		size of above header
+ *
+ * This structure is embedded in the HS firmware image at
+ * hs_bin_hdr.header_offset.
+ */
+struct hsf_fw_header {
+	u32 sig_dbg_offset;
+	u32 sig_dbg_size;
+	u32 sig_prod_offset;
+	u32 sig_prod_size;
+	u32 patch_loc;
+	u32 patch_sig;
+	u32 hdr_offset;
+	u32 hdr_size;
+};
+
+/**
+ * struct acr_r352_flcn_bl_desc - DMEM bootloader descriptor
+ * @signature:		16B signature for secure code. 0s if no secure code
+ * @ctx_dma:		DMA context to be used by BL while loading code/data
+ * @code_dma_base:	256B-aligned Physical FB Address where code is located
+ *			(falcon's $xcbase register)
+ * @non_sec_code_off:	offset from code_dma_base where the non-secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @non_sec_code_size:	the size of the nonSecure code part.
+ * @sec_code_off:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @sec_code_size:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @code_entry_point:	code entry point which will be invoked by BL after
+ *                      code is loaded.
+ * @data_dma_base:	256B aligned Physical FB Address where data is located.
+ *			(falcon's $xdbase register)
+ * @data_size:		size of data block. Should be multiple of 256B
+ *
+ * Structure used by the bootloader to load the rest of the code. This has
+ * to be filled by host and copied into DMEM at offset provided in the
+ * hsflcn_bl_desc.bl_desc_dmem_load_off.
+ */
+struct acr_r352_flcn_bl_desc {
+	u32 reserved[4];
+	u32 signature[4];
+	u32 ctx_dma;
+	u32 code_dma_base;
+	u32 non_sec_code_off;
+	u32 non_sec_code_size;
+	u32 sec_code_off;
+	u32 sec_code_size;
+	u32 code_entry_point;
+	u32 data_dma_base;
+	u32 data_size;
+	u32 code_dma_base1;
+	u32 data_dma_base1;
+};
+
+/**
+ * acr_r352_generate_flcn_bl_desc - generate generic BL descriptor for LS image
+ */
+static void
+acr_r352_generate_flcn_bl_desc(const struct nvkm_acr *acr,
+			       const struct ls_ucode_img *_img, u64 wpr_addr,
+			       void *_desc)
+{
+	struct ls_ucode_img_r352 *img = ls_ucode_img_r352(_img);
+	struct acr_r352_flcn_bl_desc *desc = _desc;
+	const struct ls_ucode_img_desc *pdesc = &_img->ucode_desc;
+	u64 base, addr_code, addr_data;
+
+	base = wpr_addr + img->lsb_header.ucode_off + pdesc->app_start_offset;
+	addr_code = (base + pdesc->app_resident_code_offset) >> 8;
+	addr_data = (base + pdesc->app_resident_data_offset) >> 8;
+
+	desc->ctx_dma = FALCON_DMAIDX_UCODE;
+	desc->code_dma_base = lower_32_bits(addr_code);
+	desc->code_dma_base1 = upper_32_bits(addr_code);
+	desc->non_sec_code_off = pdesc->app_resident_code_offset;
+	desc->non_sec_code_size = pdesc->app_resident_code_size;
+	desc->code_entry_point = pdesc->app_imem_entry;
+	desc->data_dma_base = lower_32_bits(addr_data);
+	desc->data_dma_base1 = upper_32_bits(addr_data);
+	desc->data_size = pdesc->app_resident_data_size;
+}
+
+
+/**
+ * struct hsflcn_acr_desc - data section of the HS firmware
+ *
+ * This header is to be copied at the beginning of DMEM by the HS bootloader.
+ *
+ * @signature:		signature of ACR ucode
+ * @wpr_region_id:	region ID holding the WPR header and its details
+ * @wpr_offset:		offset from the WPR region holding the wpr header
+ * @regions:		region descriptors
+ * @nonwpr_ucode_blob_size:	size of LS blob
+ * @nonwpr_ucode_blob_start:	FB location of LS blob is
+ */
+struct hsflcn_acr_desc {
+	union {
+		u8 reserved_dmem[0x200];
+		u32 signatures[4];
+	} ucode_reserved_space;
+	u32 wpr_region_id;
+	u32 wpr_offset;
+	u32 mmu_mem_range;
+#define FLCN_ACR_MAX_REGIONS 2
+	struct {
+		u32 no_regions;
+		struct {
+			u32 start_addr;
+			u32 end_addr;
+			u32 region_id;
+			u32 read_mask;
+			u32 write_mask;
+			u32 client_mask;
+		} region_props[FLCN_ACR_MAX_REGIONS];
+	} regions;
+	u32 ucode_blob_size;
+	u64 ucode_blob_base __aligned(8);
+	struct {
+		u32 vpr_enabled;
+		u32 vpr_start;
+		u32 vpr_end;
+		u32 hdcp_policies;
+	} vpr_desc;
+};
+
+
+/*
+ * Low-secure blob creation
+ */
+
+/**
+ * ls_ucode_img_load() - create a lsf_ucode_img and load it
+ */
+struct ls_ucode_img *
+acr_r352_ls_ucode_img_load(const struct acr_r352 *acr,
+			   enum nvkm_secboot_falcon falcon_id)
+{
+	const struct nvkm_subdev *subdev = acr->base.subdev;
+	struct ls_ucode_img_r352 *img;
+	int ret;
+
+	img = kzalloc(sizeof(*img), GFP_KERNEL);
+	if (!img)
+		return ERR_PTR(-ENOMEM);
+
+	img->base.falcon_id = falcon_id;
+
+	ret = acr->func->ls_func[falcon_id]->load(subdev, &img->base);
+
+	if (ret) {
+		kfree(img->base.ucode_data);
+		kfree(img->base.sig);
+		kfree(img);
+		return ERR_PTR(ret);
+	}
+
+	/* Check that the signature size matches our expectations... */
+	if (img->base.sig_size != sizeof(img->lsb_header.signature)) {
+		nvkm_error(subdev, "invalid signature size for %s falcon!\n",
+			   nvkm_secboot_falcon_name[falcon_id]);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* Copy signature to the right place */
+	memcpy(&img->lsb_header.signature, img->base.sig, img->base.sig_size);
+
+	/* not needed? the signature should already have the right value */
+	img->lsb_header.signature.falcon_id = falcon_id;
+
+	return &img->base;
+}
+
+#define LSF_LSB_HEADER_ALIGN 256
+#define LSF_BL_DATA_ALIGN 256
+#define LSF_BL_DATA_SIZE_ALIGN 256
+#define LSF_BL_CODE_SIZE_ALIGN 256
+#define LSF_UCODE_DATA_ALIGN 4096
+
+/**
+ * acr_r352_ls_img_fill_headers - fill the WPR and LSB headers of an image
+ * @acr:	ACR to use
+ * @img:	image to generate for
+ * @offset:	offset in the WPR region where this image starts
+ *
+ * Allocate space in the WPR area from offset and write the WPR and LSB headers
+ * accordingly.
+ *
+ * Return: offset at the end of this image.
+ */
+static u32
+acr_r352_ls_img_fill_headers(struct acr_r352 *acr,
+			     struct ls_ucode_img_r352 *img, u32 offset)
+{
+	struct ls_ucode_img *_img = &img->base;
+	struct acr_r352_lsf_wpr_header *whdr = &img->wpr_header;
+	struct acr_r352_lsf_lsb_header *lhdr = &img->lsb_header;
+	struct ls_ucode_img_desc *desc = &_img->ucode_desc;
+	const struct acr_r352_ls_func *func =
+					    acr->func->ls_func[_img->falcon_id];
+
+	/* Fill WPR header */
+	whdr->falcon_id = _img->falcon_id;
+	whdr->bootstrap_owner = acr->base.boot_falcon;
+	whdr->status = LSF_IMAGE_STATUS_COPY;
+
+	/* Skip bootstrapping falcons started by someone else than ACR */
+	if (acr->lazy_bootstrap & BIT(_img->falcon_id))
+		whdr->lazy_bootstrap = 1;
+
+	/* Align, save off, and include an LSB header size */
+	offset = ALIGN(offset, LSF_LSB_HEADER_ALIGN);
+	whdr->lsb_offset = offset;
+	offset += sizeof(*lhdr);
+
+	/*
+	 * Align, save off, and include the original (static) ucode
+	 * image size
+	 */
+	offset = ALIGN(offset, LSF_UCODE_DATA_ALIGN);
+	lhdr->ucode_off = offset;
+	offset += _img->ucode_size;
+
+	/*
+	 * For falcons that use a boot loader (BL), we append a loader
+	 * desc structure on the end of the ucode image and consider
+	 * this the boot loader data. The host will then copy the loader
+	 * desc args to this space within the WPR region (before locking
+	 * down) and the HS bin will then copy them to DMEM 0 for the
+	 * loader.
+	 */
+	lhdr->bl_code_size = ALIGN(desc->bootloader_size,
+				   LSF_BL_CODE_SIZE_ALIGN);
+	lhdr->ucode_size = ALIGN(desc->app_resident_data_offset,
+				 LSF_BL_CODE_SIZE_ALIGN) + lhdr->bl_code_size;
+	lhdr->data_size = ALIGN(desc->app_size, LSF_BL_CODE_SIZE_ALIGN) +
+				lhdr->bl_code_size - lhdr->ucode_size;
+	/*
+	 * Though the BL is located at 0th offset of the image, the VA
+	 * is different to make sure that it doesn't collide the actual
+	 * OS VA range
+	 */
+	lhdr->bl_imem_off = desc->bootloader_imem_offset;
+	lhdr->app_code_off = desc->app_start_offset +
+			     desc->app_resident_code_offset;
+	lhdr->app_code_size = desc->app_resident_code_size;
+	lhdr->app_data_off = desc->app_start_offset +
+			     desc->app_resident_data_offset;
+	lhdr->app_data_size = desc->app_resident_data_size;
+
+	lhdr->flags = func->lhdr_flags;
+	if (_img->falcon_id == acr->base.boot_falcon)
+		lhdr->flags |= LSF_FLAG_DMACTL_REQ_CTX;
+
+	/* Align and save off BL descriptor size */
+	lhdr->bl_data_size = ALIGN(func->bl_desc_size, LSF_BL_DATA_SIZE_ALIGN);
+
+	/*
+	 * Align, save off, and include the additional BL data
+	 */
+	offset = ALIGN(offset, LSF_BL_DATA_ALIGN);
+	lhdr->bl_data_off = offset;
+	offset += lhdr->bl_data_size;
+
+	return offset;
+}
+
+/**
+ * acr_r352_ls_fill_headers - fill WPR and LSB headers of all managed images
+ */
+int
+acr_r352_ls_fill_headers(struct acr_r352 *acr, struct list_head *imgs)
+{
+	struct ls_ucode_img_r352 *img;
+	struct list_head *l;
+	u32 count = 0;
+	u32 offset;
+
+	/* Count the number of images to manage */
+	list_for_each(l, imgs)
+		count++;
+
+	/*
+	 * Start with an array of WPR headers at the base of the WPR.
+	 * The expectation here is that the secure falcon will do a single DMA
+	 * read of this array and cache it internally so it's ok to pack these.
+	 * Also, we add 1 to the falcon count to indicate the end of the array.
+	 */
+	offset = sizeof(img->wpr_header) * (count + 1);
+
+	/*
+	 * Walk the managed falcons, accounting for the LSB structs
+	 * as well as the ucode images.
+	 */
+	list_for_each_entry(img, imgs, base.node) {
+		offset = acr_r352_ls_img_fill_headers(acr, img, offset);
+	}
+
+	return offset;
+}
+
+/**
+ * acr_r352_ls_write_wpr - write the WPR blob contents
+ */
+int
+acr_r352_ls_write_wpr(struct acr_r352 *acr, struct list_head *imgs,
+		      struct nvkm_gpuobj *wpr_blob, u32 wpr_addr)
+{
+	struct ls_ucode_img *_img;
+	u32 pos = 0;
+
+	nvkm_kmap(wpr_blob);
+
+	list_for_each_entry(_img, imgs, node) {
+		struct ls_ucode_img_r352 *img = ls_ucode_img_r352(_img);
+		const struct acr_r352_ls_func *ls_func =
+					    acr->func->ls_func[_img->falcon_id];
+		u8 gdesc[ls_func->bl_desc_size];
+
+		nvkm_gpuobj_memcpy_to(wpr_blob, pos, &img->wpr_header,
+				      sizeof(img->wpr_header));
+
+		nvkm_gpuobj_memcpy_to(wpr_blob, img->wpr_header.lsb_offset,
+				     &img->lsb_header, sizeof(img->lsb_header));
+
+		/* Generate and write BL descriptor */
+		memset(gdesc, 0, ls_func->bl_desc_size);
+		ls_func->generate_bl_desc(&acr->base, _img, wpr_addr, gdesc);
+
+		nvkm_gpuobj_memcpy_to(wpr_blob, img->lsb_header.bl_data_off,
+				      gdesc, ls_func->bl_desc_size);
+
+		/* Copy ucode */
+		nvkm_gpuobj_memcpy_to(wpr_blob, img->lsb_header.ucode_off,
+				      _img->ucode_data, _img->ucode_size);
+
+		pos += sizeof(img->wpr_header);
+	}
+
+	nvkm_wo32(wpr_blob, pos, NVKM_SECBOOT_FALCON_INVALID);
+
+	nvkm_done(wpr_blob);
+
+	return 0;
+}
+
+/* Both size and address of WPR need to be 128K-aligned */
+#define WPR_ALIGNMENT	0x20000
+/**
+ * acr_r352_prepare_ls_blob() - prepare the LS blob
+ *
+ * For each securely managed falcon, load the FW, signatures and bootloaders and
+ * prepare a ucode blob. Then, compute the offsets in the WPR region for each
+ * blob, and finally write the headers and ucode blobs into a GPU object that
+ * will be copied into the WPR region by the HS firmware.
+ */
+static int
+acr_r352_prepare_ls_blob(struct acr_r352 *acr, u64 wpr_addr, u32 wpr_size)
+{
+	const struct nvkm_subdev *subdev = acr->base.subdev;
+	struct list_head imgs;
+	struct ls_ucode_img *img, *t;
+	unsigned long managed_falcons = acr->base.managed_falcons;
+	int managed_count = 0;
+	u32 image_wpr_size;
+	int falcon_id;
+	int ret;
+
+	INIT_LIST_HEAD(&imgs);
+
+	/* Load all LS blobs */
+	for_each_set_bit(falcon_id, &managed_falcons, NVKM_SECBOOT_FALCON_END) {
+		struct ls_ucode_img *img;
+
+		img = acr->func->ls_ucode_img_load(acr, falcon_id);
+		if (IS_ERR(img)) {
+			ret = PTR_ERR(img);
+			goto cleanup;
+		}
+
+		list_add_tail(&img->node, &imgs);
+		managed_count++;
+	}
+
+	/*
+	 * Fill the WPR and LSF headers with the right offsets and compute
+	 * required WPR size
+	 */
+	image_wpr_size = acr->func->ls_fill_headers(acr, &imgs);
+	image_wpr_size = ALIGN(image_wpr_size, WPR_ALIGNMENT);
+
+	/* Allocate GPU object that will contain the WPR region */
+	ret = nvkm_gpuobj_new(subdev->device, image_wpr_size, WPR_ALIGNMENT,
+			      false, NULL, &acr->ls_blob);
+	if (ret)
+		goto cleanup;
+
+	nvkm_debug(subdev, "%d managed LS falcons, WPR size is %d bytes\n",
+		    managed_count, image_wpr_size);
+
+	/* If WPR address and size are not fixed, set them to fit the LS blob */
+	if (wpr_size == 0) {
+		wpr_addr = acr->ls_blob->addr;
+		wpr_size = image_wpr_size;
+	/*
+	 * But if the WPR region is set by the bootloader, it is illegal for
+	 * the HS blob to be larger than this region.
+	 */
+	} else if (image_wpr_size > wpr_size) {
+		nvkm_error(subdev, "WPR region too small for FW blob!\n");
+		nvkm_error(subdev, "required: %dB\n", image_wpr_size);
+		nvkm_error(subdev, "available: %dB\n", wpr_size);
+		ret = -ENOSPC;
+		goto cleanup;
+	}
+
+	/* Write LS blob */
+	ret = acr->func->ls_write_wpr(acr, &imgs, acr->ls_blob, wpr_addr);
+	if (ret)
+		nvkm_gpuobj_del(&acr->ls_blob);
+
+cleanup:
+	list_for_each_entry_safe(img, t, &imgs, node) {
+		kfree(img->ucode_data);
+		kfree(img->sig);
+		kfree(img);
+	}
+
+	return ret;
+}
+
+
+
+
+/**
+ * acr_r352_hsf_patch_signature() - patch HS blob with correct signature
+ */
+static void
+acr_r352_hsf_patch_signature(struct nvkm_secboot *sb, void *acr_image)
+{
+	struct fw_bin_header *hsbin_hdr = acr_image;
+	struct hsf_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset;
+	void *hs_data = acr_image + hsbin_hdr->data_offset;
+	void *sig;
+	u32 sig_size;
+
+	/* Falcon in debug or production mode? */
+	if (sb->boot_falcon->debug) {
+		sig = acr_image + fw_hdr->sig_dbg_offset;
+		sig_size = fw_hdr->sig_dbg_size;
+	} else {
+		sig = acr_image + fw_hdr->sig_prod_offset;
+		sig_size = fw_hdr->sig_prod_size;
+	}
+
+	/* Patch signature */
+	memcpy(hs_data + fw_hdr->patch_loc, sig + fw_hdr->patch_sig, sig_size);
+}
+
+static void
+acr_r352_fixup_hs_desc(struct acr_r352 *acr, struct nvkm_secboot *sb,
+		       struct hsflcn_acr_desc *desc)
+{
+	struct nvkm_gpuobj *ls_blob = acr->ls_blob;
+
+	/* WPR region information if WPR is not fixed */
+	if (sb->wpr_size == 0) {
+		u32 wpr_start = ls_blob->addr;
+		u32 wpr_end = wpr_start + ls_blob->size;
+
+		desc->wpr_region_id = 1;
+		desc->regions.no_regions = 2;
+		desc->regions.region_props[0].start_addr = wpr_start >> 8;
+		desc->regions.region_props[0].end_addr = wpr_end >> 8;
+		desc->regions.region_props[0].region_id = 1;
+		desc->regions.region_props[0].read_mask = 0xf;
+		desc->regions.region_props[0].write_mask = 0xc;
+		desc->regions.region_props[0].client_mask = 0x2;
+	} else {
+		desc->ucode_blob_base = ls_blob->addr;
+		desc->ucode_blob_size = ls_blob->size;
+	}
+}
+
+static void
+acr_r352_generate_hs_bl_desc(const struct hsf_load_header *hdr, void *_bl_desc,
+			     u64 offset)
+{
+	struct acr_r352_flcn_bl_desc *bl_desc = _bl_desc;
+	u64 addr_code, addr_data;
+
+	addr_code = offset >> 8;
+	addr_data = (offset + hdr->data_dma_base) >> 8;
+
+	bl_desc->ctx_dma = FALCON_DMAIDX_VIRT;
+	bl_desc->code_dma_base = lower_32_bits(addr_code);
+	bl_desc->non_sec_code_off = hdr->non_sec_code_off;
+	bl_desc->non_sec_code_size = hdr->non_sec_code_size;
+	bl_desc->sec_code_off = hdr->app[0].sec_code_off;
+	bl_desc->sec_code_size = hdr->app[0].sec_code_size;
+	bl_desc->code_entry_point = 0;
+	bl_desc->data_dma_base = lower_32_bits(addr_data);
+	bl_desc->data_size = hdr->data_size;
+}
+
+/**
+ * acr_r352_prepare_hs_blob - load and prepare a HS blob and BL descriptor
+ *
+ * @sb secure boot instance to prepare for
+ * @fw name of the HS firmware to load
+ * @blob pointer to gpuobj that will be allocated to receive the HS FW payload
+ * @bl_desc pointer to the BL descriptor to write for this firmware
+ * @patch whether we should patch the HS descriptor (only for HS loaders)
+ */
+static int
+acr_r352_prepare_hs_blob(struct acr_r352 *acr, struct nvkm_secboot *sb,
+			 const char *fw, struct nvkm_gpuobj **blob,
+			 struct hsf_load_header *load_header, bool patch)
+{
+	struct nvkm_subdev *subdev = &sb->subdev;
+	void *acr_image;
+	struct fw_bin_header *hsbin_hdr;
+	struct hsf_fw_header *fw_hdr;
+	struct hsf_load_header *load_hdr;
+	void *acr_data;
+	int ret;
+
+	acr_image = nvkm_acr_load_firmware(subdev, fw, 0);
+	if (IS_ERR(acr_image))
+		return PTR_ERR(acr_image);
+
+	hsbin_hdr = acr_image;
+	fw_hdr = acr_image + hsbin_hdr->header_offset;
+	load_hdr = acr_image + fw_hdr->hdr_offset;
+	acr_data = acr_image + hsbin_hdr->data_offset;
+
+	/* Patch signature */
+	acr_r352_hsf_patch_signature(sb, acr_image);
+
+	/* Patch descriptor with WPR information? */
+	if (patch) {
+		struct hsflcn_acr_desc *desc;
+
+		desc = acr_data + load_hdr->data_dma_base;
+		acr_r352_fixup_hs_desc(acr, sb, desc);
+	}
+
+	if (load_hdr->num_apps > ACR_R352_MAX_APPS) {
+		nvkm_error(subdev, "more apps (%d) than supported (%d)!",
+			   load_hdr->num_apps, ACR_R352_MAX_APPS);
+		ret = -EINVAL;
+		goto cleanup;
+	}
+	memcpy(load_header, load_hdr, sizeof(*load_header) +
+			       (sizeof(load_hdr->app[0]) * load_hdr->num_apps));
+
+	/* Create ACR blob and copy HS data to it */
+	ret = nvkm_gpuobj_new(subdev->device, ALIGN(hsbin_hdr->data_size, 256),
+			      0x1000, false, NULL, blob);
+	if (ret)
+		goto cleanup;
+
+	nvkm_kmap(*blob);
+	nvkm_gpuobj_memcpy_to(*blob, 0, acr_data, hsbin_hdr->data_size);
+	nvkm_done(*blob);
+
+cleanup:
+	kfree(acr_image);
+
+	return ret;
+}
+
+static int
+acr_r352_prepare_hsbl_blob(struct acr_r352 *acr)
+{
+	const struct nvkm_subdev *subdev = acr->base.subdev;
+	struct fw_bin_header *hdr;
+	struct fw_bl_desc *hsbl_desc;
+
+	acr->hsbl_blob = nvkm_acr_load_firmware(subdev, "acr/bl", 0);
+	if (IS_ERR(acr->hsbl_blob)) {
+		int ret = PTR_ERR(acr->hsbl_blob);
+
+		acr->hsbl_blob = NULL;
+		return ret;
+	}
+
+	hdr = acr->hsbl_blob;
+	hsbl_desc = acr->hsbl_blob + hdr->header_offset;
+
+	/* virtual start address for boot vector */
+	acr->base.start_address = hsbl_desc->start_tag << 8;
+
+	return 0;
+}
+
+/**
+ * acr_r352_load_blobs - load blobs common to all ACR V1 versions.
+ *
+ * This includes the LS blob, HS ucode loading blob, and HS bootloader.
+ *
+ * The HS ucode unload blob is only used on dGPU if the WPR region is variable.
+ */
+int
+acr_r352_load_blobs(struct acr_r352 *acr, struct nvkm_secboot *sb)
+{
+	int ret;
+
+	/* Firmware already loaded? */
+	if (acr->firmware_ok)
+		return 0;
+
+	/* Load and prepare the managed falcon's firmwares */
+	ret = acr_r352_prepare_ls_blob(acr, sb->wpr_addr, sb->wpr_size);
+	if (ret)
+		return ret;
+
+	/* Load the HS firmware that will load the LS firmwares */
+	if (!acr->load_blob) {
+		ret = acr_r352_prepare_hs_blob(acr, sb, "acr/ucode_load",
+					       &acr->load_blob,
+					       &acr->load_bl_header, true);
+		if (ret)
+			return ret;
+	}
+
+	/* If the ACR region is dynamically programmed, we need an unload FW */
+	if (sb->wpr_size == 0) {
+		ret = acr_r352_prepare_hs_blob(acr, sb, "acr/ucode_unload",
+					       &acr->unload_blob,
+					       &acr->unload_bl_header, false);
+		if (ret)
+			return ret;
+	}
+
+	/* Load the HS firmware bootloader */
+	if (!acr->hsbl_blob) {
+		ret = acr_r352_prepare_hsbl_blob(acr);
+		if (ret)
+			return ret;
+	}
+
+	acr->firmware_ok = true;
+	nvkm_debug(&sb->subdev, "LS blob successfully created\n");
+
+	return 0;
+}
+
+/**
+ * acr_r352_load() - prepare HS falcon to run the specified blob, mapped
+ * at GPU address offset.
+ */
+static int
+acr_r352_load(struct nvkm_acr *_acr, struct nvkm_secboot *sb,
+	      struct nvkm_gpuobj *blob, u64 offset)
+{
+	struct acr_r352 *acr = acr_r352(_acr);
+	struct nvkm_falcon *falcon = sb->boot_falcon;
+	struct fw_bin_header *hdr = acr->hsbl_blob;
+	struct fw_bl_desc *hsbl_desc = acr->hsbl_blob + hdr->header_offset;
+	void *blob_data = acr->hsbl_blob + hdr->data_offset;
+	void *hsbl_code = blob_data + hsbl_desc->code_off;
+	void *hsbl_data = blob_data + hsbl_desc->data_off;
+	u32 code_size = ALIGN(hsbl_desc->code_size, 256);
+	const struct hsf_load_header *load_hdr;
+	const u32 bl_desc_size = acr->func->hs_bl_desc_size;
+	u8 bl_desc[bl_desc_size];
+
+	/* Find the bootloader descriptor for our blob and copy it */
+	if (blob == acr->load_blob) {
+		load_hdr = &acr->load_bl_header;
+	} else if (blob == acr->unload_blob) {
+		load_hdr = &acr->unload_bl_header;
+	} else {
+		nvkm_error(_acr->subdev, "invalid secure boot blob!\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Copy HS bootloader data
+	 */
+	nvkm_falcon_load_dmem(falcon, hsbl_data, 0x0, hsbl_desc->data_size, 0);
+
+	/* Copy HS bootloader code to end of IMEM */
+	nvkm_falcon_load_imem(falcon, hsbl_code, falcon->code.limit - code_size,
+			      code_size, hsbl_desc->start_tag, 0, false);
+
+	/* Generate the BL header */
+	memset(bl_desc, 0, bl_desc_size);
+	acr->func->generate_hs_bl_desc(load_hdr, bl_desc, offset);
+
+	/*
+	 * Copy HS BL header where the HS descriptor expects it to be
+	 */
+	nvkm_falcon_load_dmem(falcon, bl_desc, hsbl_desc->dmem_load_off,
+			      bl_desc_size, 0);
+
+	return 0;
+}
+
+static int
+acr_r352_shutdown(struct acr_r352 *acr, struct nvkm_secboot *sb)
+{
+	int i;
+
+	/* Run the unload blob to unprotect the WPR region */
+	if (acr->unload_blob && sb->wpr_set) {
+		int ret;
+
+		nvkm_debug(&sb->subdev, "running HS unload blob\n");
+		ret = sb->func->run_blob(sb, acr->unload_blob);
+		if (ret)
+			return ret;
+		nvkm_debug(&sb->subdev, "HS unload blob completed\n");
+	}
+
+	for (i = 0; i < NVKM_SECBOOT_FALCON_END; i++)
+		acr->falcon_state[i] = NON_SECURE;
+
+	sb->wpr_set = false;
+
+	return 0;
+}
+
+static int
+acr_r352_bootstrap(struct acr_r352 *acr, struct nvkm_secboot *sb)
+{
+	int ret;
+
+	if (sb->wpr_set)
+		return 0;
+
+	/* Make sure all blobs are ready */
+	ret = acr_r352_load_blobs(acr, sb);
+	if (ret)
+		return ret;
+
+	nvkm_debug(&sb->subdev, "running HS load blob\n");
+	ret = sb->func->run_blob(sb, acr->load_blob);
+	/* clear halt interrupt */
+	nvkm_falcon_clear_interrupt(sb->boot_falcon, 0x10);
+	if (ret)
+		return ret;
+	nvkm_debug(&sb->subdev, "HS load blob completed\n");
+
+	sb->wpr_set = true;
+
+	return 0;
+}
+
+/*
+ * acr_r352_reset() - execute secure boot from the prepared state
+ *
+ * Load the HS bootloader and ask the falcon to run it. This will in turn
+ * load the HS firmware and run it, so once the falcon stops all the managed
+ * falcons should have their LS firmware loaded and be ready to run.
+ */
+static int
+acr_r352_reset(struct nvkm_acr *_acr, struct nvkm_secboot *sb,
+	       enum nvkm_secboot_falcon falcon)
+{
+	struct acr_r352 *acr = acr_r352(_acr);
+	int ret;
+
+	/*
+	 * Dummy GM200 implementation: perform secure boot each time we are
+	 * called on FECS. Since only FECS and GPCCS are managed and started
+	 * together, this ought to be safe.
+	 *
+	 * Once we have proper PMU firmware and support, this will be changed
+	 * to a proper call to the PMU method.
+	 */
+	if (falcon != NVKM_SECBOOT_FALCON_FECS)
+		goto end;
+
+	ret = acr_r352_shutdown(acr, sb);
+	if (ret)
+		return ret;
+
+	acr_r352_bootstrap(acr, sb);
+	if (ret)
+		return ret;
+
+end:
+	acr->falcon_state[falcon] = RESET;
+	return 0;
+}
+
+static int
+acr_r352_start(struct nvkm_acr *_acr, struct nvkm_secboot *sb,
+		    enum nvkm_secboot_falcon falcon)
+{
+	struct acr_r352 *acr = acr_r352(_acr);
+	const struct nvkm_subdev *subdev = &sb->subdev;
+	int base;
+
+	switch (falcon) {
+	case NVKM_SECBOOT_FALCON_FECS:
+		base = 0x409000;
+		break;
+	case NVKM_SECBOOT_FALCON_GPCCS:
+		base = 0x41a000;
+		break;
+	default:
+		nvkm_error(subdev, "cannot start unhandled falcon!\n");
+		return -EINVAL;
+	}
+
+	nvkm_wr32(subdev->device, base + 0x130, 0x00000002);
+	acr->falcon_state[falcon] = RUNNING;
+
+	return 0;
+}
+
+static int
+acr_r352_fini(struct nvkm_acr *_acr, struct nvkm_secboot *sb, bool suspend)
+{
+	struct acr_r352 *acr = acr_r352(_acr);
+
+	return acr_r352_shutdown(acr, sb);
+}
+
+static void
+acr_r352_dtor(struct nvkm_acr *_acr)
+{
+	struct acr_r352 *acr = acr_r352(_acr);
+
+	nvkm_gpuobj_del(&acr->unload_blob);
+
+	kfree(acr->hsbl_blob);
+	nvkm_gpuobj_del(&acr->load_blob);
+	nvkm_gpuobj_del(&acr->ls_blob);
+
+	kfree(acr);
+}
+
+const struct acr_r352_ls_func
+acr_r352_ls_fecs_func = {
+	.load = acr_ls_ucode_load_fecs,
+	.generate_bl_desc = acr_r352_generate_flcn_bl_desc,
+	.bl_desc_size = sizeof(struct acr_r352_flcn_bl_desc),
+};
+
+const struct acr_r352_ls_func
+acr_r352_ls_gpccs_func = {
+	.load = acr_ls_ucode_load_gpccs,
+	.generate_bl_desc = acr_r352_generate_flcn_bl_desc,
+	.bl_desc_size = sizeof(struct acr_r352_flcn_bl_desc),
+	/* GPCCS will be loaded using PRI */
+	.lhdr_flags = LSF_FLAG_FORCE_PRIV_LOAD,
+};
+
+const struct acr_r352_func
+acr_r352_func = {
+	.generate_hs_bl_desc = acr_r352_generate_hs_bl_desc,
+	.hs_bl_desc_size = sizeof(struct acr_r352_flcn_bl_desc),
+	.ls_ucode_img_load = acr_r352_ls_ucode_img_load,
+	.ls_fill_headers = acr_r352_ls_fill_headers,
+	.ls_write_wpr = acr_r352_ls_write_wpr,
+	.ls_func = {
+		[NVKM_SECBOOT_FALCON_FECS] = &acr_r352_ls_fecs_func,
+		[NVKM_SECBOOT_FALCON_GPCCS] = &acr_r352_ls_gpccs_func,
+	},
+};
+
+static const struct nvkm_acr_func
+acr_r352_base_func = {
+	.dtor = acr_r352_dtor,
+	.fini = acr_r352_fini,
+	.load = acr_r352_load,
+	.reset = acr_r352_reset,
+	.start = acr_r352_start,
+};
+
+struct nvkm_acr *
+acr_r352_new_(const struct acr_r352_func *func,
+	      enum nvkm_secboot_falcon boot_falcon,
+	      unsigned long managed_falcons)
+{
+	struct acr_r352 *acr;
+
+	acr = kzalloc(sizeof(*acr), GFP_KERNEL);
+	if (!acr)
+		return ERR_PTR(-ENOMEM);
+
+	acr->base.boot_falcon = boot_falcon;
+	acr->base.managed_falcons = managed_falcons;
+	acr->base.func = &acr_r352_base_func;
+	acr->func = func;
+
+	return &acr->base;
+}
+
+struct nvkm_acr *
+acr_r352_new(unsigned long managed_falcons)
+{
+	return acr_r352_new_(&acr_r352_func, NVKM_SECBOOT_FALCON_PMU,
+			     managed_falcons);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h
new file mode 100644
index 000000000000..ad5923b0fd3c
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __NVKM_SECBOOT_ACR_R352_H__
+#define __NVKM_SECBOOT_ACR_R352_H__
+
+#include "acr.h"
+#include "ls_ucode.h"
+
+struct ls_ucode_img;
+
+#define ACR_R352_MAX_APPS 8
+
+/*
+ *
+ * LS blob structures
+ *
+ */
+
+/**
+ * struct acr_r352_lsf_lsb_header - LS firmware header
+ * @signature:		signature to verify the firmware against
+ * @ucode_off:		offset of the ucode blob in the WPR region. The ucode
+ *                      blob contains the bootloader, code and data of the
+ *                      LS falcon
+ * @ucode_size:		size of the ucode blob, including bootloader
+ * @data_size:		size of the ucode blob data
+ * @bl_code_size:	size of the bootloader code
+ * @bl_imem_off:	offset in imem of the bootloader
+ * @bl_data_off:	offset of the bootloader data in WPR region
+ * @bl_data_size:	size of the bootloader data
+ * @app_code_off:	offset of the app code relative to ucode_off
+ * @app_code_size:	size of the app code
+ * @app_data_off:	offset of the app data relative to ucode_off
+ * @app_data_size:	size of the app data
+ * @flags:		flags for the secure bootloader
+ *
+ * This structure is written into the WPR region for each managed falcon. Each
+ * instance is referenced by the lsb_offset member of the corresponding
+ * lsf_wpr_header.
+ */
+struct acr_r352_lsf_lsb_header {
+	/**
+	 * LS falcon signatures
+	 * @prd_keys:		signature to use in production mode
+	 * @dgb_keys:		signature to use in debug mode
+	 * @b_prd_present:	whether the production key is present
+	 * @b_dgb_present:	whether the debug key is present
+	 * @falcon_id:		ID of the falcon the ucode applies to
+	 */
+	struct {
+		u8 prd_keys[2][16];
+		u8 dbg_keys[2][16];
+		u32 b_prd_present;
+		u32 b_dbg_present;
+		u32 falcon_id;
+	} signature;
+	u32 ucode_off;
+	u32 ucode_size;
+	u32 data_size;
+	u32 bl_code_size;
+	u32 bl_imem_off;
+	u32 bl_data_off;
+	u32 bl_data_size;
+	u32 app_code_off;
+	u32 app_code_size;
+	u32 app_data_off;
+	u32 app_data_size;
+	u32 flags;
+#define LSF_FLAG_LOAD_CODE_AT_0		1
+#define LSF_FLAG_DMACTL_REQ_CTX		4
+#define LSF_FLAG_FORCE_PRIV_LOAD	8
+};
+
+/**
+ * struct acr_r352_lsf_wpr_header - LS blob WPR Header
+ * @falcon_id:		LS falcon ID
+ * @lsb_offset:		offset of the lsb_lsf_header in the WPR region
+ * @bootstrap_owner:	secure falcon reponsible for bootstrapping the LS falcon
+ * @lazy_bootstrap:	skip bootstrapping by ACR
+ * @status:		bootstrapping status
+ *
+ * An array of these is written at the beginning of the WPR region, one for
+ * each managed falcon. The array is terminated by an instance which falcon_id
+ * is LSF_FALCON_ID_INVALID.
+ */
+struct acr_r352_lsf_wpr_header {
+	u32 falcon_id;
+	u32 lsb_offset;
+	u32 bootstrap_owner;
+	u32 lazy_bootstrap;
+	u32 status;
+#define LSF_IMAGE_STATUS_NONE				0
+#define LSF_IMAGE_STATUS_COPY				1
+#define LSF_IMAGE_STATUS_VALIDATION_CODE_FAILED		2
+#define LSF_IMAGE_STATUS_VALIDATION_DATA_FAILED		3
+#define LSF_IMAGE_STATUS_VALIDATION_DONE		4
+#define LSF_IMAGE_STATUS_VALIDATION_SKIPPED		5
+#define LSF_IMAGE_STATUS_BOOTSTRAP_READY		6
+};
+
+/**
+ * struct ls_ucode_img_r352 - ucode image augmented with r352 headers
+ */
+struct ls_ucode_img_r352 {
+	struct ls_ucode_img base;
+
+	struct acr_r352_lsf_wpr_header wpr_header;
+	struct acr_r352_lsf_lsb_header lsb_header;
+};
+#define ls_ucode_img_r352(i) container_of(i, struct ls_ucode_img_r352, base)
+
+
+/*
+ * HS blob structures
+ */
+
+struct hsf_load_header_app {
+	u32 sec_code_off;
+	u32 sec_code_size;
+};
+
+/**
+ * struct hsf_load_header - HS firmware load header
+ */
+struct hsf_load_header {
+	u32 non_sec_code_off;
+	u32 non_sec_code_size;
+	u32 data_dma_base;
+	u32 data_size;
+	u32 num_apps;
+	struct hsf_load_header_app app[0];
+};
+
+/**
+ * struct acr_r352_ls_func - manages a single LS firmware
+ *
+ * @load: load the external firmware into a ls_ucode_img
+ * @generate_bl_desc: function called on a block of bl_desc_size to generate the
+ *		      proper bootloader descriptor for this LS firmware
+ * @bl_desc_size: size of the bootloader descriptor
+ * @lhdr_flags: LS flags
+ */
+struct acr_r352_ls_func {
+	int (*load)(const struct nvkm_subdev *, struct ls_ucode_img *);
+	void (*generate_bl_desc)(const struct nvkm_acr *,
+				 const struct ls_ucode_img *, u64, void *);
+	u32 bl_desc_size;
+	u32 lhdr_flags;
+};
+
+struct acr_r352;
+
+/**
+ * struct acr_r352_func - manages nuances between ACR versions
+ *
+ * @generate_hs_bl_desc: function called on a block of bl_desc_size to generate
+ *			 the proper HS bootloader descriptor
+ * @hs_bl_desc_size: size of the HS bootloader descriptor
+ */
+struct acr_r352_func {
+	void (*generate_hs_bl_desc)(const struct hsf_load_header *, void *,
+				    u64);
+	u32 hs_bl_desc_size;
+
+	struct ls_ucode_img *(*ls_ucode_img_load)(const struct acr_r352 *,
+						  enum nvkm_secboot_falcon);
+	int (*ls_fill_headers)(struct acr_r352 *, struct list_head *);
+	int (*ls_write_wpr)(struct acr_r352 *, struct list_head *,
+			    struct nvkm_gpuobj *, u32);
+
+	const struct acr_r352_ls_func *ls_func[NVKM_SECBOOT_FALCON_END];
+};
+
+/**
+ * struct acr_r352 - ACR data for driver release 352 (and beyond)
+ */
+struct acr_r352 {
+	struct nvkm_acr base;
+	const struct acr_r352_func *func;
+
+	/*
+	 * HS FW - lock WPR region (dGPU only) and load LS FWs
+	 * on Tegra the HS FW copies the LS blob into the fixed WPR instead
+	 */
+	struct nvkm_gpuobj *load_blob;
+	struct {
+		struct hsf_load_header load_bl_header;
+		struct hsf_load_header_app __load_apps[ACR_R352_MAX_APPS];
+	};
+
+	/* HS FW - unlock WPR region (dGPU only) */
+	struct nvkm_gpuobj *unload_blob;
+	struct {
+		struct hsf_load_header unload_bl_header;
+		struct hsf_load_header_app __unload_apps[ACR_R352_MAX_APPS];
+	};
+
+	/* HS bootloader */
+	void *hsbl_blob;
+
+	/* LS FWs, to be loaded by the HS ACR */
+	struct nvkm_gpuobj *ls_blob;
+
+	/* Firmware already loaded? */
+	bool firmware_ok;
+
+	/* Falcons to lazy-bootstrap */
+	u32 lazy_bootstrap;
+
+	/* To keep track of the state of all managed falcons */
+	enum {
+		/* In non-secure state, no firmware loaded, no privileges*/
+		NON_SECURE = 0,
+		/* In low-secure mode and ready to be started */
+		RESET,
+		/* In low-secure mode and running */
+		RUNNING,
+	} falcon_state[NVKM_SECBOOT_FALCON_END];
+};
+#define acr_r352(acr) container_of(acr, struct acr_r352, base)
+
+struct nvkm_acr *acr_r352_new_(const struct acr_r352_func *,
+			       enum nvkm_secboot_falcon, unsigned long);
+
+struct ls_ucode_img *acr_r352_ls_ucode_img_load(const struct acr_r352 *,
+						enum nvkm_secboot_falcon);
+int acr_r352_ls_fill_headers(struct acr_r352 *, struct list_head *);
+int acr_r352_ls_write_wpr(struct acr_r352 *, struct list_head *,
+			  struct nvkm_gpuobj *, u32);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c
new file mode 100644
index 000000000000..f0aff1d98474
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "acr_r352.h"
+
+#include <engine/falcon.h>
+
+/**
+ * struct acr_r361_flcn_bl_desc - DMEM bootloader descriptor
+ * @signature:		16B signature for secure code. 0s if no secure code
+ * @ctx_dma:		DMA context to be used by BL while loading code/data
+ * @code_dma_base:	256B-aligned Physical FB Address where code is located
+ *			(falcon's $xcbase register)
+ * @non_sec_code_off:	offset from code_dma_base where the non-secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @non_sec_code_size:	the size of the nonSecure code part.
+ * @sec_code_off:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @sec_code_size:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @code_entry_point:	code entry point which will be invoked by BL after
+ *                      code is loaded.
+ * @data_dma_base:	256B aligned Physical FB Address where data is located.
+ *			(falcon's $xdbase register)
+ * @data_size:		size of data block. Should be multiple of 256B
+ *
+ * Structure used by the bootloader to load the rest of the code. This has
+ * to be filled by host and copied into DMEM at offset provided in the
+ * hsflcn_bl_desc.bl_desc_dmem_load_off.
+ */
+struct acr_r361_flcn_bl_desc {
+	u32 reserved[4];
+	u32 signature[4];
+	u32 ctx_dma;
+	struct flcn_u64 code_dma_base;
+	u32 non_sec_code_off;
+	u32 non_sec_code_size;
+	u32 sec_code_off;
+	u32 sec_code_size;
+	u32 code_entry_point;
+	struct flcn_u64 data_dma_base;
+	u32 data_size;
+};
+
+static void
+acr_r361_generate_flcn_bl_desc(const struct nvkm_acr *acr,
+			       const struct ls_ucode_img *_img, u64 wpr_addr,
+			       void *_desc)
+{
+	struct ls_ucode_img_r352 *img = ls_ucode_img_r352(_img);
+	struct acr_r361_flcn_bl_desc *desc = _desc;
+	const struct ls_ucode_img_desc *pdesc = &img->base.ucode_desc;
+	u64 base, addr_code, addr_data;
+
+	base = wpr_addr + img->lsb_header.ucode_off + pdesc->app_start_offset;
+	addr_code = base + pdesc->app_resident_code_offset;
+	addr_data = base + pdesc->app_resident_data_offset;
+
+	desc->ctx_dma = FALCON_DMAIDX_UCODE;
+	desc->code_dma_base = u64_to_flcn64(addr_code);
+	desc->non_sec_code_off = pdesc->app_resident_code_offset;
+	desc->non_sec_code_size = pdesc->app_resident_code_size;
+	desc->code_entry_point = pdesc->app_imem_entry;
+	desc->data_dma_base = u64_to_flcn64(addr_data);
+	desc->data_size = pdesc->app_resident_data_size;
+}
+
+static void
+acr_r361_generate_hs_bl_desc(const struct hsf_load_header *hdr, void *_bl_desc,
+			    u64 offset)
+{
+	struct acr_r361_flcn_bl_desc *bl_desc = _bl_desc;
+
+	bl_desc->ctx_dma = FALCON_DMAIDX_VIRT;
+	bl_desc->code_dma_base = u64_to_flcn64(offset);
+	bl_desc->non_sec_code_off = hdr->non_sec_code_off;
+	bl_desc->non_sec_code_size = hdr->non_sec_code_size;
+	bl_desc->sec_code_off = hdr->app[0].sec_code_off;
+	bl_desc->sec_code_size = hdr->app[0].sec_code_size;
+	bl_desc->code_entry_point = 0;
+	bl_desc->data_dma_base = u64_to_flcn64(offset + hdr->data_dma_base);
+	bl_desc->data_size = hdr->data_size;
+}
+
+const struct acr_r352_ls_func
+acr_r361_ls_fecs_func = {
+	.load = acr_ls_ucode_load_fecs,
+	.generate_bl_desc = acr_r361_generate_flcn_bl_desc,
+	.bl_desc_size = sizeof(struct acr_r361_flcn_bl_desc),
+};
+
+const struct acr_r352_ls_func
+acr_r361_ls_gpccs_func = {
+	.load = acr_ls_ucode_load_gpccs,
+	.generate_bl_desc = acr_r361_generate_flcn_bl_desc,
+	.bl_desc_size = sizeof(struct acr_r361_flcn_bl_desc),
+	/* GPCCS will be loaded using PRI */
+	.lhdr_flags = LSF_FLAG_FORCE_PRIV_LOAD,
+};
+
+const struct acr_r352_func
+acr_r361_func = {
+	.generate_hs_bl_desc = acr_r361_generate_hs_bl_desc,
+	.hs_bl_desc_size = sizeof(struct acr_r361_flcn_bl_desc),
+	.ls_ucode_img_load = acr_r352_ls_ucode_img_load,
+	.ls_fill_headers = acr_r352_ls_fill_headers,
+	.ls_write_wpr = acr_r352_ls_write_wpr,
+	.ls_func = {
+		[NVKM_SECBOOT_FALCON_FECS] = &acr_r361_ls_fecs_func,
+		[NVKM_SECBOOT_FALCON_GPCCS] = &acr_r361_ls_gpccs_func,
+	},
+};
+
+struct nvkm_acr *
+acr_r361_new(unsigned long managed_falcons)
+{
+	return acr_r352_new_(&acr_r361_func, NVKM_SECBOOT_FALCON_PMU,
+			     managed_falcons);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c
index 314be2192b7d..27c9dfffb9a6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c
@@ -19,184 +19,108 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
+
+/*
+ * Secure boot is the process by which NVIDIA-signed firmware is loaded into
+ * some of the falcons of a GPU. For production devices this is the only way
+ * for the firmware to access useful (but sensitive) registers.
+ *
+ * A Falcon microprocessor supporting advanced security modes can run in one of
+ * three modes:
+ *
+ * - Non-secure (NS). In this mode, functionality is similar to Falcon
+ *   architectures before security modes were introduced (pre-Maxwell), but
+ *   capability is restricted. In particular, certain registers may be
+ *   inaccessible for reads and/or writes, and physical memory access may be
+ *   disabled (on certain Falcon instances). This is the only possible mode that
+ *   can be used if you don't have microcode cryptographically signed by NVIDIA.
+ *
+ * - Heavy Secure (HS). In this mode, the microprocessor is a black box - it's
+ *   not possible to read or write any Falcon internal state or Falcon registers
+ *   from outside the Falcon (for example, from the host system). The only way
+ *   to enable this mode is by loading microcode that has been signed by NVIDIA.
+ *   (The loading process involves tagging the IMEM block as secure, writing the
+ *   signature into a Falcon register, and starting execution. The hardware will
+ *   validate the signature, and if valid, grant HS privileges.)
+ *
+ * - Light Secure (LS). In this mode, the microprocessor has more privileges
+ *   than NS but fewer than HS. Some of the microprocessor state is visible to
+ *   host software to ease debugging. The only way to enable this mode is by HS
+ *   microcode enabling LS mode. Some privileges available to HS mode are not
+ *   available here. LS mode is introduced in GM20x.
+ *
+ * Secure boot consists in temporarily switching a HS-capable falcon (typically
+ * PMU) into HS mode in order to validate the LS firmwares of managed falcons,
+ * load them, and switch managed falcons into LS mode. Once secure boot
+ * completes, no falcon remains in HS mode.
+ *
+ * Secure boot requires a write-protected memory region (WPR) which can only be
+ * written by the secure falcon. On dGPU, the driver sets up the WPR region in
+ * video memory. On Tegra, it is set up by the bootloader and its location and
+ * size written into memory controller registers.
+ *
+ * The secure boot process takes place as follows:
+ *
+ * 1) A LS blob is constructed that contains all the LS firmwares we want to
+ *    load, along with their signatures and bootloaders.
+ *
+ * 2) A HS blob (also called ACR) is created that contains the signed HS
+ *    firmware in charge of loading the LS firmwares into their respective
+ *    falcons.
+ *
+ * 3) The HS blob is loaded (via its own bootloader) and executed on the
+ *    HS-capable falcon. It authenticates itself, switches the secure falcon to
+ *    HS mode and setup the WPR region around the LS blob (dGPU) or copies the
+ *    LS blob into the WPR region (Tegra).
+ *
+ * 4) The LS blob is now secure from all external tampering. The HS falcon
+ *    checks the signatures of the LS firmwares and, if valid, switches the
+ *    managed falcons to LS mode and makes them ready to run the LS firmware.
+ *
+ * 5) The managed falcons remain in LS mode and can be started.
+ *
+ */
+
 #include "priv.h"
+#include "acr.h"
 
 #include <subdev/mc.h>
 #include <subdev/timer.h>
+#include <subdev/pmu.h>
 
-static const char *
-managed_falcons_names[] = {
+const char *
+nvkm_secboot_falcon_name[] = {
 	[NVKM_SECBOOT_FALCON_PMU] = "PMU",
 	[NVKM_SECBOOT_FALCON_RESERVED] = "<reserved>",
 	[NVKM_SECBOOT_FALCON_FECS] = "FECS",
 	[NVKM_SECBOOT_FALCON_GPCCS] = "GPCCS",
 	[NVKM_SECBOOT_FALCON_END] = "<invalid>",
 };
-
-/*
- * Helper falcon functions
- */
-
-static int
-falcon_clear_halt_interrupt(struct nvkm_device *device, u32 base)
-{
-	int ret;
-
-	/* clear halt interrupt */
-	nvkm_mask(device, base + 0x004, 0x10, 0x10);
-	/* wait until halt interrupt is cleared */
-	ret = nvkm_wait_msec(device, 10, base + 0x008, 0x10, 0x0);
-	if (ret < 0)
-		return ret;
-
-	return 0;
-}
-
-static int
-falcon_wait_idle(struct nvkm_device *device, u32 base)
-{
-	int ret;
-
-	ret = nvkm_wait_msec(device, 10, base + 0x04c, 0xffff, 0x0);
-	if (ret < 0)
-		return ret;
-
-	return 0;
-}
-
-static int
-nvkm_secboot_falcon_enable(struct nvkm_secboot *sb)
-{
-	struct nvkm_device *device = sb->subdev.device;
-	int ret;
-
-	/* enable engine */
-	nvkm_mc_enable(device, sb->devidx);
-	ret = nvkm_wait_msec(device, 10, sb->base + 0x10c, 0x6, 0x0);
-	if (ret < 0) {
-		nvkm_error(&sb->subdev, "Falcon mem scrubbing timeout\n");
-		nvkm_mc_disable(device, sb->devidx);
-		return ret;
-	}
-
-	ret = falcon_wait_idle(device, sb->base);
-	if (ret)
-		return ret;
-
-	/* enable IRQs */
-	nvkm_wr32(device, sb->base + 0x010, 0xff);
-	nvkm_mc_intr_mask(device, sb->devidx, true);
-
-	return 0;
-}
-
-static int
-nvkm_secboot_falcon_disable(struct nvkm_secboot *sb)
-{
-	struct nvkm_device *device = sb->subdev.device;
-
-	/* disable IRQs and wait for any previous code to complete */
-	nvkm_mc_intr_mask(device, sb->devidx, false);
-	nvkm_wr32(device, sb->base + 0x014, 0xff);
-
-	falcon_wait_idle(device, sb->base);
-
-	/* disable engine */
-	nvkm_mc_disable(device, sb->devidx);
-
-	return 0;
-}
-
-int
-nvkm_secboot_falcon_reset(struct nvkm_secboot *sb)
-{
-	int ret;
-
-	ret = nvkm_secboot_falcon_disable(sb);
-	if (ret)
-		return ret;
-
-	ret = nvkm_secboot_falcon_enable(sb);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-/**
- * nvkm_secboot_falcon_run - run the falcon that will perform secure boot
- *
- * This function is to be called after all chip-specific preparations have
- * been completed. It will start the falcon to perform secure boot, wait for
- * it to halt, and report if an error occurred.
- */
-int
-nvkm_secboot_falcon_run(struct nvkm_secboot *sb)
-{
-	struct nvkm_device *device = sb->subdev.device;
-	int ret;
-
-	/* Start falcon */
-	nvkm_wr32(device, sb->base + 0x100, 0x2);
-
-	/* Wait for falcon halt */
-	ret = nvkm_wait_msec(device, 100, sb->base + 0x100, 0x10, 0x10);
-	if (ret < 0)
-		return ret;
-
-	/* If mailbox register contains an error code, then ACR has failed */
-	ret = nvkm_rd32(device, sb->base + 0x040);
-	if (ret) {
-		nvkm_error(&sb->subdev, "ACR boot failed, ret 0x%08x", ret);
-		falcon_clear_halt_interrupt(device, sb->base);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-
 /**
  * nvkm_secboot_reset() - reset specified falcon
  */
 int
-nvkm_secboot_reset(struct nvkm_secboot *sb, u32 falcon)
+nvkm_secboot_reset(struct nvkm_secboot *sb, enum nvkm_secboot_falcon falcon)
 {
 	/* Unmanaged falcon? */
-	if (!(BIT(falcon) & sb->func->managed_falcons)) {
+	if (!(BIT(falcon) & sb->acr->managed_falcons)) {
 		nvkm_error(&sb->subdev, "cannot reset unmanaged falcon!\n");
 		return -EINVAL;
 	}
 
-	return sb->func->reset(sb, falcon);
-}
-
-/**
- * nvkm_secboot_start() - start specified falcon
- */
-int
-nvkm_secboot_start(struct nvkm_secboot *sb, u32 falcon)
-{
-	/* Unmanaged falcon? */
-	if (!(BIT(falcon) & sb->func->managed_falcons)) {
-		nvkm_error(&sb->subdev, "cannot start unmanaged falcon!\n");
-		return -EINVAL;
-	}
-
-	return sb->func->start(sb, falcon);
+	return sb->acr->func->reset(sb->acr, sb, falcon);
 }
 
 /**
  * nvkm_secboot_is_managed() - check whether a given falcon is securely-managed
  */
 bool
-nvkm_secboot_is_managed(struct nvkm_secboot *secboot,
-			enum nvkm_secboot_falcon fid)
+nvkm_secboot_is_managed(struct nvkm_secboot *sb, enum nvkm_secboot_falcon fid)
 {
-	if (!secboot)
+	if (!sb)
 		return false;
 
-	return secboot->func->managed_falcons & BIT(fid);
+	return sb->acr->managed_falcons & BIT(fid);
 }
 
 static int
@@ -205,9 +129,19 @@ nvkm_secboot_oneinit(struct nvkm_subdev *subdev)
 	struct nvkm_secboot *sb = nvkm_secboot(subdev);
 	int ret = 0;
 
+	switch (sb->acr->boot_falcon) {
+	case NVKM_SECBOOT_FALCON_PMU:
+		sb->boot_falcon = subdev->device->pmu->falcon;
+		break;
+	default:
+		nvkm_error(subdev, "Unmanaged boot falcon %s!\n",
+			                nvkm_secboot_falcon_name[sb->acr->boot_falcon]);
+		return -EINVAL;
+	}
+
 	/* Call chip-specific init function */
-	if (sb->func->init)
-		ret = sb->func->init(sb);
+	if (sb->func->oneinit)
+		ret = sb->func->oneinit(sb);
 	if (ret) {
 		nvkm_error(subdev, "Secure Boot initialization failed: %d\n",
 			   ret);
@@ -249,7 +183,7 @@ nvkm_secboot = {
 };
 
 int
-nvkm_secboot_ctor(const struct nvkm_secboot_func *func,
+nvkm_secboot_ctor(const struct nvkm_secboot_func *func, struct nvkm_acr *acr,
 		  struct nvkm_device *device, int index,
 		  struct nvkm_secboot *sb)
 {
@@ -257,22 +191,14 @@ nvkm_secboot_ctor(const struct nvkm_secboot_func *func,
 
 	nvkm_subdev_ctor(&nvkm_secboot, device, index, &sb->subdev);
 	sb->func = func;
-
-	/* setup the performing falcon's base address and masks */
-	switch (func->boot_falcon) {
-	case NVKM_SECBOOT_FALCON_PMU:
-		sb->devidx = NVKM_SUBDEV_PMU;
-		sb->base = 0x10a000;
-		break;
-	default:
-		nvkm_error(&sb->subdev, "invalid secure boot falcon\n");
-		return -EINVAL;
-	};
+	sb->acr = acr;
+	acr->subdev = &sb->subdev;
 
 	nvkm_debug(&sb->subdev, "securely managed falcons:\n");
-	for_each_set_bit(fid, &sb->func->managed_falcons,
+	for_each_set_bit(fid, &sb->acr->managed_falcons,
 			 NVKM_SECBOOT_FALCON_END)
-		nvkm_debug(&sb->subdev, "- %s\n", managed_falcons_names[fid]);
+		nvkm_debug(&sb->subdev, "- %s\n",
+			   nvkm_secboot_falcon_name[fid]);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c
index ec48e4ace37a..813c4eb0b25f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c
@@ -20,1313 +20,84 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-/*
- * Secure boot is the process by which NVIDIA-signed firmware is loaded into
- * some of the falcons of a GPU. For production devices this is the only way
- * for the firmware to access useful (but sensitive) registers.
- *
- * A Falcon microprocessor supporting advanced security modes can run in one of
- * three modes:
- *
- * - Non-secure (NS). In this mode, functionality is similar to Falcon
- *   architectures before security modes were introduced (pre-Maxwell), but
- *   capability is restricted. In particular, certain registers may be
- *   inaccessible for reads and/or writes, and physical memory access may be
- *   disabled (on certain Falcon instances). This is the only possible mode that
- *   can be used if you don't have microcode cryptographically signed by NVIDIA.
- *
- * - Heavy Secure (HS). In this mode, the microprocessor is a black box - it's
- *   not possible to read or write any Falcon internal state or Falcon registers
- *   from outside the Falcon (for example, from the host system). The only way
- *   to enable this mode is by loading microcode that has been signed by NVIDIA.
- *   (The loading process involves tagging the IMEM block as secure, writing the
- *   signature into a Falcon register, and starting execution. The hardware will
- *   validate the signature, and if valid, grant HS privileges.)
- *
- * - Light Secure (LS). In this mode, the microprocessor has more privileges
- *   than NS but fewer than HS. Some of the microprocessor state is visible to
- *   host software to ease debugging. The only way to enable this mode is by HS
- *   microcode enabling LS mode. Some privileges available to HS mode are not
- *   available here. LS mode is introduced in GM20x.
- *
- * Secure boot consists in temporarily switching a HS-capable falcon (typically
- * PMU) into HS mode in order to validate the LS firmwares of managed falcons,
- * load them, and switch managed falcons into LS mode. Once secure boot
- * completes, no falcon remains in HS mode.
- *
- * Secure boot requires a write-protected memory region (WPR) which can only be
- * written by the secure falcon. On dGPU, the driver sets up the WPR region in
- * video memory. On Tegra, it is set up by the bootloader and its location and
- * size written into memory controller registers.
- *
- * The secure boot process takes place as follows:
- *
- * 1) A LS blob is constructed that contains all the LS firmwares we want to
- *    load, along with their signatures and bootloaders.
- *
- * 2) A HS blob (also called ACR) is created that contains the signed HS
- *    firmware in charge of loading the LS firmwares into their respective
- *    falcons.
- *
- * 3) The HS blob is loaded (via its own bootloader) and executed on the
- *    HS-capable falcon. It authenticates itself, switches the secure falcon to
- *    HS mode and setup the WPR region around the LS blob (dGPU) or copies the
- *    LS blob into the WPR region (Tegra).
- *
- * 4) The LS blob is now secure from all external tampering. The HS falcon
- *    checks the signatures of the LS firmwares and, if valid, switches the
- *    managed falcons to LS mode and makes them ready to run the LS firmware.
- *
- * 5) The managed falcons remain in LS mode and can be started.
- *
- */
 
-#include "priv.h"
+#include "acr.h"
+#include "gm200.h"
 
 #include <core/gpuobj.h>
-#include <core/firmware.h>
 #include <subdev/fb.h>
-
-enum {
-	FALCON_DMAIDX_UCODE		= 0,
-	FALCON_DMAIDX_VIRT		= 1,
-	FALCON_DMAIDX_PHYS_VID		= 2,
-	FALCON_DMAIDX_PHYS_SYS_COH	= 3,
-	FALCON_DMAIDX_PHYS_SYS_NCOH	= 4,
-};
-
-/**
- * struct fw_bin_header - header of firmware files
- * @bin_magic:		always 0x3b1d14f0
- * @bin_ver:		version of the bin format
- * @bin_size:		entire image size including this header
- * @header_offset:	offset of the firmware/bootloader header in the file
- * @data_offset:	offset of the firmware/bootloader payload in the file
- * @data_size:		size of the payload
- *
- * This header is located at the beginning of the HS firmware and HS bootloader
- * files, to describe where the headers and data can be found.
- */
-struct fw_bin_header {
-	u32 bin_magic;
-	u32 bin_ver;
-	u32 bin_size;
-	u32 header_offset;
-	u32 data_offset;
-	u32 data_size;
-};
-
-/**
- * struct fw_bl_desc - firmware bootloader descriptor
- * @start_tag:		starting tag of bootloader
- * @desc_dmem_load_off:	DMEM offset of flcn_bl_dmem_desc
- * @code_off:		offset of code section
- * @code_size:		size of code section
- * @data_off:		offset of data section
- * @data_size:		size of data section
- *
- * This structure is embedded in bootloader firmware files at to describe the
- * IMEM and DMEM layout expected by the bootloader.
- */
-struct fw_bl_desc {
-	u32 start_tag;
-	u32 dmem_load_off;
-	u32 code_off;
-	u32 code_size;
-	u32 data_off;
-	u32 data_size;
-};
-
-
-/*
- *
- * LS blob structures
- *
- */
-
-/**
- * struct lsf_ucode_desc - LS falcon signatures
- * @prd_keys:		signature to use when the GPU is in production mode
- * @dgb_keys:		signature to use when the GPU is in debug mode
- * @b_prd_present:	whether the production key is present
- * @b_dgb_present:	whether the debug key is present
- * @falcon_id:		ID of the falcon the ucode applies to
- *
- * Directly loaded from a signature file.
- */
-struct lsf_ucode_desc {
-	u8  prd_keys[2][16];
-	u8  dbg_keys[2][16];
-	u32 b_prd_present;
-	u32 b_dbg_present;
-	u32 falcon_id;
-};
-
-/**
- * struct lsf_lsb_header - LS firmware header
- * @signature:		signature to verify the firmware against
- * @ucode_off:		offset of the ucode blob in the WPR region. The ucode
- *                      blob contains the bootloader, code and data of the
- *                      LS falcon
- * @ucode_size:		size of the ucode blob, including bootloader
- * @data_size:		size of the ucode blob data
- * @bl_code_size:	size of the bootloader code
- * @bl_imem_off:	offset in imem of the bootloader
- * @bl_data_off:	offset of the bootloader data in WPR region
- * @bl_data_size:	size of the bootloader data
- * @app_code_off:	offset of the app code relative to ucode_off
- * @app_code_size:	size of the app code
- * @app_data_off:	offset of the app data relative to ucode_off
- * @app_data_size:	size of the app data
- * @flags:		flags for the secure bootloader
- *
- * This structure is written into the WPR region for each managed falcon. Each
- * instance is referenced by the lsb_offset member of the corresponding
- * lsf_wpr_header.
- */
-struct lsf_lsb_header {
-	struct lsf_ucode_desc signature;
-	u32 ucode_off;
-	u32 ucode_size;
-	u32 data_size;
-	u32 bl_code_size;
-	u32 bl_imem_off;
-	u32 bl_data_off;
-	u32 bl_data_size;
-	u32 app_code_off;
-	u32 app_code_size;
-	u32 app_data_off;
-	u32 app_data_size;
-	u32 flags;
-#define LSF_FLAG_LOAD_CODE_AT_0		1
-#define LSF_FLAG_DMACTL_REQ_CTX		4
-#define LSF_FLAG_FORCE_PRIV_LOAD	8
-};
-
-/**
- * struct lsf_wpr_header - LS blob WPR Header
- * @falcon_id:		LS falcon ID
- * @lsb_offset:		offset of the lsb_lsf_header in the WPR region
- * @bootstrap_owner:	secure falcon reponsible for bootstrapping the LS falcon
- * @lazy_bootstrap:	skip bootstrapping by ACR
- * @status:		bootstrapping status
- *
- * An array of these is written at the beginning of the WPR region, one for
- * each managed falcon. The array is terminated by an instance which falcon_id
- * is LSF_FALCON_ID_INVALID.
- */
-struct lsf_wpr_header {
-	u32  falcon_id;
-	u32  lsb_offset;
-	u32  bootstrap_owner;
-	u32  lazy_bootstrap;
-	u32  status;
-#define LSF_IMAGE_STATUS_NONE				0
-#define LSF_IMAGE_STATUS_COPY				1
-#define LSF_IMAGE_STATUS_VALIDATION_CODE_FAILED		2
-#define LSF_IMAGE_STATUS_VALIDATION_DATA_FAILED		3
-#define LSF_IMAGE_STATUS_VALIDATION_DONE		4
-#define LSF_IMAGE_STATUS_VALIDATION_SKIPPED		5
-#define LSF_IMAGE_STATUS_BOOTSTRAP_READY		6
-};
-
-
-/**
- * struct ls_ucode_img_desc - descriptor of firmware image
- * @descriptor_size:		size of this descriptor
- * @image_size:			size of the whole image
- * @bootloader_start_offset:	start offset of the bootloader in ucode image
- * @bootloader_size:		size of the bootloader
- * @bootloader_imem_offset:	start off set of the bootloader in IMEM
- * @bootloader_entry_point:	entry point of the bootloader in IMEM
- * @app_start_offset:		start offset of the LS firmware
- * @app_size:			size of the LS firmware's code and data
- * @app_imem_offset:		offset of the app in IMEM
- * @app_imem_entry:		entry point of the app in IMEM
- * @app_dmem_offset:		offset of the data in DMEM
- * @app_resident_code_offset:	offset of app code from app_start_offset
- * @app_resident_code_size:	size of the code
- * @app_resident_data_offset:	offset of data from app_start_offset
- * @app_resident_data_size:	size of data
- *
- * A firmware image contains the code, data, and bootloader of a given LS
- * falcon in a single blob. This structure describes where everything is.
- *
- * This can be generated from a (bootloader, code, data) set if they have
- * been loaded separately, or come directly from a file.
- */
-struct ls_ucode_img_desc {
-	u32 descriptor_size;
-	u32 image_size;
-	u32 tools_version;
-	u32 app_version;
-	char date[64];
-	u32 bootloader_start_offset;
-	u32 bootloader_size;
-	u32 bootloader_imem_offset;
-	u32 bootloader_entry_point;
-	u32 app_start_offset;
-	u32 app_size;
-	u32 app_imem_offset;
-	u32 app_imem_entry;
-	u32 app_dmem_offset;
-	u32 app_resident_code_offset;
-	u32 app_resident_code_size;
-	u32 app_resident_data_offset;
-	u32 app_resident_data_size;
-	u32 nb_overlays;
-	struct {u32 start; u32 size; } load_ovl[64];
-	u32 compressed;
-};
-
-/**
- * struct ls_ucode_img - temporary storage for loaded LS firmwares
- * @node:		to link within lsf_ucode_mgr
- * @falcon_id:		ID of the falcon this LS firmware is for
- * @ucode_desc:		loaded or generated map of ucode_data
- * @ucode_header:	header of the firmware
- * @ucode_data:		firmware payload (code and data)
- * @ucode_size:		size in bytes of data in ucode_data
- * @wpr_header:		WPR header to be written to the LS blob
- * @lsb_header:		LSB header to be written to the LS blob
- *
- * Preparing the WPR LS blob requires information about all the LS firmwares
- * (size, etc) to be known. This structure contains all the data of one LS
- * firmware.
- */
-struct ls_ucode_img {
-	struct list_head node;
-	enum nvkm_secboot_falcon falcon_id;
-
-	struct ls_ucode_img_desc ucode_desc;
-	u32 *ucode_header;
-	u8 *ucode_data;
-	u32 ucode_size;
-
-	struct lsf_wpr_header wpr_header;
-	struct lsf_lsb_header lsb_header;
-};
-
-/**
- * struct ls_ucode_mgr - manager for all LS falcon firmwares
- * @count:	number of managed LS falcons
- * @wpr_size:	size of the required WPR region in bytes
- * @img_list:	linked list of lsf_ucode_img
- */
-struct ls_ucode_mgr {
-	u16 count;
-	u32 wpr_size;
-	struct list_head img_list;
-};
-
-
-/*
- *
- * HS blob structures
- *
- */
-
-/**
- * struct hsf_fw_header - HS firmware descriptor
- * @sig_dbg_offset:	offset of the debug signature
- * @sig_dbg_size:	size of the debug signature
- * @sig_prod_offset:	offset of the production signature
- * @sig_prod_size:	size of the production signature
- * @patch_loc:		offset of the offset (sic) of where the signature is
- * @patch_sig:		offset of the offset (sic) to add to sig_*_offset
- * @hdr_offset:		offset of the load header (see struct hs_load_header)
- * @hdr_size:		size of above header
- *
- * This structure is embedded in the HS firmware image at
- * hs_bin_hdr.header_offset.
- */
-struct hsf_fw_header {
-	u32 sig_dbg_offset;
-	u32 sig_dbg_size;
-	u32 sig_prod_offset;
-	u32 sig_prod_size;
-	u32 patch_loc;
-	u32 patch_sig;
-	u32 hdr_offset;
-	u32 hdr_size;
-};
-
-/**
- * struct hsf_load_header - HS firmware load header
- */
-struct hsf_load_header {
-	u32 non_sec_code_off;
-	u32 non_sec_code_size;
-	u32 data_dma_base;
-	u32 data_size;
-	u32 num_apps;
-	struct {
-		u32 sec_code_off;
-		u32 sec_code_size;
-	} app[0];
-};
-
-/**
- * Convenience function to duplicate a firmware file in memory and check that
- * it has the required minimum size.
- */
-static void *
-gm200_secboot_load_firmware(struct nvkm_subdev *subdev, const char *name,
-		    size_t min_size)
-{
-	const struct firmware *fw;
-	void *blob;
-	int ret;
-
-	ret = nvkm_firmware_get(subdev->device, name, &fw);
-	if (ret)
-		return ERR_PTR(ret);
-	if (fw->size < min_size) {
-		nvkm_error(subdev, "%s is smaller than expected size %zu\n",
-			   name, min_size);
-		nvkm_firmware_put(fw);
-		return ERR_PTR(-EINVAL);
-	}
-	blob = kmemdup(fw->data, fw->size, GFP_KERNEL);
-	nvkm_firmware_put(fw);
-	if (!blob)
-		return ERR_PTR(-ENOMEM);
-
-	return blob;
-}
-
-
-/*
- * Low-secure blob creation
- */
-
-#define BL_DESC_BLK_SIZE 256
-/**
- * Build a ucode image and descriptor from provided bootloader, code and data.
- *
- * @bl:		bootloader image, including 16-bytes descriptor
- * @code:	LS firmware code segment
- * @data:	LS firmware data segment
- * @desc:	ucode descriptor to be written
- *
- * Return: allocated ucode image with corresponding descriptor information. desc
- *         is also updated to contain the right offsets within returned image.
- */
-static void *
-ls_ucode_img_build(const struct firmware *bl, const struct firmware *code,
-		   const struct firmware *data, struct ls_ucode_img_desc *desc)
-{
-	struct fw_bin_header *bin_hdr = (void *)bl->data;
-	struct fw_bl_desc *bl_desc = (void *)bl->data + bin_hdr->header_offset;
-	void *bl_data = (void *)bl->data + bin_hdr->data_offset;
-	u32 pos = 0;
-	void *image;
-
-	desc->bootloader_start_offset = pos;
-	desc->bootloader_size = ALIGN(bl_desc->code_size, sizeof(u32));
-	desc->bootloader_imem_offset = bl_desc->start_tag * 256;
-	desc->bootloader_entry_point = bl_desc->start_tag * 256;
-
-	pos = ALIGN(pos + desc->bootloader_size, BL_DESC_BLK_SIZE);
-	desc->app_start_offset = pos;
-	desc->app_size = ALIGN(code->size, BL_DESC_BLK_SIZE) +
-			 ALIGN(data->size, BL_DESC_BLK_SIZE);
-	desc->app_imem_offset = 0;
-	desc->app_imem_entry = 0;
-	desc->app_dmem_offset = 0;
-	desc->app_resident_code_offset = 0;
-	desc->app_resident_code_size = ALIGN(code->size, BL_DESC_BLK_SIZE);
-
-	pos = ALIGN(pos + desc->app_resident_code_size, BL_DESC_BLK_SIZE);
-	desc->app_resident_data_offset = pos - desc->app_start_offset;
-	desc->app_resident_data_size = ALIGN(data->size, BL_DESC_BLK_SIZE);
-
-	desc->image_size = ALIGN(bl_desc->code_size, BL_DESC_BLK_SIZE) +
-			   desc->app_size;
-
-	image = kzalloc(desc->image_size, GFP_KERNEL);
-	if (!image)
-		return ERR_PTR(-ENOMEM);
-
-	memcpy(image + desc->bootloader_start_offset, bl_data,
-	       bl_desc->code_size);
-	memcpy(image + desc->app_start_offset, code->data, code->size);
-	memcpy(image + desc->app_start_offset + desc->app_resident_data_offset,
-	       data->data, data->size);
-
-	return image;
-}
-
-/**
- * ls_ucode_img_load_generic() - load and prepare a LS ucode image
- *
- * Load the LS microcode, bootloader and signature and pack them into a single
- * blob. Also generate the corresponding ucode descriptor.
- */
-static int
-ls_ucode_img_load_generic(struct nvkm_subdev *subdev,
-			  struct ls_ucode_img *img, const char *falcon_name,
-			  const u32 falcon_id)
-{
-	const struct firmware *bl, *code, *data;
-	struct lsf_ucode_desc *lsf_desc;
-	char f[64];
-	int ret;
-
-	img->ucode_header = NULL;
-
-	snprintf(f, sizeof(f), "gr/%s_bl", falcon_name);
-	ret = nvkm_firmware_get(subdev->device, f, &bl);
-	if (ret)
-		goto error;
-
-	snprintf(f, sizeof(f), "gr/%s_inst", falcon_name);
-	ret = nvkm_firmware_get(subdev->device, f, &code);
-	if (ret)
-		goto free_bl;
-
-	snprintf(f, sizeof(f), "gr/%s_data", falcon_name);
-	ret = nvkm_firmware_get(subdev->device, f, &data);
-	if (ret)
-		goto free_inst;
-
-	img->ucode_data = ls_ucode_img_build(bl, code, data,
-					     &img->ucode_desc);
-	if (IS_ERR(img->ucode_data)) {
-		ret = PTR_ERR(img->ucode_data);
-		goto free_data;
-	}
-	img->ucode_size = img->ucode_desc.image_size;
-
-	snprintf(f, sizeof(f), "gr/%s_sig", falcon_name);
-	lsf_desc = gm200_secboot_load_firmware(subdev, f, sizeof(*lsf_desc));
-	if (IS_ERR(lsf_desc)) {
-		ret = PTR_ERR(lsf_desc);
-		goto free_image;
-	}
-	/* not needed? the signature should already have the right value */
-	lsf_desc->falcon_id = falcon_id;
-	memcpy(&img->lsb_header.signature, lsf_desc, sizeof(*lsf_desc));
-	img->falcon_id = lsf_desc->falcon_id;
-	kfree(lsf_desc);
-
-	/* success path - only free requested firmware files */
-	goto free_data;
-
-free_image:
-	kfree(img->ucode_data);
-free_data:
-	nvkm_firmware_put(data);
-free_inst:
-	nvkm_firmware_put(code);
-free_bl:
-	nvkm_firmware_put(bl);
-error:
-	return ret;
-}
-
-typedef int (*lsf_load_func)(struct nvkm_subdev *, struct ls_ucode_img *);
-
-static int
-ls_ucode_img_load_fecs(struct nvkm_subdev *subdev, struct ls_ucode_img *img)
-{
-	return ls_ucode_img_load_generic(subdev, img, "fecs",
-					 NVKM_SECBOOT_FALCON_FECS);
-}
-
-static int
-ls_ucode_img_load_gpccs(struct nvkm_subdev *subdev, struct ls_ucode_img *img)
-{
-	return ls_ucode_img_load_generic(subdev, img, "gpccs",
-					 NVKM_SECBOOT_FALCON_GPCCS);
-}
-
-/**
- * ls_ucode_img_load() - create a lsf_ucode_img and load it
- */
-static struct ls_ucode_img *
-ls_ucode_img_load(struct nvkm_subdev *subdev, lsf_load_func load_func)
-{
-	struct ls_ucode_img *img;
-	int ret;
-
-	img = kzalloc(sizeof(*img), GFP_KERNEL);
-	if (!img)
-		return ERR_PTR(-ENOMEM);
-
-	ret = load_func(subdev, img);
-	if (ret) {
-		kfree(img);
-		return ERR_PTR(ret);
-	}
-
-	return img;
-}
-
-static const lsf_load_func lsf_load_funcs[] = {
-	[NVKM_SECBOOT_FALCON_END] = NULL, /* reserve enough space */
-	[NVKM_SECBOOT_FALCON_FECS] = ls_ucode_img_load_fecs,
-	[NVKM_SECBOOT_FALCON_GPCCS] = ls_ucode_img_load_gpccs,
-};
-
-/**
- * ls_ucode_img_populate_bl_desc() - populate a DMEM BL descriptor for LS image
- * @img:	ucode image to generate against
- * @desc:	descriptor to populate
- * @sb:		secure boot state to use for base addresses
- *
- * Populate the DMEM BL descriptor with the information contained in a
- * ls_ucode_desc.
- *
- */
-static void
-ls_ucode_img_populate_bl_desc(struct ls_ucode_img *img, u64 wpr_addr,
-			      struct gm200_flcn_bl_desc *desc)
-{
-	struct ls_ucode_img_desc *pdesc = &img->ucode_desc;
-	u64 addr_base;
-
-	addr_base = wpr_addr + img->lsb_header.ucode_off +
-		    pdesc->app_start_offset;
-
-	memset(desc, 0, sizeof(*desc));
-	desc->ctx_dma = FALCON_DMAIDX_UCODE;
-	desc->code_dma_base.lo = lower_32_bits(
-		(addr_base + pdesc->app_resident_code_offset));
-	desc->code_dma_base.hi = upper_32_bits(
-		(addr_base + pdesc->app_resident_code_offset));
-	desc->non_sec_code_size = pdesc->app_resident_code_size;
-	desc->data_dma_base.lo = lower_32_bits(
-		(addr_base + pdesc->app_resident_data_offset));
-	desc->data_dma_base.hi = upper_32_bits(
-		(addr_base + pdesc->app_resident_data_offset));
-	desc->data_size = pdesc->app_resident_data_size;
-	desc->code_entry_point = pdesc->app_imem_entry;
-}
-
-#define LSF_LSB_HEADER_ALIGN 256
-#define LSF_BL_DATA_ALIGN 256
-#define LSF_BL_DATA_SIZE_ALIGN 256
-#define LSF_BL_CODE_SIZE_ALIGN 256
-#define LSF_UCODE_DATA_ALIGN 4096
-
-/**
- * ls_ucode_img_fill_headers - fill the WPR and LSB headers of an image
- * @gsb:	secure boot device used
- * @img:	image to generate for
- * @offset:	offset in the WPR region where this image starts
- *
- * Allocate space in the WPR area from offset and write the WPR and LSB headers
- * accordingly.
- *
- * Return: offset at the end of this image.
- */
-static u32
-ls_ucode_img_fill_headers(struct gm200_secboot *gsb, struct ls_ucode_img *img,
-			  u32 offset)
-{
-	struct lsf_wpr_header *whdr = &img->wpr_header;
-	struct lsf_lsb_header *lhdr = &img->lsb_header;
-	struct ls_ucode_img_desc *desc = &img->ucode_desc;
-
-	if (img->ucode_header) {
-		nvkm_fatal(&gsb->base.subdev,
-			    "images withough loader are not supported yet!\n");
-		return offset;
-	}
-
-	/* Fill WPR header */
-	whdr->falcon_id = img->falcon_id;
-	whdr->bootstrap_owner = gsb->base.func->boot_falcon;
-	whdr->status = LSF_IMAGE_STATUS_COPY;
-
-	/* Align, save off, and include an LSB header size */
-	offset = ALIGN(offset, LSF_LSB_HEADER_ALIGN);
-	whdr->lsb_offset = offset;
-	offset += sizeof(struct lsf_lsb_header);
-
-	/*
-	 * Align, save off, and include the original (static) ucode
-	 * image size
-	 */
-	offset = ALIGN(offset, LSF_UCODE_DATA_ALIGN);
-	lhdr->ucode_off = offset;
-	offset += img->ucode_size;
-
-	/*
-	 * For falcons that use a boot loader (BL), we append a loader
-	 * desc structure on the end of the ucode image and consider
-	 * this the boot loader data. The host will then copy the loader
-	 * desc args to this space within the WPR region (before locking
-	 * down) and the HS bin will then copy them to DMEM 0 for the
-	 * loader.
-	 */
-	lhdr->bl_code_size = ALIGN(desc->bootloader_size,
-				   LSF_BL_CODE_SIZE_ALIGN);
-	lhdr->ucode_size = ALIGN(desc->app_resident_data_offset,
-				 LSF_BL_CODE_SIZE_ALIGN) + lhdr->bl_code_size;
-	lhdr->data_size = ALIGN(desc->app_size, LSF_BL_CODE_SIZE_ALIGN) +
-				lhdr->bl_code_size - lhdr->ucode_size;
-	/*
-	 * Though the BL is located at 0th offset of the image, the VA
-	 * is different to make sure that it doesn't collide the actual
-	 * OS VA range
-	 */
-	lhdr->bl_imem_off = desc->bootloader_imem_offset;
-	lhdr->app_code_off = desc->app_start_offset +
-			     desc->app_resident_code_offset;
-	lhdr->app_code_size = desc->app_resident_code_size;
-	lhdr->app_data_off = desc->app_start_offset +
-			     desc->app_resident_data_offset;
-	lhdr->app_data_size = desc->app_resident_data_size;
-
-	lhdr->flags = 0;
-	if (img->falcon_id == gsb->base.func->boot_falcon)
-		lhdr->flags = LSF_FLAG_DMACTL_REQ_CTX;
-
-	/* GPCCS will be loaded using PRI */
-	if (img->falcon_id == NVKM_SECBOOT_FALCON_GPCCS)
-		lhdr->flags |= LSF_FLAG_FORCE_PRIV_LOAD;
-
-	/* Align (size bloat) and save off BL descriptor size */
-	lhdr->bl_data_size = ALIGN(sizeof(struct gm200_flcn_bl_desc),
-				   LSF_BL_DATA_SIZE_ALIGN);
-	/*
-	 * Align, save off, and include the additional BL data
-	 */
-	offset = ALIGN(offset, LSF_BL_DATA_ALIGN);
-	lhdr->bl_data_off = offset;
-	offset += lhdr->bl_data_size;
-
-	return offset;
-}
-
-static void
-ls_ucode_mgr_init(struct ls_ucode_mgr *mgr)
-{
-	memset(mgr, 0, sizeof(*mgr));
-	INIT_LIST_HEAD(&mgr->img_list);
-}
-
-static void
-ls_ucode_mgr_cleanup(struct ls_ucode_mgr *mgr)
-{
-	struct ls_ucode_img *img, *t;
-
-	list_for_each_entry_safe(img, t, &mgr->img_list, node) {
-		kfree(img->ucode_data);
-		kfree(img->ucode_header);
-		kfree(img);
-	}
-}
-
-static void
-ls_ucode_mgr_add_img(struct ls_ucode_mgr *mgr, struct ls_ucode_img *img)
-{
-	mgr->count++;
-	list_add_tail(&img->node, &mgr->img_list);
-}
-
-/**
- * ls_ucode_mgr_fill_headers - fill WPR and LSB headers of all managed images
- */
-static void
-ls_ucode_mgr_fill_headers(struct gm200_secboot *gsb, struct ls_ucode_mgr *mgr)
-{
-	struct ls_ucode_img *img;
-	u32 offset;
-
-	/*
-	 * Start with an array of WPR headers at the base of the WPR.
-	 * The expectation here is that the secure falcon will do a single DMA
-	 * read of this array and cache it internally so it's ok to pack these.
-	 * Also, we add 1 to the falcon count to indicate the end of the array.
-	 */
-	offset = sizeof(struct lsf_wpr_header) * (mgr->count + 1);
-
-	/*
-	 * Walk the managed falcons, accounting for the LSB structs
-	 * as well as the ucode images.
-	 */
-	list_for_each_entry(img, &mgr->img_list, node) {
-		offset = ls_ucode_img_fill_headers(gsb, img, offset);
-	}
-
-	mgr->wpr_size = offset;
-}
-
-/**
- * ls_ucode_mgr_write_wpr - write the WPR blob contents
- */
-static int
-ls_ucode_mgr_write_wpr(struct gm200_secboot *gsb, struct ls_ucode_mgr *mgr,
-		       struct nvkm_gpuobj *wpr_blob)
-{
-	struct ls_ucode_img *img;
-	u32 pos = 0;
-
-	nvkm_kmap(wpr_blob);
-
-	list_for_each_entry(img, &mgr->img_list, node) {
-		nvkm_gpuobj_memcpy_to(wpr_blob, pos, &img->wpr_header,
-				      sizeof(img->wpr_header));
-
-		nvkm_gpuobj_memcpy_to(wpr_blob, img->wpr_header.lsb_offset,
-				     &img->lsb_header, sizeof(img->lsb_header));
-
-		/* Generate and write BL descriptor */
-		if (!img->ucode_header) {
-			u8 desc[gsb->func->bl_desc_size];
-			struct gm200_flcn_bl_desc gdesc;
-
-			ls_ucode_img_populate_bl_desc(img, gsb->wpr_addr,
-						      &gdesc);
-			gsb->func->fixup_bl_desc(&gdesc, &desc);
-			nvkm_gpuobj_memcpy_to(wpr_blob,
-					      img->lsb_header.bl_data_off,
-					      &desc, gsb->func->bl_desc_size);
-		}
-
-		/* Copy ucode */
-		nvkm_gpuobj_memcpy_to(wpr_blob, img->lsb_header.ucode_off,
-				      img->ucode_data, img->ucode_size);
-
-		pos += sizeof(img->wpr_header);
-	}
-
-	nvkm_wo32(wpr_blob, pos, NVKM_SECBOOT_FALCON_INVALID);
-
-	nvkm_done(wpr_blob);
-
-	return 0;
-}
-
-/* Both size and address of WPR need to be 128K-aligned */
-#define WPR_ALIGNMENT	0x20000
-/**
- * gm200_secboot_prepare_ls_blob() - prepare the LS blob
- *
- * For each securely managed falcon, load the FW, signatures and bootloaders and
- * prepare a ucode blob. Then, compute the offsets in the WPR region for each
- * blob, and finally write the headers and ucode blobs into a GPU object that
- * will be copied into the WPR region by the HS firmware.
- */
-static int
-gm200_secboot_prepare_ls_blob(struct gm200_secboot *gsb)
-{
-	struct nvkm_secboot *sb = &gsb->base;
-	struct nvkm_device *device = sb->subdev.device;
-	struct ls_ucode_mgr mgr;
-	int falcon_id;
-	int ret;
-
-	ls_ucode_mgr_init(&mgr);
-
-	/* Load all LS blobs */
-	for_each_set_bit(falcon_id, &gsb->base.func->managed_falcons,
-			 NVKM_SECBOOT_FALCON_END) {
-		struct ls_ucode_img *img;
-
-		img = ls_ucode_img_load(&sb->subdev, lsf_load_funcs[falcon_id]);
-
-		if (IS_ERR(img)) {
-			ret = PTR_ERR(img);
-			goto cleanup;
-		}
-		ls_ucode_mgr_add_img(&mgr, img);
-	}
-
-	/*
-	 * Fill the WPR and LSF headers with the right offsets and compute
-	 * required WPR size
-	 */
-	ls_ucode_mgr_fill_headers(gsb, &mgr);
-	mgr.wpr_size = ALIGN(mgr.wpr_size, WPR_ALIGNMENT);
-
-	/* Allocate GPU object that will contain the WPR region */
-	ret = nvkm_gpuobj_new(device, mgr.wpr_size, WPR_ALIGNMENT, false, NULL,
-			      &gsb->ls_blob);
-	if (ret)
-		goto cleanup;
-
-	nvkm_debug(&sb->subdev, "%d managed LS falcons, WPR size is %d bytes\n",
-		    mgr.count, mgr.wpr_size);
-
-	/* If WPR address and size are not fixed, set them to fit the LS blob */
-	if (!gsb->wpr_size) {
-		gsb->wpr_addr = gsb->ls_blob->addr;
-		gsb->wpr_size = gsb->ls_blob->size;
-	}
-
-	/* Write LS blob */
-	ret = ls_ucode_mgr_write_wpr(gsb, &mgr, gsb->ls_blob);
-	if (ret)
-		nvkm_gpuobj_del(&gsb->ls_blob);
-
-cleanup:
-	ls_ucode_mgr_cleanup(&mgr);
-
-	return ret;
-}
-
-/*
- * High-secure blob creation
- */
-
-/**
- * gm200_secboot_hsf_patch_signature() - patch HS blob with correct signature
- */
-static void
-gm200_secboot_hsf_patch_signature(struct gm200_secboot *gsb, void *acr_image)
-{
-	struct nvkm_secboot *sb = &gsb->base;
-	struct fw_bin_header *hsbin_hdr = acr_image;
-	struct hsf_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset;
-	void *hs_data = acr_image + hsbin_hdr->data_offset;
-	void *sig;
-	u32 sig_size;
-
-	/* Falcon in debug or production mode? */
-	if ((nvkm_rd32(sb->subdev.device, sb->base + 0xc08) >> 20) & 0x1) {
-		sig = acr_image + fw_hdr->sig_dbg_offset;
-		sig_size = fw_hdr->sig_dbg_size;
-	} else {
-		sig = acr_image + fw_hdr->sig_prod_offset;
-		sig_size = fw_hdr->sig_prod_size;
-	}
-
-	/* Patch signature */
-	memcpy(hs_data + fw_hdr->patch_loc, sig + fw_hdr->patch_sig, sig_size);
-}
-
-/**
- * gm200_secboot_populate_hsf_bl_desc() - populate BL descriptor for HS image
- */
-static void
-gm200_secboot_populate_hsf_bl_desc(void *acr_image,
-				   struct gm200_flcn_bl_desc *bl_desc)
-{
-	struct fw_bin_header *hsbin_hdr = acr_image;
-	struct hsf_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset;
-	struct hsf_load_header *load_hdr = acr_image + fw_hdr->hdr_offset;
-
-	/*
-	 * Descriptor for the bootloader that will load the ACR image into
-	 * IMEM/DMEM memory.
-	 */
-	fw_hdr = acr_image + hsbin_hdr->header_offset;
-	load_hdr = acr_image + fw_hdr->hdr_offset;
-	memset(bl_desc, 0, sizeof(*bl_desc));
-	bl_desc->ctx_dma = FALCON_DMAIDX_VIRT;
-	bl_desc->non_sec_code_off = load_hdr->non_sec_code_off;
-	bl_desc->non_sec_code_size = load_hdr->non_sec_code_size;
-	bl_desc->sec_code_off = load_hdr->app[0].sec_code_off;
-	bl_desc->sec_code_size = load_hdr->app[0].sec_code_size;
-	bl_desc->code_entry_point = 0;
-	/*
-	 * We need to set code_dma_base to the virtual address of the acr_blob,
-	 * and add this address to data_dma_base before writing it into DMEM
-	 */
-	bl_desc->code_dma_base.lo = 0;
-	bl_desc->data_dma_base.lo = load_hdr->data_dma_base;
-	bl_desc->data_size = load_hdr->data_size;
-}
-
-/**
- * gm200_secboot_prepare_hs_blob - load and prepare a HS blob and BL descriptor
- *
- * @gsb secure boot instance to prepare for
- * @fw name of the HS firmware to load
- * @blob pointer to gpuobj that will be allocated to receive the HS FW payload
- * @bl_desc pointer to the BL descriptor to write for this firmware
- * @patch whether we should patch the HS descriptor (only for HS loaders)
- */
-static int
-gm200_secboot_prepare_hs_blob(struct gm200_secboot *gsb, const char *fw,
-			      struct nvkm_gpuobj **blob,
-			      struct gm200_flcn_bl_desc *bl_desc, bool patch)
-{
-	struct nvkm_subdev *subdev = &gsb->base.subdev;
-	void *acr_image;
-	struct fw_bin_header *hsbin_hdr;
-	struct hsf_fw_header *fw_hdr;
-	void *acr_data;
-	struct hsf_load_header *load_hdr;
-	struct hsflcn_acr_desc *desc;
-	int ret;
-
-	acr_image = gm200_secboot_load_firmware(subdev, fw, 0);
-	if (IS_ERR(acr_image))
-		return PTR_ERR(acr_image);
-	hsbin_hdr = acr_image;
-
-	/* Patch signature */
-	gm200_secboot_hsf_patch_signature(gsb, acr_image);
-
-	acr_data = acr_image + hsbin_hdr->data_offset;
-
-	/* Patch descriptor? */
-	if (patch) {
-		fw_hdr = acr_image + hsbin_hdr->header_offset;
-		load_hdr = acr_image + fw_hdr->hdr_offset;
-		desc = acr_data + load_hdr->data_dma_base;
-		gsb->func->fixup_hs_desc(gsb, desc);
-	}
-
-	/* Generate HS BL descriptor */
-	gm200_secboot_populate_hsf_bl_desc(acr_image, bl_desc);
-
-	/* Create ACR blob and copy HS data to it */
-	ret = nvkm_gpuobj_new(subdev->device, ALIGN(hsbin_hdr->data_size, 256),
-			      0x1000, false, NULL, blob);
-	if (ret)
-		goto cleanup;
-
-	nvkm_kmap(*blob);
-	nvkm_gpuobj_memcpy_to(*blob, 0, acr_data, hsbin_hdr->data_size);
-	nvkm_done(*blob);
-
-cleanup:
-	kfree(acr_image);
-
-	return ret;
-}
-
-/*
- * High-secure bootloader blob creation
- */
-
-static int
-gm200_secboot_prepare_hsbl_blob(struct gm200_secboot *gsb)
-{
-	struct nvkm_subdev *subdev = &gsb->base.subdev;
-
-	gsb->hsbl_blob = gm200_secboot_load_firmware(subdev, "acr/bl", 0);
-	if (IS_ERR(gsb->hsbl_blob)) {
-		int ret = PTR_ERR(gsb->hsbl_blob);
-
-		gsb->hsbl_blob = NULL;
-		return ret;
-	}
-
-	return 0;
-}
+#include <engine/falcon.h>
+#include <subdev/mc.h>
 
 /**
- * gm20x_secboot_prepare_blobs - load blobs common to all GM20X GPUs.
+ * gm200_secboot_run_blob() - run the given high-secure blob
  *
- * This includes the LS blob, HS ucode loading blob, and HS bootloader.
- *
- * The HS ucode unload blob is only used on dGPU.
  */
 int
-gm20x_secboot_prepare_blobs(struct gm200_secboot *gsb)
-{
-	int ret;
-
-	/* Load and prepare the managed falcon's firmwares */
-	if (!gsb->ls_blob) {
-		ret = gm200_secboot_prepare_ls_blob(gsb);
-		if (ret)
-			return ret;
-	}
-
-	/* Load the HS firmware that will load the LS firmwares */
-	if (!gsb->acr_load_blob) {
-		ret = gm200_secboot_prepare_hs_blob(gsb, "acr/ucode_load",
-						&gsb->acr_load_blob,
-						&gsb->acr_load_bl_desc, true);
-		if (ret)
-			return ret;
-	}
-
-	/* Load the HS firmware bootloader */
-	if (!gsb->hsbl_blob) {
-		ret = gm200_secboot_prepare_hsbl_blob(gsb);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-static int
-gm200_secboot_prepare_blobs(struct gm200_secboot *gsb)
-{
-	int ret;
-
-	ret = gm20x_secboot_prepare_blobs(gsb);
-	if (ret)
-		return ret;
-
-	/* dGPU only: load the HS firmware that unprotects the WPR region */
-	if (!gsb->acr_unload_blob) {
-		ret = gm200_secboot_prepare_hs_blob(gsb, "acr/ucode_unload",
-					       &gsb->acr_unload_blob,
-					       &gsb->acr_unload_bl_desc, false);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-static int
-gm200_secboot_blobs_ready(struct gm200_secboot *gsb)
+gm200_secboot_run_blob(struct nvkm_secboot *sb, struct nvkm_gpuobj *blob)
 {
+	struct gm200_secboot *gsb = gm200_secboot(sb);
 	struct nvkm_subdev *subdev = &gsb->base.subdev;
+	struct nvkm_falcon *falcon = gsb->base.boot_falcon;
+	struct nvkm_vma vma;
 	int ret;
 
-	/* firmware already loaded, nothing to do... */
-	if (gsb->firmware_ok)
-		return 0;
-
-	ret = gsb->func->prepare_blobs(gsb);
-	if (ret) {
-		nvkm_error(subdev, "failed to load secure firmware\n");
-		return ret;
-	}
-
-	gsb->firmware_ok = true;
-
-	return 0;
-}
-
-
-/*
- * Secure Boot Execution
- */
-
-/**
- * gm200_secboot_load_hs_bl() - load HS bootloader into DMEM and IMEM
- */
-static void
-gm200_secboot_load_hs_bl(struct gm200_secboot *gsb, void *data, u32 data_size)
-{
-	struct nvkm_device *device = gsb->base.subdev.device;
-	struct fw_bin_header *hdr = gsb->hsbl_blob;
-	struct fw_bl_desc *hsbl_desc = gsb->hsbl_blob + hdr->header_offset;
-	void *blob_data = gsb->hsbl_blob + hdr->data_offset;
-	void *hsbl_code = blob_data + hsbl_desc->code_off;
-	void *hsbl_data = blob_data + hsbl_desc->data_off;
-	u32 code_size = ALIGN(hsbl_desc->code_size, 256);
-	const u32 base = gsb->base.base;
-	u32 blk;
-	u32 tag;
-	int i;
-
-	/*
-	 * Copy HS bootloader data
-	 */
-	nvkm_wr32(device, base + 0x1c0, (0x00000000 | (0x1 << 24)));
-	for (i = 0; i < hsbl_desc->data_size / 4; i++)
-		nvkm_wr32(device, base + 0x1c4, ((u32 *)hsbl_data)[i]);
-
-	/*
-	 * Copy HS bootloader interface structure where the HS descriptor
-	 * expects it to be
-	 */
-	nvkm_wr32(device, base + 0x1c0,
-		  (hsbl_desc->dmem_load_off | (0x1 << 24)));
-	for (i = 0; i < data_size / 4; i++)
-		nvkm_wr32(device, base + 0x1c4, ((u32 *)data)[i]);
-
-	/* Copy HS bootloader code to end of IMEM */
-	blk = (nvkm_rd32(device, base + 0x108) & 0x1ff) - (code_size >> 8);
-	tag = hsbl_desc->start_tag;
-	nvkm_wr32(device, base + 0x180, ((blk & 0xff) << 8) | (0x1 << 24));
-	for (i = 0; i < code_size / 4; i++) {
-		/* write new tag every 256B */
-		if ((i & 0x3f) == 0) {
-			nvkm_wr32(device, base + 0x188, tag & 0xffff);
-			tag++;
-		}
-		nvkm_wr32(device, base + 0x184, ((u32 *)hsbl_code)[i]);
-	}
-	nvkm_wr32(device, base + 0x188, 0);
-}
-
-/**
- * gm200_secboot_setup_falcon() - set up the secure falcon for secure boot
- */
-static int
-gm200_secboot_setup_falcon(struct gm200_secboot *gsb)
-{
-	struct nvkm_device *device = gsb->base.subdev.device;
-	struct fw_bin_header *hdr = gsb->hsbl_blob;
-	struct fw_bl_desc *hsbl_desc = gsb->hsbl_blob + hdr->header_offset;
-	/* virtual start address for boot vector */
-	u32 virt_addr = hsbl_desc->start_tag << 8;
-	const u32 base = gsb->base.base;
-	const u32 reg_base = base + 0xe00;
-	u32 inst_loc;
-	int ret;
-
-	ret = nvkm_secboot_falcon_reset(&gsb->base);
+	ret = nvkm_falcon_get(falcon, subdev);
 	if (ret)
 		return ret;
 
-	/* setup apertures - virtual */
-	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_UCODE), 0x4);
-	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_VIRT), 0x0);
-	/* setup apertures - physical */
-	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_PHYS_VID), 0x4);
-	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_PHYS_SYS_COH),
-		  0x4 | 0x1);
-	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_PHYS_SYS_NCOH),
-		  0x4 | 0x2);
-
-	/* Set context */
-	if (nvkm_memory_target(gsb->inst->memory) == NVKM_MEM_TARGET_VRAM)
-		inst_loc = 0x0; /* FB */
-	else
-		inst_loc = 0x3; /* Non-coherent sysmem */
-
-	nvkm_mask(device, base + 0x048, 0x1, 0x1);
-	nvkm_wr32(device, base + 0x480,
-		  ((gsb->inst->addr >> 12) & 0xfffffff) |
-		  (inst_loc << 28) | (1 << 30));
-
-	/* Set boot vector to code's starting virtual address */
-	nvkm_wr32(device, base + 0x104, virt_addr);
-
-	return 0;
-}
-
-/**
- * gm200_secboot_run_hs_blob() - run the given high-secure blob
- */
-static int
-gm200_secboot_run_hs_blob(struct gm200_secboot *gsb, struct nvkm_gpuobj *blob,
-			  struct gm200_flcn_bl_desc *desc)
-{
-	struct nvkm_vma vma;
-	u64 vma_addr;
-	const u32 bl_desc_size = gsb->func->bl_desc_size;
-	u8 bl_desc[bl_desc_size];
-	int ret;
-
 	/* Map the HS firmware so the HS bootloader can see it */
 	ret = nvkm_gpuobj_map(blob, gsb->vm, NV_MEM_ACCESS_RW, &vma);
-	if (ret)
+	if (ret) {
+		nvkm_falcon_put(falcon, subdev);
 		return ret;
+	}
 
-	/* Add the mapping address to the DMA bases */
-	vma_addr = flcn64_to_u64(desc->code_dma_base) + vma.offset;
-	desc->code_dma_base.lo = lower_32_bits(vma_addr);
-	desc->code_dma_base.hi = upper_32_bits(vma_addr);
-	vma_addr = flcn64_to_u64(desc->data_dma_base) + vma.offset;
-	desc->data_dma_base.lo = lower_32_bits(vma_addr);
-	desc->data_dma_base.hi = upper_32_bits(vma_addr);
-
-	/* Fixup the BL header */
-	gsb->func->fixup_bl_desc(desc, &bl_desc);
-
-	/* Reset the falcon and make it ready to run the HS bootloader */
-	ret = gm200_secboot_setup_falcon(gsb);
+	/* Reset and set the falcon up */
+	ret = nvkm_falcon_reset(falcon);
 	if (ret)
-		goto done;
+		goto end;
+	nvkm_falcon_bind_context(falcon, gsb->inst);
 
 	/* Load the HS bootloader into the falcon's IMEM/DMEM */
-	gm200_secboot_load_hs_bl(gsb, &bl_desc, bl_desc_size);
-
-	/* Start the HS bootloader */
-	ret = nvkm_secboot_falcon_run(&gsb->base);
+	ret = sb->acr->func->load(sb->acr, &gsb->base, blob, vma.offset);
 	if (ret)
-		goto done;
-
-done:
-	/* Restore the original DMA addresses */
-	vma_addr = flcn64_to_u64(desc->code_dma_base) - vma.offset;
-	desc->code_dma_base.lo = lower_32_bits(vma_addr);
-	desc->code_dma_base.hi = upper_32_bits(vma_addr);
-	vma_addr = flcn64_to_u64(desc->data_dma_base) - vma.offset;
-	desc->data_dma_base.lo = lower_32_bits(vma_addr);
-	desc->data_dma_base.hi = upper_32_bits(vma_addr);
-
-	/* We don't need the ACR firmware anymore */
-	nvkm_gpuobj_unmap(&vma);
+		goto end;
 
-	return ret;
-}
+	/* Disable interrupts as we will poll for the HALT bit */
+	nvkm_mc_intr_mask(sb->subdev.device, falcon->owner->index, false);
 
-/*
- * gm200_secboot_reset() - execute secure boot from the prepared state
- *
- * Load the HS bootloader and ask the falcon to run it. This will in turn
- * load the HS firmware and run it, so once the falcon stops all the managed
- * falcons should have their LS firmware loaded and be ready to run.
- */
-int
-gm200_secboot_reset(struct nvkm_secboot *sb, enum nvkm_secboot_falcon falcon)
-{
-	struct gm200_secboot *gsb = gm200_secboot(sb);
-	int ret;
+	/* Set default error value in mailbox register */
+	nvkm_falcon_wr32(falcon, 0x040, 0xdeada5a5);
 
-	/* Make sure all blobs are ready */
-	ret = gm200_secboot_blobs_ready(gsb);
+	/* Start the HS bootloader */
+	nvkm_falcon_set_start_addr(falcon, sb->acr->start_address);
+	nvkm_falcon_start(falcon);
+	ret = nvkm_falcon_wait_for_halt(falcon, 100);
 	if (ret)
-		return ret;
-
-	/*
-	 * Dummy GM200 implementation: perform secure boot each time we are
-	 * called on FECS. Since only FECS and GPCCS are managed and started
-	 * together, this ought to be safe.
-	 *
-	 * Once we have proper PMU firmware and support, this will be changed
-	 * to a proper call to the PMU method.
-	 */
-	if (falcon != NVKM_SECBOOT_FALCON_FECS)
 		goto end;
 
-	/* If WPR is set and we have an unload blob, run it to unlock WPR */
-	if (gsb->acr_unload_blob &&
-	    gsb->falcon_state[NVKM_SECBOOT_FALCON_FECS] != NON_SECURE) {
-		ret = gm200_secboot_run_hs_blob(gsb, gsb->acr_unload_blob,
-						&gsb->acr_unload_bl_desc);
-		if (ret)
-			return ret;
+	/* If mailbox register contains an error code, then ACR has failed */
+	ret = nvkm_falcon_rd32(falcon, 0x040);
+	if (ret) {
+		nvkm_error(subdev, "ACR boot failed, ret 0x%08x", ret);
+		ret = -EINVAL;
+		goto end;
 	}
 
-	/* Reload all managed falcons */
-	ret = gm200_secboot_run_hs_blob(gsb, gsb->acr_load_blob,
-					&gsb->acr_load_bl_desc);
-	if (ret)
-		return ret;
-
 end:
-	gsb->falcon_state[falcon] = RESET;
-	return 0;
-}
+	/* Reenable interrupts */
+	nvkm_mc_intr_mask(sb->subdev.device, falcon->owner->index, true);
 
-int
-gm200_secboot_start(struct nvkm_secboot *sb, enum nvkm_secboot_falcon falcon)
-{
-	struct gm200_secboot *gsb = gm200_secboot(sb);
-	int base;
-
-	switch (falcon) {
-	case NVKM_SECBOOT_FALCON_FECS:
-		base = 0x409000;
-		break;
-	case NVKM_SECBOOT_FALCON_GPCCS:
-		base = 0x41a000;
-		break;
-	default:
-		nvkm_error(&sb->subdev, "cannot start unhandled falcon!\n");
-		return -EINVAL;
-	}
-
-	nvkm_wr32(sb->subdev.device, base + 0x130, 0x00000002);
-	gsb->falcon_state[falcon] = RUNNING;
+	/* We don't need the ACR firmware anymore */
+	nvkm_gpuobj_unmap(&vma);
+	nvkm_falcon_put(falcon, subdev);
 
-	return 0;
+	return ret;
 }
 
-
-
 int
-gm200_secboot_init(struct nvkm_secboot *sb)
+gm200_secboot_oneinit(struct nvkm_secboot *sb)
 {
 	struct gm200_secboot *gsb = gm200_secboot(sb);
 	struct nvkm_device *device = sb->subdev.device;
@@ -1361,24 +132,22 @@ gm200_secboot_init(struct nvkm_secboot *sb)
 	nvkm_wo32(gsb->inst, 0x20c, upper_32_bits(vm_area_len - 1));
 	nvkm_done(gsb->inst);
 
+	if (sb->acr->func->oneinit) {
+		ret = sb->acr->func->oneinit(sb->acr, sb);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 }
 
-static int
+int
 gm200_secboot_fini(struct nvkm_secboot *sb, bool suspend)
 {
-	struct gm200_secboot *gsb = gm200_secboot(sb);
 	int ret = 0;
-	int i;
 
-	/* Run the unload blob to unprotect the WPR region */
-	if (gsb->acr_unload_blob &&
-	    gsb->falcon_state[NVKM_SECBOOT_FALCON_FECS] != NON_SECURE)
-		ret = gm200_secboot_run_hs_blob(gsb, gsb->acr_unload_blob,
-						&gsb->acr_unload_bl_desc);
-
-	for (i = 0; i < NVKM_SECBOOT_FALCON_END; i++)
-		gsb->falcon_state[i] = NON_SECURE;
+	if (sb->acr->func->fini)
+		ret = sb->acr->func->fini(sb->acr, sb, suspend);
 
 	return ret;
 }
@@ -1388,11 +157,7 @@ gm200_secboot_dtor(struct nvkm_secboot *sb)
 {
 	struct gm200_secboot *gsb = gm200_secboot(sb);
 
-	nvkm_gpuobj_del(&gsb->acr_unload_blob);
-
-	kfree(gsb->hsbl_blob);
-	nvkm_gpuobj_del(&gsb->acr_load_blob);
-	nvkm_gpuobj_del(&gsb->ls_blob);
+	sb->acr->func->dtor(sb->acr);
 
 	nvkm_vm_ref(NULL, &gsb->vm, gsb->pgd);
 	nvkm_gpuobj_del(&gsb->pgd);
@@ -1405,50 +170,9 @@ gm200_secboot_dtor(struct nvkm_secboot *sb)
 static const struct nvkm_secboot_func
 gm200_secboot = {
 	.dtor = gm200_secboot_dtor,
-	.init = gm200_secboot_init,
+	.oneinit = gm200_secboot_oneinit,
 	.fini = gm200_secboot_fini,
-	.reset = gm200_secboot_reset,
-	.start = gm200_secboot_start,
-	.managed_falcons = BIT(NVKM_SECBOOT_FALCON_FECS) |
-			   BIT(NVKM_SECBOOT_FALCON_GPCCS),
-	.boot_falcon = NVKM_SECBOOT_FALCON_PMU,
-};
-
-/**
- * gm200_fixup_bl_desc - just copy the BL descriptor
- *
- * Use the GM200 descriptor format by default.
- */
-static void
-gm200_secboot_fixup_bl_desc(const struct gm200_flcn_bl_desc *desc, void *ret)
-{
-	memcpy(ret, desc, sizeof(*desc));
-}
-
-static void
-gm200_secboot_fixup_hs_desc(struct gm200_secboot *gsb,
-			    struct hsflcn_acr_desc *desc)
-{
-	desc->ucode_blob_base = gsb->ls_blob->addr;
-	desc->ucode_blob_size = gsb->ls_blob->size;
-
-	desc->wpr_offset = 0;
-
-	/* WPR region information for the HS binary to set up */
-	desc->wpr_region_id = 1;
-	desc->regions.no_regions = 1;
-	desc->regions.region_props[0].region_id = 1;
-	desc->regions.region_props[0].start_addr = gsb->wpr_addr >> 8;
-	desc->regions.region_props[0].end_addr =
-		(gsb->wpr_addr + gsb->wpr_size) >> 8;
-}
-
-static const struct gm200_secboot_func
-gm200_secboot_func = {
-	.bl_desc_size = sizeof(struct gm200_flcn_bl_desc),
-	.fixup_bl_desc = gm200_secboot_fixup_bl_desc,
-	.fixup_hs_desc = gm200_secboot_fixup_hs_desc,
-	.prepare_blobs = gm200_secboot_prepare_blobs,
+	.run_blob = gm200_secboot_run_blob,
 };
 
 int
@@ -1457,6 +181,12 @@ gm200_secboot_new(struct nvkm_device *device, int index,
 {
 	int ret;
 	struct gm200_secboot *gsb;
+	struct nvkm_acr *acr;
+
+	acr = acr_r361_new(BIT(NVKM_SECBOOT_FALCON_FECS) |
+			   BIT(NVKM_SECBOOT_FALCON_GPCCS));
+	if (IS_ERR(acr))
+		return PTR_ERR(acr);
 
 	gsb = kzalloc(sizeof(*gsb), GFP_KERNEL);
 	if (!gsb) {
@@ -1465,15 +195,14 @@ gm200_secboot_new(struct nvkm_device *device, int index,
 	}
 	*psb = &gsb->base;
 
-	ret = nvkm_secboot_ctor(&gm200_secboot, device, index, &gsb->base);
+	ret = nvkm_secboot_ctor(&gm200_secboot, acr, device, index, &gsb->base);
 	if (ret)
 		return ret;
 
-	gsb->func = &gm200_secboot_func;
-
 	return 0;
 }
 
+
 MODULE_FIRMWARE("nvidia/gm200/acr/bl.bin");
 MODULE_FIRMWARE("nvidia/gm200/acr/ucode_load.bin");
 MODULE_FIRMWARE("nvidia/gm200/acr/ucode_unload.bin");
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h
new file mode 100644
index 000000000000..45adf1a3bc20
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVKM_SECBOOT_GM200_H__
+#define __NVKM_SECBOOT_GM200_H__
+
+#include "priv.h"
+
+struct gm200_secboot {
+	struct nvkm_secboot base;
+
+	/* Instance block & address space used for HS FW execution */
+	struct nvkm_gpuobj *inst;
+	struct nvkm_gpuobj *pgd;
+	struct nvkm_vm *vm;
+};
+#define gm200_secboot(sb) container_of(sb, struct gm200_secboot, base)
+
+int gm200_secboot_oneinit(struct nvkm_secboot *);
+int gm200_secboot_fini(struct nvkm_secboot *, bool);
+void *gm200_secboot_dtor(struct nvkm_secboot *);
+int gm200_secboot_run_blob(struct nvkm_secboot *, struct nvkm_gpuobj *);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
index d5395ebfe8d3..6707b8edc086 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
@@ -20,103 +20,8 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-#include "priv.h"
-
-#include <core/gpuobj.h>
-
-/*
- * The BL header format used by GM20B's firmware is slightly different
- * from the one of GM200. Fix the differences here.
- */
-struct gm20b_flcn_bl_desc {
-	u32 reserved[4];
-	u32 signature[4];
-	u32 ctx_dma;
-	u32 code_dma_base;
-	u32 non_sec_code_off;
-	u32 non_sec_code_size;
-	u32 sec_code_off;
-	u32 sec_code_size;
-	u32 code_entry_point;
-	u32 data_dma_base;
-	u32 data_size;
-};
-
-static int
-gm20b_secboot_prepare_blobs(struct gm200_secboot *gsb)
-{
-	struct nvkm_subdev *subdev = &gsb->base.subdev;
-	int acr_size;
-	int ret;
-
-	ret = gm20x_secboot_prepare_blobs(gsb);
-	if (ret)
-		return ret;
-
-	acr_size = gsb->acr_load_blob->size;
-	/*
-	 * On Tegra the WPR region is set by the bootloader. It is illegal for
-	 * the HS blob to be larger than this region.
-	 */
-	if (acr_size > gsb->wpr_size) {
-		nvkm_error(subdev, "WPR region too small for FW blob!\n");
-		nvkm_error(subdev, "required: %dB\n", acr_size);
-		nvkm_error(subdev, "WPR size: %dB\n", gsb->wpr_size);
-		return -ENOSPC;
-	}
-
-	return 0;
-}
-
-/**
- * gm20b_secboot_fixup_bl_desc - adapt BL descriptor to format used by GM20B FW
- *
- * There is only a slight format difference (DMA addresses being 32-bits and
- * 256B-aligned) to address.
- */
-static void
-gm20b_secboot_fixup_bl_desc(const struct gm200_flcn_bl_desc *desc, void *ret)
-{
-	struct gm20b_flcn_bl_desc *gdesc = ret;
-	u64 addr;
-
-	memcpy(gdesc->reserved, desc->reserved, sizeof(gdesc->reserved));
-	memcpy(gdesc->signature, desc->signature, sizeof(gdesc->signature));
-	gdesc->ctx_dma = desc->ctx_dma;
-	addr = desc->code_dma_base.hi;
-	addr <<= 32;
-	addr |= desc->code_dma_base.lo;
-	gdesc->code_dma_base = lower_32_bits(addr >> 8);
-	gdesc->non_sec_code_off = desc->non_sec_code_off;
-	gdesc->non_sec_code_size = desc->non_sec_code_size;
-	gdesc->sec_code_off = desc->sec_code_off;
-	gdesc->sec_code_size = desc->sec_code_size;
-	gdesc->code_entry_point = desc->code_entry_point;
-	addr = desc->data_dma_base.hi;
-	addr <<= 32;
-	addr |= desc->data_dma_base.lo;
-	gdesc->data_dma_base = lower_32_bits(addr >> 8);
-	gdesc->data_size = desc->data_size;
-}
-
-static void
-gm20b_secboot_fixup_hs_desc(struct gm200_secboot *gsb,
-			    struct hsflcn_acr_desc *desc)
-{
-	desc->ucode_blob_base = gsb->ls_blob->addr;
-	desc->ucode_blob_size = gsb->ls_blob->size;
-
-	desc->wpr_offset = 0;
-}
-
-static const struct gm200_secboot_func
-gm20b_secboot_func = {
-	.bl_desc_size = sizeof(struct gm20b_flcn_bl_desc),
-	.fixup_bl_desc = gm20b_secboot_fixup_bl_desc,
-	.fixup_hs_desc = gm20b_secboot_fixup_hs_desc,
-	.prepare_blobs = gm20b_secboot_prepare_blobs,
-};
-
+#include "acr.h"
+#include "gm200.h"
 
 #ifdef CONFIG_ARCH_TEGRA
 #define TEGRA_MC_BASE				0x70019000
@@ -144,15 +49,15 @@ gm20b_tegra_read_wpr(struct gm200_secboot *gsb)
 		nvkm_error(&sb->subdev, "Cannot map Tegra MC registers\n");
 		return PTR_ERR(mc);
 	}
-	gsb->wpr_addr = ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_0) |
+	sb->wpr_addr = ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_0) |
 	      ((u64)ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_HI_0) << 32);
-	gsb->wpr_size = ioread32_native(mc + MC_SECURITY_CARVEOUT2_SIZE_128K)
+	sb->wpr_size = ioread32_native(mc + MC_SECURITY_CARVEOUT2_SIZE_128K)
 		<< 17;
 	cfg = ioread32_native(mc + MC_SECURITY_CARVEOUT2_CFG0);
 	iounmap(mc);
 
 	/* Check that WPR settings are valid */
-	if (gsb->wpr_size == 0) {
+	if (sb->wpr_size == 0) {
 		nvkm_error(&sb->subdev, "WPR region is empty\n");
 		return -EINVAL;
 	}
@@ -174,7 +79,7 @@ gm20b_tegra_read_wpr(struct gm200_secboot *gsb)
 #endif
 
 static int
-gm20b_secboot_init(struct nvkm_secboot *sb)
+gm20b_secboot_oneinit(struct nvkm_secboot *sb)
 {
 	struct gm200_secboot *gsb = gm200_secboot(sb);
 	int ret;
@@ -183,17 +88,15 @@ gm20b_secboot_init(struct nvkm_secboot *sb)
 	if (ret)
 		return ret;
 
-	return gm200_secboot_init(sb);
+	return gm200_secboot_oneinit(sb);
 }
 
 static const struct nvkm_secboot_func
 gm20b_secboot = {
 	.dtor = gm200_secboot_dtor,
-	.init = gm20b_secboot_init,
-	.reset = gm200_secboot_reset,
-	.start = gm200_secboot_start,
-	.managed_falcons = BIT(NVKM_SECBOOT_FALCON_FECS),
-	.boot_falcon = NVKM_SECBOOT_FALCON_PMU,
+	.oneinit = gm20b_secboot_oneinit,
+	.fini = gm200_secboot_fini,
+	.run_blob = gm200_secboot_run_blob,
 };
 
 int
@@ -202,6 +105,11 @@ gm20b_secboot_new(struct nvkm_device *device, int index,
 {
 	int ret;
 	struct gm200_secboot *gsb;
+	struct nvkm_acr *acr;
+
+	acr = acr_r352_new(BIT(NVKM_SECBOOT_FALCON_FECS));
+	if (IS_ERR(acr))
+		return PTR_ERR(acr);
 
 	gsb = kzalloc(sizeof(*gsb), GFP_KERNEL);
 	if (!gsb) {
@@ -210,12 +118,10 @@ gm20b_secboot_new(struct nvkm_device *device, int index,
 	}
 	*psb = &gsb->base;
 
-	ret = nvkm_secboot_ctor(&gm20b_secboot, device, index, &gsb->base);
+	ret = nvkm_secboot_ctor(&gm20b_secboot, acr, device, index, &gsb->base);
 	if (ret)
 		return ret;
 
-	gsb->func = &gm20b_secboot_func;
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h
new file mode 100644
index 000000000000..00886cee57eb
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVKM_SECBOOT_LS_UCODE_H__
+#define __NVKM_SECBOOT_LS_UCODE_H__
+
+#include <core/os.h>
+#include <core/subdev.h>
+#include <subdev/secboot.h>
+
+
+/**
+ * struct ls_ucode_img_desc - descriptor of firmware image
+ * @descriptor_size:		size of this descriptor
+ * @image_size:			size of the whole image
+ * @bootloader_start_offset:	start offset of the bootloader in ucode image
+ * @bootloader_size:		size of the bootloader
+ * @bootloader_imem_offset:	start off set of the bootloader in IMEM
+ * @bootloader_entry_point:	entry point of the bootloader in IMEM
+ * @app_start_offset:		start offset of the LS firmware
+ * @app_size:			size of the LS firmware's code and data
+ * @app_imem_offset:		offset of the app in IMEM
+ * @app_imem_entry:		entry point of the app in IMEM
+ * @app_dmem_offset:		offset of the data in DMEM
+ * @app_resident_code_offset:	offset of app code from app_start_offset
+ * @app_resident_code_size:	size of the code
+ * @app_resident_data_offset:	offset of data from app_start_offset
+ * @app_resident_data_size:	size of data
+ *
+ * A firmware image contains the code, data, and bootloader of a given LS
+ * falcon in a single blob. This structure describes where everything is.
+ *
+ * This can be generated from a (bootloader, code, data) set if they have
+ * been loaded separately, or come directly from a file.
+ */
+struct ls_ucode_img_desc {
+	u32 descriptor_size;
+	u32 image_size;
+	u32 tools_version;
+	u32 app_version;
+	char date[64];
+	u32 bootloader_start_offset;
+	u32 bootloader_size;
+	u32 bootloader_imem_offset;
+	u32 bootloader_entry_point;
+	u32 app_start_offset;
+	u32 app_size;
+	u32 app_imem_offset;
+	u32 app_imem_entry;
+	u32 app_dmem_offset;
+	u32 app_resident_code_offset;
+	u32 app_resident_code_size;
+	u32 app_resident_data_offset;
+	u32 app_resident_data_size;
+	u32 nb_overlays;
+	struct {u32 start; u32 size; } load_ovl[64];
+	u32 compressed;
+};
+
+/**
+ * struct ls_ucode_img - temporary storage for loaded LS firmwares
+ * @node:		to link within lsf_ucode_mgr
+ * @falcon_id:		ID of the falcon this LS firmware is for
+ * @ucode_desc:		loaded or generated map of ucode_data
+ * @ucode_data:		firmware payload (code and data)
+ * @ucode_size:		size in bytes of data in ucode_data
+ * @sig:		signature for this firmware
+ * @sig:size:		size of the signature in bytes
+ *
+ * Preparing the WPR LS blob requires information about all the LS firmwares
+ * (size, etc) to be known. This structure contains all the data of one LS
+ * firmware.
+ */
+struct ls_ucode_img {
+	struct list_head node;
+	enum nvkm_secboot_falcon falcon_id;
+
+	struct ls_ucode_img_desc ucode_desc;
+	u8 *ucode_data;
+	u32 ucode_size;
+
+	u8 *sig;
+	u32 sig_size;
+};
+
+/**
+ * struct fw_bin_header - header of firmware files
+ * @bin_magic:		always 0x3b1d14f0
+ * @bin_ver:		version of the bin format
+ * @bin_size:		entire image size including this header
+ * @header_offset:	offset of the firmware/bootloader header in the file
+ * @data_offset:	offset of the firmware/bootloader payload in the file
+ * @data_size:		size of the payload
+ *
+ * This header is located at the beginning of the HS firmware and HS bootloader
+ * files, to describe where the headers and data can be found.
+ */
+struct fw_bin_header {
+	u32 bin_magic;
+	u32 bin_ver;
+	u32 bin_size;
+	u32 header_offset;
+	u32 data_offset;
+	u32 data_size;
+};
+
+/**
+ * struct fw_bl_desc - firmware bootloader descriptor
+ * @start_tag:		starting tag of bootloader
+ * @desc_dmem_load_off:	DMEM offset of flcn_bl_dmem_desc
+ * @code_off:		offset of code section
+ * @code_size:		size of code section
+ * @data_off:		offset of data section
+ * @data_size:		size of data section
+ *
+ * This structure is embedded in bootloader firmware files at to describe the
+ * IMEM and DMEM layout expected by the bootloader.
+ */
+struct fw_bl_desc {
+	u32 start_tag;
+	u32 dmem_load_off;
+	u32 code_off;
+	u32 code_size;
+	u32 data_off;
+	u32 data_size;
+};
+
+int acr_ls_ucode_load_fecs(const struct nvkm_subdev *, struct ls_ucode_img *);
+int acr_ls_ucode_load_gpccs(const struct nvkm_subdev *, struct ls_ucode_img *);
+
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c
new file mode 100644
index 000000000000..40a6df77bb8a
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "ls_ucode.h"
+#include "acr.h"
+
+#include <core/firmware.h>
+
+#define BL_DESC_BLK_SIZE 256
+/**
+ * Build a ucode image and descriptor from provided bootloader, code and data.
+ *
+ * @bl:		bootloader image, including 16-bytes descriptor
+ * @code:	LS firmware code segment
+ * @data:	LS firmware data segment
+ * @desc:	ucode descriptor to be written
+ *
+ * Return: allocated ucode image with corresponding descriptor information. desc
+ *         is also updated to contain the right offsets within returned image.
+ */
+static void *
+ls_ucode_img_build(const struct firmware *bl, const struct firmware *code,
+		   const struct firmware *data, struct ls_ucode_img_desc *desc)
+{
+	struct fw_bin_header *bin_hdr = (void *)bl->data;
+	struct fw_bl_desc *bl_desc = (void *)bl->data + bin_hdr->header_offset;
+	void *bl_data = (void *)bl->data + bin_hdr->data_offset;
+	u32 pos = 0;
+	void *image;
+
+	desc->bootloader_start_offset = pos;
+	desc->bootloader_size = ALIGN(bl_desc->code_size, sizeof(u32));
+	desc->bootloader_imem_offset = bl_desc->start_tag * 256;
+	desc->bootloader_entry_point = bl_desc->start_tag * 256;
+
+	pos = ALIGN(pos + desc->bootloader_size, BL_DESC_BLK_SIZE);
+	desc->app_start_offset = pos;
+	desc->app_size = ALIGN(code->size, BL_DESC_BLK_SIZE) +
+			 ALIGN(data->size, BL_DESC_BLK_SIZE);
+	desc->app_imem_offset = 0;
+	desc->app_imem_entry = 0;
+	desc->app_dmem_offset = 0;
+	desc->app_resident_code_offset = 0;
+	desc->app_resident_code_size = ALIGN(code->size, BL_DESC_BLK_SIZE);
+
+	pos = ALIGN(pos + desc->app_resident_code_size, BL_DESC_BLK_SIZE);
+	desc->app_resident_data_offset = pos - desc->app_start_offset;
+	desc->app_resident_data_size = ALIGN(data->size, BL_DESC_BLK_SIZE);
+
+	desc->image_size = ALIGN(bl_desc->code_size, BL_DESC_BLK_SIZE) +
+			   desc->app_size;
+
+	image = kzalloc(desc->image_size, GFP_KERNEL);
+	if (!image)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(image + desc->bootloader_start_offset, bl_data,
+	       bl_desc->code_size);
+	memcpy(image + desc->app_start_offset, code->data, code->size);
+	memcpy(image + desc->app_start_offset + desc->app_resident_data_offset,
+	       data->data, data->size);
+
+	return image;
+}
+
+/**
+ * ls_ucode_img_load_gr() - load and prepare a LS GR ucode image
+ *
+ * Load the LS microcode, bootloader and signature and pack them into a single
+ * blob. Also generate the corresponding ucode descriptor.
+ */
+static int
+ls_ucode_img_load_gr(const struct nvkm_subdev *subdev, struct ls_ucode_img *img,
+		     const char *falcon_name)
+{
+	const struct firmware *bl, *code, *data, *sig;
+	char f[64];
+	int ret;
+
+	snprintf(f, sizeof(f), "gr/%s_bl", falcon_name);
+	ret = nvkm_firmware_get(subdev->device, f, &bl);
+	if (ret)
+		goto error;
+
+	snprintf(f, sizeof(f), "gr/%s_inst", falcon_name);
+	ret = nvkm_firmware_get(subdev->device, f, &code);
+	if (ret)
+		goto free_bl;
+
+	snprintf(f, sizeof(f), "gr/%s_data", falcon_name);
+	ret = nvkm_firmware_get(subdev->device, f, &data);
+	if (ret)
+		goto free_inst;
+
+	snprintf(f, sizeof(f), "gr/%s_sig", falcon_name);
+	ret = nvkm_firmware_get(subdev->device, f, &sig);
+	if (ret)
+		goto free_data;
+	img->sig = kmemdup(sig->data, sig->size, GFP_KERNEL);
+	if (!img->sig) {
+		ret = -ENOMEM;
+		goto free_sig;
+	}
+	img->sig_size = sig->size;
+
+	img->ucode_data = ls_ucode_img_build(bl, code, data,
+					     &img->ucode_desc);
+	if (IS_ERR(img->ucode_data)) {
+		ret = PTR_ERR(img->ucode_data);
+		goto free_data;
+	}
+	img->ucode_size = img->ucode_desc.image_size;
+
+free_sig:
+	nvkm_firmware_put(sig);
+free_data:
+	nvkm_firmware_put(data);
+free_inst:
+	nvkm_firmware_put(code);
+free_bl:
+	nvkm_firmware_put(bl);
+error:
+	return ret;
+}
+
+int
+acr_ls_ucode_load_fecs(const struct nvkm_subdev *subdev,
+		       struct ls_ucode_img *img)
+{
+	return ls_ucode_img_load_gr(subdev, img, "fecs");
+}
+
+int
+acr_ls_ucode_load_gpccs(const struct nvkm_subdev *subdev,
+			struct ls_ucode_img *img)
+{
+	return ls_ucode_img_load_gr(subdev, img, "gpccs");
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h
index a9a8a0e1017e..936a65f5658c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h
@@ -27,20 +27,16 @@
 #include <subdev/mmu.h>
 
 struct nvkm_secboot_func {
-	int (*init)(struct nvkm_secboot *);
+	int (*oneinit)(struct nvkm_secboot *);
 	int (*fini)(struct nvkm_secboot *, bool suspend);
 	void *(*dtor)(struct nvkm_secboot *);
-	int (*reset)(struct nvkm_secboot *, enum nvkm_secboot_falcon);
-	int (*start)(struct nvkm_secboot *, enum nvkm_secboot_falcon);
-
-	/* ID of the falcon that will perform secure boot */
-	enum nvkm_secboot_falcon boot_falcon;
-	/* Bit-mask of IDs of managed falcons */
-	unsigned long managed_falcons;
+	int (*run_blob)(struct nvkm_secboot *, struct nvkm_gpuobj *);
 };
 
-int nvkm_secboot_ctor(const struct nvkm_secboot_func *, struct nvkm_device *,
-		      int index, struct nvkm_secboot *);
+extern const char *nvkm_secboot_falcon_name[];
+
+int nvkm_secboot_ctor(const struct nvkm_secboot_func *, struct nvkm_acr *,
+		      struct nvkm_device *, int, struct nvkm_secboot *);
 int nvkm_secboot_falcon_reset(struct nvkm_secboot *);
 int nvkm_secboot_falcon_run(struct nvkm_secboot *);
 
@@ -48,187 +44,20 @@ struct flcn_u64 {
 	u32 lo;
 	u32 hi;
 };
+
 static inline u64 flcn64_to_u64(const struct flcn_u64 f)
 {
 	return ((u64)f.hi) << 32 | f.lo;
 }
 
-/**
- * struct gm200_flcn_bl_desc - DMEM bootloader descriptor
- * @signature:		16B signature for secure code. 0s if no secure code
- * @ctx_dma:		DMA context to be used by BL while loading code/data
- * @code_dma_base:	256B-aligned Physical FB Address where code is located
- *			(falcon's $xcbase register)
- * @non_sec_code_off:	offset from code_dma_base where the non-secure code is
- *                      located. The offset must be multiple of 256 to help perf
- * @non_sec_code_size:	the size of the nonSecure code part.
- * @sec_code_off:	offset from code_dma_base where the secure code is
- *                      located. The offset must be multiple of 256 to help perf
- * @sec_code_size:	offset from code_dma_base where the secure code is
- *                      located. The offset must be multiple of 256 to help perf
- * @code_entry_point:	code entry point which will be invoked by BL after
- *                      code is loaded.
- * @data_dma_base:	256B aligned Physical FB Address where data is located.
- *			(falcon's $xdbase register)
- * @data_size:		size of data block. Should be multiple of 256B
- *
- * Structure used by the bootloader to load the rest of the code. This has
- * to be filled by host and copied into DMEM at offset provided in the
- * hsflcn_bl_desc.bl_desc_dmem_load_off.
- */
-struct gm200_flcn_bl_desc {
-	u32 reserved[4];
-	u32 signature[4];
-	u32 ctx_dma;
-	struct flcn_u64 code_dma_base;
-	u32 non_sec_code_off;
-	u32 non_sec_code_size;
-	u32 sec_code_off;
-	u32 sec_code_size;
-	u32 code_entry_point;
-	struct flcn_u64 data_dma_base;
-	u32 data_size;
-};
-
-/**
- * struct hsflcn_acr_desc - data section of the HS firmware
- *
- * This header is to be copied at the beginning of DMEM by the HS bootloader.
- *
- * @signature:		signature of ACR ucode
- * @wpr_region_id:	region ID holding the WPR header and its details
- * @wpr_offset:		offset from the WPR region holding the wpr header
- * @regions:		region descriptors
- * @nonwpr_ucode_blob_size:	size of LS blob
- * @nonwpr_ucode_blob_start:	FB location of LS blob is
- */
-struct hsflcn_acr_desc {
-	union {
-		u8 reserved_dmem[0x200];
-		u32 signatures[4];
-	} ucode_reserved_space;
-	u32 wpr_region_id;
-	u32 wpr_offset;
-	u32 mmu_mem_range;
-#define FLCN_ACR_MAX_REGIONS 2
-	struct {
-		u32 no_regions;
-		struct {
-			u32 start_addr;
-			u32 end_addr;
-			u32 region_id;
-			u32 read_mask;
-			u32 write_mask;
-			u32 client_mask;
-		} region_props[FLCN_ACR_MAX_REGIONS];
-	} regions;
-	u32 ucode_blob_size;
-	u64 ucode_blob_base __aligned(8);
-	struct {
-		u32 vpr_enabled;
-		u32 vpr_start;
-		u32 vpr_end;
-		u32 hdcp_policies;
-	} vpr_desc;
-};
-
-/**
- * Contains the whole secure boot state, allowing it to be performed as needed
- * @wpr_addr:		physical address of the WPR region
- * @wpr_size:		size in bytes of the WPR region
- * @ls_blob:		LS blob of all the LS firmwares, signatures, bootloaders
- * @ls_blob_size:	size of the LS blob
- * @ls_blob_nb_regions:	number of LS firmwares that will be loaded
- * @acr_blob:		HS blob
- * @acr_blob_vma:	mapping of the HS blob into the secure falcon's VM
- * @acr_bl_desc:	bootloader descriptor of the HS blob
- * @hsbl_blob:		HS blob bootloader
- * @inst:		instance block for HS falcon
- * @pgd:		page directory for the HS falcon
- * @vm:			address space used by the HS falcon
- * @falcon_state:	current state of the managed falcons
- * @firmware_ok:	whether the firmware blobs have been created
- */
-struct gm200_secboot {
-	struct nvkm_secboot base;
-	const struct gm200_secboot_func *func;
-
-	/*
-	 * Address and size of the WPR region. On dGPU this will be the
-	 * address of the LS blob. On Tegra this is a fixed region set by the
-	 * bootloader
-	 */
-	u64 wpr_addr;
-	u32 wpr_size;
-
-	/*
-	 * HS FW - lock WPR region (dGPU only) and load LS FWs
-	 * on Tegra the HS FW copies the LS blob into the fixed WPR instead
-	 */
-	struct nvkm_gpuobj *acr_load_blob;
-	struct gm200_flcn_bl_desc acr_load_bl_desc;
-
-	/* HS FW - unlock WPR region (dGPU only) */
-	struct nvkm_gpuobj *acr_unload_blob;
-	struct gm200_flcn_bl_desc acr_unload_bl_desc;
-
-	/* HS bootloader */
-	void *hsbl_blob;
-
-	/* LS FWs, to be loaded by the HS ACR */
-	struct nvkm_gpuobj *ls_blob;
-
-	/* Instance block & address space used for HS FW execution */
-	struct nvkm_gpuobj *inst;
-	struct nvkm_gpuobj *pgd;
-	struct nvkm_vm *vm;
-
-	/* To keep track of the state of all managed falcons */
-	enum {
-		/* In non-secure state, no firmware loaded, no privileges*/
-		NON_SECURE = 0,
-		/* In low-secure mode and ready to be started */
-		RESET,
-		/* In low-secure mode and running */
-		RUNNING,
-	} falcon_state[NVKM_SECBOOT_FALCON_END];
-
-	bool firmware_ok;
-};
-#define gm200_secboot(sb) container_of(sb, struct gm200_secboot, base)
-
-/**
- * Contains functions we wish to abstract between GM200-like implementations
- * @bl_desc_size:	size of the BL descriptor used by this chip.
- * @fixup_bl_desc:	hook that generates the proper BL descriptor format from
- *			the generic GM200 format into a data array of size
- *			bl_desc_size
- * @fixup_hs_desc:	hook that twiddles the HS descriptor before it is used
- * @prepare_blobs:	prepares the various blobs needed for secure booting
- */
-struct gm200_secboot_func {
-	/*
-	 * Size of the bootloader descriptor for this chip. A block of this
-	 * size is allocated before booting a falcon and the fixup_bl_desc
-	 * callback is called on it
-	 */
-	u32 bl_desc_size;
-	void (*fixup_bl_desc)(const struct gm200_flcn_bl_desc *, void *);
-
-	/*
-	 * Chip-specific modifications of the HS descriptor can be done here.
-	 * On dGPU this is used to fill the information about the WPR region
-	 * we want the HS FW to set up.
-	 */
-	void (*fixup_hs_desc)(struct gm200_secboot *, struct hsflcn_acr_desc *);
-	int (*prepare_blobs)(struct gm200_secboot *);
-};
+static inline struct flcn_u64 u64_to_flcn64(u64 u)
+{
+	struct flcn_u64 ret;
 
-int gm200_secboot_init(struct nvkm_secboot *);
-void *gm200_secboot_dtor(struct nvkm_secboot *);
-int gm200_secboot_reset(struct nvkm_secboot *, u32);
-int gm200_secboot_start(struct nvkm_secboot *, u32);
+	ret.hi = upper_32_bits(u);
+	ret.lo = lower_32_bits(u);
 
-int gm20x_secboot_prepare_blobs(struct gm200_secboot *);
+	return ret;
+}
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
index 8894fee30cbc..df949fa7d05d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
@@ -64,10 +64,9 @@ nvkm_therm_update_trip(struct nvkm_therm *therm)
 }
 
 static int
-nvkm_therm_update_linear(struct nvkm_therm *therm)
+nvkm_therm_compute_linear_duty(struct nvkm_therm *therm, u8 linear_min_temp,
+                               u8 linear_max_temp)
 {
-	u8  linear_min_temp = therm->fan->bios.linear_min_temp;
-	u8  linear_max_temp = therm->fan->bios.linear_max_temp;
 	u8  temp = therm->func->temp_get(therm);
 	u16 duty;
 
@@ -85,6 +84,21 @@ nvkm_therm_update_linear(struct nvkm_therm *therm)
 	return duty;
 }
 
+static int
+nvkm_therm_update_linear(struct nvkm_therm *therm)
+{
+	u8  min = therm->fan->bios.linear_min_temp;
+	u8  max = therm->fan->bios.linear_max_temp;
+	return nvkm_therm_compute_linear_duty(therm, min, max);
+}
+
+static int
+nvkm_therm_update_linear_fallback(struct nvkm_therm *therm)
+{
+	u8 max = therm->bios_sensor.thrs_fan_boost.temp;
+	return nvkm_therm_compute_linear_duty(therm, 30, max);
+}
+
 static void
 nvkm_therm_update(struct nvkm_therm *therm, int mode)
 {
@@ -119,6 +133,8 @@ nvkm_therm_update(struct nvkm_therm *therm, int mode)
 		case NVBIOS_THERM_FAN_OTHER:
 			if (therm->cstate)
 				duty = therm->cstate;
+			else
+				duty = nvkm_therm_update_linear_fallback(therm);
 			poll = false;
 			break;
 		}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
index fe063d5728e2..67ada1d9a28c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
@@ -95,6 +95,20 @@ nvkm_top_intr(struct nvkm_device *device, u32 intr, u64 *psubdevs)
 	return intr & ~handled;
 }
 
+int
+nvkm_top_fault_id(struct nvkm_device *device, enum nvkm_devidx devidx)
+{
+	struct nvkm_top *top = device->top;
+	struct nvkm_top_device *info;
+
+	list_for_each_entry(info, &top->device, head) {
+		if (info->index == devidx && info->fault >= 0)
+			return info->fault;
+	}
+
+	return -ENOENT;
+}
+
 enum nvkm_devidx
 nvkm_top_fault(struct nvkm_device *device, int fault)
 {
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 06aaf79de8c8..89eb0422821c 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -668,6 +668,48 @@ static const struct panel_desc avic_tm070ddh03 = {
 	},
 };
 
+static const struct drm_display_mode boe_nv101wxmn51_modes[] = {
+	{
+		.clock = 71900,
+		.hdisplay = 1280,
+		.hsync_start = 1280 + 48,
+		.hsync_end = 1280 + 48 + 32,
+		.htotal = 1280 + 48 + 32 + 80,
+		.vdisplay = 800,
+		.vsync_start = 800 + 3,
+		.vsync_end = 800 + 3 + 5,
+		.vtotal = 800 + 3 + 5 + 24,
+		.vrefresh = 60,
+	},
+	{
+		.clock = 57500,
+		.hdisplay = 1280,
+		.hsync_start = 1280 + 48,
+		.hsync_end = 1280 + 48 + 32,
+		.htotal = 1280 + 48 + 32 + 80,
+		.vdisplay = 800,
+		.vsync_start = 800 + 3,
+		.vsync_end = 800 + 3 + 5,
+		.vtotal = 800 + 3 + 5 + 24,
+		.vrefresh = 48,
+	},
+};
+
+static const struct panel_desc boe_nv101wxmn51 = {
+	.modes = boe_nv101wxmn51_modes,
+	.num_modes = ARRAY_SIZE(boe_nv101wxmn51_modes),
+	.bpc = 8,
+	.size = {
+		.width = 217,
+		.height = 136,
+	},
+	.delay = {
+		.prepare = 210,
+		.enable = 50,
+		.unprepare = 160,
+	},
+};
+
 static const struct drm_display_mode chunghwa_claa070wp03xg_mode = {
 	.clock = 66770,
 	.hdisplay = 800,
@@ -760,6 +802,8 @@ static const struct panel_desc edt_et057090dhu = {
 		.width = 115,
 		.height = 86,
 	},
+	.bus_format = MEDIA_BUS_FMT_RGB666_1X18,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_NEGEDGE,
 };
 
 static const struct drm_display_mode edt_etm0700g0dh6_mode = {
@@ -784,6 +828,8 @@ static const struct panel_desc edt_etm0700g0dh6 = {
 		.width = 152,
 		.height = 91,
 	},
+	.bus_format = MEDIA_BUS_FMT_RGB666_1X18,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_NEGEDGE,
 };
 
 static const struct drm_display_mode foxlink_fl500wvr00_a0t_mode = {
@@ -1277,6 +1323,29 @@ static const struct panel_desc nec_nl4827hc19_05b = {
 	.bus_flags = DRM_BUS_FLAG_PIXDATA_POSEDGE,
 };
 
+static const struct drm_display_mode netron_dy_e231732_mode = {
+	.clock = 66000,
+	.hdisplay = 1024,
+	.hsync_start = 1024 + 160,
+	.hsync_end = 1024 + 160 + 70,
+	.htotal = 1024 + 160 + 70 + 90,
+	.vdisplay = 600,
+	.vsync_start = 600 + 127,
+	.vsync_end = 600 + 127 + 20,
+	.vtotal = 600 + 127 + 20 + 3,
+	.vrefresh = 60,
+};
+
+static const struct panel_desc netron_dy_e231732 = {
+	.modes = &netron_dy_e231732_mode,
+	.num_modes = 1,
+	.size = {
+		.width = 154,
+		.height = 87,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB666_1X18,
+};
+
 static const struct drm_display_mode nvd_9128_mode = {
 	.clock = 29500,
 	.hdisplay = 800,
@@ -1632,6 +1701,30 @@ static const struct panel_desc starry_kr122ea0sra = {
 	},
 };
 
+static const struct display_timing tianma_tm070jdhg30_timing = {
+	.pixelclock = { 62600000, 68200000, 78100000 },
+	.hactive = { 1280, 1280, 1280 },
+	.hfront_porch = { 15, 64, 159 },
+	.hback_porch = { 5, 5, 5 },
+	.hsync_len = { 1, 1, 256 },
+	.vactive = { 800, 800, 800 },
+	.vfront_porch = { 3, 40, 99 },
+	.vback_porch = { 2, 2, 2 },
+	.vsync_len = { 1, 1, 128 },
+	.flags = DISPLAY_FLAGS_DE_HIGH,
+};
+
+static const struct panel_desc tianma_tm070jdhg30 = {
+	.timings = &tianma_tm070jdhg30_timing,
+	.num_timings = 1,
+	.bpc = 8,
+	.size = {
+		.width = 151,
+		.height = 95,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
+};
+
 static const struct drm_display_mode tpk_f07a_0102_mode = {
 	.clock = 33260,
 	.hdisplay = 800,
@@ -1748,6 +1841,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "avic,tm070ddh03",
 		.data = &avic_tm070ddh03,
 	}, {
+		.compatible = "boe,nv101wxmn51",
+		.data = &boe_nv101wxmn51,
+	}, {
 		.compatible = "chunghwa,claa070wp03xg",
 		.data = &chunghwa_claa070wp03xg,
 	}, {
@@ -1826,6 +1922,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "nec,nl4827hc19-05b",
 		.data = &nec_nl4827hc19_05b,
 	}, {
+		.compatible = "netron-dy,e231732",
+		.data = &netron_dy_e231732,
+	}, {
 		.compatible = "nvd,9128",
 		.data = &nvd_9128,
 	}, {
@@ -1868,6 +1967,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "starry,kr122ea0sra",
 		.data = &starry_kr122ea0sra,
 	}, {
+		.compatible = "tianma,tm070jdhg30",
+		.data = &tianma_tm070jdhg30,
+	}, {
 		.compatible = "tpk,f07a-0102",
 		.data = &tpk_f07a_0102,
 	}, {
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index af3bbe82fd48..956c425e639e 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -97,9 +97,10 @@
  *   2.46.0 - Add PFP_SYNC_ME support on evergreen
  *   2.47.0 - Add UVD_NO_OP register support
  *   2.48.0 - TA_CS_BC_BASE_ADDR allowed on SI
+ *   2.49.0 - DRM_RADEON_GEM_INFO ioctl returns correct vram_size/visible values
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	48
+#define KMS_DRIVER_MINOR	49
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 void radeon_driver_unload_kms(struct drm_device *dev);
@@ -366,11 +367,10 @@ static void
 radeon_pci_shutdown(struct pci_dev *pdev)
 {
 	/* if we are running in a VM, make sure the device
-	 * torn down properly on reboot/shutdown.
-	 * unfortunately we can't detect certain
-	 * hypervisors so just do this all the time.
+	 * torn down properly on reboot/shutdown
 	 */
-	radeon_pci_remove(pdev);
+	if (radeon_device_is_virtual())
+		radeon_pci_remove(pdev);
 }
 
 static int radeon_pmops_suspend(struct device *dev)
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 0bcffd8a7bd3..96683f5b2b1b 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -220,8 +220,8 @@ int radeon_gem_info_ioctl(struct drm_device *dev, void *data,
 
 	man = &rdev->mman.bdev.man[TTM_PL_VRAM];
 
-	args->vram_size = rdev->mc.real_vram_size;
-	args->vram_visible = (u64)man->size << PAGE_SHIFT;
+	args->vram_size = (u64)man->size << PAGE_SHIFT;
+	args->vram_visible = rdev->mc.visible_vram_size;
 	args->vram_visible -= rdev->vram_pin_size;
 	args->gart_size = rdev->mc.gtt_size;
 	args->gart_size -= rdev->gart_pin_size;
diff --git a/drivers/gpu/drm/sti/sti_drv.c b/drivers/gpu/drm/sti/sti_drv.c
index e6c1646b9c53..20fc0fbfa849 100644
--- a/drivers/gpu/drm/sti/sti_drv.c
+++ b/drivers/gpu/drm/sti/sti_drv.c
@@ -117,15 +117,6 @@ err:
 	return ret;
 }
 
-static void sti_drm_dbg_cleanup(struct drm_minor *minor)
-{
-	drm_debugfs_remove_files(sti_drm_dbg_list,
-				 ARRAY_SIZE(sti_drm_dbg_list), minor);
-
-	drm_debugfs_remove_files((struct drm_info_list *)&sti_drm_fps_fops,
-				 1, minor);
-}
-
 static int sti_atomic_check(struct drm_device *dev,
 			    struct drm_atomic_state *state)
 {
diff --git a/drivers/gpu/drm/sti/sti_vtg.c b/drivers/gpu/drm/sti/sti_vtg.c
index 943bce56692e..2dcba1d3a122 100644
--- a/drivers/gpu/drm/sti/sti_vtg.c
+++ b/drivers/gpu/drm/sti/sti_vtg.c
@@ -389,7 +389,6 @@ static irqreturn_t vtg_irq(int irq, void *arg)
 static int vtg_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct device_node *np;
 	struct sti_vtg *vtg;
 	struct resource *res;
 	int ret;
diff --git a/drivers/gpu/drm/tinydrm/Kconfig b/drivers/gpu/drm/tinydrm/Kconfig
new file mode 100644
index 000000000000..3504c53846da
--- /dev/null
+++ b/drivers/gpu/drm/tinydrm/Kconfig
@@ -0,0 +1,21 @@
+menuconfig DRM_TINYDRM
+	tristate "Support for simple displays"
+	depends on DRM
+	select DRM_KMS_HELPER
+	select DRM_KMS_CMA_HELPER
+	select BACKLIGHT_LCD_SUPPORT
+	select BACKLIGHT_CLASS_DEVICE
+	help
+	  Choose this option if you have a tinydrm supported display.
+	  If M is selected the module will be called tinydrm.
+
+config TINYDRM_MIPI_DBI
+	tristate
+
+config TINYDRM_MI0283QT
+	tristate "DRM support for MI0283QT"
+	depends on DRM_TINYDRM && SPI
+	select TINYDRM_MIPI_DBI
+	help
+	  DRM driver for the Multi-Inno MI0283QT display panel
+	  If M is selected the module will be called mi0283qt.
diff --git a/drivers/gpu/drm/tinydrm/Makefile b/drivers/gpu/drm/tinydrm/Makefile
new file mode 100644
index 000000000000..7a3604cf4fc2
--- /dev/null
+++ b/drivers/gpu/drm/tinydrm/Makefile
@@ -0,0 +1,7 @@
+obj-$(CONFIG_DRM_TINYDRM)		+= core/
+
+# Controllers
+obj-$(CONFIG_TINYDRM_MIPI_DBI)		+= mipi-dbi.o
+
+# Displays
+obj-$(CONFIG_TINYDRM_MI0283QT)		+= mi0283qt.o
diff --git a/drivers/gpu/drm/tinydrm/core/Makefile b/drivers/gpu/drm/tinydrm/core/Makefile
new file mode 100644
index 000000000000..fb221e6f8885
--- /dev/null
+++ b/drivers/gpu/drm/tinydrm/core/Makefile
@@ -0,0 +1,3 @@
+tinydrm-y := tinydrm-core.o tinydrm-pipe.o tinydrm-helpers.o
+
+obj-$(CONFIG_DRM_TINYDRM) += tinydrm.o
diff --git a/drivers/gpu/drm/tinydrm/core/tinydrm-core.c b/drivers/gpu/drm/tinydrm/core/tinydrm-core.c
new file mode 100644
index 000000000000..6a257dd08ee0
--- /dev/null
+++ b/drivers/gpu/drm/tinydrm/core/tinydrm-core.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright (C) 2016 Noralf Trønnes
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/tinydrm/tinydrm.h>
+#include <linux/device.h>
+#include <linux/dma-buf.h>
+
+/**
+ * DOC: overview
+ *
+ * This library provides driver helpers for very simple display hardware.
+ *
+ * It is based on &drm_simple_display_pipe coupled with a &drm_connector which
+ * has only one fixed &drm_display_mode. The framebuffers are backed by the
+ * cma helper and have support for framebuffer flushing (dirty).
+ * fbdev support is also included.
+ *
+ */
+
+/**
+ * DOC: core
+ *
+ * The driver allocates &tinydrm_device, initializes it using
+ * devm_tinydrm_init(), sets up the pipeline using tinydrm_display_pipe_init()
+ * and registers the DRM device using devm_tinydrm_register().
+ */
+
+/**
+ * tinydrm_lastclose - DRM lastclose helper
+ * @drm: DRM device
+ *
+ * This function ensures that fbdev is restored when drm_lastclose() is called
+ * on the last drm_release(). Drivers can use this as their
+ * &drm_driver->lastclose callback.
+ */
+void tinydrm_lastclose(struct drm_device *drm)
+{
+	struct tinydrm_device *tdev = drm->dev_private;
+
+	DRM_DEBUG_KMS("\n");
+	drm_fbdev_cma_restore_mode(tdev->fbdev_cma);
+}
+EXPORT_SYMBOL(tinydrm_lastclose);
+
+/**
+ * tinydrm_gem_cma_prime_import_sg_table - Produce a CMA GEM object from
+ *     another driver's scatter/gather table of pinned pages
+ * @drm: DRM device to import into
+ * @attach: DMA-BUF attachment
+ * @sgt: Scatter/gather table of pinned pages
+ *
+ * This function imports a scatter/gather table exported via DMA-BUF by
+ * another driver using drm_gem_cma_prime_import_sg_table(). It sets the
+ * kernel virtual address on the CMA object. Drivers should use this as their
+ * &drm_driver->gem_prime_import_sg_table callback if they need the virtual
+ * address. tinydrm_gem_cma_free_object() should be used in combination with
+ * this function.
+ *
+ * Returns:
+ * A pointer to a newly created GEM object or an ERR_PTR-encoded negative
+ * error code on failure.
+ */
+struct drm_gem_object *
+tinydrm_gem_cma_prime_import_sg_table(struct drm_device *drm,
+				      struct dma_buf_attachment *attach,
+				      struct sg_table *sgt)
+{
+	struct drm_gem_cma_object *cma_obj;
+	struct drm_gem_object *obj;
+	void *vaddr;
+
+	vaddr = dma_buf_vmap(attach->dmabuf);
+	if (!vaddr) {
+		DRM_ERROR("Failed to vmap PRIME buffer\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	obj = drm_gem_cma_prime_import_sg_table(drm, attach, sgt);
+	if (IS_ERR(obj)) {
+		dma_buf_vunmap(attach->dmabuf, vaddr);
+		return obj;
+	}
+
+	cma_obj = to_drm_gem_cma_obj(obj);
+	cma_obj->vaddr = vaddr;
+
+	return obj;
+}
+EXPORT_SYMBOL(tinydrm_gem_cma_prime_import_sg_table);
+
+/**
+ * tinydrm_gem_cma_free_object - Free resources associated with a CMA GEM
+ *                               object
+ * @gem_obj: GEM object to free
+ *
+ * This function frees the backing memory of the CMA GEM object, cleans up the
+ * GEM object state and frees the memory used to store the object itself using
+ * drm_gem_cma_free_object(). It also handles PRIME buffers which has the kernel
+ * virtual address set by tinydrm_gem_cma_prime_import_sg_table(). Drivers
+ * can use this as their &drm_driver->gem_free_object callback.
+ */
+void tinydrm_gem_cma_free_object(struct drm_gem_object *gem_obj)
+{
+	if (gem_obj->import_attach) {
+		struct drm_gem_cma_object *cma_obj;
+
+		cma_obj = to_drm_gem_cma_obj(gem_obj);
+		dma_buf_vunmap(gem_obj->import_attach->dmabuf, cma_obj->vaddr);
+		cma_obj->vaddr = NULL;
+	}
+
+	drm_gem_cma_free_object(gem_obj);
+}
+EXPORT_SYMBOL_GPL(tinydrm_gem_cma_free_object);
+
+const struct file_operations tinydrm_fops = {
+	.owner		= THIS_MODULE,
+	.open		= drm_open,
+	.release	= drm_release,
+	.unlocked_ioctl	= drm_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= drm_compat_ioctl,
+#endif
+	.poll		= drm_poll,
+	.read		= drm_read,
+	.llseek		= no_llseek,
+	.mmap		= drm_gem_cma_mmap,
+};
+EXPORT_SYMBOL(tinydrm_fops);
+
+static struct drm_framebuffer *
+tinydrm_fb_create(struct drm_device *drm, struct drm_file *file_priv,
+		  const struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct tinydrm_device *tdev = drm->dev_private;
+
+	return drm_fb_cma_create_with_funcs(drm, file_priv, mode_cmd,
+					    tdev->fb_funcs);
+}
+
+static const struct drm_mode_config_funcs tinydrm_mode_config_funcs = {
+	.fb_create = tinydrm_fb_create,
+	.atomic_check = drm_atomic_helper_check,
+	.atomic_commit = drm_atomic_helper_commit,
+};
+
+static int tinydrm_init(struct device *parent, struct tinydrm_device *tdev,
+			const struct drm_framebuffer_funcs *fb_funcs,
+			struct drm_driver *driver)
+{
+	struct drm_device *drm;
+
+	mutex_init(&tdev->dirty_lock);
+	tdev->fb_funcs = fb_funcs;
+
+	/*
+	 * We don't embed drm_device, because that prevent us from using
+	 * devm_kzalloc() to allocate tinydrm_device in the driver since
+	 * drm_dev_unref() frees the structure. The devm_ functions provide
+	 * for easy error handling.
+	 */
+	drm = drm_dev_alloc(driver, parent);
+	if (IS_ERR(drm))
+		return PTR_ERR(drm);
+
+	tdev->drm = drm;
+	drm->dev_private = tdev;
+	drm_mode_config_init(drm);
+	drm->mode_config.funcs = &tinydrm_mode_config_funcs;
+
+	return 0;
+}
+
+static void tinydrm_fini(struct tinydrm_device *tdev)
+{
+	drm_mode_config_cleanup(tdev->drm);
+	mutex_destroy(&tdev->dirty_lock);
+	tdev->drm->dev_private = NULL;
+	drm_dev_unref(tdev->drm);
+}
+
+static void devm_tinydrm_release(void *data)
+{
+	tinydrm_fini(data);
+}
+
+/**
+ * devm_tinydrm_init - Initialize tinydrm device
+ * @parent: Parent device object
+ * @tdev: tinydrm device
+ * @fb_funcs: Framebuffer functions
+ * @driver: DRM driver
+ *
+ * This function initializes @tdev, the underlying DRM device and it's
+ * mode_config. Resources will be automatically freed on driver detach (devres)
+ * using drm_mode_config_cleanup() and drm_dev_unref().
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int devm_tinydrm_init(struct device *parent, struct tinydrm_device *tdev,
+		      const struct drm_framebuffer_funcs *fb_funcs,
+		      struct drm_driver *driver)
+{
+	int ret;
+
+	ret = tinydrm_init(parent, tdev, fb_funcs, driver);
+	if (ret)
+		return ret;
+
+	ret = devm_add_action(parent, devm_tinydrm_release, tdev);
+	if (ret)
+		tinydrm_fini(tdev);
+
+	return ret;
+}
+EXPORT_SYMBOL(devm_tinydrm_init);
+
+static int tinydrm_register(struct tinydrm_device *tdev)
+{
+	struct drm_device *drm = tdev->drm;
+	int bpp = drm->mode_config.preferred_depth;
+	struct drm_fbdev_cma *fbdev;
+	int ret;
+
+	ret = drm_dev_register(tdev->drm, 0);
+	if (ret)
+		return ret;
+
+	fbdev = drm_fbdev_cma_init_with_funcs(drm, bpp ? bpp : 32,
+					      drm->mode_config.num_connector,
+					      tdev->fb_funcs);
+	if (IS_ERR(fbdev))
+		DRM_ERROR("Failed to initialize fbdev: %ld\n", PTR_ERR(fbdev));
+	else
+		tdev->fbdev_cma = fbdev;
+
+	return 0;
+}
+
+static void tinydrm_unregister(struct tinydrm_device *tdev)
+{
+	struct drm_fbdev_cma *fbdev_cma = tdev->fbdev_cma;
+
+	drm_crtc_force_disable_all(tdev->drm);
+	/* don't restore fbdev in lastclose, keep pipeline disabled */
+	tdev->fbdev_cma = NULL;
+	drm_dev_unregister(tdev->drm);
+	if (fbdev_cma)
+		drm_fbdev_cma_fini(fbdev_cma);
+}
+
+static void devm_tinydrm_register_release(void *data)
+{
+	tinydrm_unregister(data);
+}
+
+/**
+ * devm_tinydrm_register - Register tinydrm device
+ * @tdev: tinydrm device
+ *
+ * This function registers the underlying DRM device and fbdev.
+ * These resources will be automatically unregistered on driver detach (devres)
+ * and the display pipeline will be disabled.
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int devm_tinydrm_register(struct tinydrm_device *tdev)
+{
+	struct device *dev = tdev->drm->dev;
+	int ret;
+
+	ret = tinydrm_register(tdev);
+	if (ret)
+		return ret;
+
+	ret = devm_add_action(dev, devm_tinydrm_register_release, tdev);
+	if (ret)
+		tinydrm_unregister(tdev);
+
+	return ret;
+}
+EXPORT_SYMBOL(devm_tinydrm_register);
+
+/**
+ * tinydrm_shutdown - Shutdown tinydrm
+ * @tdev: tinydrm device
+ *
+ * This function makes sure that the display pipeline is disabled.
+ * Used by drivers in their shutdown callback to turn off the display
+ * on machine shutdown and reboot.
+ */
+void tinydrm_shutdown(struct tinydrm_device *tdev)
+{
+	drm_crtc_force_disable_all(tdev->drm);
+}
+EXPORT_SYMBOL(tinydrm_shutdown);
+
+/**
+ * tinydrm_suspend - Suspend tinydrm
+ * @tdev: tinydrm device
+ *
+ * Used in driver PM operations to suspend tinydrm.
+ * Suspends fbdev and DRM.
+ * Resume with tinydrm_resume().
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int tinydrm_suspend(struct tinydrm_device *tdev)
+{
+	struct drm_atomic_state *state;
+
+	if (tdev->suspend_state) {
+		DRM_ERROR("Failed to suspend: state already set\n");
+		return -EINVAL;
+	}
+
+	drm_fbdev_cma_set_suspend_unlocked(tdev->fbdev_cma, 1);
+	state = drm_atomic_helper_suspend(tdev->drm);
+	if (IS_ERR(state)) {
+		drm_fbdev_cma_set_suspend_unlocked(tdev->fbdev_cma, 0);
+		return PTR_ERR(state);
+	}
+
+	tdev->suspend_state = state;
+
+	return 0;
+}
+EXPORT_SYMBOL(tinydrm_suspend);
+
+/**
+ * tinydrm_resume - Resume tinydrm
+ * @tdev: tinydrm device
+ *
+ * Used in driver PM operations to resume tinydrm.
+ * Suspend with tinydrm_suspend().
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int tinydrm_resume(struct tinydrm_device *tdev)
+{
+	struct drm_atomic_state *state = tdev->suspend_state;
+	int ret;
+
+	if (!state) {
+		DRM_ERROR("Failed to resume: state is not set\n");
+		return -EINVAL;
+	}
+
+	tdev->suspend_state = NULL;
+
+	ret = drm_atomic_helper_resume(tdev->drm, state);
+	if (ret) {
+		DRM_ERROR("Error resuming state: %d\n", ret);
+		return ret;
+	}
+
+	drm_fbdev_cma_set_suspend_unlocked(tdev->fbdev_cma, 0);
+
+	return 0;
+}
+EXPORT_SYMBOL(tinydrm_resume);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c b/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c
new file mode 100644
index 000000000000..3ccda6c1e159
--- /dev/null
+++ b/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c
@@ -0,0 +1,460 @@
+/*
+ * Copyright (C) 2016 Noralf Trønnes
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <drm/tinydrm/tinydrm.h>
+#include <drm/tinydrm/tinydrm-helpers.h>
+#include <linux/backlight.h>
+#include <linux/pm.h>
+#include <linux/spi/spi.h>
+#include <linux/swab.h>
+
+static unsigned int spi_max;
+module_param(spi_max, uint, 0400);
+MODULE_PARM_DESC(spi_max, "Set a lower SPI max transfer size");
+
+/**
+ * tinydrm_merge_clips - Merge clip rectangles
+ * @dst: Destination clip rectangle
+ * @src: Source clip rectangle(s)
+ * @num_clips: Number of @src clip rectangles
+ * @flags: Dirty fb ioctl flags
+ * @max_width: Maximum width of @dst
+ * @max_height: Maximum height of @dst
+ *
+ * This function merges @src clip rectangle(s) into @dst. If @src is NULL,
+ * @max_width and @min_width is used to set a full @dst clip rectangle.
+ *
+ * Returns:
+ * true if it's a full clip, false otherwise
+ */
+bool tinydrm_merge_clips(struct drm_clip_rect *dst,
+			 struct drm_clip_rect *src, unsigned int num_clips,
+			 unsigned int flags, u32 max_width, u32 max_height)
+{
+	unsigned int i;
+
+	if (!src || !num_clips) {
+		dst->x1 = 0;
+		dst->x2 = max_width;
+		dst->y1 = 0;
+		dst->y2 = max_height;
+		return true;
+	}
+
+	dst->x1 = ~0;
+	dst->y1 = ~0;
+	dst->x2 = 0;
+	dst->y2 = 0;
+
+	for (i = 0; i < num_clips; i++) {
+		if (flags & DRM_MODE_FB_DIRTY_ANNOTATE_COPY)
+			i++;
+		dst->x1 = min(dst->x1, src[i].x1);
+		dst->x2 = max(dst->x2, src[i].x2);
+		dst->y1 = min(dst->y1, src[i].y1);
+		dst->y2 = max(dst->y2, src[i].y2);
+	}
+
+	if (dst->x2 > max_width || dst->y2 > max_height ||
+	    dst->x1 >= dst->x2 || dst->y1 >= dst->y2) {
+		DRM_DEBUG_KMS("Illegal clip: x1=%u, x2=%u, y1=%u, y2=%u\n",
+			      dst->x1, dst->x2, dst->y1, dst->y2);
+		dst->x1 = 0;
+		dst->y1 = 0;
+		dst->x2 = max_width;
+		dst->y2 = max_height;
+	}
+
+	return (dst->x2 - dst->x1) == max_width &&
+	       (dst->y2 - dst->y1) == max_height;
+}
+EXPORT_SYMBOL(tinydrm_merge_clips);
+
+/**
+ * tinydrm_memcpy - Copy clip buffer
+ * @dst: Destination buffer
+ * @vaddr: Source buffer
+ * @fb: DRM framebuffer
+ * @clip: Clip rectangle area to copy
+ */
+void tinydrm_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb,
+		    struct drm_clip_rect *clip)
+{
+	unsigned int cpp = drm_format_plane_cpp(fb->format->format, 0);
+	unsigned int pitch = fb->pitches[0];
+	void *src = vaddr + (clip->y1 * pitch) + (clip->x1 * cpp);
+	size_t len = (clip->x2 - clip->x1) * cpp;
+	unsigned int y;
+
+	for (y = clip->y1; y < clip->y2; y++) {
+		memcpy(dst, src, len);
+		src += pitch;
+		dst += len;
+	}
+}
+EXPORT_SYMBOL(tinydrm_memcpy);
+
+/**
+ * tinydrm_swab16 - Swap bytes into clip buffer
+ * @dst: RGB565 destination buffer
+ * @vaddr: RGB565 source buffer
+ * @fb: DRM framebuffer
+ * @clip: Clip rectangle area to copy
+ */
+void tinydrm_swab16(u16 *dst, void *vaddr, struct drm_framebuffer *fb,
+		    struct drm_clip_rect *clip)
+{
+	size_t len = (clip->x2 - clip->x1) * sizeof(u16);
+	unsigned int x, y;
+	u16 *src, *buf;
+
+	/*
+	 * The cma memory is write-combined so reads are uncached.
+	 * Speed up by fetching one line at a time.
+	 */
+	buf = kmalloc(len, GFP_KERNEL);
+	if (!buf)
+		return;
+
+	for (y = clip->y1; y < clip->y2; y++) {
+		src = vaddr + (y * fb->pitches[0]);
+		src += clip->x1;
+		memcpy(buf, src, len);
+		src = buf;
+		for (x = clip->x1; x < clip->x2; x++)
+			*dst++ = swab16(*src++);
+	}
+
+	kfree(buf);
+}
+EXPORT_SYMBOL(tinydrm_swab16);
+
+/**
+ * tinydrm_xrgb8888_to_rgb565 - Convert XRGB8888 to RGB565 clip buffer
+ * @dst: RGB565 destination buffer
+ * @vaddr: XRGB8888 source buffer
+ * @fb: DRM framebuffer
+ * @clip: Clip rectangle area to copy
+ * @swap: Swap bytes
+ *
+ * Drivers can use this function for RGB565 devices that don't natively
+ * support XRGB8888.
+ */
+void tinydrm_xrgb8888_to_rgb565(u16 *dst, void *vaddr,
+				struct drm_framebuffer *fb,
+				struct drm_clip_rect *clip, bool swap)
+{
+	size_t len = (clip->x2 - clip->x1) * sizeof(u32);
+	unsigned int x, y;
+	u32 *src, *buf;
+	u16 val16;
+
+	buf = kmalloc(len, GFP_KERNEL);
+	if (!buf)
+		return;
+
+	for (y = clip->y1; y < clip->y2; y++) {
+		src = vaddr + (y * fb->pitches[0]);
+		src += clip->x1;
+		memcpy(buf, src, len);
+		src = buf;
+		for (x = clip->x1; x < clip->x2; x++) {
+			val16 = ((*src & 0x00F80000) >> 8) |
+				((*src & 0x0000FC00) >> 5) |
+				((*src & 0x000000F8) >> 3);
+			src++;
+			if (swap)
+				*dst++ = swab16(val16);
+			else
+				*dst++ = val16;
+		}
+	}
+
+	kfree(buf);
+}
+EXPORT_SYMBOL(tinydrm_xrgb8888_to_rgb565);
+
+/**
+ * tinydrm_of_find_backlight - Find backlight device in device-tree
+ * @dev: Device
+ *
+ * This function looks for a DT node pointed to by a property named 'backlight'
+ * and uses of_find_backlight_by_node() to get the backlight device.
+ * Additionally if the brightness property is zero, it is set to
+ * max_brightness.
+ *
+ * Returns:
+ * NULL if there's no backlight property.
+ * Error pointer -EPROBE_DEFER if the DT node is found, but no backlight device
+ * is found.
+ * If the backlight device is found, a pointer to the structure is returned.
+ */
+struct backlight_device *tinydrm_of_find_backlight(struct device *dev)
+{
+	struct backlight_device *backlight;
+	struct device_node *np;
+
+	np = of_parse_phandle(dev->of_node, "backlight", 0);
+	if (!np)
+		return NULL;
+
+	backlight = of_find_backlight_by_node(np);
+	of_node_put(np);
+
+	if (!backlight)
+		return ERR_PTR(-EPROBE_DEFER);
+
+	if (!backlight->props.brightness) {
+		backlight->props.brightness = backlight->props.max_brightness;
+		DRM_DEBUG_KMS("Backlight brightness set to %d\n",
+			      backlight->props.brightness);
+	}
+
+	return backlight;
+}
+EXPORT_SYMBOL(tinydrm_of_find_backlight);
+
+/**
+ * tinydrm_enable_backlight - Enable backlight helper
+ * @backlight: Backlight device
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int tinydrm_enable_backlight(struct backlight_device *backlight)
+{
+	unsigned int old_state;
+	int ret;
+
+	if (!backlight)
+		return 0;
+
+	old_state = backlight->props.state;
+	backlight->props.state &= ~BL_CORE_FBBLANK;
+	DRM_DEBUG_KMS("Backlight state: 0x%x -> 0x%x\n", old_state,
+		      backlight->props.state);
+
+	ret = backlight_update_status(backlight);
+	if (ret)
+		DRM_ERROR("Failed to enable backlight %d\n", ret);
+
+	return ret;
+}
+EXPORT_SYMBOL(tinydrm_enable_backlight);
+
+/**
+ * tinydrm_disable_backlight - Disable backlight helper
+ * @backlight: Backlight device
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int tinydrm_disable_backlight(struct backlight_device *backlight)
+{
+	unsigned int old_state;
+	int ret;
+
+	if (!backlight)
+		return 0;
+
+	old_state = backlight->props.state;
+	backlight->props.state |= BL_CORE_FBBLANK;
+	DRM_DEBUG_KMS("Backlight state: 0x%x -> 0x%x\n", old_state,
+		      backlight->props.state);
+	ret = backlight_update_status(backlight);
+	if (ret)
+		DRM_ERROR("Failed to disable backlight %d\n", ret);
+
+	return ret;
+}
+EXPORT_SYMBOL(tinydrm_disable_backlight);
+
+#if IS_ENABLED(CONFIG_SPI)
+
+/**
+ * tinydrm_spi_max_transfer_size - Determine max SPI transfer size
+ * @spi: SPI device
+ * @max_len: Maximum buffer size needed (optional)
+ *
+ * This function returns the maximum size to use for SPI transfers. It checks
+ * the SPI master, the optional @max_len and the module parameter spi_max and
+ * returns the smallest.
+ *
+ * Returns:
+ * Maximum size for SPI transfers
+ */
+size_t tinydrm_spi_max_transfer_size(struct spi_device *spi, size_t max_len)
+{
+	size_t ret;
+
+	ret = min(spi_max_transfer_size(spi), spi->master->max_dma_len);
+	if (max_len)
+		ret = min(ret, max_len);
+	if (spi_max)
+		ret = min_t(size_t, ret, spi_max);
+	ret &= ~0x3;
+	if (ret < 4)
+		ret = 4;
+
+	return ret;
+}
+EXPORT_SYMBOL(tinydrm_spi_max_transfer_size);
+
+/**
+ * tinydrm_spi_bpw_supported - Check if bits per word is supported
+ * @spi: SPI device
+ * @bpw: Bits per word
+ *
+ * This function checks to see if the SPI master driver supports @bpw.
+ *
+ * Returns:
+ * True if @bpw is supported, false otherwise.
+ */
+bool tinydrm_spi_bpw_supported(struct spi_device *spi, u8 bpw)
+{
+	u32 bpw_mask = spi->master->bits_per_word_mask;
+
+	if (bpw == 8)
+		return true;
+
+	if (!bpw_mask) {
+		dev_warn_once(&spi->dev,
+			      "bits_per_word_mask not set, assume 8-bit only\n");
+		return false;
+	}
+
+	if (bpw_mask & SPI_BPW_MASK(bpw))
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL(tinydrm_spi_bpw_supported);
+
+static void
+tinydrm_dbg_spi_print(struct spi_device *spi, struct spi_transfer *tr,
+		      const void *buf, int idx, bool tx)
+{
+	u32 speed_hz = tr->speed_hz ? tr->speed_hz : spi->max_speed_hz;
+	char linebuf[3 * 32];
+
+	hex_dump_to_buffer(buf, tr->len, 16,
+			   DIV_ROUND_UP(tr->bits_per_word, 8),
+			   linebuf, sizeof(linebuf), false);
+
+	printk(KERN_DEBUG
+	       "    tr(%i): speed=%u%s, bpw=%i, len=%u, %s_buf=[%s%s]\n", idx,
+	       speed_hz > 1000000 ? speed_hz / 1000000 : speed_hz / 1000,
+	       speed_hz > 1000000 ? "MHz" : "kHz", tr->bits_per_word, tr->len,
+	       tx ? "tx" : "rx", linebuf, tr->len > 16 ? " ..." : "");
+}
+
+/* called through tinydrm_dbg_spi_message() */
+void _tinydrm_dbg_spi_message(struct spi_device *spi, struct spi_message *m)
+{
+	struct spi_transfer *tmp;
+	struct list_head *pos;
+	int i = 0;
+
+	list_for_each(pos, &m->transfers) {
+		tmp = list_entry(pos, struct spi_transfer, transfer_list);
+
+		if (tmp->tx_buf)
+			tinydrm_dbg_spi_print(spi, tmp, tmp->tx_buf, i, true);
+		if (tmp->rx_buf)
+			tinydrm_dbg_spi_print(spi, tmp, tmp->rx_buf, i, false);
+		i++;
+	}
+}
+EXPORT_SYMBOL(_tinydrm_dbg_spi_message);
+
+/**
+ * tinydrm_spi_transfer - SPI transfer helper
+ * @spi: SPI device
+ * @speed_hz: Override speed (optional)
+ * @header: Optional header transfer
+ * @bpw: Bits per word
+ * @buf: Buffer to transfer
+ * @len: Buffer length
+ *
+ * This SPI transfer helper breaks up the transfer of @buf into chunks which
+ * the SPI master driver can handle. If the machine is Little Endian and the
+ * SPI master driver doesn't support 16 bits per word, it swaps the bytes and
+ * does a 8-bit transfer.
+ * If @header is set, it is prepended to each SPI message.
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int tinydrm_spi_transfer(struct spi_device *spi, u32 speed_hz,
+			 struct spi_transfer *header, u8 bpw, const void *buf,
+			 size_t len)
+{
+	struct spi_transfer tr = {
+		.bits_per_word = bpw,
+		.speed_hz = speed_hz,
+	};
+	struct spi_message m;
+	u16 *swap_buf = NULL;
+	size_t max_chunk;
+	size_t chunk;
+	int ret = 0;
+
+	if (WARN_ON_ONCE(bpw != 8 && bpw != 16))
+		return -EINVAL;
+
+	max_chunk = tinydrm_spi_max_transfer_size(spi, 0);
+
+	if (drm_debug & DRM_UT_DRIVER)
+		pr_debug("[drm:%s] bpw=%u, max_chunk=%zu, transfers:\n",
+			 __func__, bpw, max_chunk);
+
+	if (bpw == 16 && !tinydrm_spi_bpw_supported(spi, 16)) {
+		tr.bits_per_word = 8;
+		if (tinydrm_machine_little_endian()) {
+			swap_buf = kmalloc(min(len, max_chunk), GFP_KERNEL);
+			if (!swap_buf)
+				return -ENOMEM;
+		}
+	}
+
+	spi_message_init(&m);
+	if (header)
+		spi_message_add_tail(header, &m);
+	spi_message_add_tail(&tr, &m);
+
+	while (len) {
+		chunk = min(len, max_chunk);
+
+		tr.tx_buf = buf;
+		tr.len = chunk;
+
+		if (swap_buf) {
+			const u16 *buf16 = buf;
+			unsigned int i;
+
+			for (i = 0; i < chunk / 2; i++)
+				swap_buf[i] = swab16(buf16[i]);
+
+			tr.tx_buf = swap_buf;
+		}
+
+		buf += chunk;
+		len -= chunk;
+
+		tinydrm_dbg_spi_message(spi, &m);
+		ret = spi_sync(spi, &m);
+		if (ret)
+			return ret;
+	};
+
+	return 0;
+}
+EXPORT_SYMBOL(tinydrm_spi_transfer);
+
+#endif /* CONFIG_SPI */
diff --git a/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c b/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c
new file mode 100644
index 000000000000..ec43fb7ad9e4
--- /dev/null
+++ b/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright (C) 2016 Noralf Trønnes
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_modes.h>
+#include <drm/tinydrm/tinydrm.h>
+
+struct tinydrm_connector {
+	struct drm_connector base;
+	const struct drm_display_mode *mode;
+};
+
+static inline struct tinydrm_connector *
+to_tinydrm_connector(struct drm_connector *connector)
+{
+	return container_of(connector, struct tinydrm_connector, base);
+}
+
+static int tinydrm_connector_get_modes(struct drm_connector *connector)
+{
+	struct tinydrm_connector *tconn = to_tinydrm_connector(connector);
+	struct drm_display_mode *mode;
+
+	mode = drm_mode_duplicate(connector->dev, tconn->mode);
+	if (!mode) {
+		DRM_ERROR("Failed to duplicate mode\n");
+		return 0;
+	}
+
+	if (mode->name[0] == '\0')
+		drm_mode_set_name(mode);
+
+	mode->type |= DRM_MODE_TYPE_PREFERRED;
+	drm_mode_probed_add(connector, mode);
+
+	if (mode->width_mm) {
+		connector->display_info.width_mm = mode->width_mm;
+		connector->display_info.height_mm = mode->height_mm;
+	}
+
+	return 1;
+}
+
+static const struct drm_connector_helper_funcs tinydrm_connector_hfuncs = {
+	.get_modes = tinydrm_connector_get_modes,
+	.best_encoder = drm_atomic_helper_best_encoder,
+};
+
+static enum drm_connector_status
+tinydrm_connector_detect(struct drm_connector *connector, bool force)
+{
+	if (drm_device_is_unplugged(connector->dev))
+		return connector_status_disconnected;
+
+	return connector->status;
+}
+
+static void tinydrm_connector_destroy(struct drm_connector *connector)
+{
+	struct tinydrm_connector *tconn = to_tinydrm_connector(connector);
+
+	drm_connector_cleanup(connector);
+	kfree(tconn);
+}
+
+static const struct drm_connector_funcs tinydrm_connector_funcs = {
+	.dpms = drm_atomic_helper_connector_dpms,
+	.reset = drm_atomic_helper_connector_reset,
+	.detect = tinydrm_connector_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = tinydrm_connector_destroy,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+struct drm_connector *
+tinydrm_connector_create(struct drm_device *drm,
+			 const struct drm_display_mode *mode,
+			 int connector_type)
+{
+	struct tinydrm_connector *tconn;
+	struct drm_connector *connector;
+	int ret;
+
+	tconn = kzalloc(sizeof(*tconn), GFP_KERNEL);
+	if (!tconn)
+		return ERR_PTR(-ENOMEM);
+
+	tconn->mode = mode;
+	connector = &tconn->base;
+
+	drm_connector_helper_add(connector, &tinydrm_connector_hfuncs);
+	ret = drm_connector_init(drm, connector, &tinydrm_connector_funcs,
+				 connector_type);
+	if (ret) {
+		kfree(tconn);
+		return ERR_PTR(ret);
+	}
+
+	connector->status = connector_status_connected;
+
+	return connector;
+}
+
+/**
+ * tinydrm_display_pipe_update - Display pipe update helper
+ * @pipe: Simple display pipe
+ * @old_state: Old plane state
+ *
+ * This function does a full framebuffer flush if the plane framebuffer
+ * has changed. It also handles vblank events. Drivers can use this as their
+ * &drm_simple_display_pipe_funcs->update callback.
+ */
+void tinydrm_display_pipe_update(struct drm_simple_display_pipe *pipe,
+				 struct drm_plane_state *old_state)
+{
+	struct tinydrm_device *tdev = pipe_to_tinydrm(pipe);
+	struct drm_framebuffer *fb = pipe->plane.state->fb;
+	struct drm_crtc *crtc = &tdev->pipe.crtc;
+
+	if (fb && (fb != old_state->fb)) {
+		pipe->plane.fb = fb;
+		if (fb->funcs->dirty)
+			fb->funcs->dirty(fb, NULL, 0, 0, NULL, 0);
+	}
+
+	if (crtc->state->event) {
+		spin_lock_irq(&crtc->dev->event_lock);
+		drm_crtc_send_vblank_event(crtc, crtc->state->event);
+		spin_unlock_irq(&crtc->dev->event_lock);
+		crtc->state->event = NULL;
+	}
+}
+EXPORT_SYMBOL(tinydrm_display_pipe_update);
+
+/**
+ * tinydrm_display_pipe_prepare_fb - Display pipe prepare_fb helper
+ * @pipe: Simple display pipe
+ * @plane_state: Plane state
+ *
+ * This function uses drm_fb_cma_prepare_fb() to check if the plane FB has an
+ * dma-buf attached, extracts the exclusive fence and attaches it to plane
+ * state for the atomic helper to wait on. Drivers can use this as their
+ * &drm_simple_display_pipe_funcs->prepare_fb callback.
+ */
+int tinydrm_display_pipe_prepare_fb(struct drm_simple_display_pipe *pipe,
+				    struct drm_plane_state *plane_state)
+{
+	return drm_fb_cma_prepare_fb(&pipe->plane, plane_state);
+}
+EXPORT_SYMBOL(tinydrm_display_pipe_prepare_fb);
+
+static int tinydrm_rotate_mode(struct drm_display_mode *mode,
+			       unsigned int rotation)
+{
+	if (rotation == 0 || rotation == 180) {
+		return 0;
+	} else if (rotation == 90 || rotation == 270) {
+		swap(mode->hdisplay, mode->vdisplay);
+		swap(mode->hsync_start, mode->vsync_start);
+		swap(mode->hsync_end, mode->vsync_end);
+		swap(mode->htotal, mode->vtotal);
+		swap(mode->width_mm, mode->height_mm);
+		return 0;
+	} else {
+		return -EINVAL;
+	}
+}
+
+/**
+ * tinydrm_display_pipe_init - Initialize display pipe
+ * @tdev: tinydrm device
+ * @funcs: Display pipe functions
+ * @connector_type: Connector type
+ * @formats: Array of supported formats (DRM_FORMAT\_\*)
+ * @format_count: Number of elements in @formats
+ * @mode: Supported mode
+ * @rotation: Initial @mode rotation in degrees Counter Clock Wise
+ *
+ * This function sets up a &drm_simple_display_pipe with a &drm_connector that
+ * has one fixed &drm_display_mode which is rotated according to @rotation.
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int
+tinydrm_display_pipe_init(struct tinydrm_device *tdev,
+			  const struct drm_simple_display_pipe_funcs *funcs,
+			  int connector_type,
+			  const uint32_t *formats,
+			  unsigned int format_count,
+			  const struct drm_display_mode *mode,
+			  unsigned int rotation)
+{
+	struct drm_device *drm = tdev->drm;
+	struct drm_display_mode *mode_copy;
+	struct drm_connector *connector;
+	int ret;
+
+	mode_copy = devm_kmalloc(drm->dev, sizeof(*mode_copy), GFP_KERNEL);
+	if (!mode_copy)
+		return -ENOMEM;
+
+	*mode_copy = *mode;
+	ret = tinydrm_rotate_mode(mode_copy, rotation);
+	if (ret) {
+		DRM_ERROR("Illegal rotation value %u\n", rotation);
+		return -EINVAL;
+	}
+
+	drm->mode_config.min_width = mode_copy->hdisplay;
+	drm->mode_config.max_width = mode_copy->hdisplay;
+	drm->mode_config.min_height = mode_copy->vdisplay;
+	drm->mode_config.max_height = mode_copy->vdisplay;
+
+	connector = tinydrm_connector_create(drm, mode_copy, connector_type);
+	if (IS_ERR(connector))
+		return PTR_ERR(connector);
+
+	ret = drm_simple_display_pipe_init(drm, &tdev->pipe, funcs, formats,
+					   format_count, connector);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+EXPORT_SYMBOL(tinydrm_display_pipe_init);
diff --git a/drivers/gpu/drm/tinydrm/mi0283qt.c b/drivers/gpu/drm/tinydrm/mi0283qt.c
new file mode 100644
index 000000000000..b29fe86158f7
--- /dev/null
+++ b/drivers/gpu/drm/tinydrm/mi0283qt.c
@@ -0,0 +1,279 @@
+/*
+ * DRM driver for Multi-Inno MI0283QT panels
+ *
+ * Copyright 2016 Noralf Trønnes
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <drm/tinydrm/ili9341.h>
+#include <drm/tinydrm/mipi-dbi.h>
+#include <drm/tinydrm/tinydrm-helpers.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/property.h>
+#include <linux/regulator/consumer.h>
+#include <linux/spi/spi.h>
+#include <video/mipi_display.h>
+
+static int mi0283qt_init(struct mipi_dbi *mipi)
+{
+	struct tinydrm_device *tdev = &mipi->tinydrm;
+	struct device *dev = tdev->drm->dev;
+	u8 addr_mode;
+	int ret;
+
+	DRM_DEBUG_KMS("\n");
+
+	ret = regulator_enable(mipi->regulator);
+	if (ret) {
+		dev_err(dev, "Failed to enable regulator %d\n", ret);
+		return ret;
+	}
+
+	/* Avoid flicker by skipping setup if the bootloader has done it */
+	if (mipi_dbi_display_is_on(mipi))
+		return 0;
+
+	mipi_dbi_hw_reset(mipi);
+	ret = mipi_dbi_command(mipi, MIPI_DCS_SOFT_RESET);
+	if (ret) {
+		dev_err(dev, "Error sending command %d\n", ret);
+		regulator_disable(mipi->regulator);
+		return ret;
+	}
+
+	msleep(20);
+
+	mipi_dbi_command(mipi, MIPI_DCS_SET_DISPLAY_OFF);
+
+	mipi_dbi_command(mipi, ILI9341_PWCTRLB, 0x00, 0x83, 0x30);
+	mipi_dbi_command(mipi, ILI9341_PWRSEQ, 0x64, 0x03, 0x12, 0x81);
+	mipi_dbi_command(mipi, ILI9341_DTCTRLA, 0x85, 0x01, 0x79);
+	mipi_dbi_command(mipi, ILI9341_PWCTRLA, 0x39, 0x2c, 0x00, 0x34, 0x02);
+	mipi_dbi_command(mipi, ILI9341_PUMPCTRL, 0x20);
+	mipi_dbi_command(mipi, ILI9341_DTCTRLB, 0x00, 0x00);
+
+	/* Power Control */
+	mipi_dbi_command(mipi, ILI9341_PWCTRL1, 0x26);
+	mipi_dbi_command(mipi, ILI9341_PWCTRL2, 0x11);
+	/* VCOM */
+	mipi_dbi_command(mipi, ILI9341_VMCTRL1, 0x35, 0x3e);
+	mipi_dbi_command(mipi, ILI9341_VMCTRL2, 0xbe);
+
+	/* Memory Access Control */
+	mipi_dbi_command(mipi, MIPI_DCS_SET_PIXEL_FORMAT, 0x55);
+
+	switch (mipi->rotation) {
+	default:
+		addr_mode = ILI9341_MADCTL_MV | ILI9341_MADCTL_MY |
+			    ILI9341_MADCTL_MX;
+		break;
+	case 90:
+		addr_mode = ILI9341_MADCTL_MY;
+		break;
+	case 180:
+		addr_mode = ILI9341_MADCTL_MV;
+		break;
+	case 270:
+		addr_mode = ILI9341_MADCTL_MX;
+		break;
+	}
+	addr_mode |= ILI9341_MADCTL_BGR;
+	mipi_dbi_command(mipi, MIPI_DCS_SET_ADDRESS_MODE, addr_mode);
+
+	/* Frame Rate */
+	mipi_dbi_command(mipi, ILI9341_FRMCTR1, 0x00, 0x1b);
+
+	/* Gamma */
+	mipi_dbi_command(mipi, ILI9341_EN3GAM, 0x08);
+	mipi_dbi_command(mipi, MIPI_DCS_SET_GAMMA_CURVE, 0x01);
+	mipi_dbi_command(mipi, ILI9341_PGAMCTRL,
+		       0x1f, 0x1a, 0x18, 0x0a, 0x0f, 0x06, 0x45, 0x87,
+		       0x32, 0x0a, 0x07, 0x02, 0x07, 0x05, 0x00);
+	mipi_dbi_command(mipi, ILI9341_NGAMCTRL,
+		       0x00, 0x25, 0x27, 0x05, 0x10, 0x09, 0x3a, 0x78,
+		       0x4d, 0x05, 0x18, 0x0d, 0x38, 0x3a, 0x1f);
+
+	/* DDRAM */
+	mipi_dbi_command(mipi, ILI9341_ETMOD, 0x07);
+
+	/* Display */
+	mipi_dbi_command(mipi, ILI9341_DISCTRL, 0x0a, 0x82, 0x27, 0x00);
+	mipi_dbi_command(mipi, MIPI_DCS_EXIT_SLEEP_MODE);
+	msleep(100);
+
+	mipi_dbi_command(mipi, MIPI_DCS_SET_DISPLAY_ON);
+	msleep(100);
+
+	return 0;
+}
+
+static void mi0283qt_fini(void *data)
+{
+	struct mipi_dbi *mipi = data;
+
+	DRM_DEBUG_KMS("\n");
+	regulator_disable(mipi->regulator);
+}
+
+static const struct drm_simple_display_pipe_funcs mi0283qt_pipe_funcs = {
+	.enable = mipi_dbi_pipe_enable,
+	.disable = mipi_dbi_pipe_disable,
+	.update = tinydrm_display_pipe_update,
+	.prepare_fb = tinydrm_display_pipe_prepare_fb,
+};
+
+static const struct drm_display_mode mi0283qt_mode = {
+	TINYDRM_MODE(320, 240, 58, 43),
+};
+
+static struct drm_driver mi0283qt_driver = {
+	.driver_features	= DRIVER_GEM | DRIVER_MODESET | DRIVER_PRIME |
+				  DRIVER_ATOMIC,
+	TINYDRM_GEM_DRIVER_OPS,
+	.lastclose		= tinydrm_lastclose,
+	.debugfs_init		= mipi_dbi_debugfs_init,
+	.name			= "mi0283qt",
+	.desc			= "Multi-Inno MI0283QT",
+	.date			= "20160614",
+	.major			= 1,
+	.minor			= 0,
+};
+
+static const struct of_device_id mi0283qt_of_match[] = {
+	{ .compatible = "multi-inno,mi0283qt" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mi0283qt_of_match);
+
+static const struct spi_device_id mi0283qt_id[] = {
+	{ "mi0283qt", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(spi, mi0283qt_id);
+
+static int mi0283qt_probe(struct spi_device *spi)
+{
+	struct device *dev = &spi->dev;
+	struct tinydrm_device *tdev;
+	struct mipi_dbi *mipi;
+	struct gpio_desc *dc;
+	u32 rotation = 0;
+	int ret;
+
+	mipi = devm_kzalloc(dev, sizeof(*mipi), GFP_KERNEL);
+	if (!mipi)
+		return -ENOMEM;
+
+	mipi->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
+	if (IS_ERR(mipi->reset)) {
+		dev_err(dev, "Failed to get gpio 'reset'\n");
+		return PTR_ERR(mipi->reset);
+	}
+
+	dc = devm_gpiod_get_optional(dev, "dc", GPIOD_OUT_LOW);
+	if (IS_ERR(dc)) {
+		dev_err(dev, "Failed to get gpio 'dc'\n");
+		return PTR_ERR(dc);
+	}
+
+	mipi->regulator = devm_regulator_get(dev, "power");
+	if (IS_ERR(mipi->regulator))
+		return PTR_ERR(mipi->regulator);
+
+	mipi->backlight = tinydrm_of_find_backlight(dev);
+	if (IS_ERR(mipi->backlight))
+		return PTR_ERR(mipi->backlight);
+
+	device_property_read_u32(dev, "rotation", &rotation);
+
+	ret = mipi_dbi_spi_init(spi, mipi, dc, &mi0283qt_pipe_funcs,
+				&mi0283qt_driver, &mi0283qt_mode, rotation);
+	if (ret)
+		return ret;
+
+	ret = mi0283qt_init(mipi);
+	if (ret)
+		return ret;
+
+	/* use devres to fini after drm unregister (drv->remove is before) */
+	ret = devm_add_action(dev, mi0283qt_fini, mipi);
+	if (ret) {
+		mi0283qt_fini(mipi);
+		return ret;
+	}
+
+	tdev = &mipi->tinydrm;
+
+	ret = devm_tinydrm_register(tdev);
+	if (ret)
+		return ret;
+
+	spi_set_drvdata(spi, mipi);
+
+	DRM_DEBUG_DRIVER("Initialized %s:%s @%uMHz on minor %d\n",
+			 tdev->drm->driver->name, dev_name(dev),
+			 spi->max_speed_hz / 1000000,
+			 tdev->drm->primary->index);
+
+	return 0;
+}
+
+static void mi0283qt_shutdown(struct spi_device *spi)
+{
+	struct mipi_dbi *mipi = spi_get_drvdata(spi);
+
+	tinydrm_shutdown(&mipi->tinydrm);
+}
+
+static int __maybe_unused mi0283qt_pm_suspend(struct device *dev)
+{
+	struct mipi_dbi *mipi = dev_get_drvdata(dev);
+	int ret;
+
+	ret = tinydrm_suspend(&mipi->tinydrm);
+	if (ret)
+		return ret;
+
+	mi0283qt_fini(mipi);
+
+	return 0;
+}
+
+static int __maybe_unused mi0283qt_pm_resume(struct device *dev)
+{
+	struct mipi_dbi *mipi = dev_get_drvdata(dev);
+	int ret;
+
+	ret = mi0283qt_init(mipi);
+	if (ret)
+		return ret;
+
+	return tinydrm_resume(&mipi->tinydrm);
+}
+
+static const struct dev_pm_ops mi0283qt_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(mi0283qt_pm_suspend, mi0283qt_pm_resume)
+};
+
+static struct spi_driver mi0283qt_spi_driver = {
+	.driver = {
+		.name = "mi0283qt",
+		.owner = THIS_MODULE,
+		.of_match_table = mi0283qt_of_match,
+		.pm = &mi0283qt_pm_ops,
+	},
+	.id_table = mi0283qt_id,
+	.probe = mi0283qt_probe,
+	.shutdown = mi0283qt_shutdown,
+};
+module_spi_driver(mi0283qt_spi_driver);
+
+MODULE_DESCRIPTION("Multi-Inno MI0283QT DRM driver");
+MODULE_AUTHOR("Noralf Trønnes");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/tinydrm/mipi-dbi.c b/drivers/gpu/drm/tinydrm/mipi-dbi.c
new file mode 100644
index 000000000000..07d49ba78d8e
--- /dev/null
+++ b/drivers/gpu/drm/tinydrm/mipi-dbi.c
@@ -0,0 +1,1005 @@
+/*
+ * MIPI Display Bus Interface (DBI) LCD controller support
+ *
+ * Copyright 2016 Noralf Trønnes
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <drm/tinydrm/mipi-dbi.h>
+#include <drm/tinydrm/tinydrm-helpers.h>
+#include <linux/debugfs.h>
+#include <linux/dma-buf.h>
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/regulator/consumer.h>
+#include <linux/spi/spi.h>
+#include <video/mipi_display.h>
+
+#define MIPI_DBI_MAX_SPI_READ_SPEED 2000000 /* 2MHz */
+
+#define DCS_POWER_MODE_DISPLAY			BIT(2)
+#define DCS_POWER_MODE_DISPLAY_NORMAL_MODE	BIT(3)
+#define DCS_POWER_MODE_SLEEP_MODE		BIT(4)
+#define DCS_POWER_MODE_PARTIAL_MODE		BIT(5)
+#define DCS_POWER_MODE_IDLE_MODE		BIT(6)
+#define DCS_POWER_MODE_RESERVED_MASK		(BIT(0) | BIT(1) | BIT(7))
+
+/**
+ * DOC: overview
+ *
+ * This library provides helpers for MIPI Display Bus Interface (DBI)
+ * compatible display controllers.
+ *
+ * Many controllers for tiny lcd displays are MIPI compliant and can use this
+ * library. If a controller uses registers 0x2A and 0x2B to set the area to
+ * update and uses register 0x2C to write to frame memory, it is most likely
+ * MIPI compliant.
+ *
+ * Only MIPI Type 1 displays are supported since a full frame memory is needed.
+ *
+ * There are 3 MIPI DBI implementation types:
+ *
+ * A. Motorola 6800 type parallel bus
+ *
+ * B. Intel 8080 type parallel bus
+ *
+ * C. SPI type with 3 options:
+ *
+ *    1. 9-bit with the Data/Command signal as the ninth bit
+ *    2. Same as above except it's sent as 16 bits
+ *    3. 8-bit with the Data/Command signal as a separate D/CX pin
+ *
+ * Currently mipi_dbi only supports Type C options 1 and 3 with
+ * mipi_dbi_spi_init().
+ */
+
+#define MIPI_DBI_DEBUG_COMMAND(cmd, data, len) \
+({ \
+	if (!len) \
+		DRM_DEBUG_DRIVER("cmd=%02x\n", cmd); \
+	else if (len <= 32) \
+		DRM_DEBUG_DRIVER("cmd=%02x, par=%*ph\n", cmd, len, data); \
+	else \
+		DRM_DEBUG_DRIVER("cmd=%02x, len=%zu\n", cmd, len); \
+})
+
+static const u8 mipi_dbi_dcs_read_commands[] = {
+	MIPI_DCS_GET_DISPLAY_ID,
+	MIPI_DCS_GET_RED_CHANNEL,
+	MIPI_DCS_GET_GREEN_CHANNEL,
+	MIPI_DCS_GET_BLUE_CHANNEL,
+	MIPI_DCS_GET_DISPLAY_STATUS,
+	MIPI_DCS_GET_POWER_MODE,
+	MIPI_DCS_GET_ADDRESS_MODE,
+	MIPI_DCS_GET_PIXEL_FORMAT,
+	MIPI_DCS_GET_DISPLAY_MODE,
+	MIPI_DCS_GET_SIGNAL_MODE,
+	MIPI_DCS_GET_DIAGNOSTIC_RESULT,
+	MIPI_DCS_READ_MEMORY_START,
+	MIPI_DCS_READ_MEMORY_CONTINUE,
+	MIPI_DCS_GET_SCANLINE,
+	MIPI_DCS_GET_DISPLAY_BRIGHTNESS,
+	MIPI_DCS_GET_CONTROL_DISPLAY,
+	MIPI_DCS_GET_POWER_SAVE,
+	MIPI_DCS_GET_CABC_MIN_BRIGHTNESS,
+	MIPI_DCS_READ_DDB_START,
+	MIPI_DCS_READ_DDB_CONTINUE,
+	0, /* sentinel */
+};
+
+static bool mipi_dbi_command_is_read(struct mipi_dbi *mipi, u8 cmd)
+{
+	unsigned int i;
+
+	if (!mipi->read_commands)
+		return false;
+
+	for (i = 0; i < 0xff; i++) {
+		if (!mipi->read_commands[i])
+			return false;
+		if (cmd == mipi->read_commands[i])
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * mipi_dbi_command_read - MIPI DCS read command
+ * @mipi: MIPI structure
+ * @cmd: Command
+ * @val: Value read
+ *
+ * Send MIPI DCS read command to the controller.
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int mipi_dbi_command_read(struct mipi_dbi *mipi, u8 cmd, u8 *val)
+{
+	if (!mipi->read_commands)
+		return -EACCES;
+
+	if (!mipi_dbi_command_is_read(mipi, cmd))
+		return -EINVAL;
+
+	return mipi_dbi_command_buf(mipi, cmd, val, 1);
+}
+EXPORT_SYMBOL(mipi_dbi_command_read);
+
+/**
+ * mipi_dbi_command_buf - MIPI DCS command with parameter(s) in an array
+ * @mipi: MIPI structure
+ * @cmd: Command
+ * @data: Parameter buffer
+ * @len: Buffer length
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int mipi_dbi_command_buf(struct mipi_dbi *mipi, u8 cmd, u8 *data, size_t len)
+{
+	int ret;
+
+	mutex_lock(&mipi->cmdlock);
+	ret = mipi->command(mipi, cmd, data, len);
+	mutex_unlock(&mipi->cmdlock);
+
+	return ret;
+}
+EXPORT_SYMBOL(mipi_dbi_command_buf);
+
+static int mipi_dbi_buf_copy(void *dst, struct drm_framebuffer *fb,
+				struct drm_clip_rect *clip, bool swap)
+{
+	struct drm_gem_cma_object *cma_obj = drm_fb_cma_get_gem_obj(fb, 0);
+	struct dma_buf_attachment *import_attach = cma_obj->base.import_attach;
+	struct drm_format_name_buf format_name;
+	void *src = cma_obj->vaddr;
+	int ret = 0;
+
+	if (import_attach) {
+		ret = dma_buf_begin_cpu_access(import_attach->dmabuf,
+					       DMA_FROM_DEVICE);
+		if (ret)
+			return ret;
+	}
+
+	switch (fb->format->format) {
+	case DRM_FORMAT_RGB565:
+		if (swap)
+			tinydrm_swab16(dst, src, fb, clip);
+		else
+			tinydrm_memcpy(dst, src, fb, clip);
+		break;
+	case DRM_FORMAT_XRGB8888:
+		tinydrm_xrgb8888_to_rgb565(dst, src, fb, clip, swap);
+		break;
+	default:
+		dev_err_once(fb->dev->dev, "Format is not supported: %s\n",
+			     drm_get_format_name(fb->format->format,
+						 &format_name));
+		return -EINVAL;
+	}
+
+	if (import_attach)
+		ret = dma_buf_end_cpu_access(import_attach->dmabuf,
+					     DMA_FROM_DEVICE);
+	return ret;
+}
+
+static int mipi_dbi_fb_dirty(struct drm_framebuffer *fb,
+			     struct drm_file *file_priv,
+			     unsigned int flags, unsigned int color,
+			     struct drm_clip_rect *clips,
+			     unsigned int num_clips)
+{
+	struct drm_gem_cma_object *cma_obj = drm_fb_cma_get_gem_obj(fb, 0);
+	struct tinydrm_device *tdev = fb->dev->dev_private;
+	struct mipi_dbi *mipi = mipi_dbi_from_tinydrm(tdev);
+	bool swap = mipi->swap_bytes;
+	struct drm_clip_rect clip;
+	int ret = 0;
+	bool full;
+	void *tr;
+
+	mutex_lock(&tdev->dirty_lock);
+
+	if (!mipi->enabled)
+		goto out_unlock;
+
+	/* fbdev can flush even when we're not interested */
+	if (tdev->pipe.plane.fb != fb)
+		goto out_unlock;
+
+	full = tinydrm_merge_clips(&clip, clips, num_clips, flags,
+				   fb->width, fb->height);
+
+	DRM_DEBUG("Flushing [FB:%d] x1=%u, x2=%u, y1=%u, y2=%u\n", fb->base.id,
+		  clip.x1, clip.x2, clip.y1, clip.y2);
+
+	if (!mipi->dc || !full || swap ||
+	    fb->format->format == DRM_FORMAT_XRGB8888) {
+		tr = mipi->tx_buf;
+		ret = mipi_dbi_buf_copy(mipi->tx_buf, fb, &clip, swap);
+		if (ret)
+			goto out_unlock;
+	} else {
+		tr = cma_obj->vaddr;
+	}
+
+	mipi_dbi_command(mipi, MIPI_DCS_SET_COLUMN_ADDRESS,
+			 (clip.x1 >> 8) & 0xFF, clip.x1 & 0xFF,
+			 (clip.x2 >> 8) & 0xFF, (clip.x2 - 1) & 0xFF);
+	mipi_dbi_command(mipi, MIPI_DCS_SET_PAGE_ADDRESS,
+			 (clip.y1 >> 8) & 0xFF, clip.y1 & 0xFF,
+			 (clip.y2 >> 8) & 0xFF, (clip.y2 - 1) & 0xFF);
+
+	ret = mipi_dbi_command_buf(mipi, MIPI_DCS_WRITE_MEMORY_START, tr,
+				(clip.x2 - clip.x1) * (clip.y2 - clip.y1) * 2);
+
+out_unlock:
+	mutex_unlock(&tdev->dirty_lock);
+
+	if (ret)
+		dev_err_once(fb->dev->dev, "Failed to update display %d\n",
+			     ret);
+
+	return ret;
+}
+
+static const struct drm_framebuffer_funcs mipi_dbi_fb_funcs = {
+	.destroy	= drm_fb_cma_destroy,
+	.create_handle	= drm_fb_cma_create_handle,
+	.dirty		= mipi_dbi_fb_dirty,
+};
+
+/**
+ * mipi_dbi_pipe_enable - MIPI DBI pipe enable helper
+ * @pipe: Display pipe
+ * @crtc_state: CRTC state
+ *
+ * This function enables backlight. Drivers can use this as their
+ * &drm_simple_display_pipe_funcs->enable callback.
+ */
+void mipi_dbi_pipe_enable(struct drm_simple_display_pipe *pipe,
+			  struct drm_crtc_state *crtc_state)
+{
+	struct tinydrm_device *tdev = pipe_to_tinydrm(pipe);
+	struct mipi_dbi *mipi = mipi_dbi_from_tinydrm(tdev);
+	struct drm_framebuffer *fb = pipe->plane.fb;
+
+	DRM_DEBUG_KMS("\n");
+
+	mipi->enabled = true;
+	if (fb)
+		fb->funcs->dirty(fb, NULL, 0, 0, NULL, 0);
+
+	tinydrm_enable_backlight(mipi->backlight);
+}
+EXPORT_SYMBOL(mipi_dbi_pipe_enable);
+
+static void mipi_dbi_blank(struct mipi_dbi *mipi)
+{
+	struct drm_device *drm = mipi->tinydrm.drm;
+	u16 height = drm->mode_config.min_height;
+	u16 width = drm->mode_config.min_width;
+	size_t len = width * height * 2;
+
+	memset(mipi->tx_buf, 0, len);
+
+	mipi_dbi_command(mipi, MIPI_DCS_SET_COLUMN_ADDRESS, 0, 0,
+			 (width >> 8) & 0xFF, (width - 1) & 0xFF);
+	mipi_dbi_command(mipi, MIPI_DCS_SET_PAGE_ADDRESS, 0, 0,
+			 (height >> 8) & 0xFF, (height - 1) & 0xFF);
+	mipi_dbi_command_buf(mipi, MIPI_DCS_WRITE_MEMORY_START,
+			     (u8 *)mipi->tx_buf, len);
+}
+
+/**
+ * mipi_dbi_pipe_disable - MIPI DBI pipe disable helper
+ * @pipe: Display pipe
+ *
+ * This function disables backlight if present or if not the
+ * display memory is blanked. Drivers can use this as their
+ * &drm_simple_display_pipe_funcs->disable callback.
+ */
+void mipi_dbi_pipe_disable(struct drm_simple_display_pipe *pipe)
+{
+	struct tinydrm_device *tdev = pipe_to_tinydrm(pipe);
+	struct mipi_dbi *mipi = mipi_dbi_from_tinydrm(tdev);
+
+	DRM_DEBUG_KMS("\n");
+
+	mipi->enabled = false;
+
+	if (mipi->backlight)
+		tinydrm_disable_backlight(mipi->backlight);
+	else
+		mipi_dbi_blank(mipi);
+}
+EXPORT_SYMBOL(mipi_dbi_pipe_disable);
+
+static const uint32_t mipi_dbi_formats[] = {
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_XRGB8888,
+};
+
+/**
+ * mipi_dbi_init - MIPI DBI initialization
+ * @dev: Parent device
+ * @mipi: &mipi_dbi structure to initialize
+ * @pipe_funcs: Display pipe functions
+ * @driver: DRM driver
+ * @mode: Display mode
+ * @rotation: Initial rotation in degrees Counter Clock Wise
+ *
+ * This function initializes a &mipi_dbi structure and it's underlying
+ * @tinydrm_device. It also sets up the display pipeline.
+ *
+ * Supported formats: Native RGB565 and emulated XRGB8888.
+ *
+ * Objects created by this function will be automatically freed on driver
+ * detach (devres).
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int mipi_dbi_init(struct device *dev, struct mipi_dbi *mipi,
+		  const struct drm_simple_display_pipe_funcs *pipe_funcs,
+		  struct drm_driver *driver,
+		  const struct drm_display_mode *mode, unsigned int rotation)
+{
+	size_t bufsize = mode->vdisplay * mode->hdisplay * sizeof(u16);
+	struct tinydrm_device *tdev = &mipi->tinydrm;
+	int ret;
+
+	if (!mipi->command)
+		return -EINVAL;
+
+	mutex_init(&mipi->cmdlock);
+
+	mipi->tx_buf = devm_kmalloc(dev, bufsize, GFP_KERNEL);
+	if (!mipi->tx_buf)
+		return -ENOMEM;
+
+	ret = devm_tinydrm_init(dev, tdev, &mipi_dbi_fb_funcs, driver);
+	if (ret)
+		return ret;
+
+	/* TODO: Maybe add DRM_MODE_CONNECTOR_SPI */
+	ret = tinydrm_display_pipe_init(tdev, pipe_funcs,
+					DRM_MODE_CONNECTOR_VIRTUAL,
+					mipi_dbi_formats,
+					ARRAY_SIZE(mipi_dbi_formats), mode,
+					rotation);
+	if (ret)
+		return ret;
+
+	tdev->drm->mode_config.preferred_depth = 16;
+	mipi->rotation = rotation;
+
+	drm_mode_config_reset(tdev->drm);
+
+	DRM_DEBUG_KMS("preferred_depth=%u, rotation = %u\n",
+		      tdev->drm->mode_config.preferred_depth, rotation);
+
+	return 0;
+}
+EXPORT_SYMBOL(mipi_dbi_init);
+
+/**
+ * mipi_dbi_hw_reset - Hardware reset of controller
+ * @mipi: MIPI DBI structure
+ *
+ * Reset controller if the &mipi_dbi->reset gpio is set.
+ */
+void mipi_dbi_hw_reset(struct mipi_dbi *mipi)
+{
+	if (!mipi->reset)
+		return;
+
+	gpiod_set_value_cansleep(mipi->reset, 0);
+	msleep(20);
+	gpiod_set_value_cansleep(mipi->reset, 1);
+	msleep(120);
+}
+EXPORT_SYMBOL(mipi_dbi_hw_reset);
+
+/**
+ * mipi_dbi_display_is_on - Check if display is on
+ * @mipi: MIPI DBI structure
+ *
+ * This function checks the Power Mode register (if readable) to see if
+ * display output is turned on. This can be used to see if the bootloader
+ * has already turned on the display avoiding flicker when the pipeline is
+ * enabled.
+ *
+ * Returns:
+ * true if the display can be verified to be on, false otherwise.
+ */
+bool mipi_dbi_display_is_on(struct mipi_dbi *mipi)
+{
+	u8 val;
+
+	if (mipi_dbi_command_read(mipi, MIPI_DCS_GET_POWER_MODE, &val))
+		return false;
+
+	val &= ~DCS_POWER_MODE_RESERVED_MASK;
+
+	if (val != (DCS_POWER_MODE_DISPLAY |
+	    DCS_POWER_MODE_DISPLAY_NORMAL_MODE | DCS_POWER_MODE_SLEEP_MODE))
+		return false;
+
+	DRM_DEBUG_DRIVER("Display is ON\n");
+
+	return true;
+}
+EXPORT_SYMBOL(mipi_dbi_display_is_on);
+
+#if IS_ENABLED(CONFIG_SPI)
+
+/*
+ * Many controllers have a max speed of 10MHz, but can be pushed way beyond
+ * that. Increase reliability by running pixel data at max speed and the rest
+ * at 10MHz, preventing transfer glitches from messing up the init settings.
+ */
+static u32 mipi_dbi_spi_cmd_max_speed(struct spi_device *spi, size_t len)
+{
+	if (len > 64)
+		return 0; /* use default */
+
+	return min_t(u32, 10000000, spi->max_speed_hz);
+}
+
+/*
+ * MIPI DBI Type C Option 1
+ *
+ * If the SPI controller doesn't have 9 bits per word support,
+ * use blocks of 9 bytes to send 8x 9-bit words using a 8-bit SPI transfer.
+ * Pad partial blocks with MIPI_DCS_NOP (zero).
+ * This is how the D/C bit (x) is added:
+ *     x7654321
+ *     0x765432
+ *     10x76543
+ *     210x7654
+ *     3210x765
+ *     43210x76
+ *     543210x7
+ *     6543210x
+ *     76543210
+ */
+
+static int mipi_dbi_spi1e_transfer(struct mipi_dbi *mipi, int dc,
+				   const void *buf, size_t len,
+				   unsigned int bpw)
+{
+	bool swap_bytes = (bpw == 16 && tinydrm_machine_little_endian());
+	size_t chunk, max_chunk = mipi->tx_buf9_len;
+	struct spi_device *spi = mipi->spi;
+	struct spi_transfer tr = {
+		.tx_buf = mipi->tx_buf9,
+		.bits_per_word = 8,
+	};
+	struct spi_message m;
+	const u8 *src = buf;
+	int i, ret;
+	u8 *dst;
+
+	if (drm_debug & DRM_UT_DRIVER)
+		pr_debug("[drm:%s] dc=%d, max_chunk=%zu, transfers:\n",
+			 __func__, dc, max_chunk);
+
+	tr.speed_hz = mipi_dbi_spi_cmd_max_speed(spi, len);
+	spi_message_init_with_transfers(&m, &tr, 1);
+
+	if (!dc) {
+		if (WARN_ON_ONCE(len != 1))
+			return -EINVAL;
+
+		/* Command: pad no-op's (zeroes) at beginning of block */
+		dst = mipi->tx_buf9;
+		memset(dst, 0, 9);
+		dst[8] = *src;
+		tr.len = 9;
+
+		tinydrm_dbg_spi_message(spi, &m);
+
+		return spi_sync(spi, &m);
+	}
+
+	/* max with room for adding one bit per byte */
+	max_chunk = max_chunk / 9 * 8;
+	/* but no bigger than len */
+	max_chunk = min(max_chunk, len);
+	/* 8 byte blocks */
+	max_chunk = max_t(size_t, 8, max_chunk & ~0x7);
+
+	while (len) {
+		size_t added = 0;
+
+		chunk = min(len, max_chunk);
+		len -= chunk;
+		dst = mipi->tx_buf9;
+
+		if (chunk < 8) {
+			u8 val, carry = 0;
+
+			/* Data: pad no-op's (zeroes) at end of block */
+			memset(dst, 0, 9);
+
+			if (swap_bytes) {
+				for (i = 1; i < (chunk + 1); i++) {
+					val = src[1];
+					*dst++ = carry | BIT(8 - i) | (val >> i);
+					carry = val << (8 - i);
+					i++;
+					val = src[0];
+					*dst++ = carry | BIT(8 - i) | (val >> i);
+					carry = val << (8 - i);
+					src += 2;
+				}
+				*dst++ = carry;
+			} else {
+				for (i = 1; i < (chunk + 1); i++) {
+					val = *src++;
+					*dst++ = carry | BIT(8 - i) | (val >> i);
+					carry = val << (8 - i);
+				}
+				*dst++ = carry;
+			}
+
+			chunk = 8;
+			added = 1;
+		} else {
+			for (i = 0; i < chunk; i += 8) {
+				if (swap_bytes) {
+					*dst++ =                 BIT(7) | (src[1] >> 1);
+					*dst++ = (src[1] << 7) | BIT(6) | (src[0] >> 2);
+					*dst++ = (src[0] << 6) | BIT(5) | (src[3] >> 3);
+					*dst++ = (src[3] << 5) | BIT(4) | (src[2] >> 4);
+					*dst++ = (src[2] << 4) | BIT(3) | (src[5] >> 5);
+					*dst++ = (src[5] << 3) | BIT(2) | (src[4] >> 6);
+					*dst++ = (src[4] << 2) | BIT(1) | (src[7] >> 7);
+					*dst++ = (src[7] << 1) | BIT(0);
+					*dst++ = src[6];
+				} else {
+					*dst++ =                 BIT(7) | (src[0] >> 1);
+					*dst++ = (src[0] << 7) | BIT(6) | (src[1] >> 2);
+					*dst++ = (src[1] << 6) | BIT(5) | (src[2] >> 3);
+					*dst++ = (src[2] << 5) | BIT(4) | (src[3] >> 4);
+					*dst++ = (src[3] << 4) | BIT(3) | (src[4] >> 5);
+					*dst++ = (src[4] << 3) | BIT(2) | (src[5] >> 6);
+					*dst++ = (src[5] << 2) | BIT(1) | (src[6] >> 7);
+					*dst++ = (src[6] << 1) | BIT(0);
+					*dst++ = src[7];
+				}
+
+				src += 8;
+				added++;
+			}
+		}
+
+		tr.len = chunk + added;
+
+		tinydrm_dbg_spi_message(spi, &m);
+		ret = spi_sync(spi, &m);
+		if (ret)
+			return ret;
+	};
+
+	return 0;
+}
+
+static int mipi_dbi_spi1_transfer(struct mipi_dbi *mipi, int dc,
+				  const void *buf, size_t len,
+				  unsigned int bpw)
+{
+	struct spi_device *spi = mipi->spi;
+	struct spi_transfer tr = {
+		.bits_per_word = 9,
+	};
+	const u16 *src16 = buf;
+	const u8 *src8 = buf;
+	struct spi_message m;
+	size_t max_chunk;
+	u16 *dst16;
+	int ret;
+
+	if (!tinydrm_spi_bpw_supported(spi, 9))
+		return mipi_dbi_spi1e_transfer(mipi, dc, buf, len, bpw);
+
+	tr.speed_hz = mipi_dbi_spi_cmd_max_speed(spi, len);
+	max_chunk = mipi->tx_buf9_len;
+	dst16 = mipi->tx_buf9;
+
+	if (drm_debug & DRM_UT_DRIVER)
+		pr_debug("[drm:%s] dc=%d, max_chunk=%zu, transfers:\n",
+			 __func__, dc, max_chunk);
+
+	max_chunk = min(max_chunk / 2, len);
+
+	spi_message_init_with_transfers(&m, &tr, 1);
+	tr.tx_buf = dst16;
+
+	while (len) {
+		size_t chunk = min(len, max_chunk);
+		unsigned int i;
+
+		if (bpw == 16 && tinydrm_machine_little_endian()) {
+			for (i = 0; i < (chunk * 2); i += 2) {
+				dst16[i]     = *src16 >> 8;
+				dst16[i + 1] = *src16++ & 0xFF;
+				if (dc) {
+					dst16[i]     |= 0x0100;
+					dst16[i + 1] |= 0x0100;
+				}
+			}
+		} else {
+			for (i = 0; i < chunk; i++) {
+				dst16[i] = *src8++;
+				if (dc)
+					dst16[i] |= 0x0100;
+			}
+		}
+
+		tr.len = chunk;
+		len -= chunk;
+
+		tinydrm_dbg_spi_message(spi, &m);
+		ret = spi_sync(spi, &m);
+		if (ret)
+			return ret;
+	};
+
+	return 0;
+}
+
+static int mipi_dbi_typec1_command(struct mipi_dbi *mipi, u8 cmd,
+				   u8 *parameters, size_t num)
+{
+	unsigned int bpw = (cmd == MIPI_DCS_WRITE_MEMORY_START) ? 16 : 8;
+	int ret;
+
+	if (mipi_dbi_command_is_read(mipi, cmd))
+		return -ENOTSUPP;
+
+	MIPI_DBI_DEBUG_COMMAND(cmd, parameters, num);
+
+	ret = mipi_dbi_spi1_transfer(mipi, 0, &cmd, 1, 8);
+	if (ret || !num)
+		return ret;
+
+	return mipi_dbi_spi1_transfer(mipi, 1, parameters, num, bpw);
+}
+
+/* MIPI DBI Type C Option 3 */
+
+static int mipi_dbi_typec3_command_read(struct mipi_dbi *mipi, u8 cmd,
+					u8 *data, size_t len)
+{
+	struct spi_device *spi = mipi->spi;
+	u32 speed_hz = min_t(u32, MIPI_DBI_MAX_SPI_READ_SPEED,
+			     spi->max_speed_hz / 2);
+	struct spi_transfer tr[2] = {
+		{
+			.speed_hz = speed_hz,
+			.tx_buf = &cmd,
+			.len = 1,
+		}, {
+			.speed_hz = speed_hz,
+			.len = len,
+		},
+	};
+	struct spi_message m;
+	u8 *buf;
+	int ret;
+
+	if (!len)
+		return -EINVAL;
+
+	/*
+	 * Support non-standard 24-bit and 32-bit Nokia read commands which
+	 * start with a dummy clock, so we need to read an extra byte.
+	 */
+	if (cmd == MIPI_DCS_GET_DISPLAY_ID ||
+	    cmd == MIPI_DCS_GET_DISPLAY_STATUS) {
+		if (!(len == 3 || len == 4))
+			return -EINVAL;
+
+		tr[1].len = len + 1;
+	}
+
+	buf = kmalloc(tr[1].len, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	tr[1].rx_buf = buf;
+	gpiod_set_value_cansleep(mipi->dc, 0);
+
+	spi_message_init_with_transfers(&m, tr, ARRAY_SIZE(tr));
+	ret = spi_sync(spi, &m);
+	if (ret)
+		goto err_free;
+
+	tinydrm_dbg_spi_message(spi, &m);
+
+	if (tr[1].len == len) {
+		memcpy(data, buf, len);
+	} else {
+		unsigned int i;
+
+		for (i = 0; i < len; i++)
+			data[i] = (buf[i] << 1) | !!(buf[i + 1] & BIT(7));
+	}
+
+	MIPI_DBI_DEBUG_COMMAND(cmd, data, len);
+
+err_free:
+	kfree(buf);
+
+	return ret;
+}
+
+static int mipi_dbi_typec3_command(struct mipi_dbi *mipi, u8 cmd,
+				   u8 *par, size_t num)
+{
+	struct spi_device *spi = mipi->spi;
+	unsigned int bpw = 8;
+	u32 speed_hz;
+	int ret;
+
+	if (mipi_dbi_command_is_read(mipi, cmd))
+		return mipi_dbi_typec3_command_read(mipi, cmd, par, num);
+
+	MIPI_DBI_DEBUG_COMMAND(cmd, par, num);
+
+	gpiod_set_value_cansleep(mipi->dc, 0);
+	speed_hz = mipi_dbi_spi_cmd_max_speed(spi, 1);
+	ret = tinydrm_spi_transfer(spi, speed_hz, NULL, 8, &cmd, 1);
+	if (ret || !num)
+		return ret;
+
+	if (cmd == MIPI_DCS_WRITE_MEMORY_START && !mipi->swap_bytes)
+		bpw = 16;
+
+	gpiod_set_value_cansleep(mipi->dc, 1);
+	speed_hz = mipi_dbi_spi_cmd_max_speed(spi, num);
+
+	return tinydrm_spi_transfer(spi, speed_hz, NULL, bpw, par, num);
+}
+
+/**
+ * mipi_dbi_spi_init - Initialize MIPI DBI SPI interfaced controller
+ * @spi: SPI device
+ * @dc: D/C gpio (optional)
+ * @mipi: &mipi_dbi structure to initialize
+ * @pipe_funcs: Display pipe functions
+ * @driver: DRM driver
+ * @mode: Display mode
+ * @rotation: Initial rotation in degrees Counter Clock Wise
+ *
+ * This function sets &mipi_dbi->command, enables &mipi->read_commands for the
+ * usual read commands and initializes @mipi using mipi_dbi_init().
+ *
+ * If @dc is set, a Type C Option 3 interface is assumed, if not
+ * Type C Option 1.
+ *
+ * If the SPI master driver doesn't support the necessary bits per word,
+ * the following transformation is used:
+ *
+ * - 9-bit: reorder buffer as 9x 8-bit words, padded with no-op command.
+ * - 16-bit: if big endian send as 8-bit, if little endian swap bytes
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int mipi_dbi_spi_init(struct spi_device *spi, struct mipi_dbi *mipi,
+		      struct gpio_desc *dc,
+		      const struct drm_simple_display_pipe_funcs *pipe_funcs,
+		      struct drm_driver *driver,
+		      const struct drm_display_mode *mode,
+		      unsigned int rotation)
+{
+	size_t tx_size = tinydrm_spi_max_transfer_size(spi, 0);
+	struct device *dev = &spi->dev;
+	int ret;
+
+	if (tx_size < 16) {
+		DRM_ERROR("SPI transmit buffer too small: %zu\n", tx_size);
+		return -EINVAL;
+	}
+
+	/*
+	 * Even though it's not the SPI device that does DMA (the master does),
+	 * the dma mask is necessary for the dma_alloc_wc() in
+	 * drm_gem_cma_create(). The dma_addr returned will be a physical
+	 * adddress which might be different from the bus address, but this is
+	 * not a problem since the address will not be used.
+	 * The virtual address is used in the transfer and the SPI core
+	 * re-maps it on the SPI master device using the DMA streaming API
+	 * (spi_map_buf()).
+	 */
+	if (!dev->coherent_dma_mask) {
+		ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(32));
+		if (ret) {
+			dev_warn(dev, "Failed to set dma mask %d\n", ret);
+			return ret;
+		}
+	}
+
+	mipi->spi = spi;
+	mipi->read_commands = mipi_dbi_dcs_read_commands;
+
+	if (dc) {
+		mipi->command = mipi_dbi_typec3_command;
+		mipi->dc = dc;
+		if (tinydrm_machine_little_endian() &&
+		    !tinydrm_spi_bpw_supported(spi, 16))
+			mipi->swap_bytes = true;
+	} else {
+		mipi->command = mipi_dbi_typec1_command;
+		mipi->tx_buf9_len = tx_size;
+		mipi->tx_buf9 = devm_kmalloc(dev, tx_size, GFP_KERNEL);
+		if (!mipi->tx_buf9)
+			return -ENOMEM;
+	}
+
+	return mipi_dbi_init(dev, mipi, pipe_funcs, driver, mode, rotation);
+}
+EXPORT_SYMBOL(mipi_dbi_spi_init);
+
+#endif /* CONFIG_SPI */
+
+#ifdef CONFIG_DEBUG_FS
+
+static ssize_t mipi_dbi_debugfs_command_write(struct file *file,
+					      const char __user *ubuf,
+					      size_t count, loff_t *ppos)
+{
+	struct seq_file *m = file->private_data;
+	struct mipi_dbi *mipi = m->private;
+	u8 val, cmd, parameters[64];
+	char *buf, *pos, *token;
+	unsigned int i;
+	int ret;
+
+	buf = memdup_user_nul(ubuf, count);
+	if (IS_ERR(buf))
+		return PTR_ERR(buf);
+
+	/* strip trailing whitespace */
+	for (i = count - 1; i > 0; i--)
+		if (isspace(buf[i]))
+			buf[i] = '\0';
+		else
+			break;
+	i = 0;
+	pos = buf;
+	while (pos) {
+		token = strsep(&pos, " ");
+		if (!token) {
+			ret = -EINVAL;
+			goto err_free;
+		}
+
+		ret = kstrtou8(token, 16, &val);
+		if (ret < 0)
+			goto err_free;
+
+		if (token == buf)
+			cmd = val;
+		else
+			parameters[i++] = val;
+
+		if (i == 64) {
+			ret = -E2BIG;
+			goto err_free;
+		}
+	}
+
+	ret = mipi_dbi_command_buf(mipi, cmd, parameters, i);
+
+err_free:
+	kfree(buf);
+
+	return ret < 0 ? ret : count;
+}
+
+static int mipi_dbi_debugfs_command_show(struct seq_file *m, void *unused)
+{
+	struct mipi_dbi *mipi = m->private;
+	u8 cmd, val[4];
+	size_t len, i;
+	int ret;
+
+	for (cmd = 0; cmd < 255; cmd++) {
+		if (!mipi_dbi_command_is_read(mipi, cmd))
+			continue;
+
+		switch (cmd) {
+		case MIPI_DCS_READ_MEMORY_START:
+		case MIPI_DCS_READ_MEMORY_CONTINUE:
+			len = 2;
+			break;
+		case MIPI_DCS_GET_DISPLAY_ID:
+			len = 3;
+			break;
+		case MIPI_DCS_GET_DISPLAY_STATUS:
+			len = 4;
+			break;
+		default:
+			len = 1;
+			break;
+		}
+
+		seq_printf(m, "%02x: ", cmd);
+		ret = mipi_dbi_command_buf(mipi, cmd, val, len);
+		if (ret) {
+			seq_puts(m, "XX\n");
+			continue;
+		}
+
+		for (i = 0; i < len; i++)
+			seq_printf(m, "%02x", val[i]);
+		seq_puts(m, "\n");
+	}
+
+	return 0;
+}
+
+static int mipi_dbi_debugfs_command_open(struct inode *inode,
+					 struct file *file)
+{
+	return single_open(file, mipi_dbi_debugfs_command_show,
+			   inode->i_private);
+}
+
+static const struct file_operations mipi_dbi_debugfs_command_fops = {
+	.owner = THIS_MODULE,
+	.open = mipi_dbi_debugfs_command_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+	.write = mipi_dbi_debugfs_command_write,
+};
+
+static const struct drm_info_list mipi_dbi_debugfs_list[] = {
+	{ "fb",   drm_fb_cma_debugfs_show, 0 },
+};
+
+/**
+ * mipi_dbi_debugfs_init - Create debugfs entries
+ * @minor: DRM minor
+ *
+ * This function creates a 'command' debugfs file for sending commands to the
+ * controller or getting the read command values.
+ * Drivers can use this as their &drm_driver->debugfs_init callback.
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int mipi_dbi_debugfs_init(struct drm_minor *minor)
+{
+	struct tinydrm_device *tdev = minor->dev->dev_private;
+	struct mipi_dbi *mipi = mipi_dbi_from_tinydrm(tdev);
+	umode_t mode = S_IFREG | S_IWUSR;
+
+	if (mipi->read_commands)
+		mode |= S_IRUGO;
+	debugfs_create_file("command", mode, minor->debugfs_root, mipi,
+			    &mipi_dbi_debugfs_command_fops);
+
+	return drm_debugfs_create_files(mipi_dbi_debugfs_list,
+					ARRAY_SIZE(mipi_dbi_debugfs_list),
+					minor->debugfs_root, minor);
+}
+EXPORT_SYMBOL(mipi_dbi_debugfs_init);
+
+#endif
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index a0cd4ea15f07..0c06844af445 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -843,7 +843,7 @@ static void vc4_crtc_destroy_state(struct drm_crtc *crtc,
 
 	}
 
-	__drm_atomic_helper_crtc_destroy_state(state);
+	drm_atomic_helper_crtc_destroy_state(crtc, state);
 }
 
 static const struct drm_crtc_funcs vc4_crtc_funcs = {
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index db920771bfb5..ab3016982466 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -594,12 +594,14 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
 					  args->shader_rec_count);
 	struct vc4_bo *bo;
 
-	if (uniforms_offset < shader_rec_offset ||
+	if (shader_rec_offset < args->bin_cl_size ||
+	    uniforms_offset < shader_rec_offset ||
 	    exec_size < uniforms_offset ||
 	    args->shader_rec_count >= (UINT_MAX /
 					  sizeof(struct vc4_shader_state)) ||
 	    temp_size < exec_size) {
 		DRM_ERROR("overflow in exec arguments\n");
+		ret = -EINVAL;
 		goto fail;
 	}
 
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index c1f06897136b..f7a229df572d 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -858,7 +858,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
 		}
 	}
 	plane = &vc4_plane->base;
-	ret = drm_universal_plane_init(dev, plane, 0xff,
+	ret = drm_universal_plane_init(dev, plane, 0,
 				       &vc4_plane_funcs,
 				       formats, num_formats,
 				       type, NULL);
diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c
index 08886a309757..5cdd003605f5 100644
--- a/drivers/gpu/drm/vc4/vc4_render_cl.c
+++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
@@ -461,7 +461,7 @@ static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
 		}
 
 		ret = vc4_full_res_bounds_check(exec, *obj, surf);
-		if (!ret)
+		if (ret)
 			return ret;
 
 		return 0;
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index c27858ae0552..eeb021fe6410 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -399,6 +399,7 @@ static int host1x_device_add(struct host1x *host1x,
 	dev_set_name(&device->dev, "%s", driver->driver.name);
 	of_dma_configure(&device->dev, host1x->dev->of_node);
 	device->dev.release = host1x_device_release;
+	device->dev.of_node = host1x->dev->of_node;
 	device->dev.bus = &host1x_bus_type;
 	device->dev.parent = host1x->dev;
 
diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
index 97218af4fe75..8368e6f766ee 100644
--- a/drivers/gpu/ipu-v3/ipu-common.c
+++ b/drivers/gpu/ipu-v3/ipu-common.c
@@ -1238,12 +1238,6 @@ static int ipu_add_client_devices(struct ipu_soc *ipu, unsigned long ipu_base)
 			platform_device_put(pdev);
 			goto err_register;
 		}
-
-		/*
-		 * Set of_node only after calling platform_device_add. Otherwise
-		 * the platform:imx-ipuv3-crtc modalias won't be used.
-		 */
-		pdev->dev.of_node = of_node;
 	}
 
 	return 0;
diff --git a/drivers/gpu/ipu-v3/ipu-csi.c b/drivers/gpu/ipu-v3/ipu-csi.c
index 63c7292f427a..24e12b87a0cb 100644
--- a/drivers/gpu/ipu-v3/ipu-csi.c
+++ b/drivers/gpu/ipu-v3/ipu-csi.c
@@ -544,6 +544,7 @@ void ipu_csi_set_downsize(struct ipu_csi *csi, bool horiz, bool vert)
 
 	spin_unlock_irqrestore(&csi->lock, flags);
 }
+EXPORT_SYMBOL_GPL(ipu_csi_set_downsize);
 
 void ipu_csi_set_test_generator(struct ipu_csi *csi, bool active,
 				u32 r_value, u32 g_value, u32 b_value,