diff options
author | John Brooks <john@fastquake.com> | 2022-08-09 17:15:33 -0400 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2022-08-23 19:01:18 +0000 |
commit | 98ba1e0d817e0354aad5d82eb9a2dc4cce33540f (patch) | |
tree | a7ce6d9087b427fb0ac04908183e0f38f4f56cc2 | |
parent | 35f053ba8cd11a16077a79ebf5d3952f6112b096 (diff) | |
download | mesa-98ba1e0d817e0354aad5d82eb9a2dc4cce33540f.tar.gz mesa-98ba1e0d817e0354aad5d82eb9a2dc4cce33540f.tar.bz2 mesa-98ba1e0d817e0354aad5d82eb9a2dc4cce33540f.zip |
radv: Fix mipmap views on GFX10+
As explained in the previous commit, GFX9+ has issues with addressing
mipmaps in block-compressed images. In the case of copy commands, we fix
this by doing an extra copy for the missing blocks.
For GFX10, the mipmap layout in memory allows us to do better than that. We
can change the base level of the descriptor to one level bigger than the
requested level and adjust the extent and address to match. This is done by
ComputeNonBlockCompressedView in addrlib. Thus on GFX10 we can skip the
fixup copy workaround, and this will also fix cases outside of explicit
copy commands.
Signed-off-by: John Brooks <john@fastquake.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Acked-by: Acked-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17970>
-rw-r--r-- | src/amd/common/ac_surface.c | 47 | ||||
-rw-r--r-- | src/amd/common/ac_surface.h | 15 | ||||
-rw-r--r-- | src/amd/vulkan/radv_image.c | 77 | ||||
-rw-r--r-- | src/amd/vulkan/radv_meta_bufimage.c | 9 |
4 files changed, 128 insertions, 20 deletions
diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c index 2ef131c90c9..e00716e04ec 100644 --- a/src/amd/common/ac_surface.c +++ b/src/amd/common/ac_surface.c @@ -3040,6 +3040,53 @@ ac_surface_addr_from_coord(struct ac_addrlib *addrlib, const struct radeon_info return output.addr; } +void +ac_surface_compute_nbc_view(struct ac_addrlib *addrlib, const struct radeon_info *info, + const struct radeon_surf *surf, const struct ac_surf_info *surf_info, + unsigned level, unsigned layer, struct ac_surf_nbc_view *out) +{ + /* Only implemented for GFX10+ */ + assert(info->gfx_level >= GFX10); + + ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT input = {0}; + input.size = sizeof(ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT); + input.swizzleMode = surf->u.gfx9.swizzle_mode; + input.resourceType = surf->u.gfx9.resource_type; + switch (surf->bpe) { + case 8: + input.format = ADDR_FMT_BC1; + break; + case 16: + input.format = ADDR_FMT_BC3; + break; + default: + assert(0); + } + input.width = surf_info->width; + input.height = surf_info->height; + input.numSlices = surf_info->array_size; + input.numMipLevels = surf_info->levels; + input.pipeBankXor = surf->tile_swizzle; + input.slice = layer; + input.mipId = level; + + ADDR_E_RETURNCODE res; + ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT output = {0}; + output.size = sizeof(ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT); + res = Addr2ComputeNonBlockCompressedView(addrlib->handle, &input, &output); + if (res == ADDR_OK) { + out->base_address_offset = output.offset; + out->tile_swizzle = output.pipeBankXor; + out->width = output.unalignedWidth; + out->height = output.unalignedHeight; + out->max_mip = output.numMipLevels; + out->level = output.mipId; + out->valid = true; + } else { + out->valid = false; + } +} + void ac_surface_print_info(FILE *out, const struct radeon_info *info, const struct radeon_surf *surf) { diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h index 60ebcc5719c..48a8c4c6628 100644 --- a/src/amd/common/ac_surface.h +++ b/src/amd/common/ac_surface.h @@ -415,6 +415,17 @@ struct ac_surf_config { unsigned is_cube : 1; }; +/* Output parameters for ac_surface_compute_nbc_view */ +struct ac_surf_nbc_view { + bool valid; + uint32_t width; + uint32_t height; + uint32_t level; + uint32_t max_mip; /* Used for max_mip in the resource descriptor */ + uint8_t tile_swizzle; + uint64_t base_address_offset; +}; + struct ac_addrlib *ac_addrlib_create(const struct radeon_info *info, uint64_t *max_alignment); void ac_addrlib_destroy(struct ac_addrlib *addrlib); void *ac_addrlib_get_handle(struct ac_addrlib *addrlib); @@ -474,6 +485,10 @@ uint64_t ac_surface_addr_from_coord(struct ac_addrlib *addrlib, const struct rad const struct radeon_surf *surf, const struct ac_surf_info *surf_info, unsigned level, unsigned x, unsigned y, unsigned layer, bool is_3d); +void ac_surface_compute_nbc_view(struct ac_addrlib *addrlib, const struct radeon_info *info, + const struct radeon_surf *surf, + const struct ac_surf_info *surf_info, unsigned level, + unsigned layer, struct ac_surf_nbc_view *out); void ac_surface_print_info(FILE *out, const struct radeon_info *info, const struct radeon_surf *surf); diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index 58af590c482..3f1e0c4e69b 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -764,12 +764,14 @@ si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *im const struct legacy_surf_level *base_level_info, unsigned plane_id, unsigned base_level, unsigned first_level, unsigned block_width, bool is_stencil, bool is_storage_image, bool disable_compression, - bool enable_write_compression, uint32_t *state) + bool enable_write_compression, uint32_t *state, + const struct ac_surf_nbc_view *nbc_view) { struct radv_image_plane *plane = &image->planes[plane_id]; struct radv_image_binding *binding = image->disjoint ? &image->bindings[plane_id] : &image->bindings[0]; uint64_t gpu_address = binding->bo ? radv_buffer_get_va(binding->bo) + binding->offset : 0; uint64_t va = gpu_address; + uint8_t swizzle = plane->surface.tile_swizzle; enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level; uint64_t meta_va = 0; if (gfx_level >= GFX9) { @@ -777,12 +779,16 @@ si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *im va += plane->surface.u.gfx9.zs.stencil_offset; else va += plane->surface.u.gfx9.surf_offset; + if (nbc_view && nbc_view->valid) { + va += nbc_view->base_address_offset; + swizzle = nbc_view->tile_swizzle; + } } else va += (uint64_t)base_level_info->offset_256B * 256; state[0] = va >> 8; if (gfx_level >= GFX9 || base_level_info->mode == RADEON_SURF_MODE_2D) - state[0] |= plane->surface.tile_swizzle; + state[0] |= swizzle; state[1] &= C_008F14_BASE_ADDRESS_HI; state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40); @@ -953,7 +959,8 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima unsigned last_level, unsigned first_layer, unsigned last_layer, unsigned width, unsigned height, unsigned depth, float min_lod, uint32_t *state, uint32_t *fmask_state, - VkImageCreateFlags img_create_flags) + VkImageCreateFlags img_create_flags, + const struct ac_surf_nbc_view *nbc_view) { const struct util_format_description *desc; enum pipe_swizzle swizzle[4]; @@ -1032,6 +1039,8 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima unsigned max_mip = image->info.samples > 1 ? util_logbase2(image->info.samples) : image->info.levels - 1; + if (nbc_view && nbc_view->valid) + max_mip = nbc_view->max_mip; if (device->physical_device->rad_info.gfx_level >= GFX11) { state[1] |= S_00A004_MAX_MIP(max_mip); @@ -1326,12 +1335,13 @@ radv_make_texture_descriptor(struct radv_device *device, struct radv_image *imag const VkComponentMapping *mapping, unsigned first_level, unsigned last_level, unsigned first_layer, unsigned last_layer, unsigned width, unsigned height, unsigned depth, float min_lod, uint32_t *state, - uint32_t *fmask_state, VkImageCreateFlags img_create_flags) + uint32_t *fmask_state, VkImageCreateFlags img_create_flags, + const struct ac_surf_nbc_view *nbc_view) { if (device->physical_device->rad_info.gfx_level >= GFX10) { gfx10_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping, first_level, last_level, first_layer, last_layer, width, height, - depth, min_lod, state, fmask_state, img_create_flags); + depth, min_lod, state, fmask_state, img_create_flags, nbc_view); } else { si_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping, first_level, last_level, first_layer, last_layer, width, height, @@ -1351,11 +1361,11 @@ radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image, radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->vk.image_type, image->vk.format, &fixedmapping, 0, image->info.levels - 1, 0, image->info.array_size - 1, image->info.width, image->info.height, - image->info.depth, 0.0f, desc, NULL, 0); + image->info.depth, 0.0f, desc, NULL, 0, NULL); si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 0, image->planes[0].surface.blk_w, false, false, false, false, - desc); + desc, NULL); ac_surface_get_umd_metadata(&device->physical_device->rad_info, &image->planes[0].surface, image->info.levels, desc, &md->size_metadata, md->metadata); @@ -1926,13 +1936,27 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_ return VK_SUCCESS; } +static inline void +compute_non_block_compressed_view(const struct radv_device *device, + const struct radv_image_view *iview, + struct ac_surf_nbc_view *nbc_view) +{ + const struct radv_image *image = iview->image; + const struct radeon_surf *surf = &image->planes[0].surface; + struct ac_addrlib *addrlib = device->ws->get_addrlib(device->ws); + + ac_surface_compute_nbc_view(addrlib, &device->physical_device->rad_info, surf, &image->info, + iview->vk.base_mip_level, iview->vk.base_array_layer, nbc_view); +} + static void radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_device *device, VkFormat vk_format, const VkComponentMapping *components, float min_lod, bool is_storage_image, bool disable_compression, bool enable_compression, unsigned plane_id, - unsigned descriptor_plane_id, VkImageCreateFlags img_create_flags) + unsigned descriptor_plane_id, VkImageCreateFlags img_create_flags, + const struct ac_surf_nbc_view *nbc_view) { struct radv_image *image = iview->image; struct radv_image_plane *plane = &image->planes[plane_id]; @@ -1952,8 +1976,15 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format); - if (device->physical_device->rad_info.gfx_level >= GFX9) + if (device->physical_device->rad_info.gfx_level >= GFX9) { hw_level = iview->vk.base_mip_level; + if (nbc_view->valid) { + hw_level = nbc_view->level; + iview->extent.width = nbc_view->width; + iview->extent.height = nbc_view->height; + } + } + radv_make_texture_descriptor( device, image, is_storage_image, iview->vk.view_type, vk_format, components, hw_level, hw_level + iview->vk.level_count - 1, iview->vk.base_array_layer, @@ -1962,7 +1993,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic vk_format_get_plane_height(image->vk.format, plane_id, iview->extent.height), iview->extent.depth, min_lod, descriptor->plane_descriptors[descriptor_plane_id], descriptor_plane_id || is_storage_image ? NULL : descriptor->fmask_descriptor, - img_create_flags); + img_create_flags, nbc_view); const struct legacy_surf_level *base_level_info = NULL; if (device->physical_device->rad_info.gfx_level <= GFX9) { @@ -1978,7 +2009,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic si_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, iview->vk.base_mip_level, iview->vk.base_mip_level, blk_w, is_stencil, is_storage_image, disable_compression, enable_write_compression, - descriptor->plane_descriptors[descriptor_plane_id]); + descriptor->plane_descriptors[descriptor_plane_id], nbc_view); } static unsigned @@ -2057,6 +2088,7 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device, const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; uint32_t plane_count = 1; float min_lod = 0.0f; + struct ac_surf_nbc_view nbc_view = {0}; const struct VkImageViewMinLodCreateInfoEXT *min_lod_info = vk_find_struct_const(pCreateInfo->pNext, IMAGE_VIEW_MIN_LOD_CREATE_INFO_EXT); @@ -2179,13 +2211,22 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device, lvl_width = round_up_u32(lvl_width * view_bw, img_bw); lvl_height = round_up_u32(lvl_height * view_bh, img_bh); - lvl_width <<= range->baseMipLevel; - lvl_height <<= range->baseMipLevel; - - iview->extent.width = CLAMP(lvl_width, iview->extent.width, + iview->extent.width = CLAMP(lvl_width << range->baseMipLevel, iview->extent.width, iview->image->planes[0].surface.u.gfx9.base_mip_width); - iview->extent.height = CLAMP(lvl_height, iview->extent.height, + iview->extent.height = CLAMP(lvl_height << range->baseMipLevel, iview->extent.height, iview->image->planes[0].surface.u.gfx9.base_mip_height); + + /* If the hardware-computed extent is still be too small, on GFX10 + * we can attempt another workaround provided by addrlib that + * changes the descriptor's base level, and adjusts the address and + * extents accordingly. + */ + if (device->physical_device->rad_info.gfx_level >= GFX10 && + (radv_minify(iview->extent.width, range->baseMipLevel) < lvl_width || + radv_minify(iview->extent.height, range->baseMipLevel) < lvl_height) && + iview->vk.layer_count == 1) { + compute_non_block_compressed_view(device, iview, &nbc_view); + } } } } @@ -2199,10 +2240,10 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device, VkFormat format = vk_format_get_plane_format(iview->vk.view_format, i); radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, min_lod, false, disable_compression, enable_compression, iview->plane_id + i, - i, img_create_flags); + i, img_create_flags, &nbc_view); radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, min_lod, true, disable_compression, enable_compression, iview->plane_id + i, - i, img_create_flags); + i, img_create_flags, &nbc_view); } } diff --git a/src/amd/vulkan/radv_meta_bufimage.c b/src/amd/vulkan/radv_meta_bufimage.c index 4fd4a6f6267..3ee22cd0020 100644 --- a/src/amd/vulkan/radv_meta_bufimage.c +++ b/src/amd/vulkan/radv_meta_bufimage.c @@ -1308,6 +1308,9 @@ create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_buffe * radv_image_view_init). Some texels are unaddressable and cannot be copied * to/from by a compute shader. Here we will perform a buffer copy to copy the * texels that the hardware missed. + * + * GFX10 will not use this workaround because it can be fixed by adjusting its + * image view descriptors instead. */ static void fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, @@ -1322,8 +1325,10 @@ fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, const struct radeon_info *rad_info = &device->physical_device->rad_info; struct ac_addrlib *addrlib = device->ws->get_addrlib(device->ws); - if (rad_info->gfx_level < GFX9 || image->vk.mip_levels == 1 || - !vk_format_is_block_compressed(image->vk.format)) + /* GFX10 will use a different workaround unless this is not a 2D image */ + if (rad_info->gfx_level < GFX9 || + (rad_info->gfx_level >= GFX10 && image->vk.image_type == VK_IMAGE_TYPE_2D) || + image->vk.mip_levels == 1 || !vk_format_is_block_compressed(image->vk.format)) return; /* The physical extent of the base mip */ |