summaryrefslogtreecommitdiff
path: root/src/broadcom
diff options
context:
space:
mode:
authorIago Toral Quiroga <itoral@igalia.com>2021-11-12 10:35:59 +0100
committerMarge Bot <emma+marge@anholt.net>2023-10-13 22:37:43 +0000
commit9e90d955087dfa6b7487940f58d3613ac4082894 (patch)
tree5e5c833b75ce28169b854e0f5ac1b6267ec21486 /src/broadcom
parent904519d245ea6ce44dcd86459f36d749cb0d722e (diff)
downloadmesa-9e90d955087dfa6b7487940f58d3613ac4082894.tar.gz
mesa-9e90d955087dfa6b7487940f58d3613ac4082894.tar.bz2
mesa-9e90d955087dfa6b7487940f58d3613ac4082894.zip
v3d,v3dv: support up to 8 render targets in v7.1+
Reviewed-by: Alejandro PiƱeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25450>
Diffstat (limited to 'src/broadcom')
-rw-r--r--src/broadcom/common/v3d_limits.h3
-rw-r--r--src/broadcom/common/v3d_util.c49
-rw-r--r--src/broadcom/common/v3d_util.h6
-rw-r--r--src/broadcom/compiler/nir_to_vir.c10
-rw-r--r--src/broadcom/vulkan/v3dv_cmd_buffer.c5
-rw-r--r--src/broadcom/vulkan/v3dv_device.c6
-rw-r--r--src/broadcom/vulkan/v3dv_limits.h2
-rw-r--r--src/broadcom/vulkan/v3dv_meta_clear.c8
-rw-r--r--src/broadcom/vulkan/v3dv_pass.c6
-rw-r--r--src/broadcom/vulkan/v3dv_pipeline.c4
-rw-r--r--src/broadcom/vulkan/v3dvx_cmd_buffer.c7
-rw-r--r--src/broadcom/vulkan/v3dvx_device.c1
12 files changed, 81 insertions, 26 deletions
diff --git a/src/broadcom/common/v3d_limits.h b/src/broadcom/common/v3d_limits.h
index 46f38bd7484..354c8784914 100644
--- a/src/broadcom/common/v3d_limits.h
+++ b/src/broadcom/common/v3d_limits.h
@@ -42,7 +42,8 @@
#define V3D_MAX_SAMPLES 4
-#define V3D_MAX_DRAW_BUFFERS 4
+#define V3D_MAX_DRAW_BUFFERS 8
+#define V3D_MAX_RENDER_TARGETS(ver) (ver < 71 ? 4 : 8)
#define V3D_MAX_POINT_SIZE 512.0f
#define V3D_MAX_LINE_WIDTH 32
diff --git a/src/broadcom/common/v3d_util.c b/src/broadcom/common/v3d_util.c
index 26f5c6b336f..209a5eceaa1 100644
--- a/src/broadcom/common/v3d_util.c
+++ b/src/broadcom/common/v3d_util.c
@@ -88,8 +88,10 @@ v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo,
}
void
-v3d_choose_tile_size(uint32_t color_attachment_count, uint32_t max_color_bpp,
- bool msaa, bool double_buffer,
+v3d_choose_tile_size(const struct v3d_device_info *devinfo,
+ uint32_t color_attachment_count,
+ uint32_t max_color_bpp, bool msaa,
+ bool double_buffer,
uint32_t *width, uint32_t *height)
{
static const uint8_t tile_sizes[] = {
@@ -103,7 +105,9 @@ v3d_choose_tile_size(uint32_t color_attachment_count, uint32_t max_color_bpp,
};
uint32_t idx = 0;
- if (color_attachment_count > 2)
+ if (color_attachment_count > 4)
+ idx += 3;
+ else if (color_attachment_count > 2)
idx += 2;
else if (color_attachment_count > 1)
idx += 1;
@@ -117,6 +121,45 @@ v3d_choose_tile_size(uint32_t color_attachment_count, uint32_t max_color_bpp,
idx += max_color_bpp;
+ if (devinfo->ver >= 71) {
+ /* In V3D 7.x the TLB has an auxiliary buffer of 8KB that will be
+ * automatically used for depth instead of the main 16KB depth TLB buffer
+ * when the depth tile fits in the auxiliary buffer, allowing the hardware
+ * to allocate the 16KB from the main depth TLB to the color TLB. If
+ * we can do that, then we are effectively doubling the memory we have
+ * for color and we can increase our tile dimensions by a factor of 2
+ * (reduce idx by 1).
+ *
+ * If we have computed a tile size that would be smaller than the minimum
+ * of 8x8, then it is certain that depth will fit in the aux depth TLB
+ * (even in MSAA mode).
+ *
+ * Otherwise, we need check if we can fit depth in the aux TLB buffer
+ * using a larger tile size.
+ *
+ * FIXME: the docs state that depth TLB memory can be used for color
+ * if depth testing is not used by setting the 'depth disable' bit in the
+ * rendering configuration. However, this comes with a requirement that
+ * occlussion queries must not be active. We need to clarify if this means
+ * active at the point at which we emit a tile rendering configuration
+ * item, meaning that the we have a query spanning a full render pass
+ * (this is something we can tell before we emit the rendering
+ * configuration item) or active in the subpass for which we are enabling
+ * the bit (which we can't tell until later, when we record commands for
+ * the subpass). If it is the latter, then we cannot use this feature.
+ */
+ if (idx >= ARRAY_SIZE(tile_sizes) / 2) {
+ idx--;
+ } else if (idx > 0) {
+ /* Depth is always 32bpp (4x32bpp for 4x MSAA) */
+ uint32_t depth_bpp = !msaa ? 4 : 16;
+ uint32_t tile_w = tile_sizes[(idx - 1) * 2];
+ uint32_t tile_h = tile_sizes[(idx - 1) * 2 + 1];
+ if (tile_w * tile_h * depth_bpp <= 8192)
+ idx--;
+ }
+ }
+
assert(idx < ARRAY_SIZE(tile_sizes) / 2);
*width = tile_sizes[idx * 2];
diff --git a/src/broadcom/common/v3d_util.h b/src/broadcom/common/v3d_util.h
index f6197069b9a..ade5a0bb152 100644
--- a/src/broadcom/common/v3d_util.h
+++ b/src/broadcom/common/v3d_util.h
@@ -38,8 +38,10 @@ v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo,
uint32_t wg_size);
void
-v3d_choose_tile_size(uint32_t color_attachment_count, uint32_t max_color_bpp,
- bool msaa, bool double_buffer,
+v3d_choose_tile_size(const struct v3d_device_info *devinfo,
+ uint32_t color_attachment_count,
+ uint32_t max_color_bpp, bool msaa,
+ bool double_buffer,
uint32_t *width, uint32_t *height);
uint32_t
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index 27a45e452ca..220c864a056 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -2417,15 +2417,17 @@ ntq_setup_outputs(struct v3d_compile *c)
switch (var->data.location) {
case FRAG_RESULT_COLOR:
- c->output_color_var[0] = var;
- c->output_color_var[1] = var;
- c->output_color_var[2] = var;
- c->output_color_var[3] = var;
+ for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++)
+ c->output_color_var[i] = var;
break;
case FRAG_RESULT_DATA0:
case FRAG_RESULT_DATA1:
case FRAG_RESULT_DATA2:
case FRAG_RESULT_DATA3:
+ case FRAG_RESULT_DATA4:
+ case FRAG_RESULT_DATA5:
+ case FRAG_RESULT_DATA6:
+ case FRAG_RESULT_DATA7:
c->output_color_var[var->data.location -
FRAG_RESULT_DATA0] = var;
break;
diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c
index 440dfdc5f2f..87b31e42d5f 100644
--- a/src/broadcom/vulkan/v3dv_cmd_buffer.c
+++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c
@@ -366,7 +366,8 @@ job_compute_frame_tiling(struct v3dv_job *job,
/* Double-buffer is incompatible with MSAA */
assert(!tiling->msaa || !tiling->double_buffer);
- v3d_choose_tile_size(render_target_count, max_internal_bpp,
+ v3d_choose_tile_size(&job->device->devinfo,
+ render_target_count, max_internal_bpp,
tiling->msaa, tiling->double_buffer,
&tiling->tile_width, &tiling->tile_height);
@@ -1375,7 +1376,7 @@ cmd_buffer_emit_subpass_clears(struct v3dv_cmd_buffer *cmd_buffer)
}
uint32_t att_count = 0;
- VkClearAttachment atts[V3D_MAX_DRAW_BUFFERS + 1]; /* 4 color + D/S */
+ VkClearAttachment atts[V3D_MAX_DRAW_BUFFERS + 1]; /* +1 for D/S */
/* We only need to emit subpass clears as draw calls for color attachments
* if the render area is not aligned to tile boundaries.
diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c
index d2e098a5f78..f67261ff087 100644
--- a/src/broadcom/vulkan/v3dv_device.c
+++ b/src/broadcom/vulkan/v3dv_device.c
@@ -1335,6 +1335,8 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
const VkSampleCountFlags supported_sample_counts =
VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;
+ const uint8_t max_rts = V3D_MAX_RENDER_TARGETS(pdevice->devinfo.ver);
+
struct timespec clock_res;
clock_getres(CLOCK_MONOTONIC, &clock_res);
const float timestamp_period =
@@ -1405,7 +1407,7 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
.maxFragmentInputComponents = max_varying_components,
.maxFragmentOutputAttachments = 4,
.maxFragmentDualSrcAttachments = 0,
- .maxFragmentCombinedOutputResources = MAX_RENDER_TARGETS +
+ .maxFragmentCombinedOutputResources = max_rts +
MAX_STORAGE_BUFFERS +
MAX_STORAGE_IMAGES,
@@ -1445,7 +1447,7 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
.framebufferDepthSampleCounts = supported_sample_counts,
.framebufferStencilSampleCounts = supported_sample_counts,
.framebufferNoAttachmentsSampleCounts = supported_sample_counts,
- .maxColorAttachments = MAX_RENDER_TARGETS,
+ .maxColorAttachments = max_rts,
.sampledImageColorSampleCounts = supported_sample_counts,
.sampledImageIntegerSampleCounts = supported_sample_counts,
.sampledImageDepthSampleCounts = supported_sample_counts,
diff --git a/src/broadcom/vulkan/v3dv_limits.h b/src/broadcom/vulkan/v3dv_limits.h
index 60b3ab2eafe..4df172e6bf3 100644
--- a/src/broadcom/vulkan/v3dv_limits.h
+++ b/src/broadcom/vulkan/v3dv_limits.h
@@ -50,8 +50,6 @@
#define MAX_DYNAMIC_BUFFERS (MAX_DYNAMIC_UNIFORM_BUFFERS + \
MAX_DYNAMIC_STORAGE_BUFFERS)
-#define MAX_RENDER_TARGETS 4
-
#define MAX_MULTIVIEW_VIEW_COUNT 16
/* These are tunable parameters in the HW design, but all the V3D
diff --git a/src/broadcom/vulkan/v3dv_meta_clear.c b/src/broadcom/vulkan/v3dv_meta_clear.c
index c68d7302c4d..e46899ca8a8 100644
--- a/src/broadcom/vulkan/v3dv_meta_clear.c
+++ b/src/broadcom/vulkan/v3dv_meta_clear.c
@@ -747,7 +747,7 @@ get_color_clear_pipeline_cache_key(uint32_t rt_idx,
uint32_t bit_offset = 0;
key |= rt_idx;
- bit_offset += 2;
+ bit_offset += 3;
key |= ((uint64_t) format) << bit_offset;
bit_offset += 32;
@@ -1189,9 +1189,11 @@ v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
- /* We can only clear attachments in the current subpass */
- assert(attachmentCount <= 5); /* 4 color + D/S */
+ /* We can have at most max_color_RTs + 1 D/S attachments */
+ assert(attachmentCount <=
+ V3D_MAX_RENDER_TARGETS(cmd_buffer->device->devinfo.ver) + 1);
+ /* We can only clear attachments in the current subpass */
struct v3dv_render_pass *pass = cmd_buffer->state.pass;
assert(cmd_buffer->state.subpass_idx < pass->subpass_count);
diff --git a/src/broadcom/vulkan/v3dv_pass.c b/src/broadcom/vulkan/v3dv_pass.c
index 3e82c15df88..7f2e2bbc710 100644
--- a/src/broadcom/vulkan/v3dv_pass.c
+++ b/src/broadcom/vulkan/v3dv_pass.c
@@ -322,11 +322,11 @@ subpass_get_granularity(struct v3dv_device *device,
/* Granularity is defined by the tile size */
assert(subpass_idx < pass->subpass_count);
struct v3dv_subpass *subpass = &pass->subpasses[subpass_idx];
- const uint32_t color_attachment_count = subpass->color_count;
+ const uint32_t color_count = subpass->color_count;
bool msaa = false;
uint32_t max_bpp = 0;
- for (uint32_t i = 0; i < color_attachment_count; i++) {
+ for (uint32_t i = 0; i < color_count; i++) {
uint32_t attachment_idx = subpass->color_attachments[i].attachment;
if (attachment_idx == VK_ATTACHMENT_UNUSED)
continue;
@@ -349,7 +349,7 @@ subpass_get_granularity(struct v3dv_device *device,
* heuristics so we choose a conservative granularity here, with it disabled.
*/
uint32_t width, height;
- v3d_choose_tile_size(color_attachment_count, max_bpp, msaa,
+ v3d_choose_tile_size(&device->devinfo, color_count, max_bpp, msaa,
false /* double-buffer */, &width, &height);
*granularity = (VkExtent2D) {
.width = width,
diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c
index d3e307cacb2..df2131d75c6 100644
--- a/src/broadcom/vulkan/v3dv_pipeline.c
+++ b/src/broadcom/vulkan/v3dv_pipeline.c
@@ -2657,6 +2657,7 @@ pipeline_init_dynamic_state(
const VkPipelineColorWriteCreateInfoEXT *pColorWriteState)
{
/* Initialize to default values */
+ const struct v3d_device_info *devinfo = &pipeline->device->devinfo;
struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state;
memset(dynamic, 0, sizeof(*dynamic));
dynamic->stencil_compare_mask.front = ~0;
@@ -2664,7 +2665,8 @@ pipeline_init_dynamic_state(
dynamic->stencil_write_mask.front = ~0;
dynamic->stencil_write_mask.back = ~0;
dynamic->line_width = 1.0f;
- dynamic->color_write_enable = (1ull << (4 * V3D_MAX_DRAW_BUFFERS)) - 1;
+ dynamic->color_write_enable =
+ (1ull << (4 * V3D_MAX_RENDER_TARGETS(devinfo->ver))) - 1;
/* Create a mask of enabled dynamic states */
uint32_t dynamic_states = 0;
diff --git a/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
index a2011ef1b5e..b05c5f77428 100644
--- a/src/broadcom/vulkan/v3dvx_cmd_buffer.c
+++ b/src/broadcom/vulkan/v3dvx_cmd_buffer.c
@@ -1550,10 +1550,13 @@ v3dX(cmd_buffer_emit_blend)(struct v3dv_cmd_buffer *cmd_buffer)
struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
assert(pipeline);
+ const struct v3d_device_info *devinfo = &cmd_buffer->device->devinfo;
+ const uint32_t max_color_rts = V3D_MAX_RENDER_TARGETS(devinfo->ver);
+
const uint32_t blend_packets_size =
cl_packet_length(BLEND_ENABLES) +
cl_packet_length(BLEND_CONSTANT_COLOR) +
- cl_packet_length(BLEND_CFG) * V3D_MAX_DRAW_BUFFERS;
+ cl_packet_length(BLEND_CFG) * max_color_rts;
v3dv_cl_ensure_space_with_branch(&job->bcl, blend_packets_size);
v3dv_return_if_oom(cmd_buffer, NULL);
@@ -1565,7 +1568,7 @@ v3dX(cmd_buffer_emit_blend)(struct v3dv_cmd_buffer *cmd_buffer)
}
}
- for (uint32_t i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
+ for (uint32_t i = 0; i < max_color_rts; i++) {
if (pipeline->blend.enables & (1 << i))
cl_emit_prepacked(&job->bcl, &pipeline->blend.cfg[i]);
}
diff --git a/src/broadcom/vulkan/v3dvx_device.c b/src/broadcom/vulkan/v3dvx_device.c
index 72daefadb08..4d17a2691a5 100644
--- a/src/broadcom/vulkan/v3dvx_device.c
+++ b/src/broadcom/vulkan/v3dvx_device.c
@@ -49,7 +49,6 @@ vk_to_v3d_compare_func[] = {
[VK_COMPARE_OP_ALWAYS] = V3D_COMPARE_FUNC_ALWAYS,
};
-
static union pipe_color_union encode_border_color(
const VkSamplerCustomBorderColorCreateInfoEXT *bc_info)
{