diff options
author | Jason Ekstrand <jason.ekstrand@intel.com> | 2018-08-07 15:47:54 -0700 |
---|---|---|
committer | Jason Ekstrand <jason.ekstrand@intel.com> | 2018-08-17 10:50:28 -0500 |
commit | d9ea015ced39bab7fb32cc772307c4fb647403bd (patch) | |
tree | 00af3fbac7c188a801f5eee67ae3a6d8eb5284b7 /src/intel | |
parent | f210a5f4bb868ad5ae70c5fb3912f3a241305666 (diff) | |
download | mesa-d9ea015ced39bab7fb32cc772307c4fb647403bd.tar.gz mesa-d9ea015ced39bab7fb32cc772307c4fb647403bd.tar.bz2 mesa-d9ea015ced39bab7fb32cc772307c4fb647403bd.zip |
anv/pipeline: Lower pipeline layouts etc. after linking
This allows us to use the link-optimized shader for determining binding
table layouts and, more importantly, URB layouts. For apps running on
DXVK, this is extremely important as DXVK likes to declare max-size
inputs and outputs and this lets is massively shrink our URB space
requirements.
VkPipeline-db results (Batman pipelines only) on KBL:
total instructions in shared programs: 820403 -> 790008 (-3.70%)
instructions in affected programs: 273759 -> 243364 (-11.10%)
helped: 622
HURT: 42
total spills in shared programs: 8449 -> 5212 (-38.31%)
spills in affected programs: 3427 -> 190 (-94.46%)
helped: 607
HURT: 2
total fills in shared programs: 11638 -> 6067 (-47.87%)
fills in affected programs: 5879 -> 308 (-94.76%)
helped: 606
HURT: 3
Looking at shaders by hand, it makes the URB between TCS and TES go from
containing 32 per-vertex varyings per tessellation shader pair to a more
reasonable 8-12. For a 3-vertex patch, that's at least half the URB
space no matter how big the patch section is.
Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com>
Diffstat (limited to 'src/intel')
-rw-r--r-- | src/intel/vulkan/anv_pipeline.c | 58 |
1 files changed, 28 insertions, 30 deletions
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index ef1a80d2024..0fe0c7e296e 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -472,24 +472,17 @@ anv_pipeline_hash_compute(struct anv_pipeline *pipeline, _mesa_sha1_final(&ctx, sha1_out); } -static nir_shader * -anv_pipeline_compile(struct anv_pipeline *pipeline, - void *mem_ctx, - struct anv_pipeline_layout *layout, - struct anv_pipeline_stage *stage, - struct brw_stage_prog_data *prog_data, - struct anv_pipeline_bind_map *map) +static void +anv_pipeline_lower_nir(struct anv_pipeline *pipeline, + void *mem_ctx, + struct anv_pipeline_stage *stage, + struct anv_pipeline_layout *layout) { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - nir_shader *nir = anv_shader_compile_to_nir(pipeline, mem_ctx, - stage->module, - stage->entrypoint, - stage->stage, - stage->spec_info); - if (nir == NULL) - return NULL; + struct brw_stage_prog_data *prog_data = &stage->prog_data.base; + nir_shader *nir = stage->nir; NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout); @@ -531,15 +524,17 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, pipeline->needs_data_cache = true; /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ - if (layout) - anv_nir_apply_pipeline_layout(pipeline, layout, nir, prog_data, map); + if (layout) { + anv_nir_apply_pipeline_layout(pipeline, layout, nir, prog_data, + &stage->bind_map); + } if (nir->info.stage != MESA_SHADER_COMPUTE) brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges); assert(nir->num_uniforms == prog_data->nr_params * 4); - return nir; + stage->nir = nir; } static void @@ -807,16 +802,12 @@ anv_pipeline_link_fs(const struct brw_compiler *compiler, stage->key.wm.color_outputs_valid = (1 << num_rts) - 1; assert(num_rts <= max_rt); - assert(stage->bind_map.surface_count + num_rts <= 256); - memmove(stage->bind_map.surface_to_descriptor + num_rts, - stage->bind_map.surface_to_descriptor, - stage->bind_map.surface_count * - sizeof(*stage->bind_map.surface_to_descriptor)); + assert(stage->bind_map.surface_count == 0); typed_memcpy(stage->bind_map.surface_to_descriptor, rt_bindings, num_rts); stage->bind_map.surface_count += num_rts; - anv_fill_binding_table(&stage->prog_data.wm.base, num_rts); + anv_fill_binding_table(&stage->prog_data.wm.base, 0); } static const unsigned * @@ -976,10 +967,11 @@ anv_pipeline_compile_graphics(struct anv_pipeline *pipeline, .sampler_to_descriptor = stages[s].sampler_to_descriptor }; - stages[s].nir = anv_pipeline_compile(pipeline, pipeline_ctx, layout, - &stages[s], - &stages[s].prog_data.base, - &stages[s].bind_map); + stages[s].nir = anv_shader_compile_to_nir(pipeline, pipeline_ctx, + stages[s].module, + stages[s].entrypoint, + stages[s].stage, + stages[s].spec_info); if (stages[s].nir == NULL) { result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); goto fail; @@ -1022,6 +1014,8 @@ anv_pipeline_compile_graphics(struct anv_pipeline *pipeline, void *stage_ctx = ralloc_context(NULL); + anv_pipeline_lower_nir(pipeline, stage_ctx, &stages[s], layout); + const unsigned *code; switch (s) { case MESA_SHADER_VERTEX: @@ -1141,14 +1135,18 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, void *mem_ctx = ralloc_context(NULL); - stage.nir = anv_pipeline_compile(pipeline, mem_ctx, layout, &stage, - &stage.prog_data.base, - &stage.bind_map); + stage.nir = anv_shader_compile_to_nir(pipeline, mem_ctx, + stage.module, + stage.entrypoint, + stage.stage, + stage.spec_info); if (stage.nir == NULL) { ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } + anv_pipeline_lower_nir(pipeline, mem_ctx, &stage, layout); + NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id, &stage.prog_data.cs); |