summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKonstantin Seurer <konstantin.seurer@gmail.com>2023-12-27 15:19:12 +0100
committerEric Engestrom <eric@engestrom.ch>2024-01-09 19:37:45 +0000
commita3bcb524829427b23e7363f8f73172155ab57ba0 (patch)
treee45ab9f4127ab9165a008f485eb3c8df1beadc6f
parente7dd2637a5b02cc3a5cdd5e1503c68f1c10b5b37 (diff)
downloadmesa-a3bcb524829427b23e7363f8f73172155ab57ba0.tar.gz
mesa-a3bcb524829427b23e7363f8f73172155ab57ba0.tar.bz2
mesa-a3bcb524829427b23e7363f8f73172155ab57ba0.zip
vtn: Remove transpose(m0)*m1 fast path
This is broken for games that rely on invariant geometry since the usage of matrices can affect how gl_Position is computed. The fdot fastpath relied on if and how fdot is lowered for correctness. Totals from 6578 (7.73% of 85071) affected shaders: MaxWaves: 147190 -> 147170 (-0.01%) Instrs: 4451406 -> 4438140 (-0.30%); split: -0.31%, +0.01% CodeSize: 23553020 -> 23541772 (-0.05%); split: -0.07%, +0.03% VGPRs: 302304 -> 302328 (+0.01%) SpillSGPRs: 1309 -> 1329 (+1.53%) Latency: 22509985 -> 22177164 (-1.48%); split: -1.48%, +0.00% InvThroughput: 4862795 -> 4842951 (-0.41%); split: -0.41%, +0.01% VClause: 85035 -> 84998 (-0.04%); split: -0.06%, +0.02% SClause: 131008 -> 131055 (+0.04%); split: -0.02%, +0.05% Copies: 298935 -> 298060 (-0.29%); split: -0.71%, +0.41% PreSGPRs: 266833 -> 267292 (+0.17%); split: -0.85%, +1.03% PreVGPRs: 249511 -> 249601 (+0.04%) Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9562 cc: mesa-stable Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26821> (cherry picked from commit 4d02543853eb86c6c3cb6dd2a84e9a673b44001f)
-rw-r--r--.pick_status.json2
-rw-r--r--src/compiler/spirv/vtn_alu.c40
2 files changed, 10 insertions, 32 deletions
diff --git a/.pick_status.json b/.pick_status.json
index ceaef0e523e..8f04f1575dc 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -544,7 +544,7 @@
"description": "vtn: Remove transpose(m0)*m1 fast path",
"nominated": true,
"nomination_type": 0,
- "resolution": 0,
+ "resolution": 1,
"main_sha": null,
"because_sha": null,
"notes": null
diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c
index 04d71ae6eb2..a6b327f6a02 100644
--- a/src/compiler/spirv/vtn_alu.c
+++ b/src/compiler/spirv/vtn_alu.c
@@ -94,38 +94,16 @@ matrix_multiply(struct vtn_builder *b,
transpose_result = true;
}
- if (src0_transpose && !src1_transpose &&
- glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) {
- /* We already have the rows of src0 and the columns of src1 available,
- * so we can just take the dot product of each row with each column to
- * get the result.
- */
-
- for (unsigned i = 0; i < src1_columns; i++) {
- nir_def *vec_src[4];
- for (unsigned j = 0; j < src0_rows; j++) {
- vec_src[j] = nir_fdot(&b->nb, src0_transpose->elems[j]->def,
- src1->elems[i]->def);
- }
- dest->elems[i]->def = nir_vec(&b->nb, vec_src, src0_rows);
- }
- } else {
- /* We don't handle the case where src1 is transposed but not src0, since
- * the general case only uses individual components of src1 so the
- * optimizer should chew through the transpose we emitted for src1.
- */
-
- for (unsigned i = 0; i < src1_columns; i++) {
- /* dest[i] = sum(src0[j] * src1[i][j] for all j) */
+ for (unsigned i = 0; i < src1_columns; i++) {
+ /* dest[i] = sum(src0[j] * src1[i][j] for all j) */
+ dest->elems[i]->def =
+ nir_fmul(&b->nb, src0->elems[src0_columns - 1]->def,
+ nir_channel(&b->nb, src1->elems[i]->def, src0_columns - 1));
+ for (int j = src0_columns - 2; j >= 0; j--) {
dest->elems[i]->def =
- nir_fmul(&b->nb, src0->elems[src0_columns - 1]->def,
- nir_channel(&b->nb, src1->elems[i]->def, src0_columns - 1));
- for (int j = src0_columns - 2; j >= 0; j--) {
- dest->elems[i]->def =
- nir_ffma(&b->nb, src0->elems[j]->def,
- nir_channel(&b->nb, src1->elems[i]->def, j),
- dest->elems[i]->def);
- }
+ nir_ffma(&b->nb, src0->elems[j]->def,
+ nir_channel(&b->nb, src1->elems[i]->def, j),
+ dest->elems[i]->def);
}
}