-
Notifications
You must be signed in to change notification settings - Fork 1.3k
fix(naga): zero initialize loop-local var declarations in each iteration
#9592
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
ruihe774
wants to merge
2
commits into
gfx-rs:trunk
Choose a base branch
from
ruihe774:fix-9489
base: trunk
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+1,316
−0
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| targets = "SPIRV | METAL | GLSL | HLSL | WGSL | IR" | ||
|
|
||
| [msl.per_entry_point_map.main] | ||
| resources = [ | ||
| { bind_target = { buffer = 0, mutable = false }, resource_binding = { group = 0, binding = 0 } }, | ||
| { bind_target = { buffer = 1, mutable = true }, resource_binding = { group = 0, binding = 1 } }, | ||
| ] | ||
| sizes_buffer = 24 | ||
|
|
||
| [glsl] | ||
| version.Desktop = 430 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| // #9489: A `var` declaration inside a loop body without an explicit | ||
| // initializer must be re-zero-initialized on every iteration, just like | ||
| // one with an explicit initializer. Naga hoists all local variables to | ||
| // function scope and zero-initializes them once at function entry, so the | ||
| // per-iteration reset has to be lowered to an explicit store in the loop | ||
| // body (the same mechanism already used for explicit initializers). | ||
| // | ||
| // Without the fix, `acc_noinit` accumulates across iterations of the outer | ||
| // loop (a running prefix sum) while `acc_init` does not, even though the | ||
| // two are semantically equivalent per the WGSL spec. | ||
|
|
||
| @group(0) @binding(0) var<storage, read> input: array<f32, 64>; | ||
| @group(0) @binding(1) var<storage, read_write> output: array<f32, 8>; | ||
|
|
||
| @compute @workgroup_size(1) | ||
| fn main() { | ||
| for (var t = 0u; t < 4u; t++) { | ||
| var acc_noinit: vec4<f32>; // no explicit initializer | ||
| var acc_init: vec4<f32> = vec4<f32>(); // explicit initializer | ||
|
|
||
| for (var d = 0u; d < 16u; d++) { | ||
| let v = vec4<f32>(input[t * 16u + d]); | ||
| acc_noinit += v; | ||
| acc_init += v; | ||
| } | ||
|
|
||
| output[t * 2u] = acc_noinit.x; | ||
| output[t * 2u + 1u] = acc_init.x; | ||
| } | ||
| } |
65 changes: 65 additions & 0 deletions
65
naga/tests/out/glsl/wgsl-9489-loop-local-var-init.main.Compute.glsl
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,65 @@ | ||
| #version 430 core | ||
| #extension GL_ARB_compute_shader : require | ||
| #extension GL_ARB_shader_storage_buffer_object : require | ||
| layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; | ||
|
|
||
| layout(std430) readonly buffer type_1_block_0Compute { float _group_0_binding_0_cs[64]; }; | ||
|
|
||
| layout(std430) buffer type_2_block_1Compute { float _group_0_binding_1_cs[8]; }; | ||
|
|
||
|
|
||
| void main() { | ||
| uint t = 0u; | ||
| vec4 acc_noinit = vec4(0.0); | ||
| vec4 acc_init = vec4(0.0); | ||
| uint d = 0u; | ||
| bool loop_init = true; | ||
| while(true) { | ||
| if (!loop_init) { | ||
| uint _e47 = t; | ||
| t = (_e47 + 1u); | ||
| } | ||
| loop_init = false; | ||
| uint _e2 = t; | ||
| if ((_e2 < 4u)) { | ||
| } else { | ||
| break; | ||
| } | ||
| { | ||
| acc_noinit = vec4(0.0); | ||
| acc_init = vec4(0.0); | ||
| d = 0u; | ||
| bool loop_init_1 = true; | ||
| while(true) { | ||
| if (!loop_init_1) { | ||
| uint _e28 = d; | ||
| d = (_e28 + 1u); | ||
| } | ||
| loop_init_1 = false; | ||
| uint _e11 = d; | ||
| if ((_e11 < 16u)) { | ||
| } else { | ||
| break; | ||
| } | ||
| { | ||
| uint _e15 = t; | ||
| uint _e18 = d; | ||
| float _e21 = _group_0_binding_0_cs[((_e15 * 16u) + _e18)]; | ||
| vec4 v = vec4(_e21); | ||
| vec4 _e23 = acc_noinit; | ||
| acc_noinit = (_e23 + v); | ||
| vec4 _e25 = acc_init; | ||
| acc_init = (_e25 + v); | ||
| } | ||
| } | ||
| uint _e31 = t; | ||
| float _e36 = acc_noinit.x; | ||
| _group_0_binding_1_cs[(_e31 * 2u)] = _e36; | ||
| uint _e38 = t; | ||
| float _e45 = acc_init.x; | ||
| _group_0_binding_1_cs[((_e38 * 2u) + 1u)] = _e45; | ||
| } | ||
| } | ||
| return; | ||
| } | ||
|
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,70 @@ | ||
| ByteAddressBuffer input : register(t0); | ||
| RWByteAddressBuffer output : register(u1); | ||
|
|
||
| float4 ZeroValuefloat4() { | ||
| return (float4)0; | ||
| } | ||
|
|
||
| [numthreads(1, 1, 1)] | ||
| void main() | ||
| { | ||
| uint t = 0u; | ||
| float4 acc_noinit = (float4)0; | ||
| float4 acc_init = (float4)0; | ||
| uint d = (uint)0; | ||
|
|
||
| uint2 loop_bound = uint2(4294967295u, 4294967295u); | ||
| bool loop_init = true; | ||
| while(true) { | ||
| if (all(loop_bound == uint2(0u, 0u))) { break; } | ||
| loop_bound -= uint2(loop_bound.y == 0u, 1u); | ||
| if (!loop_init) { | ||
| uint _e47 = t; | ||
| t = (_e47 + 1u); | ||
| } | ||
| loop_init = false; | ||
| uint _e2 = t; | ||
| if ((_e2 < 4u)) { | ||
| } else { | ||
| break; | ||
| } | ||
| { | ||
| acc_noinit = ZeroValuefloat4(); | ||
| acc_init = ZeroValuefloat4(); | ||
| d = 0u; | ||
| uint2 loop_bound_1 = uint2(4294967295u, 4294967295u); | ||
| bool loop_init_1 = true; | ||
| while(true) { | ||
| if (all(loop_bound_1 == uint2(0u, 0u))) { break; } | ||
| loop_bound_1 -= uint2(loop_bound_1.y == 0u, 1u); | ||
| if (!loop_init_1) { | ||
| uint _e28 = d; | ||
| d = (_e28 + 1u); | ||
| } | ||
| loop_init_1 = false; | ||
| uint _e11 = d; | ||
| if ((_e11 < 16u)) { | ||
| } else { | ||
| break; | ||
| } | ||
| { | ||
| uint _e15 = t; | ||
| uint _e18 = d; | ||
| float _e21 = asfloat(input.Load(((_e15 * 16u) + _e18)*4)); | ||
| float4 v = (_e21).xxxx; | ||
| float4 _e23 = acc_noinit; | ||
| acc_noinit = (_e23 + v); | ||
| float4 _e25 = acc_init; | ||
| acc_init = (_e25 + v); | ||
| } | ||
| } | ||
| uint _e31 = t; | ||
| float _e36 = acc_noinit.x; | ||
| output.Store((_e31 * 2u)*4, asuint(_e36)); | ||
| uint _e38 = t; | ||
| float _e45 = acc_init.x; | ||
| output.Store(((_e38 * 2u) + 1u)*4, asuint(_e45)); | ||
| } | ||
| } | ||
| return; | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| ( | ||
| vertex:[ | ||
| ], | ||
| fragment:[ | ||
| ], | ||
| compute:[ | ||
| ( | ||
| entry_point:"main", | ||
| target_profile:"cs_5_1", | ||
| ), | ||
| ], | ||
| task:[ | ||
| ], | ||
| mesh:[ | ||
| ], | ||
| ) |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The contract of the IR is that variables without an initializer get zero initialized. While this fixes the issue, the problem is in the backends not in the frontend. Can you see if you can fix it in the backends instead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not an easy way to fix this issue in the backends. There is no per-block/per-iteration lifetime for locals in the IR. The backend has no signal to re-zero.