Skip to content

Commit 5f6b2f2

Browse files
author
cisco
committed
fix(keeper): distinguish storage failures from validation errors in SubmitPocValidationsV2
## Problem `SubmitPocValidationsV2` used `LogWarn + continue` for all `SetPocValidationV2` errors, treating infrastructure-level storage failures the same as expected validation errors (e.g. invalid bech32 addresses). A transient storage failure would silently drop votes and return success to the caller — leaving consensus state inconsistent with no observable signal. ## Fix Replace `LogWarn + continue` with `LogError + return error` for `SetPocValidationV2` failures. This is safe because `HasPocValidationV2` (called earlier in the same loop iteration) already validates bech32 addresses for both participant and validator. Any error reaching `SetPocValidationV2` is therefore definitively a storage-layer failure, not invalid input. - Storage failure: `LogError` + abort batch via `return nil, fmt.Errorf(...)`. Cosmos SDK rolls back the full transaction, preventing partial writes. - Invalid address: caught by `HasPocValidationV2` → `LogWarn + continue`. Partial-success design preserved. `TestSubmitPocValidationsV2_PartialSuccess` passes. Adds `TestSubmitPocValidationsV2_StorageError_AddressPreValidation` documenting the address pre-validation invariant that makes this distinction possible. ## Impact - Storage failures are now observable (LogError, tx rollback) instead of silent - Partial-success behavior for invalid addresses unchanged (design intent preserved) - Closes #1012
1 parent ec8f455 commit 5f6b2f2

File tree

2 files changed

+33
-2
lines changed

2 files changed

+33
-2
lines changed

inference-chain/x/inference/keeper/msg_server_poc_v2_test.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,31 @@ func TestSubmitPocValidationsV2_PartialSuccess(t *testing.T) {
182182
require.True(t, exists2)
183183
}
184184

185+
// TestSubmitPocValidationsV2_StorageError_AddressPreValidation documents the address
186+
// pre-validation invariant: HasPocValidationV2 validates bech32 addresses BEFORE
187+
// SetPocValidationV2 is called. Therefore any SetPocValidationV2 error is a storage failure.
188+
// This test verifies the invariant by showing SetPocValidationV2 with an invalid participant
189+
// address errors on bech32 decoding — and that this path is unreachable via the batch handler
190+
// (invalid addresses are already caught by HasPocValidationV2 at line 116).
191+
func TestSubmitPocValidationsV2_StorageError_AddressPreValidation(t *testing.T) {
192+
k, ctx, _ := keepertest.InferenceKeeperReturningMocks(t)
193+
sdkCtx := sdk.UnwrapSDKContext(ctx)
194+
195+
// Direct call: SetPocValidationV2 returns error for invalid participant bech32.
196+
err := k.SetPocValidationV2(sdkCtx, types.PoCValidationV2{
197+
ParticipantAddress: "invalid_address",
198+
ValidatorParticipantAddress: testutil.Validator,
199+
PocStageStartBlockHeight: 100,
200+
})
201+
require.Error(t, err)
202+
require.Contains(t, err.Error(), "bech32")
203+
204+
// In SubmitPocValidationsV2, this path is unreachable: HasPocValidationV2 validates
205+
// the same addresses first (line 116) and returns continue on error.
206+
// Therefore SetPocValidationV2 errors in the batch loop are definitively storage failures,
207+
// not address validation errors — making return nil, fmt.Errorf the correct response.
208+
}
209+
185210
// Test HasPocValidationV2
186211
func TestHasPocValidationV2(t *testing.T) {
187212
k, ctx, _ := keepertest.InferenceKeeperReturningMocks(t)

inference-chain/x/inference/keeper/msg_server_poc_validations_v2.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,12 +137,18 @@ func (k msgServer) SubmitPocValidationsV2(goCtx context.Context, msg *types.MsgS
137137
ValidatedWeight: validation.ValidatedWeight,
138138
}
139139

140+
// HasPocValidationV2 above already validated bech32 addresses for both participant
141+
// and validator. Any error here is therefore a storage-layer failure, not invalid input.
142+
// Abort the entire batch: a storage failure is infrastructure-level and partial writes
143+
// would leave consensus state inconsistent. Cosmos SDK rolls back the tx on non-nil error.
140144
if err := k.SetPocValidationV2(ctx, storedValidation); err != nil {
141-
k.LogWarn("[SubmitPocValidationsV2] Failed to store validation, skipping", types.PoC,
145+
k.LogError("[SubmitPocValidationsV2] storage failure, aborting batch", types.PoC,
142146
"validator", msg.Creator,
143147
"participant", validation.ParticipantAddress,
148+
"storedSoFar", storedCount,
144149
"error", err)
145-
continue
150+
return nil, fmt.Errorf("storage failure storing poc validation for participant %s: %w",
151+
validation.ParticipantAddress, err)
146152
}
147153

148154
storedCount++

0 commit comments

Comments
 (0)