diff --git a/core/trino-main/src/main/java/io/trino/SystemSessionProperties.java b/core/trino-main/src/main/java/io/trino/SystemSessionProperties.java index f77bb636e4bf..2e720e844e9e 100644 --- a/core/trino-main/src/main/java/io/trino/SystemSessionProperties.java +++ b/core/trino-main/src/main/java/io/trino/SystemSessionProperties.java @@ -794,6 +794,11 @@ public SystemSessionProperties( "Retry policy", RetryPolicy.class, queryManagerConfig.getRetryPolicy(), + value -> { + if (!queryManagerConfig.getAllowedRetryPolicies().contains(value)) { + throw new TrinoException(INVALID_SESSION_PROPERTY, format("Retry policy %s not allowed. Must be one of %s", value, queryManagerConfig.getAllowedRetryPolicies())); + } + }, true), integerProperty( QUERY_RETRY_ATTEMPTS, diff --git a/core/trino-main/src/main/java/io/trino/execution/QueryManagerConfig.java b/core/trino-main/src/main/java/io/trino/execution/QueryManagerConfig.java index 911310064dbe..0d057c21b9f6 100644 --- a/core/trino-main/src/main/java/io/trino/execution/QueryManagerConfig.java +++ b/core/trino-main/src/main/java/io/trino/execution/QueryManagerConfig.java @@ -13,6 +13,7 @@ */ package io.trino.execution; +import com.google.common.collect.ImmutableSet; import io.airlift.configuration.Config; import io.airlift.configuration.ConfigDescription; import io.airlift.configuration.DefunctConfig; @@ -22,12 +23,15 @@ import io.airlift.units.MinDataSize; import io.airlift.units.MinDuration; import io.trino.operator.RetryPolicy; +import jakarta.validation.constraints.AssertTrue; import jakarta.validation.constraints.DecimalMin; import jakarta.validation.constraints.Max; import jakarta.validation.constraints.Min; import jakarta.validation.constraints.NotNull; +import java.util.EnumSet; import java.util.Optional; +import java.util.Set; import java.util.concurrent.TimeUnit; import static com.google.common.base.Preconditions.checkArgument; @@ -104,6 +108,8 @@ public class QueryManagerConfig private Duration requiredWorkersMaxWait = new Duration(5, TimeUnit.MINUTES); private RetryPolicy retryPolicy = RetryPolicy.NONE; + private Set allowedRetryPolicies = EnumSet.allOf(RetryPolicy.class); + private int queryRetryAttempts = 4; private int taskRetryAttemptsPerTask = 4; private Duration retryInitialDelay = new Duration(10, SECONDS); @@ -613,6 +619,25 @@ public QueryManagerConfig setRetryPolicy(RetryPolicy retryPolicy) return this; } + public Set getAllowedRetryPolicies() + { + return allowedRetryPolicies; + } + + @Config("retry-policy.allowed") + @ConfigDescription("Retry policies that are allowed to be used") + public QueryManagerConfig setAllowedRetryPolicies(Set allowedRetryPolicies) + { + this.allowedRetryPolicies = ImmutableSet.copyOf(allowedRetryPolicies); + return this; + } + + @AssertTrue(message = "Selected retry policy not present in retry-policy.allowed list") + public boolean isRetryPolicyAllowed() + { + return allowedRetryPolicies.contains(retryPolicy); + } + @Min(0) public int getQueryRetryAttempts() { diff --git a/core/trino-main/src/test/java/io/trino/execution/TestQueryManagerConfig.java b/core/trino-main/src/test/java/io/trino/execution/TestQueryManagerConfig.java index f6e428917b76..f75b569e3e57 100644 --- a/core/trino-main/src/test/java/io/trino/execution/TestQueryManagerConfig.java +++ b/core/trino-main/src/test/java/io/trino/execution/TestQueryManagerConfig.java @@ -17,13 +17,16 @@ import io.airlift.units.DataSize; import io.airlift.units.Duration; import io.trino.operator.RetryPolicy; +import jakarta.validation.constraints.AssertTrue; import org.junit.jupiter.api.Test; +import java.util.EnumSet; import java.util.Map; import static io.airlift.configuration.testing.ConfigAssertions.assertFullMapping; import static io.airlift.configuration.testing.ConfigAssertions.assertRecordedDefaults; import static io.airlift.configuration.testing.ConfigAssertions.recordDefaults; +import static io.airlift.testing.ValidationAssertions.assertFailsValidation; import static io.airlift.units.DataSize.Unit.GIGABYTE; import static io.airlift.units.DataSize.Unit.KILOBYTE; import static io.airlift.units.DataSize.Unit.MEGABYTE; @@ -76,6 +79,7 @@ public void testDefaults() .setRequiredWorkers(1) .setRequiredWorkersMaxWait(new Duration(5, MINUTES)) .setRetryPolicy(RetryPolicy.NONE) + .setAllowedRetryPolicies(EnumSet.allOf(RetryPolicy.class)) .setQueryRetryAttempts(4) .setTaskRetryAttemptsPerTask(4) .setRetryInitialDelay(new Duration(10, SECONDS)) @@ -160,6 +164,7 @@ public void testExplicitPropertyMappings() .put("query-manager.required-workers", "333") .put("query-manager.required-workers-max-wait", "33m") .put("retry-policy", "QUERY") + .put("retry-policy.allowed", "QUERY,TASK") .put("query-retry-attempts", "0") .put("task-retry-attempts-per-task", "9") .put("retry-initial-delay", "1m") @@ -241,6 +246,7 @@ public void testExplicitPropertyMappings() .setRequiredWorkers(333) .setRequiredWorkersMaxWait(new Duration(33, MINUTES)) .setRetryPolicy(RetryPolicy.QUERY) + .setAllowedRetryPolicies(EnumSet.of(RetryPolicy.QUERY, RetryPolicy.TASK)) .setQueryRetryAttempts(0) .setTaskRetryAttemptsPerTask(9) .setRetryInitialDelay(new Duration(1, MINUTES)) @@ -290,4 +296,16 @@ public void testExplicitPropertyMappings() assertFullMapping(properties, expected); } + + @Test + public void testAllowedRetryPoliciesValidation() + { + assertFailsValidation( + new QueryManagerConfig() + .setAllowedRetryPolicies(EnumSet.of(RetryPolicy.NONE, RetryPolicy.TASK)) + .setRetryPolicy(RetryPolicy.QUERY), + "retryPolicyAllowed", + "Selected retry policy not present in retry-policy.allowed list", + AssertTrue.class); + } } diff --git a/docs/src/main/sphinx/admin/fault-tolerant-execution.md b/docs/src/main/sphinx/admin/fault-tolerant-execution.md index 4f80e97763e5..7ae39dd09d56 100644 --- a/docs/src/main/sphinx/admin/fault-tolerant-execution.md +++ b/docs/src/main/sphinx/admin/fault-tolerant-execution.md @@ -71,6 +71,11 @@ execution on a Trino cluster: fault-tolerant execution and typically only to deactivate with `NONE`, since switching between modes on a cluster is not tested. - `NONE` +* - `retry-policy.allowed` + - List of retry policies that are allowed to be configured for a cluster. + This property is used to prevent a user from configuring a retry policy that + is not meant to be used on the given cluster. + - `NONE`, `QUERY`, `TASK` * - `exchange.deduplication-buffer-size` - [Data size](prop-type-data-size) of the coordinator's in-memory buffer used by fault-tolerant execution to store output of query