diff --git a/runners/core-java/build.gradle b/runners/core-java/build.gradle index 9f24ce39b974..403cf4f2bc5a 100644 --- a/runners/core-java/build.gradle +++ b/runners/core-java/build.gradle @@ -42,6 +42,7 @@ dependencies { implementation project(path: ":model:pipeline", configuration: "shadow") implementation project(path: ":sdks:java:core", configuration: "shadow") implementation project(path: ":model:job-management", configuration: "shadow") + implementation project(path: ":model:fn-execution", configuration: "shadow") implementation library.java.vendored_guava_32_1_2_jre implementation library.java.joda_time implementation library.java.vendored_grpc_1_69_0 diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/CombinedMetadata.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/CombinedMetadata.java new file mode 100644 index 000000000000..bdce67f6bad5 --- /dev/null +++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/CombinedMetadata.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.core; + +import com.google.auto.value.AutoValue; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import org.apache.beam.model.fnexecution.v1.BeamFnApi; +import org.apache.beam.sdk.coders.AtomicCoder; +import org.apache.beam.sdk.values.CausedByDrain; + +/** + * Encapsulates metadata that propagates with elements in the pipeline. + * + *

This metadata is sent along with elements. It currently includes fields like {@link + * CausedByDrain}, and is designed to be extensible to support future metadata fields such as + * OpenTelemetry context or CDC (Change Data Capture) kind. + * + *

The purpose of this class is to group targeted metadata fields together. This makes it easier + * to define combination strategies (e.g., when accumulating state in {@code ReduceFnRunner}) when + * multiple elements are merged or grouped, without having to extend method signatures or state + * handling for every new metadata field. + */ +@AutoValue +public abstract class CombinedMetadata { + public abstract CausedByDrain causedByDrain(); + + public static CombinedMetadata create(CausedByDrain causedByDrain) { + return new AutoValue_CombinedMetadata(causedByDrain); + } + + public static CombinedMetadata createDefault() { + return create(CausedByDrain.NORMAL); + } + + public static class Coder extends AtomicCoder { + private static final Coder INSTANCE = new Coder(); + + public static Coder of() { + return INSTANCE; + } + + @Override + public void encode(CombinedMetadata value, OutputStream outStream) throws IOException { + BeamFnApi.Elements.ElementMetadata proto = + BeamFnApi.Elements.ElementMetadata.newBuilder() + .setDrain( + value.causedByDrain() == CausedByDrain.CAUSED_BY_DRAIN + ? BeamFnApi.Elements.DrainMode.Enum.DRAINING + : BeamFnApi.Elements.DrainMode.Enum.NOT_DRAINING) + .build(); + proto.writeDelimitedTo(outStream); + } + + @Override + public CombinedMetadata decode(InputStream inStream) throws IOException { + BeamFnApi.Elements.ElementMetadata proto = + BeamFnApi.Elements.ElementMetadata.parseDelimitedFrom(inStream); + if (proto == null) { + return CombinedMetadata.createDefault(); + } + + CausedByDrain causedByDrain = + proto.getDrain() == BeamFnApi.Elements.DrainMode.Enum.DRAINING + ? CausedByDrain.CAUSED_BY_DRAIN + : CausedByDrain.NORMAL; + + return CombinedMetadata.create(causedByDrain); + } + } +} diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/CombinedMetadataCombiner.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/CombinedMetadataCombiner.java new file mode 100644 index 000000000000..a2f3f26520ef --- /dev/null +++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/CombinedMetadataCombiner.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.core; + +import org.apache.beam.sdk.transforms.Combine.CombineFn; +import org.apache.beam.sdk.values.CausedByDrain; + +/** Combiner for CombinedMetadata. */ +class CombinedMetadataCombiner + extends CombineFn { + private static final CombinedMetadataCombiner INSTANCE = new CombinedMetadataCombiner(); + + public static CombinedMetadataCombiner of() { + return INSTANCE; + } + + @Override + public CombinedMetadata createAccumulator() { + return CombinedMetadata.create(CausedByDrainCombiner.of().createAccumulator()); + } + + @Override + public CombinedMetadata addInput(CombinedMetadata accumulator, CombinedMetadata input) { + return CombinedMetadata.create( + CausedByDrainCombiner.of().addInput(accumulator.causedByDrain(), input.causedByDrain())); + } + + @Override + public CombinedMetadata mergeAccumulators(Iterable accumulators) { + CombinedMetadata result = createAccumulator(); + for (CombinedMetadata accum : accumulators) { + result = addInput(result, accum); + } + return result; + } + + @Override + public CombinedMetadata extractOutput(CombinedMetadata accumulator) { + return accumulator; + } + + /** Combiner for CausedByDrain metadata. */ + static class CausedByDrainCombiner implements MetadataCombiner { + private static final CausedByDrainCombiner INSTANCE = new CausedByDrainCombiner(); + + public static CausedByDrainCombiner of() { + return INSTANCE; + } + + @Override + public CausedByDrain createAccumulator() { + return CausedByDrain.NORMAL; + } + + @Override + public CausedByDrain addInput(CausedByDrain current, CausedByDrain input) { + if (current == CausedByDrain.CAUSED_BY_DRAIN || input == CausedByDrain.CAUSED_BY_DRAIN) { + return CausedByDrain.CAUSED_BY_DRAIN; + } + return CausedByDrain.NORMAL; + } + } +} diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/LateDataDroppingDoFnRunner.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/LateDataDroppingDoFnRunner.java index f5587b46598a..b206e66dcf3e 100644 --- a/runners/core-java/src/main/java/org/apache/beam/runners/core/LateDataDroppingDoFnRunner.java +++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/LateDataDroppingDoFnRunner.java @@ -145,7 +145,13 @@ public Iterable> filter( } else { nonLateElements.add( WindowedValues.of( - element.getValue(), element.getTimestamp(), window, element.getPaneInfo())); + element.getValue(), + element.getTimestamp(), + window, + element.getPaneInfo(), + element.getRecordId(), + element.getRecordOffset(), + element.causedByDrain())); } } } diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/MetadataCombiner.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/MetadataCombiner.java new file mode 100644 index 000000000000..55884a8e43a8 --- /dev/null +++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/MetadataCombiner.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.core; + +/** Interface for combining pipeline metadata. */ +interface MetadataCombiner { + T createAccumulator(); + + T addInput(T accumulator, T input); +} diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/ReduceFnRunner.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/ReduceFnRunner.java index 0721ddc4685e..1ae0c52f853a 100644 --- a/runners/core-java/src/main/java/org/apache/beam/runners/core/ReduceFnRunner.java +++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/ReduceFnRunner.java @@ -38,6 +38,7 @@ import org.apache.beam.sdk.metrics.Counter; import org.apache.beam.sdk.metrics.Metrics; import org.apache.beam.sdk.options.PipelineOptions; +import org.apache.beam.sdk.state.CombiningState; import org.apache.beam.sdk.state.TimeDomain; import org.apache.beam.sdk.transforms.windowing.BoundedWindow; import org.apache.beam.sdk.transforms.windowing.PaneInfo; @@ -107,6 +108,12 @@ public class ReduceFnRunner { *

  • It uses discarding or accumulation mode according to the {@link WindowingStrategy}. * */ + static final StateTag> + METADATA_TAG = + StateTags.makeSystemTagInternal( + StateTags.combiningValue( + "combinedMetadata", CombinedMetadata.Coder.of(), CombinedMetadataCombiner.of())); + private final WindowingStrategy windowingStrategy; private final WindowedValueReceiver> outputter; @@ -376,7 +383,7 @@ public void processElements(Iterable> values) throws Excep emit( contextFactory.base(window, StateStyle.DIRECT), contextFactory.base(window, StateStyle.RENAMED), - CausedByDrain.NORMAL); + CombinedMetadata.createDefault()); } // We're all done with merging and emitting elements so can compress the activeWindow state. @@ -590,6 +597,7 @@ private void processElement(Map windowToMergeResult, WindowedValue value.getTimestamp(), StateStyle.DIRECT, value.causedByDrain()); + if (triggerRunner.isClosed(directContext.state())) { // This window has already been closed. droppedDueToClosedWindow.inc(); @@ -604,6 +612,11 @@ private void processElement(Map windowToMergeResult, WindowedValue continue; } + CombinedMetadata metadata = CombinedMetadata.create(value.causedByDrain()); + if (!metadata.equals(CombinedMetadata.createDefault())) { + directContext.state().access(METADATA_TAG).add(metadata); + } + activeWindows.ensureWindowIsActive(window); ReduceFn.ProcessValueContext renamedContext = contextFactory.forValue( @@ -649,15 +662,15 @@ private class WindowActivation { // garbage collect the window. We'll consider any timer at or after the // end-of-window time to be a signal to garbage collect. public final boolean isGarbageCollection; - public final CausedByDrain causedByDrain; + public final CombinedMetadata combinedMetadata; WindowActivation( ReduceFn.Context directContext, ReduceFn.Context renamedContext, - CausedByDrain causedByDrain) { + CombinedMetadata combinedMetadata) { this.directContext = directContext; this.renamedContext = renamedContext; - this.causedByDrain = causedByDrain; + this.combinedMetadata = combinedMetadata; W window = directContext.window(); // The output watermark is before the end of the window if it is either unknown @@ -742,7 +755,8 @@ public void onTimers(Iterable timers) throws Exception { ReduceFn.Context renamedContext = contextFactory.base(window, StateStyle.RENAMED); WindowActivation windowActivation = - new WindowActivation(directContext, renamedContext, timer.causedByDrain()); + new WindowActivation( + directContext, renamedContext, CombinedMetadata.create(timer.causedByDrain())); windowActivations.put(window, windowActivation); // Perform prefetching of state to determine if the trigger should fire. @@ -778,7 +792,7 @@ public void onTimers(Iterable timers) throws Exception { directContext.window(), timerInternals.currentInputWatermarkTime(), timerInternals.currentOutputWatermarkTime(), - windowActivation.causedByDrain); + windowActivation.combinedMetadata.causedByDrain()); boolean windowIsActiveAndOpen = windowActivation.windowIsActiveAndOpen(); if (windowIsActiveAndOpen) { @@ -792,7 +806,7 @@ public void onTimers(Iterable timers) throws Exception { renamedContext, true /* isFinished */, windowActivation.isEndOfWindow, - windowActivation.causedByDrain); + windowActivation.combinedMetadata); checkState(newHold == null, "Hold placed at %s despite isFinished being true.", newHold); } @@ -810,7 +824,7 @@ public void onTimers(Iterable timers) throws Exception { if (windowActivation.windowIsActiveAndOpen() && triggerRunner.shouldFire( directContext.window(), directContext.timers(), directContext.state())) { - emit(directContext, renamedContext, windowActivation.causedByDrain); + emit(directContext, renamedContext, windowActivation.combinedMetadata); } if (windowActivation.isEndOfWindow) { @@ -874,6 +888,7 @@ private void clearAllState( triggerRunner.clearState( directContext.window(), directContext.timers(), directContext.state()); paneInfoTracker.clear(directContext.state()); + directContext.state().access(METADATA_TAG).clear(); } else { // If !windowIsActiveAndOpen then !activeWindows.isActive (1) or triggerRunner.isClosed (2). // For (1), if !activeWindows.isActive then the window must be merging and has been @@ -934,8 +949,9 @@ private void prefetchEmit( private void emit( ReduceFn.Context directContext, ReduceFn.Context renamedContext, - CausedByDrain causedByDrain) + CombinedMetadata metadata) throws Exception { + checkState( triggerRunner.shouldFire( directContext.window(), directContext.timers(), directContext.state())); @@ -950,13 +966,14 @@ private void emit( // Run onTrigger to produce the actual pane contents. // As a side effect it will clear all element holds, but not necessarily any // end-of-window or garbage collection holds. - onTrigger(directContext, renamedContext, isFinished, false /*isEndOfWindow*/, causedByDrain); + onTrigger(directContext, renamedContext, isFinished, false /*isEndOfWindow*/, metadata); // Now that we've triggered, the pane is empty. nonEmptyPanes.clearPane(renamedContext.state()); // Cleanup buffered data if appropriate if (shouldDiscard) { + directContext.state().access(METADATA_TAG).clear(); // Cleanup flavor C: The user does not want any buffered data to persist between panes. reduceFn.clearState(renamedContext); } @@ -1002,6 +1019,7 @@ private void prefetchOnTrigger( /** * Run the {@link ReduceFn#onTrigger} method and produce any necessary output. * + * @param metadata from timer or default * @return output watermark hold added, or {@literal null} if none. */ private @Nullable Instant onTrigger( @@ -1009,8 +1027,19 @@ private void prefetchOnTrigger( ReduceFn.Context renamedContext, final boolean isFinished, boolean isEndOfWindow, - CausedByDrain causedByDrain) + CombinedMetadata metadata) throws Exception { + CombiningState metadataState = + directContext.state().access(METADATA_TAG); + CombinedMetadata aggregatedMetadata = metadataState.read(); + CombinedMetadata fullyAggregatedMetadata = + CombinedMetadataCombiner.of().addInput(aggregatedMetadata, metadata); + final CausedByDrain aggregatedCausedByDrain = fullyAggregatedMetadata.causedByDrain(); + if (isFinished) { + metadataState.clear(); + } else if (!metadata.equals(CombinedMetadata.createDefault())) { + metadataState.add(metadata); + } // Extract the window hold, and as a side effect clear it. final WatermarkHold.OldAndNewHolds pair = watermarkHold.extractAndRelease(renamedContext, isFinished).read(); @@ -1081,12 +1110,12 @@ private void prefetchOnTrigger( .setValue(KV.of(key, toOutput)) .setTimestamp(outputTimestamp) .setWindows(windows) - .setCausedByDrain(causedByDrain) + .setCausedByDrain(aggregatedCausedByDrain) .setPaneInfo(paneInfo) .setReceiver(outputter) .output(); }, - causedByDrain); + aggregatedCausedByDrain); reduceFn.onTrigger(renamedTriggerContext); } diff --git a/runners/core-java/src/test/java/org/apache/beam/runners/core/CombinedMetadataTest.java b/runners/core-java/src/test/java/org/apache/beam/runners/core/CombinedMetadataTest.java new file mode 100644 index 000000000000..29d3efe53b03 --- /dev/null +++ b/runners/core-java/src/test/java/org/apache/beam/runners/core/CombinedMetadataTest.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.core; + +import static org.junit.Assert.assertEquals; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import org.apache.beam.sdk.values.CausedByDrain; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Tests for {@link CombinedMetadata}. */ +@RunWith(JUnit4.class) +public class CombinedMetadataTest { + + @Test + public void testCoderEncodeDecode() throws Exception { + CombinedMetadata metadata = CombinedMetadata.create(CausedByDrain.CAUSED_BY_DRAIN); + CombinedMetadata.Coder coder = CombinedMetadata.Coder.of(); + + ByteArrayOutputStream outStream = new ByteArrayOutputStream(); + coder.encode(metadata, outStream); + + ByteArrayInputStream inStream = new ByteArrayInputStream(outStream.toByteArray()); + CombinedMetadata decoded = coder.decode(inStream); + + assertEquals(metadata, decoded); + } + + @Test + public void testCoderDecodeEOF() throws Exception { + CombinedMetadata.Coder coder = CombinedMetadata.Coder.of(); + + // Stream with no data (EOF immediately) + ByteArrayInputStream inStream = new ByteArrayInputStream(new byte[0]); + CombinedMetadata decoded = coder.decode(inStream); + + assertEquals(CombinedMetadata.createDefault(), decoded); + } + + @Test + public void testCoderDecodeEmptyMessage() throws Exception { + CombinedMetadata.Coder coder = CombinedMetadata.Coder.of(); + + // Stream with a 0-length delimited message + ByteArrayInputStream inStream = new ByteArrayInputStream(new byte[] {0}); + CombinedMetadata decoded = coder.decode(inStream); + + // ElementMetadata.parseDelimitedFrom(0-length) should yield default proto with NOT_DRAINING + // which translates to CausedByDrain.NORMAL, which is the default! + assertEquals(CombinedMetadata.createDefault(), decoded); + } +} diff --git a/runners/core-java/src/test/java/org/apache/beam/runners/core/ReduceFnTester.java b/runners/core-java/src/test/java/org/apache/beam/runners/core/ReduceFnTester.java index 43b6a3cb0cb0..2326a1c77d38 100644 --- a/runners/core-java/src/test/java/org/apache/beam/runners/core/ReduceFnTester.java +++ b/runners/core-java/src/test/java/org/apache/beam/runners/core/ReduceFnTester.java @@ -318,7 +318,7 @@ public boolean hasNoActiveWindows() { public final void assertHasOnlyGlobalAndFinishedSetsFor(W... expectedWindows) { assertHasOnlyGlobalAndAllowedTags( ImmutableSet.copyOf(expectedWindows), - ImmutableSet.of(TriggerStateMachineRunner.FINISHED_BITS_TAG)); + ImmutableSet.of(TriggerStateMachineRunner.FINISHED_BITS_TAG, ReduceFnRunner.METADATA_TAG)); } @SafeVarargs @@ -331,7 +331,8 @@ public final void assertHasOnlyGlobalAndStateFor(W... expectedWindows) { PaneInfoTracker.PANE_INFO_TAG, WatermarkHold.watermarkHoldTagForTimestampCombiner( objectStrategy.getTimestampCombiner()), - WatermarkHold.EXTRA_HOLD_TAG)); + WatermarkHold.EXTRA_HOLD_TAG, + ReduceFnRunner.METADATA_TAG)); } @SafeVarargs diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WindowDoFnOperatorTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WindowDoFnOperatorTest.java index 3bee828f23dd..6287ad119ac2 100644 --- a/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WindowDoFnOperatorTest.java +++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WindowDoFnOperatorTest.java @@ -156,6 +156,11 @@ public void testTimerCleanupOfPendingTimerList() throws Exception { // Note that the following is 1 because the state is key-partitioned assertThat(Iterables.size(timerInternals.pendingTimersById.keys()), is(1)); + // Expected 6 state entries: + // - 2 entries for user buffer state ("buf") - one per key + // - 2 entries for watermark hold state ("hold") - one per key + // - 2 entries for non-empty panes count state ("count") - one per key + // - additional 2 entries for "combinedMetadata" state if non default metadata will be added assertThat(testHarness.numKeyedStateEntries(), is(6)); // close bundle testHarness.setProcessingTime( @@ -169,6 +174,11 @@ public void testTimerCleanupOfPendingTimerList() throws Exception { // Note that the following is zero because we only the first key is active assertThat(Iterables.size(timerInternals.pendingTimersById.keys()), is(0)); + // Expected 4 state entries remaining for the second key (which is still active): + // - 1 entry for user buffer state ("buf") + // - 1 entry for watermark hold state ("hold") + // - 1 entry for non-empty panes count state ("count") + // - 1 entry for "combinedMetadata" state if non default metadata will be added assertThat(testHarness.numKeyedStateEntries(), is(3)); // close bundle diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WindmillSink.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WindmillSink.java index b07dc670e326..3ab46f0ddb42 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WindmillSink.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WindmillSink.java @@ -39,6 +39,7 @@ import org.apache.beam.sdk.transforms.windowing.PaneInfo; import org.apache.beam.sdk.transforms.windowing.PaneInfo.PaneInfoCoder; import org.apache.beam.sdk.util.ByteStringOutputStream; +import org.apache.beam.sdk.values.CausedByDrain; import org.apache.beam.sdk.values.KV; import org.apache.beam.sdk.values.ValueWithRecordId; import org.apache.beam.sdk.values.ValueWithRecordId.ValueWithRecordIdCoder; @@ -220,7 +221,12 @@ public long add(WindowedValue data) throws IOException { ByteString id = ByteString.EMPTY; // todo #33176 specify additional metadata in the future BeamFnApi.Elements.ElementMetadata additionalMetadata = - BeamFnApi.Elements.ElementMetadata.newBuilder().build(); + BeamFnApi.Elements.ElementMetadata.newBuilder() + .setDrain( + data.causedByDrain() == CausedByDrain.CAUSED_BY_DRAIN + ? BeamFnApi.Elements.DrainMode.Enum.DRAINING + : BeamFnApi.Elements.DrainMode.Enum.NOT_DRAINING) + .build(); ByteString metadata = encodeMetadata( stream, windowsCoder, data.getWindows(), data.getPaneInfo(), additionalMetadata); diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorkerTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorkerTest.java index cff5ba4a2fc4..d8a1d1b90d47 100644 --- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorkerTest.java +++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorkerTest.java @@ -1807,6 +1807,7 @@ public void testMergeWindows() throws Exception { String timerTagPrefix = "/s" + window + "+0"; ByteString bufferTag = ByteString.copyFromUtf8(window + "+ubuf"); ByteString paneInfoTag = ByteString.copyFromUtf8(window + "+upaneInfo"); + ByteString combinedMetadataTag = ByteString.copyFromUtf8(window + "+ucombinedMetadata"); String watermarkDataHoldTag = window + "+uhold"; String watermarkExtraHoldTag = window + "+uextra"; String stateFamily = "MergeWindows"; @@ -1836,10 +1837,10 @@ public void testMergeWindows() throws Exception { // Set timer verifyTimers(actualOutput, buildWatermarkTimer(timerTagPrefix, 999)); - + // no combined metadata as it's default assertThat( actualOutput.getBagUpdatesList(), - Matchers.contains( + Matchers.containsInAnyOrder( Matchers.equalTo( Windmill.TagBag.newBuilder() .setTag(bufferTag) @@ -1915,6 +1916,13 @@ public void testMergeWindows() throws Exception { .getValueBuilder() .setTimestamp(0) .setData(ByteString.EMPTY); + dataBuilder + .addBagsBuilder() + .setTag(combinedMetadataTag) + .setStateFamily(stateFamily) + // 0x02: Protobuf Delimited Length (Payload is 2 bytes), 0x08: Protobuf Tag + // (Field #1), 0x01: Protobuf Value (Enum 1 = NOT_DRAINING). + .addValues(ByteString.copyFrom(new byte[] {0x02, 0x08, 0x01})); server.whenGetDataCalled().thenReturn(dataResponse.build()); expectedBytesRead += dataBuilder.build().getSerializedSize(); @@ -1960,12 +1968,18 @@ public void testMergeWindows() throws Exception { assertThat( "" + actualOutput.getBagUpdatesList(), actualOutput.getBagUpdatesList(), - Matchers.contains( + Matchers.containsInAnyOrder( Matchers.equalTo( Windmill.TagBag.newBuilder() .setTag(bufferTag) .setStateFamily(stateFamily) .setDeleteAll(true) + .build()), + Matchers.equalTo( + Windmill.TagBag.newBuilder() + .setTag(combinedMetadataTag) + .setStateFamily(stateFamily) + .setDeleteAll(true) .build()))); verifyHolds( @@ -2097,6 +2111,7 @@ public void testMergeWindowsCaching() throws Exception { String timerTagPrefix = "/s" + window + "+0"; ByteString bufferTag = ByteString.copyFromUtf8(window + "+ubuf"); ByteString paneInfoTag = ByteString.copyFromUtf8(window + "+upaneInfo"); + ByteString combinedMetadataTag = ByteString.copyFromUtf8(window + "+ucombinedMetadata"); String watermarkDataHoldTag = window + "+uhold"; String watermarkExtraHoldTag = window + "+uextra"; String stateFamily = "MergeWindows"; @@ -2127,9 +2142,10 @@ public void testMergeWindowsCaching() throws Exception { // Set timer verifyTimers(actualOutput, buildWatermarkTimer(timerTagPrefix, 999)); + // no combinedMetadataTag as it is default assertThat( actualOutput.getBagUpdatesList(), - Matchers.contains( + Matchers.containsInAnyOrder( Matchers.equalTo( Windmill.TagBag.newBuilder() .setTag(bufferTag) @@ -2205,6 +2221,11 @@ public void testMergeWindowsCaching() throws Exception { .getValueBuilder() .setTimestamp(0) .setData(ByteString.EMPTY); + dataBuilder + .addBagsBuilder() + .setTag(combinedMetadataTag) + .setStateFamily(stateFamily) + .addValues(ByteString.copyFrom(new byte[] {0x02, 0x08, 0x01})); server.whenGetDataCalled().thenReturn(dataResponse.build()); expectedBytesRead += dataBuilder.build().getSerializedSize(); @@ -2250,12 +2271,18 @@ public void testMergeWindowsCaching() throws Exception { assertThat( "" + actualOutput.getBagUpdatesList(), actualOutput.getBagUpdatesList(), - Matchers.contains( + Matchers.containsInAnyOrder( Matchers.equalTo( Windmill.TagBag.newBuilder() .setTag(bufferTag) .setStateFamily(stateFamily) .setDeleteAll(true) + .build()), + Matchers.equalTo( + Windmill.TagBag.newBuilder() + .setTag(combinedMetadataTag) + .setStateFamily(stateFamily) + .setDeleteAll(true) .build()))); verifyHolds( diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/values/WindowedValues.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/values/WindowedValues.java index ba2720f5e39b..70168024762e 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/values/WindowedValues.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/values/WindowedValues.java @@ -866,6 +866,10 @@ public static void setMetadataSupported() { metadataSupported = true; } + public static void setMetadataNotSupported() { + metadataSupported = false; + } + public static boolean isMetadataSupported() { return metadataSupported; } diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/MetadataPropagationTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/MetadataPropagationTest.java index a2ff99905f6c..cba13ecb6bf2 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/MetadataPropagationTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/MetadataPropagationTest.java @@ -17,13 +17,23 @@ */ package org.apache.beam.sdk.transforms; +import org.apache.beam.sdk.coders.BooleanCoder; import org.apache.beam.sdk.testing.NeedsRunner; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.TestPipeline; +import org.apache.beam.sdk.testing.TestStream; +import org.apache.beam.sdk.testing.ValidatesRunner; +import org.apache.beam.sdk.transforms.windowing.FixedWindows; +import org.apache.beam.sdk.transforms.windowing.IntervalWindow; +import org.apache.beam.sdk.transforms.windowing.Window; import org.apache.beam.sdk.values.CausedByDrain; +import org.apache.beam.sdk.values.KV; import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.TimestampedValue; import org.apache.beam.sdk.values.WindowedValues; -import org.junit.Ignore; +import org.joda.time.Duration; +import org.joda.time.Instant; +import org.junit.After; import org.junit.Rule; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -36,10 +46,14 @@ public class MetadataPropagationTest { /** Tests for metadata propagation. */ @Rule public final transient TestPipeline pipeline = TestPipeline.create(); - static class CausedByDrainSettingDoFn extends DoFn { + static class CausedByDrainSettingDoFn extends DoFn { @ProcessElement - public void process(OutputReceiver r) { - r.builder("value").setCausedByDrain(CausedByDrain.CAUSED_BY_DRAIN).output(); + public void process(@Element Boolean isDrain, OutputReceiver r) { + if (isDrain) { + r.builder("value").setCausedByDrain(CausedByDrain.CAUSED_BY_DRAIN).output(); + } else { + r.builder("value").setCausedByDrain(CausedByDrain.NORMAL).output(); + } } } @@ -52,12 +66,11 @@ public void process(ProcessContext pc, OutputReceiver r) { @Test @Category(NeedsRunner.class) - @Ignore public void testMetadataPropagationAcrossShuffleParameter() { WindowedValues.WindowedValueCoder.setMetadataSupported(); PCollection results = pipeline - .apply(Create.of(1)) + .apply(Create.of(true)) .apply(ParDo.of(new CausedByDrainSettingDoFn())) .apply(Redistribute.arbitrarily()) .apply(ParDo.of(new CausedByDrainExtractingDoFn())); @@ -68,11 +81,11 @@ public void testMetadataPropagationAcrossShuffleParameter() { } @Test - @Category(NeedsRunner.class) + @Category({ValidatesRunner.class, NeedsRunner.class}) public void testMetadataPropagationParameter() { PCollection results = pipeline - .apply(Create.of(1)) + .apply(Create.of(true)) .apply(ParDo.of(new CausedByDrainSettingDoFn())) .apply(ParDo.of(new CausedByDrainExtractingDoFn())); @@ -80,4 +93,70 @@ public void testMetadataPropagationParameter() { pipeline.run(); } + + static class CausedByDrainExtracingFromGBKDoFn + extends DoFn>, String> { + @ProcessElement + public void process(ProcessContext pc, OutputReceiver r) { + r.output(pc.causedByDrain().toString()); + } + } + + /** + * Tests metadata propagation across GroupByKey. Note: This test works only with DirectRunner and + * runners that support metadata propagation (e.g. via a flag to enable metadata encoding in + * coders). It fails on portable runners like Prism because they do not have implementation for + * metadata propagation, leading to coder mismatches. + */ + @Test + @Category(NeedsRunner.class) + public void testMetadataPropagationAcrossGBK() { + WindowedValues.WindowedValueCoder.setMetadataSupported(); + Instant baseTime = new Instant(0); + TestStream stream = + TestStream.create(BooleanCoder.of()) + .advanceWatermarkTo(baseTime) + .addElements(TimestampedValue.of(false, baseTime.plus(Duration.standardSeconds(10)))) + .advanceWatermarkTo(baseTime.plus(Duration.standardMinutes(1))) + .addElements( + TimestampedValue.of(false, baseTime.plus(Duration.standardSeconds(71))), + TimestampedValue.of(true, baseTime.plus(Duration.standardSeconds(72)))) + .advanceWatermarkTo(baseTime.plus(Duration.standardMinutes(2))) + .addElements( + TimestampedValue.of(false, baseTime.plus(Duration.standardSeconds(130))), + TimestampedValue.of(true, baseTime.plus(Duration.standardSeconds(131))), // drain + TimestampedValue.of(false, baseTime.plus(Duration.standardSeconds(132)))) + .advanceWatermarkTo(baseTime.plus(Duration.standardMinutes(3))) + .addElements( + TimestampedValue.of(false, baseTime.plus(Duration.standardSeconds(181)))) // normal + .advanceWatermarkTo(baseTime.plus(Duration.standardMinutes(4))) + .advanceWatermarkToInfinity(); + + Duration windowDuration = Duration.standardMinutes(1); + IntervalWindow window1 = new IntervalWindow(baseTime, windowDuration); + IntervalWindow window2 = new IntervalWindow(window1.end(), windowDuration); + IntervalWindow window3 = new IntervalWindow(window2.end(), windowDuration); + IntervalWindow window4 = new IntervalWindow(window3.end(), windowDuration); + + PCollection results = + pipeline + .apply(stream) + .apply(ParDo.of(new CausedByDrainSettingDoFn())) + .apply(WithKeys.of("1")) + .apply(Window.into(FixedWindows.of(windowDuration))) + .apply(GroupByKey.create()) + .apply(ParDo.of(new CausedByDrainExtracingFromGBKDoFn())); + + PAssert.that(results).inWindow(window1).containsInAnyOrder("NORMAL"); + PAssert.that(results).inWindow(window2).containsInAnyOrder("CAUSED_BY_DRAIN"); + PAssert.that(results).inWindow(window3).containsInAnyOrder("CAUSED_BY_DRAIN"); + PAssert.that(results).inWindow(window4).containsInAnyOrder("NORMAL"); + + pipeline.run(); + } + + @After + public void tearDown() { + WindowedValues.WindowedValueCoder.setMetadataNotSupported(); + } }