提交 6bd5714d 编写于 作者: A Aljoscha Krettek

[FLINK-3121] Emit Final Watermark in Kafka Source

Kafka sources that don't read from any partition never emit a watermark,
thereby blocking the progress of event-time in downstream operations.
This changes the Kafka Source to emit a Long.MAX_VALUE watermark if it
knows that it will never receive data.

This also changes the Timestamp Extraction operator to reacto to a
Long.MAX_VALUE watermark by itself emitting a Long.MAX_VALUE watermark.
上级 4b648870
......@@ -32,6 +32,7 @@ import org.apache.flink.streaming.api.checkpoint.CheckpointNotifier;
import org.apache.flink.streaming.api.checkpoint.CheckpointedAsynchronously;
import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
import org.apache.flink.streaming.api.operators.StreamingRuntimeContext;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.connectors.kafka.internals.Fetcher;
import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition;
import org.apache.flink.streaming.connectors.kafka.internals.LegacyFetcher;
......@@ -434,7 +435,12 @@ public class FlinkKafkaConsumer<T> extends RichParallelSourceFunction<T>
}
}
else {
// this source never completes
// this source never completes, so emit a Long.MAX_VALUE watermark
// to not block watermark forwarding
if (getRuntimeContext().getExecutionConfig().areTimestampsEnabled()) {
sourceContext.emitWatermark(new Watermark(Long.MAX_VALUE));
}
final Object waitLock = new Object();
while (running) {
// wait until we are canceled
......
......@@ -60,6 +60,12 @@ public class StreamSource<T> extends AbstractUdfStreamOperator<T, SourceFunction
// This will mostly emit a final +Inf Watermark to make the Watermark logic work
// when some sources finish before others do
ctx.close();
if (executionConfig.areTimestampsEnabled()) {
synchronized (lockingObject) {
output.emitWatermark(new Watermark(Long.MAX_VALUE));
}
}
}
public void cancel() {
......@@ -296,14 +302,6 @@ public class StreamSource<T> extends AbstractUdfStreamOperator<T, SourceFunction
}
@Override
public void close() {
// emit one last +Inf watermark to make downstream watermark processing work
// when some sources close early
synchronized (lockingObject) {
if (watermarkMultiplexingEnabled) {
output.emitWatermark(new Watermark(Long.MAX_VALUE));
}
}
}
public void close() {}
}
}
......@@ -56,14 +56,6 @@ public class ExtractTimestampsOperator<T>
currentWatermark = Long.MIN_VALUE;
}
@Override
public void close() throws Exception {
super.close();
// emit a final +Inf watermark, just like the sources
output.emitWatermark(new Watermark(Long.MAX_VALUE));
}
@Override
public void processElement(StreamRecord<T> element) throws Exception {
long newTimestamp = userFunction.extractTimestamp(element.getValue(), element.getTimestamp());
......@@ -90,6 +82,11 @@ public class ExtractTimestampsOperator<T>
@Override
public void processWatermark(Watermark mark) throws Exception {
// ignore them, since we are basically a watermark source
// if we receive a Long.MAX_VALUE watermark we forward it since it is used
// to signal the end of input and to not block watermark progress downstream
if (mark.getTimestamp() == Long.MAX_VALUE && mark.getTimestamp() > currentWatermark) {
currentWatermark = Long.MAX_VALUE;
output.emitWatermark(mark);
}
}
}
......@@ -447,6 +447,68 @@ public class TimestampITCase {
}
}
/**
* This test verifies that the timestamp extractor forwards Long.MAX_VALUE watermarks.
*/
@Test
public void testTimestampExtractorWithLongMaxWatermarkFromSource() throws Exception {
final int NUM_ELEMENTS = 10;
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
env.setParallelism(2);
env.getConfig().disableSysoutLogging();
env.getConfig().enableTimestamps();
env.getConfig().setAutoWatermarkInterval(1);
DataStream<Integer> source1 = env.addSource(new EventTimeSourceFunction<Integer>() {
@Override
public void run(SourceContext<Integer> ctx) throws Exception {
int index = 0;
while (index < NUM_ELEMENTS) {
ctx.collectWithTimestamp(index, index);
ctx.collectWithTimestamp(index - 1, index - 1);
index++;
ctx.emitWatermark(new Watermark(index-2));
}
// emit the final Long.MAX_VALUE watermark, do it twice and verify that
// we only see one in the result
ctx.emitWatermark(new Watermark(Long.MAX_VALUE));
ctx.emitWatermark(new Watermark(Long.MAX_VALUE));
}
@Override
public void cancel() {
}
});
source1.assignTimestamps(new TimestampExtractor<Integer>() {
@Override
public long extractTimestamp(Integer element, long currentTimestamp) {
return element;
}
@Override
public long extractWatermark(Integer element, long currentTimestamp) {
return Long.MIN_VALUE;
}
@Override
public long getCurrentWatermark() {
return Long.MIN_VALUE;
}
})
.transform("Watermark Check", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(true));
env.execute();
Assert.assertTrue(CustomOperator.finalWatermarks[0].size() == 1);
Assert.assertTrue(CustomOperator.finalWatermarks[0].get(0).getTimestamp() == Long.MAX_VALUE);
}
/**
* This tests whether the program throws an exception when an event-time source tries
* to emit without timestamp.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册