提交 26c6447e 编写于 作者: S Sachin Goel 提交者: Maximilian Michels

[FLINK-1819][core] allow access to RuntimeContext from Input and OutputFormats

1. Allow access to Runtime Context from I/O formats.
2. Make all existing I/O formats context aware.

This closes #966.
上级 7cc85c7d
......@@ -46,7 +46,7 @@ import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.core.fs.Path;
/**
* The base class for {@link InputFormat}s that read from files. For specific input types the
* The base class for {@link RichInputFormat}s that read from files. For specific input types the
* {@link #nextRecord(Object)} and {@link #reachedEnd()} methods need to be implemented.
* Additionally, one may override {@link #open(FileInputSplit)} and {@link #close()} to
* change the life cycle behavior.
......@@ -54,7 +54,7 @@ import org.apache.flink.core.fs.Path;
* <p>After the {@link #open(FileInputSplit)} method completed, the file input data is available
* from the {@link #stream} field.</p>
*/
public abstract class FileInputFormat<OT> implements InputFormat<OT, FileInputSplit> {
public abstract class FileInputFormat<OT> extends RichInputFormat<OT, FileInputSplit> {
// -------------------------------------- Constants -------------------------------------------
......
......@@ -33,10 +33,11 @@ import org.apache.flink.core.fs.Path;
import org.apache.flink.core.fs.FileSystem.WriteMode;
/**
* The abstract base class for all output formats that are file based. Contains the logic to open/close the target
* The abstract base class for all Rich output formats that are file based. Contains the logic to
* open/close the target
* file streams.
*/
public abstract class FileOutputFormat<IT> implements OutputFormat<IT>, InitializeOnMaster, CleanupWhenUnsuccessful {
public abstract class FileOutputFormat<IT> extends RichOutputFormat<IT> implements InitializeOnMaster, CleanupWhenUnsuccessful {
private static final long serialVersionUID = 1L;
......
......@@ -26,9 +26,9 @@ import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.io.GenericInputSplit;
/**
* Generic base class for all inputs that are not based on files.
* Generic base class for all Rich inputs that are not based on files.
*/
public abstract class GenericInputFormat<OT> implements InputFormat<OT, GenericInputSplit> {
public abstract class GenericInputFormat<OT> extends RichInputFormat<OT, GenericInputSplit> {
private static final long serialVersionUID = 1L;
......
......@@ -19,6 +19,7 @@
package org.apache.flink.api.common.io;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.api.common.io.statistics.BaseStatistics;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.io.InputSplit;
......@@ -52,6 +53,7 @@ import java.io.IOException;
* @param <S> The InputSplit type of the wrapped InputFormat.
*
* @see org.apache.flink.api.common.io.InputFormat
* @see org.apache.flink.api.common.io.RichInputFormat
* @see org.apache.flink.api.common.operators.base.JoinOperatorBase
* @see org.apache.flink.api.common.operators.base.CrossOperatorBase
* @see org.apache.flink.api.common.operators.base.MapOperatorBase
......@@ -59,7 +61,7 @@ import java.io.IOException;
* @see org.apache.flink.api.common.operators.base.FilterOperatorBase
* @see org.apache.flink.api.common.operators.base.MapPartitionOperatorBase
*/
public final class ReplicatingInputFormat<OT, S extends InputSplit> implements InputFormat<OT, S> {
public final class ReplicatingInputFormat<OT, S extends InputSplit> extends RichInputFormat<OT, S> {
private static final long serialVersionUID = 1L;
......@@ -112,4 +114,20 @@ public final class ReplicatingInputFormat<OT, S extends InputSplit> implements I
public void close() throws IOException {
this.replicatedIF.close();
}
@Override
public void setRuntimeContext(RuntimeContext context){
if(this.replicatedIF instanceof RichInputFormat){
((RichInputFormat)this.replicatedIF).setRuntimeContext(context);
}
}
@Override
public RuntimeContext getRuntimeContext(){
if(this.replicatedIF instanceof RichInputFormat){
return ((RichInputFormat)this.replicatedIF).getRuntimeContext();
} else{
throw new RuntimeException("The underlying input format to this ReplicatingInputFormat isn't context aware");
}
}
}
/*
c * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.common.io;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.core.io.InputSplit;
/**
* An abstract stub implementation for Rich input formats.
* Rich formats have access to their runtime execution context via {@link #getRuntimeContext()}.
*/
public abstract class RichInputFormat<OT, T extends InputSplit> implements InputFormat<OT, T> {
private static final long serialVersionUID = 1L;
// --------------------------------------------------------------------------------------------
// Runtime context access
// --------------------------------------------------------------------------------------------
private transient RuntimeContext runtimeContext;
public void setRuntimeContext(RuntimeContext t) {
this.runtimeContext = t;
}
public RuntimeContext getRuntimeContext() {
if (this.runtimeContext != null) {
return this.runtimeContext;
} else {
throw new IllegalStateException("The runtime context has not been initialized yet. Try accessing " +
"it in one of the other life cycle methods.");
}
}
}
/*
c * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.common.io;
import org.apache.flink.api.common.functions.RuntimeContext;
/**
* An abstract stub implementation for Rich output formats.
* Rich formats have access to their runtime execution context via {@link #getRuntimeContext()}.
*/
public abstract class RichOutputFormat<IT> implements OutputFormat<IT> {
private static final long serialVersionUID = 1L;
// --------------------------------------------------------------------------------------------
// Runtime context access
// --------------------------------------------------------------------------------------------
private transient RuntimeContext runtimeContext;
public void setRuntimeContext(RuntimeContext t) {
this.runtimeContext = t;
}
public RuntimeContext getRuntimeContext() {
if (this.runtimeContext != null) {
return this.runtimeContext;
} else {
throw new IllegalStateException("The runtime context has not been initialized yet. Try accessing " +
"it in one of the other life cycle methods.");
}
}
}
......@@ -28,8 +28,7 @@ import org.apache.flink.core.memory.DataOutputView;
*
* @see SerializedInputFormat
*/
public class SerializedOutputFormat<T extends IOReadableWritable> extends
BinaryOutputFormat<T> {
public class SerializedOutputFormat<T extends IOReadableWritable> extends BinaryOutputFormat<T> {
private static final long serialVersionUID = 1L;
......
......@@ -40,6 +40,8 @@ import org.apache.flink.api.common.aggregators.ConvergenceCriterion;
import org.apache.flink.api.common.functions.IterationRuntimeContext;
import org.apache.flink.api.common.functions.RichFunction;
import org.apache.flink.api.common.functions.util.RuntimeUDFContext;
import org.apache.flink.api.common.io.RichInputFormat;
import org.apache.flink.api.common.io.RichOutputFormat;
import org.apache.flink.api.common.operators.base.BulkIterationBase;
import org.apache.flink.api.common.operators.base.BulkIterationBase.PartialSolutionPlaceHolder;
import org.apache.flink.api.common.operators.base.DeltaIterationBase;
......@@ -128,10 +130,10 @@ public class CollectionExecutor {
result = executeBinaryOperator((DualInputOperator<?, ?, ?, ?>) operator, superStep);
}
else if (operator instanceof GenericDataSourceBase) {
result = executeDataSource((GenericDataSourceBase<?, ?>) operator);
result = executeDataSource((GenericDataSourceBase<?, ?>) operator, superStep);
}
else if (operator instanceof GenericDataSinkBase) {
executeDataSink((GenericDataSinkBase<?>) operator);
executeDataSink((GenericDataSinkBase<?>) operator, superStep);
result = Collections.emptyList();
}
else {
......@@ -148,7 +150,7 @@ public class CollectionExecutor {
// Operator class specific execution methods
// --------------------------------------------------------------------------------------------
private <IN> void executeDataSink(GenericDataSinkBase<?> sink) throws Exception {
private <IN> void executeDataSink(GenericDataSinkBase<?> sink, int superStep) throws Exception {
Operator<?> inputOp = sink.getInput();
if (inputOp == null) {
throw new InvalidProgramException("The data sink " + sink.getName() + " has no input.");
......@@ -160,13 +162,31 @@ public class CollectionExecutor {
@SuppressWarnings("unchecked")
GenericDataSinkBase<IN> typedSink = (GenericDataSinkBase<IN>) sink;
typedSink.executeOnCollections(input, executionConfig);
// build the runtime context and compute broadcast variables, if necessary
RuntimeUDFContext ctx;
if (RichOutputFormat.class.isAssignableFrom(typedSink.getUserCodeWrapper().getUserCodeClass())) {
ctx = superStep == 0 ? new RuntimeUDFContext(typedSink.getName(), 1, 0, getClass().getClassLoader(), executionConfig, accumulators) :
new IterationRuntimeUDFContext(typedSink.getName(), 1, 0, classLoader, executionConfig, accumulators);
} else {
ctx = null;
}
typedSink.executeOnCollections(input, ctx, executionConfig);
}
private <OUT> List<OUT> executeDataSource(GenericDataSourceBase<?, ?> source) throws Exception {
private <OUT> List<OUT> executeDataSource(GenericDataSourceBase<?, ?> source, int superStep)
throws Exception {
@SuppressWarnings("unchecked")
GenericDataSourceBase<OUT, ?> typedSource = (GenericDataSourceBase<OUT, ?>) source;
return typedSource.executeOnCollections(executionConfig);
// build the runtime context and compute broadcast variables, if necessary
RuntimeUDFContext ctx;
if (RichInputFormat.class.isAssignableFrom(typedSource.getUserCodeWrapper().getUserCodeClass())) {
ctx = superStep == 0 ? new RuntimeUDFContext(source.getName(), 1, 0, getClass().getClassLoader(), executionConfig, accumulators) :
new IterationRuntimeUDFContext(source.getName(), 1, 0, classLoader, executionConfig, accumulators);
} else {
ctx = null;
}
return typedSource.executeOnCollections(ctx, executionConfig);
}
private <IN, OUT> List<OUT> executeUnaryOperator(SingleInputOperator<?, ?, ?> operator, int superStep) throws Exception {
......
......@@ -19,15 +19,17 @@
package org.apache.flink.api.common.operators;
import java.util.List;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.distributions.DataDistribution;
import org.apache.flink.api.common.io.FinalizeOnMaster;
import org.apache.flink.api.common.io.InitializeOnMaster;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.api.common.io.OutputFormat;
import org.apache.flink.api.common.io.RichOutputFormat;
import org.apache.flink.api.common.io.InitializeOnMaster;
import org.apache.flink.api.common.io.FinalizeOnMaster;
import org.apache.flink.api.common.operators.util.UserCodeObjectWrapper;
import org.apache.flink.api.common.operators.util.UserCodeWrapper;
import org.apache.flink.api.common.typeinfo.AtomicType;
......@@ -296,11 +298,11 @@ public class GenericDataSinkBase<IN> extends Operator<Nothing> {
visitor.postVisit(this);
}
}
// --------------------------------------------------------------------------------------------
@SuppressWarnings("unchecked")
protected void executeOnCollections(List<IN> inputData, ExecutionConfig executionConfig) throws Exception {
protected void executeOnCollections(List<IN> inputData, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
OutputFormat<IN> format = this.formatWrapper.getUserCodeObject();
TypeInformation<IN> inputType = getInput().getOperatorInfo().getOutputType();
......@@ -328,9 +330,11 @@ public class GenericDataSinkBase<IN> extends Operator<Nothing> {
if(format instanceof InitializeOnMaster) {
((InitializeOnMaster)format).initializeGlobal(1);
}
format.configure(this.parameters);
if(format instanceof RichOutputFormat){
((RichOutputFormat) format).setRuntimeContext(ctx);
}
format.open(0, 1);
for (IN element : inputData) {
format.writeRecord(element);
......
......@@ -24,7 +24,9 @@ import java.util.List;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.functions.Partitioner;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.api.common.io.InputFormat;
import org.apache.flink.api.common.io.RichInputFormat;
import org.apache.flink.api.common.operators.util.UserCodeClassWrapper;
import org.apache.flink.api.common.operators.util.UserCodeObjectWrapper;
import org.apache.flink.api.common.operators.util.UserCodeWrapper;
......@@ -200,14 +202,18 @@ public class GenericDataSourceBase<OUT, T extends InputFormat<OUT, ?>> extends O
visitor.postVisit(this);
}
}
// --------------------------------------------------------------------------------------------
protected List<OUT> executeOnCollections(ExecutionConfig executionConfig) throws Exception {
protected List<OUT> executeOnCollections(RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
@SuppressWarnings("unchecked")
InputFormat<OUT, InputSplit> inputFormat = (InputFormat<OUT, InputSplit>) this.formatWrapper.getUserCodeObject();
inputFormat.configure(this.parameters);
if(inputFormat instanceof RichInputFormat){
((RichInputFormat) inputFormat).setRuntimeContext(ctx);
}
List<OUT> result = new ArrayList<OUT>();
// splits
......
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.common.io;
import java.util.HashMap;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.accumulators.Accumulator;
import org.apache.flink.api.common.functions.util.RuntimeUDFContext;
import org.apache.flink.types.Value;
import org.junit.Assert;
import org.junit.Test;
/**
* Tests runtime context access from inside an RichInputFormat class
*/
public class RichInputFormatTest {
@Test
public void testCheckRuntimeContextAccess() {
final SerializedInputFormat<Value> inputFormat = new SerializedInputFormat<Value>();
inputFormat.setRuntimeContext(new RuntimeUDFContext("test name", 3, 1,
getClass().getClassLoader(), new ExecutionConfig(), new HashMap<String, Accumulator<?, ?>>()));
Assert.assertEquals(inputFormat.getRuntimeContext().getIndexOfThisSubtask(), 1);
Assert.assertEquals(inputFormat.getRuntimeContext().getNumberOfParallelSubtasks(),3);
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.common.io;
import java.util.HashMap;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.accumulators.Accumulator;
import org.apache.flink.api.common.functions.util.RuntimeUDFContext;
import org.apache.flink.types.Value;
import org.junit.Assert;
import org.junit.Test;
/**
* Tests runtime context access from inside an RichOutputFormat class
*/
public class RichOutputFormatTest {
@Test
public void testCheckRuntimeContextAccess() {
final SerializedOutputFormat<Value> inputFormat = new SerializedOutputFormat<Value>();
inputFormat.setRuntimeContext(new RuntimeUDFContext("test name", 3, 1,
getClass().getClassLoader(), new ExecutionConfig(), new HashMap<String, Accumulator<?, ?>>()));
Assert.assertEquals(inputFormat.getRuntimeContext().getIndexOfThisSubtask(), 1);
Assert.assertEquals(inputFormat.getRuntimeContext().getNumberOfParallelSubtasks(),3);
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.common.operators;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.accumulators.Accumulator;
import org.apache.flink.api.common.functions.util.RuntimeUDFContext;
import org.apache.flink.api.common.operators.util.TestIOData;
import org.apache.flink.api.common.operators.util.TestNonRichOutputFormat;
import org.apache.flink.api.common.operators.util.TestNonRichInputFormat;
import org.apache.flink.api.common.operators.util.TestRichOutputFormat;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.types.Nothing;
import org.junit.Test;
import java.util.HashMap;
import static java.util.Arrays.asList;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
/**
* Checks the GenericDataSinkBase operator for both Rich and non-Rich output formats.
*/
@SuppressWarnings("serial")
public class GenericDataSinkBaseTest implements java.io.Serializable {
private static TestNonRichInputFormat in = new TestNonRichInputFormat();
GenericDataSourceBase<String, TestNonRichInputFormat> source =
new GenericDataSourceBase<String, TestNonRichInputFormat>(
in, new OperatorInformation<String>(BasicTypeInfo.STRING_TYPE_INFO), "testSource");
@Test
public void testDataSourcePlain() {
try {
TestNonRichOutputFormat out = new TestNonRichOutputFormat();
GenericDataSinkBase<String> sink = new GenericDataSinkBase<String>(
out,
new UnaryOperatorInformation<String, Nothing>(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.getInfoFor(Nothing.class)),
"test_sink");
sink.setInput(source);
ExecutionConfig executionConfig = new ExecutionConfig();
executionConfig.disableObjectReuse();
in.reset();
sink.executeOnCollections(asList(TestIOData.NAMES), null, executionConfig);
assertEquals(out.output, asList(TestIOData.NAMES));
executionConfig.enableObjectReuse();
out.clear();
in.reset();
sink.executeOnCollections(asList(TestIOData.NAMES), null, executionConfig);
assertEquals(out.output, asList(TestIOData.NAMES));
}
catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
@Test
public void testDataSourceWithRuntimeContext() {
try {
TestRichOutputFormat out = new TestRichOutputFormat();
GenericDataSinkBase<String> sink = new GenericDataSinkBase<String>(
out,
new UnaryOperatorInformation<String, Nothing>(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.getInfoFor(Nothing.class)),
"test_sink");
sink.setInput(source);
ExecutionConfig executionConfig = new ExecutionConfig();
final HashMap<String, Accumulator<?, ?>> accumulatorMap = new HashMap<String, Accumulator<?, ?>>();
executionConfig.disableObjectReuse();
in.reset();
sink.executeOnCollections(asList(TestIOData.NAMES), new RuntimeUDFContext("test_sink", 1, 0, null, executionConfig, accumulatorMap), executionConfig);
assertEquals(out.output, asList(TestIOData.RICH_NAMES));
executionConfig.enableObjectReuse();
out.clear();
in.reset();
sink.executeOnCollections(asList(TestIOData.NAMES), new RuntimeUDFContext("test_sink", 1, 0, null, executionConfig, accumulatorMap), executionConfig);
assertEquals(out.output, asList(TestIOData.RICH_NAMES));
} catch(Exception e){
e.printStackTrace();
fail(e.getMessage());
}
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.common.operators;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.accumulators.Accumulator;
import org.apache.flink.api.common.functions.util.RuntimeUDFContext;
import org.apache.flink.api.common.operators.util.TestIOData;
import org.apache.flink.api.common.operators.util.TestNonRichInputFormat;
import org.apache.flink.api.common.operators.util.TestRichInputFormat;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.junit.Test;
import java.util.HashMap;
import java.util.List;
import static java.util.Arrays.asList;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
/**
* Checks the GenericDataSourceBase operator for both Rich and non-Rich input formats.
*/
@SuppressWarnings("serial")
public class GenericDataSourceBaseTest implements java.io.Serializable {
@Test
public void testDataSourcePlain() {
try {
TestNonRichInputFormat in = new TestNonRichInputFormat();
GenericDataSourceBase<String, TestNonRichInputFormat> source =
new GenericDataSourceBase<String, TestNonRichInputFormat>(
in, new OperatorInformation<String>(BasicTypeInfo.STRING_TYPE_INFO), "testSource");
ExecutionConfig executionConfig = new ExecutionConfig();
executionConfig.disableObjectReuse();
List<String> resultMutableSafe = source.executeOnCollections(null, executionConfig);
in.reset();
executionConfig.enableObjectReuse();
List<String> resultRegular = source.executeOnCollections(null, executionConfig);
assertEquals(asList(TestIOData.NAMES), resultMutableSafe);
assertEquals(asList(TestIOData.NAMES), resultRegular);
}
catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
@Test
public void testDataSourceWithRuntimeContext() {
try {
TestRichInputFormat in = new TestRichInputFormat();
GenericDataSourceBase<String, TestRichInputFormat> source =
new GenericDataSourceBase<String, TestRichInputFormat>(
in, new OperatorInformation<String>(BasicTypeInfo.STRING_TYPE_INFO), "testSource");
final HashMap<String, Accumulator<?, ?>> accumulatorMap = new HashMap<String, Accumulator<?, ?>>();
ExecutionConfig executionConfig = new ExecutionConfig();
executionConfig.disableObjectReuse();
List<String> resultMutableSafe = source.executeOnCollections(new RuntimeUDFContext("test_source", 1, 0, null, executionConfig, accumulatorMap), executionConfig);
in.reset();
executionConfig.enableObjectReuse();
List<String> resultRegular = source.executeOnCollections(new RuntimeUDFContext("test_source", 1, 0, null, executionConfig, accumulatorMap), executionConfig);
assertEquals(asList(TestIOData.RICH_NAMES), resultMutableSafe);
assertEquals(asList(TestIOData.RICH_NAMES), resultRegular);
} catch(Exception e){
e.printStackTrace();
fail(e.getMessage());
}
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.common.operators.util;
import java.io.IOException;
import org.apache.flink.api.common.io.InputFormat;
import org.apache.flink.api.common.io.NonParallelInput;
import org.apache.flink.api.common.io.DefaultInputSplitAssigner;
import org.apache.flink.api.common.io.statistics.BaseStatistics;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.io.GenericInputSplit;
/**
* Generic base class for all inputs that are not based on files.
* This is copied from {@link org.apache.flink.api.common.io.GenericInputFormat}
* This class doesn't provide access to RuntimeContext.
*/
public abstract class NonRichGenericInputFormat<OT> implements InputFormat<OT, GenericInputSplit> {
private static final long serialVersionUID = 1L;
/**
* The partition of this split.
*/
protected int partitionNumber;
// --------------------------------------------------------------------------------------------
@Override
public void configure(Configuration parameters) {
// nothing by default
}
@Override
public BaseStatistics getStatistics(BaseStatistics cachedStatistics) throws IOException {
// no statistics available, by default.
return cachedStatistics;
}
@Override
public GenericInputSplit[] createInputSplits(int numSplits) throws IOException {
if (numSplits < 1) {
throw new IllegalArgumentException("Number of input splits has to be at least 1.");
}
numSplits = (this instanceof NonParallelInput) ? 1 : numSplits;
GenericInputSplit[] splits = new GenericInputSplit[numSplits];
for (int i = 0; i < splits.length; i++) {
splits[i] = new GenericInputSplit(i, numSplits);
}
return splits;
}
@Override
public DefaultInputSplitAssigner getInputSplitAssigner(GenericInputSplit[] splits) {
return new DefaultInputSplitAssigner(splits);
}
// --------------------------------------------------------------------------------------------
@Override
public void open(GenericInputSplit split) throws IOException {
this.partitionNumber = split.getSplitNumber();
}
@Override
public void close() throws IOException {}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.common.operators.util;
public class TestIOData {
public static final String[] NAMES = { "Peter", "Bob", "Liddy", "Alexander", "Stan" };
public static final String[] RICH_NAMES = { "Peter01", "Bob01", "Liddy01", "Alexander01", "Stan01"};
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.common.operators.util;
import org.apache.flink.api.common.io.NonParallelInput;
import java.io.IOException;
/**
* Test Non rich input format class which emits just five elements.
*/
public class TestNonRichInputFormat extends NonRichGenericInputFormat<String> implements NonParallelInput{
private static final long serialVersionUID = 1L;
private static final int NUM = 5;
private static final String[] NAMES = TestIOData.NAMES;
private int count = 0;
@Override
public boolean reachedEnd() throws IOException {
return count >= NUM;
}
@Override
public String nextRecord(String reuse) throws IOException {
count++;
return NAMES[count - 1];
}
public void reset(){
count = 0;
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.common.operators.util;
import java.util.LinkedList;
import java.util.List;
import org.apache.flink.api.common.io.OutputFormat;
import org.apache.flink.configuration.Configuration;
/**
* Non rich test output format which stores everything in a list.
*/
public class TestNonRichOutputFormat implements OutputFormat<String> {
public List<String> output = new LinkedList<String>();
@Override
public void configure(Configuration parameters){}
@Override
public void open(int a, int b){}
@Override
public void close(){}
@Override
public void writeRecord(String record){
output.add(record);
}
public void clear(){
output.clear();
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.common.operators.util;
import java.io.IOException;
import org.apache.flink.api.common.io.GenericInputFormat;
import org.apache.flink.api.common.io.NonParallelInput;
/**
* Same as the non rich test input format, except it provide access to runtime context.
*/
public class TestRichInputFormat extends GenericInputFormat<String> implements NonParallelInput{
private static final long serialVersionUID = 1L;
private static final int NUM = 5;
private static final String[] NAMES = TestIOData.NAMES;
private int count = 0;
@Override
public boolean reachedEnd() throws IOException {
return count >= NUM;
}
@Override
public String nextRecord(String reuse) throws IOException {
count++;
return NAMES[count - 1] + getRuntimeContext().getIndexOfThisSubtask() + "" +
getRuntimeContext().getNumberOfParallelSubtasks();
}
public void reset(){
count = 0;
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.common.operators.util;
import java.util.LinkedList;
import java.util.List;
import org.apache.flink.api.common.io.RichOutputFormat;
import org.apache.flink.configuration.Configuration;
/**
* Same as the non rich test output format, except it provide access to runtime context.
*/
public class TestRichOutputFormat extends RichOutputFormat<String> {
public List<String> output = new LinkedList<String>();
@Override
public void configure(Configuration parameters){}
@Override
public void open(int a, int b){}
@Override
public void close(){}
@Override
public void writeRecord(String record){
output.add(record + getRuntimeContext().getIndexOfThisSubtask() + "" +
getRuntimeContext().getNumberOfParallelSubtasks());
}
public void clear(){
output.clear();
}
}
......@@ -20,7 +20,7 @@
package org.apache.flink.api.java.hadoop.mapred;
import org.apache.flink.api.common.io.FileInputFormat.FileBaseStatistics;
import org.apache.flink.api.common.io.InputFormat;
import org.apache.flink.api.common.io.RichInputFormat;
import org.apache.flink.api.common.io.LocatableInputSplitAssigner;
import org.apache.flink.api.common.io.statistics.BaseStatistics;
import org.apache.flink.api.java.hadoop.mapred.utils.HadoopUtils;
......@@ -45,7 +45,7 @@ import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
public abstract class HadoopInputFormatBase<K, V, T> implements InputFormat<T, HadoopInputSplit> {
public abstract class HadoopInputFormatBase<K, V, T> extends RichInputFormat<T, HadoopInputSplit> {
private static final long serialVersionUID = 1L;
......
......@@ -20,7 +20,7 @@
package org.apache.flink.api.java.hadoop.mapred;
import org.apache.flink.api.common.io.FinalizeOnMaster;
import org.apache.flink.api.common.io.OutputFormat;
import org.apache.flink.api.common.io.RichOutputFormat;
import org.apache.flink.api.java.hadoop.mapred.utils.HadoopUtils;
import org.apache.flink.api.java.hadoop.mapred.wrapper.HadoopDummyProgressable;
import org.apache.flink.api.java.hadoop.mapred.wrapper.HadoopDummyReporter;
......@@ -41,7 +41,7 @@ import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
public abstract class HadoopOutputFormatBase<K, V, T> implements OutputFormat<T>, FinalizeOnMaster {
public abstract class HadoopOutputFormatBase<K, V, T> extends RichOutputFormat<T> implements FinalizeOnMaster {
private static final long serialVersionUID = 1L;
......
......@@ -19,7 +19,7 @@
package org.apache.flink.api.java.hadoop.mapreduce;
import org.apache.flink.api.common.io.FileInputFormat.FileBaseStatistics;
import org.apache.flink.api.common.io.InputFormat;
import org.apache.flink.api.common.io.RichInputFormat;
import org.apache.flink.api.common.io.LocatableInputSplitAssigner;
import org.apache.flink.api.common.io.statistics.BaseStatistics;
import org.apache.flink.api.java.hadoop.mapreduce.utils.HadoopUtils;
......@@ -48,7 +48,7 @@ import java.util.List;
import static com.google.common.base.Preconditions.checkNotNull;
public abstract class HadoopInputFormatBase<K, V, T> implements InputFormat<T, HadoopInputSplit> {
public abstract class HadoopInputFormatBase<K, V, T> extends RichInputFormat<T, HadoopInputSplit> {
private static final long serialVersionUID = 1L;
......
......@@ -19,7 +19,7 @@
package org.apache.flink.api.java.hadoop.mapreduce;
import org.apache.flink.api.common.io.FinalizeOnMaster;
import org.apache.flink.api.common.io.OutputFormat;
import org.apache.flink.api.common.io.RichOutputFormat;
import org.apache.flink.api.java.hadoop.mapreduce.utils.HadoopUtils;
import org.apache.flink.configuration.Configuration;
import org.apache.hadoop.conf.Configurable;
......@@ -38,7 +38,7 @@ import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
public abstract class HadoopOutputFormatBase<K, V, T> implements OutputFormat<T>, FinalizeOnMaster {
public abstract class HadoopOutputFormatBase<K, V, T> extends RichOutputFormat<T> implements FinalizeOnMaster {
private static final long serialVersionUID = 1L;
......
......@@ -26,7 +26,7 @@ import java.util.HashMap;
import java.util.Map;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.io.OutputFormat;
import org.apache.flink.api.common.io.RichOutputFormat;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.java.typeutils.InputTypeConfigurable;
......@@ -35,7 +35,7 @@ import org.apache.flink.configuration.Configuration;
/**
* An output format that writes record into collection
*/
public class LocalCollectionOutputFormat<T> implements OutputFormat<T>, InputTypeConfigurable {
public class LocalCollectionOutputFormat<T> extends RichOutputFormat<T> implements InputTypeConfigurable {
private static final long serialVersionUID = 1L;
......
......@@ -20,11 +20,11 @@ package org.apache.flink.api.java.io;
import java.io.PrintStream;
import org.apache.flink.api.common.io.OutputFormat;
import org.apache.flink.api.common.io.RichOutputFormat;
import org.apache.flink.configuration.Configuration;
public class PrintingOutputFormat<T> implements OutputFormat<T> {
public class PrintingOutputFormat<T> extends RichOutputFormat<T> {
private static final long serialVersionUID = 1L;
......
......@@ -21,6 +21,7 @@ package org.apache.flink.runtime.operators;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.io.CleanupWhenUnsuccessful;
import org.apache.flink.api.common.io.OutputFormat;
import org.apache.flink.api.common.io.RichOutputFormat;
import org.apache.flink.api.common.typeutils.TypeComparatorFactory;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.common.typeutils.TypeSerializerFactory;
......@@ -28,6 +29,7 @@ import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.io.IOReadableWritable;
import org.apache.flink.runtime.accumulators.AccumulatorRegistry;
import org.apache.flink.runtime.execution.CancelTaskException;
import org.apache.flink.runtime.execution.Environment;
import org.apache.flink.runtime.io.network.api.reader.MutableReader;
import org.apache.flink.runtime.io.network.api.reader.MutableRecordReader;
import org.apache.flink.runtime.io.network.partition.consumer.UnionInputGate;
......@@ -35,6 +37,7 @@ import org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable;
import org.apache.flink.runtime.operators.chaining.ExceptionInChainedStubException;
import org.apache.flink.runtime.operators.sort.UnilateralSortMerger;
import org.apache.flink.runtime.operators.util.CloseableInputProvider;
import org.apache.flink.runtime.operators.util.DistributedRuntimeUDFContext;
import org.apache.flink.runtime.operators.util.ReaderIterator;
import org.apache.flink.runtime.operators.util.TaskConfig;
import org.apache.flink.runtime.plugable.DeserializationDelegate;
......@@ -109,6 +112,13 @@ public class DataSinkTask<IT> extends AbstractInvokable {
LOG.debug(getLogString("Starting data sink operator"));
}
if(RichOutputFormat.class.isAssignableFrom(this.format.getClass())){
((RichOutputFormat) this.format).setRuntimeContext(createRuntimeContext());
if (LOG.isDebugEnabled()) {
LOG.debug(getLogString("Rich Sink detected. Initializing runtime context."));
}
}
ExecutionConfig executionConfig;
try {
ExecutionConfig c = (ExecutionConfig) InstantiationUtil.readObjectFromConfig(
......@@ -393,4 +403,12 @@ public class DataSinkTask<IT> extends AbstractInvokable {
private String getLogString(String message) {
return RegularPactTask.constructLogString(message, this.getEnvironment().getTaskName(), this);
}
public DistributedRuntimeUDFContext createRuntimeContext() {
Environment env = getEnvironment();
return new DistributedRuntimeUDFContext(env.getTaskName(), env.getNumberOfSubtasks(),
env.getIndexInSubtaskGroup(), getUserCodeClassLoader(), getExecutionConfig(),
env.getDistributedCacheEntries(), env.getAccumulatorRegistry().getUserMap());
}
}
......@@ -19,19 +19,21 @@
package org.apache.flink.runtime.operators;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.accumulators.Accumulator;
import org.apache.flink.api.common.io.InputFormat;
import org.apache.flink.api.common.io.RichInputFormat;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.common.typeutils.TypeSerializerFactory;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.io.InputSplit;
import org.apache.flink.runtime.accumulators.AccumulatorRegistry;
import org.apache.flink.runtime.execution.CancelTaskException;
import org.apache.flink.runtime.execution.Environment;
import org.apache.flink.runtime.io.network.api.writer.RecordWriter;
import org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable;
import org.apache.flink.runtime.jobgraph.tasks.InputSplitProvider;
import org.apache.flink.runtime.operators.chaining.ChainedDriver;
import org.apache.flink.runtime.operators.chaining.ExceptionInChainedStubException;
import org.apache.flink.runtime.operators.util.DistributedRuntimeUDFContext;
import org.apache.flink.runtime.operators.util.TaskConfig;
import org.apache.flink.util.Collector;
import org.apache.flink.util.InstantiationUtil;
......@@ -42,7 +44,6 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
/**
......@@ -103,6 +104,13 @@ public class DataSourceTask<OT> extends AbstractInvokable {
LOG.debug(getLogString("Starting data source operator"));
}
if(RichInputFormat.class.isAssignableFrom(this.format.getClass())){
((RichInputFormat) this.format).setRuntimeContext(createRuntimeContext());
if (LOG.isDebugEnabled()) {
LOG.debug(getLogString("Rich Source detected. Initializing runtime context."));
}
}
ExecutionConfig executionConfig;
try {
ExecutionConfig c = (ExecutionConfig) InstantiationUtil.readObjectFromConfig(
......@@ -295,10 +303,8 @@ public class DataSourceTask<OT> extends AbstractInvokable {
final AccumulatorRegistry accumulatorRegistry = getEnvironment().getAccumulatorRegistry();
final AccumulatorRegistry.Reporter reporter = accumulatorRegistry.getReadWriteReporter();
Map<String, Accumulator<?, ?>> accumulatorMap = accumulatorRegistry.getUserMap();
this.output = RegularPactTask.initOutputs(this, cl, this.config, this.chainedTasks, this.eventualOutputs,
getExecutionConfig(), reporter, accumulatorMap);
getExecutionConfig(), reporter, getEnvironment().getAccumulatorRegistry().getUserMap());
}
// ------------------------------------------------------------------------
......@@ -377,4 +383,12 @@ public class DataSourceTask<OT> extends AbstractInvokable {
}
};
}
public DistributedRuntimeUDFContext createRuntimeContext() {
Environment env = getEnvironment();
return new DistributedRuntimeUDFContext(env.getTaskName(), env.getNumberOfSubtasks(),
env.getIndexInSubtaskGroup(), getUserCodeClassLoader(), getExecutionConfig(),
env.getDistributedCacheEntries(), env.getAccumulatorRegistry().getUserMap());
}
}
......@@ -24,6 +24,7 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.flink.api.common.io.InputFormat;
import org.apache.flink.api.common.io.RichInputFormat;
import org.apache.flink.api.common.io.LocatableInputSplitAssigner;
import org.apache.flink.api.common.io.statistics.BaseStatistics;
import org.apache.flink.api.java.tuple.Tuple;
......@@ -43,7 +44,7 @@ import org.slf4j.LoggerFactory;
* {@link InputFormat} subclass that wraps the access for HTables.
*
*/
public abstract class TableInputFormat<T extends Tuple> implements InputFormat<T, TableInputSplit>{
public abstract class TableInputFormat<T extends Tuple> extends RichInputFormat<T, TableInputSplit>{
private static final long serialVersionUID = 1L;
......
......@@ -18,7 +18,7 @@
package org.apache.flink.hcatalog;
import org.apache.flink.api.common.io.InputFormat;
import org.apache.flink.api.common.io.RichInputFormat;
import org.apache.flink.api.common.io.LocatableInputSplitAssigner;
import org.apache.flink.api.common.io.statistics.BaseStatistics;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
......@@ -63,7 +63,7 @@ import java.util.Map;
*
* @param <T>
*/
public abstract class HCatInputFormatBase<T> implements InputFormat<T, HadoopInputSplit>, ResultTypeQueryable<T> {
public abstract class HCatInputFormatBase<T> extends RichInputFormat<T, HadoopInputSplit> implements ResultTypeQueryable<T> {
private static final long serialVersionUID = 1L;
......
......@@ -31,7 +31,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.flink.api.common.io.DefaultInputSplitAssigner;
import org.apache.flink.api.common.io.InputFormat;
import org.apache.flink.api.common.io.RichInputFormat;
import org.apache.flink.api.common.io.statistics.BaseStatistics;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.configuration.Configuration;
......@@ -48,7 +48,7 @@ import org.apache.flink.types.NullValue;
* @see Tuple
* @see DriverManager
*/
public class JDBCInputFormat<OUT extends Tuple> implements InputFormat<OUT, InputSplit>, NonParallelInput {
public class JDBCInputFormat<OUT extends Tuple> extends RichInputFormat<OUT, InputSplit> implements NonParallelInput {
private static final long serialVersionUID = 1L;
private static final Logger LOG = LoggerFactory.getLogger(JDBCInputFormat.class);
......
......@@ -26,7 +26,7 @@ import java.sql.SQLException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.flink.api.common.io.OutputFormat;
import org.apache.flink.api.common.io.RichOutputFormat;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.configuration.Configuration;
......@@ -38,7 +38,7 @@ import org.apache.flink.configuration.Configuration;
* @see Tuple
* @see DriverManager
*/
public class JDBCOutputFormat<OUT extends Tuple> implements OutputFormat<OUT> {
public class JDBCOutputFormat<OUT extends Tuple> extends RichOutputFormat<OUT> {
private static final long serialVersionUID = 1L;
@SuppressWarnings("unused")
......
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.test.io;
import org.apache.flink.api.common.operators.util.TestNonRichInputFormat;
import org.apache.flink.api.common.operators.util.TestNonRichOutputFormat;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.test.util.JavaProgramTestBase;
import static org.junit.Assert.fail;
/**
* Tests for non rich DataSource and DataSink input output formats being correctly used at runtime.
*/
public class InputOutputITCase extends JavaProgramTestBase {
@Override
protected void testProgram() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
TestNonRichOutputFormat output = new TestNonRichOutputFormat();
env.createInput(new TestNonRichInputFormat()).output(output);
try {
env.execute();
} catch(Exception e){
// we didn't break anything by making everything rich.
e.printStackTrace();
fail(e.getMessage());
}
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.test.io;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.api.common.accumulators.LongCounter;
import org.apache.flink.api.common.io.RichOutputFormat;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.io.TextInputFormat;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.fs.FileInputSplit;
import org.apache.flink.core.fs.Path;
import org.apache.flink.test.util.JavaProgramTestBase;
import java.io.IOException;
import java.util.concurrent.ConcurrentLinkedQueue;
import static org.junit.Assert.assertEquals;
/**
* Tests for rich DataSource and DataSink input output formats accessing RuntimeContext by
* checking accumulator values.
*/
public class RichInputOutputITCase extends JavaProgramTestBase {
private String inputPath;
private static ConcurrentLinkedQueue<Integer> readCalls;
private static ConcurrentLinkedQueue<Integer> writeCalls;
@Override
protected void preSubmit() throws Exception {
inputPath = createTempFile("input", "ab\n"
+ "cd\n"
+ "ef\n");
}
@Override
protected void testProgram() throws Exception {
// test verifying the number of records read and written vs the accumulator counts
readCalls = new ConcurrentLinkedQueue<Integer>();
writeCalls = new ConcurrentLinkedQueue<Integer>();
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.createInput(new TestInputFormat(new Path(inputPath))).output(new TestOutputFormat());
JobExecutionResult result = env.execute();
Object a = result.getAllAccumulatorResults().get("DATA_SOURCE_ACCUMULATOR");
Object b = result.getAllAccumulatorResults().get("DATA_SINK_ACCUMULATOR");
long recordsRead = (Long) a;
long recordsWritten = (Long) b;
assertEquals(recordsRead, readCalls.size());
assertEquals(recordsWritten, writeCalls.size());
}
private static final class TestInputFormat extends TextInputFormat {
private static final long serialVersionUID = 1L;
private LongCounter counter = new LongCounter();
public TestInputFormat(Path filePath) {
super(filePath);
}
@Override
public void open(FileInputSplit split) throws IOException{
try{
getRuntimeContext().addAccumulator("DATA_SOURCE_ACCUMULATOR", counter);
} catch(UnsupportedOperationException e){
// the accumulator is already added
}
super.open(split);
}
@Override
public String nextRecord(String reuse) throws IOException{
readCalls.add(1);
counter.add(1);
return super.nextRecord(reuse);
}
}
private static final class TestOutputFormat extends RichOutputFormat<String> {
private LongCounter counter = new LongCounter();
@Override
public void configure(Configuration parameters){}
@Override
public void open(int a, int b){
try{
getRuntimeContext().addAccumulator("DATA_SINK_ACCUMULATOR", counter);
} catch(UnsupportedOperationException e){
// the accumulator is already added
}
}
@Override
public void close() throws IOException{}
@Override
public void writeRecord(String record){
writeCalls.add(1);
counter.add(1);
}
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册