提交 84759dad 编写于 作者: G gyfora 提交者: Stephan Ewen

[streaming] added support for batch partitioning to jobgraphbuilder

上级 7e7051b3
......@@ -56,6 +56,8 @@ public class DataStream<T extends Tuple> {
initConnections();
}
//TODO: create copy method (or constructor) and copy datastream at every operator
private void initConnections() {
connectIDs = new ArrayList<String>();
......@@ -82,7 +84,6 @@ public class DataStream<T extends Tuple> {
for (int i = 0; i < batchSizes.size(); i++) {
batchSizes.set(i, batchSize);
}
context.setBatchSize(this);
return this;
}
......
......@@ -306,7 +306,8 @@ public class JobGraphBuilder {
public void setBatchSize(String componentName, int batchSize) {
Configuration config = components.get(componentName).getConfiguration();
config.setInteger("batchSize", batchSize);
config.setInteger("batchSize_"
+ (components.get(componentName).getNumberOfForwardConnections() - 1), batchSize);
}
/**
......
......@@ -91,6 +91,8 @@ public class StreamExecutionEnvironment {
}
}
this.setBatchSize(inputStream);
}
public <T extends Tuple, R extends Tuple> DataStream<R> addFunction(String functionName,
......
......@@ -18,6 +18,7 @@ package eu.stratosphere.streaming.api.streamcomponent;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.util.ArrayList;
import java.util.ConcurrentModificationException;
import java.util.List;
......@@ -75,6 +76,12 @@ public final class StreamComponentHelper<T extends AbstractInvokable> {
private SerializationDelegate<Tuple> outSerializationDelegate = null;
public StreamCollector<Tuple> collector;
private List<Integer> batchsizes_s = new ArrayList<Integer>();
private List<Integer> batchsizes_f = new ArrayList<Integer>();
private int keyPosition = 0;
private List<RecordWriter<StreamRecord>> outputs_s = new ArrayList<RecordWriter<StreamRecord>>();
private List<RecordWriter<StreamRecord>> outputs_f = new ArrayList<RecordWriter<StreamRecord>>();
public static int newComponent() {
numComponents++;
......@@ -109,6 +116,7 @@ public final class StreamComponentHelper<T extends AbstractInvokable> {
List<RecordWriter<StreamRecord>> outputs) {
int batchSize = taskConfiguration.getInteger("batchSize", 1);
long batchTimeout = taskConfiguration.getLong("batchTimeout", 1000);
collector = new StreamCollector<Tuple>(batchSize, batchTimeout, id,
outSerializationDelegate, outputs);
......@@ -208,10 +216,11 @@ public final class StreamComponentHelper<T extends AbstractInvokable> {
List<ChannelSelector<StreamRecord>> partitioners) throws StreamComponentException {
int numberOfOutputs = taskConfiguration.getInteger("numberOfOutputs", 0);
for (int i = 0; i < numberOfOutputs; i++) {
setPartitioner(taskConfiguration, i, partitioners);
}
for (ChannelSelector<StreamRecord> outputPartitioner : partitioners) {
ChannelSelector<StreamRecord> outputPartitioner = partitioners.get(i);
if (taskBase instanceof StreamTask) {
outputs.add(new RecordWriter<StreamRecord>((StreamTask) taskBase,
StreamRecord.class, outputPartitioner));
......@@ -221,6 +230,11 @@ public final class StreamComponentHelper<T extends AbstractInvokable> {
} else {
throw new StreamComponentException("Nonsupported object passed to setConfigOutputs");
}
if (outputs_f.size() < batchsizes_f.size()) {
outputs_f.add(outputs.get(i));
} else {
outputs_s.add(outputs.get(i));
}
}
}
......@@ -321,14 +335,18 @@ public final class StreamComponentHelper<T extends AbstractInvokable> {
Class<? extends ChannelSelector<StreamRecord>> partitioner = taskConfiguration.getClass(
"partitionerClass_" + nrOutput, DefaultPartitioner.class, ChannelSelector.class);
Integer batchSize = taskConfiguration.getInteger("batchSize_" + nrOutput, 1);
try {
if (partitioner.equals(FieldsPartitioner.class)) {
int keyPosition = taskConfiguration
.getInteger("partitionerIntParam_" + nrOutput, 1);
batchsizes_f.add(batchSize);
// TODO:force one partitioning field
keyPosition = taskConfiguration.getInteger("partitionerIntParam_" + nrOutput, 1);
partitioners.add(partitioner.getConstructor(int.class).newInstance(keyPosition));
} else {
batchsizes_s.add(batchSize);
partitioners.add(partitioner.newInstance());
}
if (log.isTraceEnabled()) {
......
package eu.stratosphere.streaming.util;
import java.io.IOException;
import java.io.File;
public class TestDataUtil {
public static void downloadIfNotExists(String fileName) {
String testDataDir = "";
File file = new File(testDataDir + fileName);
String testRepoUrl = "info.ilab.sztaki.hu/~mbalassi/flink-streaming/testdata/";
if (file.exists()){
System.out.println(fileName +" already exists");
} else {
System.out.println("downloading " + fileName);
try {
String myCommand = "wget -O " + testDataDir + fileName + " " + testRepoUrl + fileName;
System.out.println(myCommand);
Runtime.getRuntime().exec(myCommand);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册