[streaming] added support for batch partitioning to jobgraphbuilder

84759dad · gyfora · Stephan Ewen · 7e7051b3 · 84759dad · 84759dad
6 changed file
--- a/flink-addons/flink-streaming/src/main/java/eu/stratosphere/streaming/api/DataStream.java
+++ b/flink-addons/flink-streaming/src/main/java/eu/stratosphere/streaming/api/DataStream.java
@@ -56,6 +56,8 @@ public class DataStream<T extends Tuple> {
 		initConnections();

 	}
+	
+	//TODO: create copy method (or constructor) and copy datastream at every operator

 	private void initConnections() {
 		connectIDs = new ArrayList<String>();
@@ -82,7 +84,6 @@ public class DataStream<T extends Tuple> {
 		for (int i = 0; i < batchSizes.size(); i++) {
 			batchSizes.set(i, batchSize);
 		}
-		context.setBatchSize(this);
 		return this;
 	}


--- a/flink-addons/flink-streaming/src/main/java/eu/stratosphere/streaming/api/JobGraphBuilder.java
+++ b/flink-addons/flink-streaming/src/main/java/eu/stratosphere/streaming/api/JobGraphBuilder.java
@@ -306,7 +306,8 @@ public class JobGraphBuilder {

 	public void setBatchSize(String componentName, int batchSize) {
 		Configuration config = components.get(componentName).getConfiguration();
-		config.setInteger("batchSize", batchSize);
+		config.setInteger("batchSize_"
+				+ (components.get(componentName).getNumberOfForwardConnections() - 1), batchSize);
 	}

 	/**

--- a/flink-addons/flink-streaming/src/main/java/eu/stratosphere/streaming/api/StreamExecutionEnvironment.java
+++ b/flink-addons/flink-streaming/src/main/java/eu/stratosphere/streaming/api/StreamExecutionEnvironment.java
@@ -91,6 +91,8 @@ public class StreamExecutionEnvironment {
 			}

 		}
+		this.setBatchSize(inputStream);
+
 	}

 	public <T extends Tuple, R extends Tuple> DataStream<R> addFunction(String functionName,

--- a/flink-addons/flink-streaming/src/main/java/eu/stratosphere/streaming/api/streamcomponent/StreamComponentHelper.java
+++ b/flink-addons/flink-streaming/src/main/java/eu/stratosphere/streaming/api/streamcomponent/StreamComponentHelper.java
@@ -18,6 +18,7 @@ package eu.stratosphere.streaming.api.streamcomponent;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.ObjectInputStream;
+import java.util.ArrayList;
 import java.util.ConcurrentModificationException;
 import java.util.List;

@@ -75,6 +76,12 @@ public final class StreamComponentHelper<T extends AbstractInvokable> {
 	private SerializationDelegate<Tuple> outSerializationDelegate = null;

 	public StreamCollector<Tuple> collector;
+	private List<Integer> batchsizes_s = new ArrayList<Integer>();
+	private List<Integer> batchsizes_f = new ArrayList<Integer>();
+	private int keyPosition = 0;
+
+	private List<RecordWriter<StreamRecord>> outputs_s = new ArrayList<RecordWriter<StreamRecord>>();
+	private List<RecordWriter<StreamRecord>> outputs_f = new ArrayList<RecordWriter<StreamRecord>>();

 	public static int newComponent() {
 		numComponents++;
@@ -109,6 +116,7 @@ public final class StreamComponentHelper<T extends AbstractInvokable> {
 			List<RecordWriter<StreamRecord>> outputs) {

 		int batchSize = taskConfiguration.getInteger("batchSize", 1);
+
 		long batchTimeout = taskConfiguration.getLong("batchTimeout", 1000);
 		collector = new StreamCollector<Tuple>(batchSize, batchTimeout, id,
 				outSerializationDelegate, outputs);
@@ -208,10 +216,11 @@ public final class StreamComponentHelper<T extends AbstractInvokable> {
 			List<ChannelSelector<StreamRecord>> partitioners) throws StreamComponentException {

 		int numberOfOutputs = taskConfiguration.getInteger("numberOfOutputs", 0);
+
 		for (int i = 0; i < numberOfOutputs; i++) {
 			setPartitioner(taskConfiguration, i, partitioners);
-		}
-		for (ChannelSelector<StreamRecord> outputPartitioner : partitioners) {
+			ChannelSelector<StreamRecord> outputPartitioner = partitioners.get(i);
+
 			if (taskBase instanceof StreamTask) {
 				outputs.add(new RecordWriter<StreamRecord>((StreamTask) taskBase,
 						StreamRecord.class, outputPartitioner));
@@ -221,6 +230,11 @@ public final class StreamComponentHelper<T extends AbstractInvokable> {
 			} else {
 				throw new StreamComponentException("Nonsupported object passed to setConfigOutputs");
 			}
+			if (outputs_f.size() < batchsizes_f.size()) {
+				outputs_f.add(outputs.get(i));
+			} else {
+				outputs_s.add(outputs.get(i));
+			}
 		}
 	}

@@ -321,14 +335,18 @@ public final class StreamComponentHelper<T extends AbstractInvokable> {
 		Class<? extends ChannelSelector<StreamRecord>> partitioner = taskConfiguration.getClass(
 				"partitionerClass_" + nrOutput, DefaultPartitioner.class, ChannelSelector.class);

+		Integer batchSize = taskConfiguration.getInteger("batchSize_" + nrOutput, 1);
+
 		try {
 			if (partitioner.equals(FieldsPartitioner.class)) {
-				int keyPosition = taskConfiguration
-						.getInteger("partitionerIntParam_" + nrOutput, 1);
+				batchsizes_f.add(batchSize);
+				// TODO:force one partitioning field
+				keyPosition = taskConfiguration.getInteger("partitionerIntParam_" + nrOutput, 1);

 				partitioners.add(partitioner.getConstructor(int.class).newInstance(keyPosition));

 			} else {
+				batchsizes_s.add(batchSize);
 				partitioners.add(partitioner.newInstance());
 			}
 			if (log.isTraceEnabled()) {

--- a/flink-addons/flink-streaming/src/main/java/eu/stratosphere/streaming/util/TestDataUtil.java
+++ b/flink-addons/flink-streaming/src/main/java/eu/stratosphere/streaming/util/TestDataUtil.java
-package eu.stratosphere.streaming.util;
-
-import java.io.IOException;
-import java.io.File;
-
-public class TestDataUtil {
-
-	public static void downloadIfNotExists(String fileName) {
-		String testDataDir = "";
-		File file = new File(testDataDir + fileName);
-		String testRepoUrl = "info.ilab.sztaki.hu/~mbalassi/flink-streaming/testdata/";
-
-		if (file.exists()){
-			System.out.println(fileName +" already exists");
-		} else {
-			System.out.println("downloading " + fileName);
-			try {
-				String myCommand = "wget -O " + testDataDir	+ fileName + " " + testRepoUrl + fileName;
-				System.out.println(myCommand);
-				Runtime.getRuntime().exec(myCommand);
-			} catch (IOException e) {
-				// TODO Auto-generated catch block
-				e.printStackTrace();
-			}
-		}
-	}
-
-}
--- a/flink-addons/flink-streaming/src/test/resources/testdata/hamlet.txt
+++ b/flink-addons/flink-streaming/src/test/resources/testdata/hamlet.txt