提交 f0c57317 编写于 作者: J jfeher 提交者: Stephan Ewen

[streaming] RMQ source for multiple types

上级 df9dac28
......@@ -5,42 +5,18 @@
<modelVersion>4.0.0</modelVersion>
<groupId>eu.stratosphere</groupId>
<version>0.3-SNAPSHOT</version>
<version>0.2-SNAPSHOT</version>
<artifactId>stratosphere-streaming</artifactId>
<name>stratosphere-streaming</name>
<packaging>pom</packaging>
<url>http://github.com/stratosphere/stratosphere-streaming</url>
<inceptionYear>2014</inceptionYear>
<licenses>
<license>
<name>The Apache Software License, Version 2.0</name>
<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
<distribution>stratosphere</distribution>
</license>
</licenses>
<scm>
<url>https://github.com/stratosphere/stratosphere-streaming</url>
<connection>scm:git:git@github.com:stratosphere/stratosphere-streaming.git</connection>
<developerConnection>scm:git:git@github.com:stratosphere/stratosphere-streaming.git</developerConnection>
</scm>
<developers>
</developers>
<modules>
<module>stratosphere-streaming-core</module>
<module>stratosphere-streaming-examples</module>
<module>stratosphere-streaming-addons</module>
</modules>
<packaging>jar</packaging>
<properties>
<stratosphere.version>0.6-SNAPSHOT</stratosphere.version>
<stratosphere.version>0.5</stratosphere.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
</properties>
<repositories>
<repository>
<id>dms-repo</id>
......@@ -49,54 +25,47 @@
<snapshots><enabled>true</enabled></snapshots>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>eu.stratosphere</groupId>
<artifactId>stratosphere-core</artifactId>
<version>${stratosphere.version}</version>
<type>jar</type>
<version>${stratosphere.version}</version>
</dependency>
<dependency>
<groupId>eu.stratosphere</groupId>
<artifactId>stratosphere-tests</artifactId>
<version>${stratosphere.version}</version>
<type>jar</type>
</dependency>
<dependency>
<groupId>eu.stratosphere</groupId>
<artifactId>stratosphere-compiler</artifactId>
<version>${stratosphere.version}</version>
<type>jar</type>
</dependency>
<dependency>
<groupId>eu.stratosphere</groupId>
<artifactId>stratosphere-runtime</artifactId>
<version>${stratosphere.version}</version>
<type>jar</type>
</dependency>
<dependency>
<groupId>eu.stratosphere</groupId>
<artifactId>stratosphere-clients</artifactId>
<version>${stratosphere.version}</version>
<type>jar</type>
</dependency>
<dependency>
<groupId>eu.stratosphere</groupId>
<artifactId>stratosphere-java</artifactId>
<version>${stratosphere.version}</version>
<type>jar</type>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.7</version>
<type>jar</type>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.3.2</version>
<type>jar</type>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
......@@ -109,14 +78,27 @@
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.16</version>
<type>jar</type>
</dependency>
<dependency>
<groupId>com.rabbitmq</groupId>
<artifactId>amqp-client</artifactId>
<version>3.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.10</artifactId>
<version>0.8.0</version>
</dependency>
<dependency>
<groupId>org.jblas</groupId>
<artifactId>jblas</artifactId>
<version>1.2.3</version>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-all</artifactId>
<version>1.8.5</version>
<scope>test</scope>
<type>jar</type>
</dependency>
</dependencies>
......
......@@ -12,12 +12,13 @@
* specific language governing permissions and limitations under the License.
*
**********************************************************************************************************************/
package eu.stratosphere.streaming.api.invokable;
package eu.stratosphere.streaming.api;
import java.util.Iterator;
import eu.stratosphere.api.java.functions.GroupReduceFunction;
import eu.stratosphere.api.java.tuple.Tuple;
import eu.stratosphere.streaming.api.invokable.UserTaskInvokable;
import eu.stratosphere.streaming.api.streamrecord.StreamRecord;
import eu.stratosphere.util.Collector;
......@@ -31,7 +32,6 @@ public class BatchReduceInvokable<IN extends Tuple, OUT extends Tuple> extends U
@Override
public void invoke(StreamRecord record, Collector<OUT> collector) throws Exception {
@SuppressWarnings("unchecked")
Iterator<IN> iterator = (Iterator<IN>) record.getBatchIterable().iterator();
reducer.reduce(iterator, collector);
}
......
......@@ -24,11 +24,6 @@ import eu.stratosphere.api.java.functions.GroupReduceFunction;
import eu.stratosphere.api.java.functions.MapFunction;
import eu.stratosphere.api.java.tuple.Tuple;
import eu.stratosphere.streaming.api.StreamExecutionEnvironment.ConnectionType;
import eu.stratosphere.streaming.api.function.SinkFunction;
import eu.stratosphere.streaming.api.invokable.BatchReduceInvokable;
import eu.stratosphere.streaming.api.invokable.FilterInvokable;
import eu.stratosphere.streaming.api.invokable.FlatMapInvokable;
import eu.stratosphere.streaming.api.invokable.MapInvokable;
import eu.stratosphere.types.TypeInformation;
public class DataStream<T extends Tuple> {
......@@ -198,29 +193,6 @@ public class DataStream<T extends Tuple> {
return returnStream;
}
/**
* Applies a Map transformation on a DataStream. The transformation calls a
* MapFunction for each element of the DataStream. Each MapFunction call
* returns exactly one element.
*
* @param mapper
* The MapFunction that is called for each element of the
* DataStream.
* @param parallelism
* The number of threads the function runs on.
* @param <R>
* output type
* @return The transformed DataStream.
*/
public <R extends Tuple> DataStream<R> map(MapFunction<T, R> mapper, int parallelism) {
return environment.addFunction("map", this.copy(), mapper, new MapInvokable<T, R>(mapper),
parallelism);
}
public <R extends Tuple> DataStream<R> map(MapFunction<T, R> mapper) {
return map(mapper, 1);
}
/**
* Applies a FlatMap transformation on a DataStream. The transformation
* calls a FlatMapFunction for each element of the DataSet. Each
......@@ -241,30 +213,23 @@ public class DataStream<T extends Tuple> {
new FlatMapInvokable<T, R>(flatMapper), parallelism);
}
public <R extends Tuple> DataStream<R> flatMap(FlatMapFunction<T, R> flatMapper) {
return flatMap(flatMapper, 1);
}
/**
* Applies a Filter transformation on a DataStream. The transformation calls
* a FilterFunction for each element of the DataStream and retains only
* those element for which the function returns true. Elements for which the
* function returns false are filtered.
* Applies a Map transformation on a DataStream. The transformation calls a
* MapFunction for each element of the DataStream. Each MapFunction call
* returns exactly one element.
*
* @param filter
* The FilterFunction that is called for each element of the
* DataSet.
* @param mapper
* The MapFunction that is called for each element of the
* DataStream.
* @param parallelism
* The number of threads the function runs on.
* @return The filtered DataStream.
* @param <R>
* output type
* @return The transformed DataStream.
*/
public DataStream<T> filter(FilterFunction<T> filter, int parallelism) {
return environment.addFunction("filter", this.copy(), filter,
new FilterInvokable<T>(filter), parallelism);
}
public DataStream<T> filter(FilterFunction<T> filter) {
return filter(filter, 1);
public <R extends Tuple> DataStream<R> map(MapFunction<T, R> mapper, int parallelism) {
return environment.addFunction("map", this.copy(), mapper, new MapInvokable<T, R>(mapper),
parallelism);
}
/**
......@@ -290,9 +255,22 @@ public class DataStream<T extends Tuple> {
new BatchReduceInvokable<T, R>(reducer), parallelism);
}
public <R extends Tuple> DataStream<R> batchReduce(GroupReduceFunction<T, R> reducer,
int batchSize) {
return batchReduce(reducer, batchSize, 1);
/**
* Applies a Filter transformation on a DataStream. The transformation calls
* a FilterFunction for each element of the DataStream and retains only
* those element for which the function returns true. Elements for which the
* function returns false are filtered.
*
* @param filter
* The FilterFunction that is called for each element of the
* DataSet.
* @param parallelism
* The number of threads the function runs on.
* @return The filtered DataStream.
*/
public DataStream<T> filter(FilterFunction<T> filter, int parallelism) {
return environment.addFunction("filter", this.copy(), filter,
new FilterInvokable<T>(filter), parallelism);
}
/**
......
......@@ -12,7 +12,7 @@
* specific language governing permissions and limitations under the License.
*
**********************************************************************************************************************/
package eu.stratosphere.streaming.api.function;
package eu.stratosphere.streaming.api;
import java.io.BufferedReader;
import java.io.FileReader;
......
......@@ -13,7 +13,7 @@
*
**********************************************************************************************************************/
package eu.stratosphere.streaming.api.function;
package eu.stratosphere.streaming.api;
import java.io.BufferedReader;
import java.io.FileReader;
......
......@@ -12,27 +12,24 @@
* specific language governing permissions and limitations under the License.
*
**********************************************************************************************************************/
package eu.stratosphere.streaming.api.invokable;
package eu.stratosphere.streaming.api;
import eu.stratosphere.api.java.functions.FilterFunction;
import eu.stratosphere.api.java.tuple.Tuple;
import eu.stratosphere.streaming.api.invokable.UserTaskInvokable;
import eu.stratosphere.streaming.api.streamrecord.StreamRecord;
import eu.stratosphere.util.Collector;
public class FilterInvokable<IN extends Tuple> extends UserTaskInvokable<IN, IN> {
private static final long serialVersionUID = 1L;
public class FilterInvokable<IN extends Tuple> extends UserTaskInvokable<IN, IN> {
FilterFunction<IN> filterFunction;
public FilterInvokable(FilterFunction<IN> filterFunction) {
this.filterFunction = filterFunction;
}
@Override
public void invoke(StreamRecord record, Collector<IN> collector) throws Exception {
for (int i = 0; i < record.getBatchSize(); i++) {
@SuppressWarnings("unchecked")
IN tuple = (IN) record.getTuple(i);
if (filterFunction.filter(tuple)) {
collector.collect(tuple);
......
......@@ -13,10 +13,11 @@
*
**********************************************************************************************************************/
package eu.stratosphere.streaming.api.invokable;
package eu.stratosphere.streaming.api;
import eu.stratosphere.api.java.functions.FlatMapFunction;
import eu.stratosphere.api.java.tuple.Tuple;
import eu.stratosphere.streaming.api.invokable.UserTaskInvokable;
import eu.stratosphere.streaming.api.streamrecord.StreamRecord;
import eu.stratosphere.util.Collector;
......
......@@ -28,6 +28,8 @@ import org.apache.commons.logging.LogFactory;
import eu.stratosphere.api.java.tuple.Tuple;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.nephele.io.ChannelSelector;
import eu.stratosphere.nephele.io.channels.ChannelType;
import eu.stratosphere.nephele.jobgraph.AbstractJobVertex;
import eu.stratosphere.nephele.jobgraph.JobGraph;
import eu.stratosphere.nephele.jobgraph.JobGraphDefinitionException;
......@@ -35,8 +37,6 @@ import eu.stratosphere.nephele.jobgraph.JobInputVertex;
import eu.stratosphere.nephele.jobgraph.JobOutputVertex;
import eu.stratosphere.nephele.jobgraph.JobTaskVertex;
import eu.stratosphere.pact.runtime.task.util.TaskConfig;
import eu.stratosphere.runtime.io.api.ChannelSelector;
import eu.stratosphere.runtime.io.channels.ChannelType;
import eu.stratosphere.streaming.api.invokable.UserSinkInvokable;
import eu.stratosphere.streaming.api.invokable.UserSourceInvokable;
import eu.stratosphere.streaming.api.invokable.UserTaskInvokable;
......@@ -63,8 +63,8 @@ public class JobGraphBuilder {
protected String maxParallelismVertexName;
protected int maxParallelism;
protected FaultToleranceType faultToleranceType;
private int batchSize = 1;
private long batchTimeout = 1000;
private int batchSize;
private long batchTimeout;
/**
* Creates a new JobGraph with the given name
......@@ -87,12 +87,24 @@ public class JobGraphBuilder {
this.faultToleranceType = faultToleranceType;
}
public void setDefaultBatchSize(int batchSize) {
this.batchSize = batchSize;
}
/**
* Creates a new JobGraph with the given parameters
*
* @param jobGraphName
* Name of the JobGraph
* @param faultToleranceType
* Type of fault tolerance
* @param defaultBatchSize
* Default number of records to send at one emit
* @param defaultBatchTimeoutMillis
* defaultBatchTimeoutMillis
*/
public void setBatchTimeout(int timeout) {
this.batchTimeout = timeout;
public JobGraphBuilder(String jobGraphName, FaultToleranceType faultToleranceType,
int defaultBatchSize, long defaultBatchTimeoutMillis) {
this(jobGraphName, faultToleranceType);
this.batchSize = defaultBatchSize;
this.batchTimeout = defaultBatchTimeoutMillis;
}
/**
......@@ -108,16 +120,18 @@ public class JobGraphBuilder {
* Serialized udf
* @param parallelism
* Number of parallel instances created
* @param subtasksPerInstance
* Number of parallel instances on one task manager
*/
public void setSource(String sourceName, UserSourceInvokable<? extends Tuple> InvokableObject,
String operatorName, byte[] serializedFunction, int parallelism) {
String operatorName, byte[] serializedFunction, int parallelism, int subtasksPerInstance) {
final JobInputVertex source = new JobInputVertex(sourceName, jobGraph);
source.setInvokableClass(StreamSource.class);
source.setInputClass(StreamSource.class);
setComponent(sourceName, source, InvokableObject, operatorName, serializedFunction,
parallelism);
parallelism, subtasksPerInstance);
if (log.isDebugEnabled()) {
log.debug("SOURCE: " + sourceName);
......@@ -137,15 +151,17 @@ public class JobGraphBuilder {
* Serialized udf
* @param parallelism
* Number of parallel instances created
* @param subtasksPerInstance
* Number of parallel instances on one task manager
*/
public void setTask(String taskName,
UserTaskInvokable<? extends Tuple, ? extends Tuple> TaskInvokableObject,
String operatorName, byte[] serializedFunction, int parallelism) {
String operatorName, byte[] serializedFunction, int parallelism, int subtasksPerInstance) {
final JobTaskVertex task = new JobTaskVertex(taskName, jobGraph);
task.setInvokableClass(StreamTask.class);
task.setTaskClass(StreamTask.class);
setComponent(taskName, task, TaskInvokableObject, operatorName, serializedFunction,
parallelism);
parallelism, subtasksPerInstance);
if (log.isDebugEnabled()) {
log.debug("TASK: " + taskName);
......@@ -165,13 +181,16 @@ public class JobGraphBuilder {
* Serialized udf
* @param parallelism
* Number of parallel instances created
* @param subtasksPerInstance
* Number of parallel instances on one task manager
*/
public void setSink(String sinkName, UserSinkInvokable<? extends Tuple> InvokableObject,
String operatorName, byte[] serializedFunction, int parallelism) {
String operatorName, byte[] serializedFunction, int parallelism, int subtasksPerInstance) {
final JobOutputVertex sink = new JobOutputVertex(sinkName, jobGraph);
sink.setInvokableClass(StreamSink.class);
setComponent(sinkName, sink, InvokableObject, operatorName, serializedFunction, parallelism);
sink.setOutputClass(StreamSink.class);
setComponent(sinkName, sink, InvokableObject, operatorName, serializedFunction,
parallelism, subtasksPerInstance);
if (log.isDebugEnabled()) {
log.debug("SINK: " + sinkName);
......@@ -199,11 +218,10 @@ public class JobGraphBuilder {
*/
private void setComponent(String componentName, AbstractJobVertex component,
Serializable InvokableObject, String operatorName, byte[] serializedFunction,
int parallelism) {
int parallelism, int subtasksPerInstance) {
component.setNumberOfSubtasks(parallelism);
// TODO remove all NumberOfSubtasks setting
// component.setNumberOfSubtasksPerInstance(subtasksPerInstance);
component.setNumberOfSubtasksPerInstance(subtasksPerInstance);
if (parallelism > maxParallelism) {
maxParallelism = parallelism;
......@@ -224,6 +242,20 @@ public class JobGraphBuilder {
numberOfInstances.put(componentName, parallelism);
}
/**
* Sets the number of tuples batched together for higher throughput
*
* @param componentName
* Name of the component
* @param batchSize
* Number of tuples batched together
*/
public void setBatchSize(String componentName, int batchSize) {
Configuration config = components.get(componentName).getConfiguration();
config.setInteger("batchSize_"
+ (components.get(componentName).getNumberOfForwardConnections() - 1), batchSize);
}
/**
* Adds serialized invokable object to the JobVertex configuration
*
......@@ -234,36 +266,105 @@ public class JobGraphBuilder {
* be added
*/
private void addSerializedObject(Serializable InvokableObject, Configuration config) {
ByteArrayOutputStream baos = null;
ObjectOutputStream oos = null;
try {
baos = new ByteArrayOutputStream();
oos = new ObjectOutputStream(baos);
oos.writeObject(InvokableObject);
config.setBytes("serializedudf", baos.toByteArray());
} catch (Exception e) {
e.printStackTrace();
System.out.println("Serialization error " + InvokableObject.getClass());
}
}
/**
* Sets the number of tuples batched together for higher throughput
* Sets udf operator from one component to another, used with some sinks.
*
* @param componentName
* Name of the component
* @param batchSize
* Number of tuples batched together
* @param from
* from
* @param to
* to
*/
public void setBatchSize(String componentName, int batchSize) {
Configuration config = components.get(componentName).getConfiguration();
config.setInteger("batchSize_"
+ (components.get(componentName).getNumberOfForwardConnections() - 1), batchSize);
public void setBytesFrom(String from, String to) {
Configuration fromConfig = components.get(from).getConfiguration();
Configuration toConfig = components.get(to).getConfiguration();
toConfig.setString("operatorName", fromConfig.getString("operatorName", null));
toConfig.setBytes("operator", fromConfig.getBytes("operator", null));
}
/**
* Connects to JobGraph components with the given names, partitioning and
* channel type
*
* @param upStreamComponentName
* Name of the upstream component, that will emit the tuples
* @param downStreamComponentName
* Name of the downstream component, that will receive the tuples
* @param PartitionerClass
* Class of the partitioner
*/
private void connect(String upStreamComponentName, String downStreamComponentName,
Class<? extends ChannelSelector<StreamRecord>> PartitionerClass) {
AbstractJobVertex upStreamComponent = components.get(upStreamComponentName);
AbstractJobVertex downStreamComponent = components.get(downStreamComponentName);
try {
upStreamComponent.connectTo(downStreamComponent, ChannelType.NETWORK);
Configuration config = new TaskConfig(upStreamComponent.getConfiguration())
.getConfiguration();
config.setClass(
"partitionerClass_" + (upStreamComponent.getNumberOfForwardConnections() - 1),
PartitionerClass);
if (log.isDebugEnabled()) {
log.debug("CONNECTED: " + PartitionerClass.getSimpleName() + " - "
+ upStreamComponentName + " -> " + downStreamComponentName);
}
} catch (JobGraphDefinitionException e) {
if (log.isErrorEnabled()) {
log.error("Cannot connect components with " + PartitionerClass.getSimpleName()
+ " : " + upStreamComponentName + " -> " + downStreamComponentName, e);
}
}
}
/**
* Sets instance sharing between the given components
*
* @param component1
* Share will be called on this component
* @param component2
* Share will be called to this component
*/
public void setInstanceSharing(String component1, String component2) {
AbstractJobVertex c1 = components.get(component1);
AbstractJobVertex c2 = components.get(component2);
c1.setVertexToShareInstancesWith(c2);
}
/**
* Sets all components to share with the one with highest parallelism
*/
private void setAutomaticInstanceSharing() {
AbstractJobVertex maxParallelismVertex = components.get(maxParallelismVertexName);
for (String componentName : components.keySet()) {
if (componentName != maxParallelismVertexName) {
components.get(componentName).setVertexToShareInstancesWith(maxParallelismVertex);
}
}
}
/**
......@@ -301,28 +402,28 @@ public class JobGraphBuilder {
*/
public void fieldsConnect(String upStreamComponentName, String downStreamComponentName,
int keyPosition) {
AbstractJobVertex upStreamComponent = components.get(upStreamComponentName);
AbstractJobVertex downStreamComponent = components.get(downStreamComponentName);
try {
upStreamComponent.connectTo(downStreamComponent, ChannelType.NETWORK);
Configuration config = new TaskConfig(upStreamComponent.getConfiguration())
.getConfiguration();
config.setClass(
"partitionerClass_" + (upStreamComponent.getNumberOfForwardConnections() - 1),
FieldsPartitioner.class);
config.setInteger(
"partitionerIntParam_"
+ (upStreamComponent.getNumberOfForwardConnections() - 1), keyPosition);
config.setInteger("numOfOutputs_"
+ (upStreamComponent.getNumberOfForwardConnections() - 1),
numberOfInstances.get(downStreamComponentName));
addOutputChannels(upStreamComponentName, 1);
if (log.isDebugEnabled()) {
log.debug("CONNECTED: FIELD PARTITIONING - " + upStreamComponentName + " -> "
......@@ -336,7 +437,7 @@ public class JobGraphBuilder {
}
log.info("Fieldsconnected " + upStreamComponentName + " to " + downStreamComponentName
+ " on " + keyPosition);
}
/**
......@@ -354,7 +455,7 @@ public class JobGraphBuilder {
connect(upStreamComponentName, downStreamComponentName, GlobalPartitioner.class);
addOutputChannels(upStreamComponentName, 1);
log.info("Globalconnected: " + upStreamComponentName + " to " + downStreamComponentName);
}
/**
......@@ -374,89 +475,6 @@ public class JobGraphBuilder {
log.info("Shuffleconnected: " + upStreamComponentName + " to " + downStreamComponentName);
}
/**
* Connects to JobGraph components with the given names, partitioning and
* channel type
*
* @param upStreamComponentName
* Name of the upstream component, that will emit the tuples
* @param downStreamComponentName
* Name of the downstream component, that will receive the tuples
* @param PartitionerClass
* Class of the partitioner
*/
private void connect(String upStreamComponentName, String downStreamComponentName,
Class<? extends ChannelSelector<StreamRecord>> PartitionerClass) {
AbstractJobVertex upStreamComponent = components.get(upStreamComponentName);
AbstractJobVertex downStreamComponent = components.get(downStreamComponentName);
try {
upStreamComponent.connectTo(downStreamComponent, ChannelType.NETWORK);
Configuration config = new TaskConfig(upStreamComponent.getConfiguration())
.getConfiguration();
config.setClass(
"partitionerClass_" + (upStreamComponent.getNumberOfForwardConnections() - 1),
PartitionerClass);
if (log.isDebugEnabled()) {
log.debug("CONNECTED: " + PartitionerClass.getSimpleName() + " - "
+ upStreamComponentName + " -> " + downStreamComponentName);
}
} catch (JobGraphDefinitionException e) {
if (log.isErrorEnabled()) {
log.error("Cannot connect components with " + PartitionerClass.getSimpleName()
+ " : " + upStreamComponentName + " -> " + downStreamComponentName, e);
}
}
}
/**
* Sets udf operator from one component to another, used with some sinks.
*
* @param from
* from
* @param to
* to
*/
public void setBytesFrom(String from, String to) {
Configuration fromConfig = components.get(from).getConfiguration();
Configuration toConfig = components.get(to).getConfiguration();
toConfig.setString("operatorName", fromConfig.getString("operatorName", null));
toConfig.setBytes("operator", fromConfig.getBytes("operator", null));
}
/**
* Sets instance sharing between the given components
*
* @param component1
* Share will be called on this component
* @param component2
* Share will be called to this component
*/
public void setInstanceSharing(String component1, String component2) {
AbstractJobVertex c1 = components.get(component1);
AbstractJobVertex c2 = components.get(component2);
c1.setVertexToShareInstancesWith(c2);
}
/**
* Sets all components to share with the one with highest parallelism
*/
private void setAutomaticInstanceSharing() {
AbstractJobVertex maxParallelismVertex = components.get(maxParallelismVertexName);
for (String componentName : components.keySet()) {
if (componentName != maxParallelismVertexName) {
components.get(componentName).setVertexToShareInstancesWith(maxParallelismVertex);
}
}
}
/**
* Sets the number of instances for a given component, used for fault
* tolerance purposes
......
......@@ -13,10 +13,11 @@
*
**********************************************************************************************************************/
package eu.stratosphere.streaming.api.invokable;
package eu.stratosphere.streaming.api;
import eu.stratosphere.api.java.functions.MapFunction;
import eu.stratosphere.api.java.tuple.Tuple;
import eu.stratosphere.streaming.api.invokable.UserTaskInvokable;
import eu.stratosphere.streaming.api.streamrecord.StreamRecord;
import eu.stratosphere.util.Collector;
......
......@@ -13,7 +13,7 @@
*
**********************************************************************************************************************/
package eu.stratosphere.streaming.api.function;
package eu.stratosphere.streaming.api;
import java.io.Serializable;
......
......@@ -13,10 +13,10 @@
*
**********************************************************************************************************************/
package eu.stratosphere.streaming.api.invokable;
package eu.stratosphere.streaming.api;
import eu.stratosphere.api.java.tuple.Tuple;
import eu.stratosphere.streaming.api.function.SinkFunction;
import eu.stratosphere.streaming.api.invokable.UserSinkInvokable;
import eu.stratosphere.streaming.api.streamrecord.StreamRecord;
import eu.stratosphere.util.Collector;
......
......@@ -13,7 +13,7 @@
*
**********************************************************************************************************************/
package eu.stratosphere.streaming.api.function;
package eu.stratosphere.streaming.api;
import eu.stratosphere.api.java.tuple.Tuple;
import eu.stratosphere.streaming.api.invokable.UserSourceInvokable;
......
......@@ -13,11 +13,13 @@
*
**********************************************************************************************************************/
package eu.stratosphere.streaming.api.streamrecord;
package eu.stratosphere.streaming.api;
import eu.stratosphere.api.java.tuple.Tuple;
import eu.stratosphere.nephele.io.RecordWriter;
import eu.stratosphere.pact.runtime.plugable.SerializationDelegate;
import eu.stratosphere.runtime.io.api.RecordWriter;
import eu.stratosphere.streaming.api.streamrecord.ArrayStreamRecord;
import eu.stratosphere.streaming.api.streamrecord.StreamRecord;
import eu.stratosphere.util.Collector;
public class StreamCollector<T extends Tuple> implements Collector<T> {
......
......@@ -13,14 +13,15 @@
*
**********************************************************************************************************************/
package eu.stratosphere.streaming.api.streamrecord;
package eu.stratosphere.streaming.api;
import java.util.ArrayList;
import java.util.List;
import eu.stratosphere.api.java.tuple.Tuple;
import eu.stratosphere.nephele.io.RecordWriter;
import eu.stratosphere.pact.runtime.plugable.SerializationDelegate;
import eu.stratosphere.runtime.io.api.RecordWriter;
import eu.stratosphere.streaming.api.streamrecord.StreamRecord;
import eu.stratosphere.util.Collector;
public class StreamCollectorManager<T extends Tuple> implements Collector<T> {
......@@ -53,7 +54,6 @@ public class StreamCollectorManager<T extends Tuple> implements Collector<T> {
}
for (int i = 0; i < batchSizesOfPartitioned.size(); i++) {
@SuppressWarnings("unchecked")
StreamCollector<Tuple>[] collectors = new StreamCollector[parallelismOfOutput.get(i)];
for (int j = 0; j < collectors.length; j++) {
collectors[j] = new StreamCollector<Tuple>(batchSizesOfPartitioned.get(i),
......
......@@ -17,7 +17,7 @@ package eu.stratosphere.streaming.api.invokable;
import java.io.Serializable;
public abstract class StreamComponentInvokable implements Serializable {
public abstract class StreamComponent implements Serializable {
private static final long serialVersionUID = 1L;
......
......@@ -20,10 +20,7 @@ import eu.stratosphere.streaming.api.streamrecord.StreamRecord;
import eu.stratosphere.util.Collector;
public abstract class StreamRecordInvokable<IN extends Tuple, OUT extends Tuple> extends
StreamComponentInvokable {
private static final long serialVersionUID = 1L;
StreamComponent {
public abstract void invoke(StreamRecord record, Collector<OUT> collector)
throws Exception;
}
......@@ -20,7 +20,7 @@ import java.io.Serializable;
import eu.stratosphere.api.java.tuple.Tuple;
import eu.stratosphere.util.Collector;
public abstract class UserSourceInvokable<OUT extends Tuple> extends StreamComponentInvokable implements
public abstract class UserSourceInvokable<OUT extends Tuple> extends StreamComponent implements
Serializable {
private static final long serialVersionUID = 1L;
......
......@@ -16,8 +16,10 @@
package eu.stratosphere.streaming.api.streamcomponent;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.util.ArrayList;
import java.util.ConcurrentModificationException;
import java.util.List;
import org.apache.commons.logging.Log;
......@@ -33,26 +35,39 @@ import eu.stratosphere.api.java.typeutils.TupleTypeInfo;
import eu.stratosphere.api.java.typeutils.TypeExtractor;
import eu.stratosphere.api.java.typeutils.runtime.TupleSerializer;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.nephele.event.task.AbstractTaskEvent;
import eu.stratosphere.nephele.event.task.EventListener;
import eu.stratosphere.nephele.io.AbstractRecordReader;
import eu.stratosphere.nephele.io.ChannelSelector;
import eu.stratosphere.nephele.io.MutableRecordReader;
import eu.stratosphere.nephele.io.RecordWriter;
import eu.stratosphere.nephele.template.AbstractInvokable;
import eu.stratosphere.pact.runtime.plugable.DeserializationDelegate;
import eu.stratosphere.pact.runtime.plugable.SerializationDelegate;
import eu.stratosphere.runtime.io.api.AbstractRecordReader;
import eu.stratosphere.runtime.io.api.ChannelSelector;
import eu.stratosphere.runtime.io.api.MutableRecordReader;
import eu.stratosphere.runtime.io.api.RecordWriter;
import eu.stratosphere.streaming.api.function.SinkFunction;
import eu.stratosphere.streaming.api.invokable.StreamComponentInvokable;
import eu.stratosphere.streaming.api.SinkFunction;
import eu.stratosphere.streaming.api.StreamCollectorManager;
import eu.stratosphere.streaming.api.invokable.DefaultSinkInvokable;
import eu.stratosphere.streaming.api.invokable.DefaultSourceInvokable;
import eu.stratosphere.streaming.api.invokable.DefaultTaskInvokable;
import eu.stratosphere.streaming.api.invokable.StreamComponent;
import eu.stratosphere.streaming.api.invokable.StreamRecordInvokable;
import eu.stratosphere.streaming.api.invokable.UserSinkInvokable;
import eu.stratosphere.streaming.api.invokable.UserSourceInvokable;
import eu.stratosphere.streaming.api.invokable.UserTaskInvokable;
import eu.stratosphere.streaming.api.streamrecord.ArrayStreamRecord;
import eu.stratosphere.streaming.api.streamrecord.StreamCollectorManager;
import eu.stratosphere.streaming.api.streamrecord.StreamRecord;
import eu.stratosphere.streaming.faulttolerance.AckEvent;
import eu.stratosphere.streaming.faulttolerance.AckEventListener;
import eu.stratosphere.streaming.faulttolerance.FailEvent;
import eu.stratosphere.streaming.faulttolerance.FailEventListener;
import eu.stratosphere.streaming.faulttolerance.FaultToleranceUtil;
import eu.stratosphere.streaming.partitioner.DefaultPartitioner;
import eu.stratosphere.streaming.partitioner.FieldsPartitioner;
import eu.stratosphere.util.Collector;
public abstract class AbstractStreamComponent extends AbstractInvokable {
private final Log log = LogFactory.getLog(AbstractStreamComponent.class);
public final class StreamComponentHelper<T extends AbstractInvokable> {
private static final Log log = LogFactory.getLog(StreamComponentHelper.class);
private static int numComponents = 0;
private TupleTypeInfo<Tuple> inTupleTypeInfo = null;
private TupleSerializer<Tuple> inTupleSerializer = null;
......@@ -62,6 +77,7 @@ public abstract class AbstractStreamComponent extends AbstractInvokable {
private TupleSerializer<Tuple> outTupleSerializer = null;
private SerializationDelegate<Tuple> outSerializationDelegate = null;
public Collector<Tuple> collector;
private List<Integer> batchSizesNotPartitioned = new ArrayList<Integer>();
private List<Integer> batchSizesPartitioned = new ArrayList<Integer>();
private List<Integer> numOfOutputsPartitioned = new ArrayList<Integer>();
......@@ -70,34 +86,49 @@ public abstract class AbstractStreamComponent extends AbstractInvokable {
private List<RecordWriter<StreamRecord>> outputsNotPartitioned = new ArrayList<RecordWriter<StreamRecord>>();
private List<RecordWriter<StreamRecord>> outputsPartitioned = new ArrayList<RecordWriter<StreamRecord>>();
protected Configuration configuration;
protected Collector<Tuple> collector;
protected int instanceID;
protected String name;
private static int numComponents = 0;
protected static int newComponent() {
public static int newComponent() {
numComponents++;
return numComponents;
}
protected void initialize() {
configuration = getTaskConfiguration();
name = configuration.getString("componentName", "MISSING_COMPONENT_NAME");
public void setAckListener(FaultToleranceUtil recordBuffer, int sourceInstanceID,
List<RecordWriter<StreamRecord>> outputs) {
EventListener[] ackListeners = new EventListener[outputs.size()];
for (int i = 0; i < outputs.size(); i++) {
ackListeners[i] = new AckEventListener(sourceInstanceID, recordBuffer, i);
outputs.get(i).subscribeToEvent(ackListeners[i], AckEvent.class);
}
}
protected Collector<Tuple> setCollector(List<RecordWriter<StreamRecord>> outputs) {
long batchTimeout = configuration.getLong("batchTimeout", 1000);
public void setFailListener(FaultToleranceUtil recordBuffer, int sourceInstanceID,
List<RecordWriter<StreamRecord>> outputs) {
EventListener[] failListeners = new EventListener[outputs.size()];
for (int i = 0; i < outputs.size(); i++) {
failListeners[i] = new FailEventListener(sourceInstanceID, recordBuffer, i);
outputs.get(i).subscribeToEvent(failListeners[i], FailEvent.class);
}
}
public Collector<Tuple> setCollector(Configuration taskConfiguration, int id,
List<RecordWriter<StreamRecord>> outputs) {
long batchTimeout = taskConfiguration.getLong("batchTimeout", 1000);
collector = new StreamCollectorManager<Tuple>(batchSizesNotPartitioned,
batchSizesPartitioned, numOfOutputsPartitioned, keyPosition, batchTimeout,
instanceID, outSerializationDelegate, outputsPartitioned, outputsNotPartitioned);
batchSizesPartitioned, numOfOutputsPartitioned, keyPosition, batchTimeout, id,
outSerializationDelegate, outputsPartitioned, outputsNotPartitioned);
return collector;
}
protected void setSerializers() {
byte[] operatorBytes = configuration.getBytes("operator", null);
String operatorName = configuration.getString("operatorName", "");
public void setSerializers(Configuration taskConfiguration) {
byte[] operatorBytes = taskConfiguration.getBytes("operator", null);
String operatorName = taskConfiguration.getString("operatorName", "");
Object function = null;
try {
......@@ -154,7 +185,7 @@ public abstract class AbstractStreamComponent extends AbstractInvokable {
outSerializationDelegate = new SerializationDelegate<Tuple>(outTupleSerializer);
}
protected void setSinkSerializer() {
public void setSinkSerializer() {
if (outSerializationDelegate != null) {
inTupleTypeInfo = outTupleTypeInfo;
......@@ -163,40 +194,59 @@ public abstract class AbstractStreamComponent extends AbstractInvokable {
}
}
protected AbstractRecordReader getConfigInputs()
public AbstractRecordReader getConfigInputs(T taskBase, Configuration taskConfiguration)
throws StreamComponentException {
int numberOfInputs = configuration.getInteger("numberOfInputs", 0);
int numberOfInputs = taskConfiguration.getInteger("numberOfInputs", 0);
if (numberOfInputs < 2) {
return new StreamRecordReader(this, ArrayStreamRecord.class,
inDeserializationDelegate, inTupleSerializer);
if (taskBase instanceof StreamTask) {
return new StreamRecordReader((StreamTask) taskBase, ArrayStreamRecord.class,
inDeserializationDelegate, inTupleSerializer);
} else if (taskBase instanceof StreamSink) {
return new StreamRecordReader((StreamSink) taskBase, ArrayStreamRecord.class,
inDeserializationDelegate, inTupleSerializer);
} else {
throw new StreamComponentException("Nonsupported object passed to setConfigInputs");
}
} else {
@SuppressWarnings("unchecked")
MutableRecordReader<StreamRecord>[] recordReaders = (MutableRecordReader<StreamRecord>[]) new MutableRecordReader<?>[numberOfInputs];
for (int i = 0; i < numberOfInputs; i++) {
recordReaders[i] = new MutableRecordReader<StreamRecord>(this);
if (taskBase instanceof StreamTask) {
recordReaders[i] = new MutableRecordReader<StreamRecord>((StreamTask) taskBase);
} else if (taskBase instanceof StreamSink) {
recordReaders[i] = new MutableRecordReader<StreamRecord>((StreamSink) taskBase);
} else {
throw new StreamComponentException(
"Nonsupported object passed to setConfigInputs");
}
}
return new UnionStreamRecordReader(recordReaders, ArrayStreamRecord.class,
inDeserializationDelegate, inTupleSerializer);
}
}
protected void setConfigOutputs(List<RecordWriter<StreamRecord>> outputs,
public void setConfigOutputs(T taskBase, Configuration taskConfiguration,
List<RecordWriter<StreamRecord>> outputs,
List<ChannelSelector<StreamRecord>> partitioners) throws StreamComponentException {
int numberOfOutputs = configuration.getInteger("numberOfOutputs", 0);
int numberOfOutputs = taskConfiguration.getInteger("numberOfOutputs", 0);
for (int i = 0; i < numberOfOutputs; i++) {
setPartitioner(i, partitioners);
setPartitioner(taskConfiguration, i, partitioners);
ChannelSelector<StreamRecord> outputPartitioner = partitioners.get(i);
outputs.add(new RecordWriter<StreamRecord>(this, outputPartitioner));
if (taskBase instanceof StreamTask) {
outputs.add(new RecordWriter<StreamRecord>((StreamTask) taskBase,
StreamRecord.class, outputPartitioner));
} else if (taskBase instanceof StreamSource) {
outputs.add(new RecordWriter<StreamRecord>((StreamSource) taskBase,
StreamRecord.class, outputPartitioner));
} else {
throw new StreamComponentException("Nonsupported object passed to setConfigOutputs");
}
if (outputsPartitioned.size() < batchSizesPartitioned.size()) {
outputsPartitioned.add(outputs.get(i));
} else {
......@@ -205,22 +255,92 @@ public abstract class AbstractStreamComponent extends AbstractInvokable {
}
}
private void setPartitioner(int numberOfOutputs,
List<ChannelSelector<StreamRecord>> partitioners) {
/**
* Reads and creates a StreamComponent from the config.
*
* @param userFunctionClass
* Class of the invokable function
* @param config
* Configuration object
* @return The StreamComponent object
*/
private StreamComponent getInvokable(Class<? extends StreamComponent> userFunctionClass,
Configuration config) {
StreamComponent userFunction = null;
byte[] userFunctionSerialized = config.getBytes("serializedudf", null);
try {
ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(
userFunctionSerialized));
userFunction = (StreamComponent) ois.readObject();
} catch (Exception e) {
if (log.isErrorEnabled()) {
log.error("Cannot instanciate user function: " + userFunctionClass.getSimpleName());
}
}
return userFunction;
}
@SuppressWarnings("rawtypes")
public UserSinkInvokable getSinkInvokable(Configuration config) {
Class<? extends UserSinkInvokable> userFunctionClass = config.getClass("userfunction",
DefaultSinkInvokable.class, UserSinkInvokable.class);
return (UserSinkInvokable) getInvokable(userFunctionClass, config);
}
// TODO consider logging stack trace!
@SuppressWarnings("rawtypes")
public UserTaskInvokable getTaskInvokable(Configuration config) {
Class<? extends ChannelSelector<StreamRecord>> partitioner = configuration.getClass(
// Default value is a TaskInvokable even if it was called from a source
Class<? extends UserTaskInvokable> userFunctionClass = config.getClass("userfunction",
DefaultTaskInvokable.class, UserTaskInvokable.class);
return (UserTaskInvokable) getInvokable(userFunctionClass, config);
}
@SuppressWarnings("rawtypes")
public UserSourceInvokable getSourceInvokable(Configuration config) {
// Default value is a TaskInvokable even if it was called from a source
Class<? extends UserSourceInvokable> userFunctionClass = config.getClass("userfunction",
DefaultSourceInvokable.class, UserSourceInvokable.class);
return (UserSourceInvokable) getInvokable(userFunctionClass, config);
}
// TODO find a better solution for this
public void threadSafePublish(AbstractTaskEvent event, AbstractRecordReader inputs)
throws InterruptedException, IOException {
boolean concurrentModificationOccured = false;
while (!concurrentModificationOccured) {
try {
inputs.publishEvent(event);
concurrentModificationOccured = true;
} catch (ConcurrentModificationException exeption) {
if (log.isTraceEnabled()) {
log.trace("Waiting to publish " + event.getClass());
}
}
}
}
private void setPartitioner(Configuration config, int numberOfOutputs,
List<ChannelSelector<StreamRecord>> partitioners) {
Class<? extends ChannelSelector<StreamRecord>> partitioner = config.getClass(
"partitionerClass_" + numberOfOutputs, DefaultPartitioner.class,
ChannelSelector.class);
Integer batchSize = configuration.getInteger("batchSize_" + numberOfOutputs, 1);
Integer batchSize = config.getInteger("batchSize_" + numberOfOutputs, 1);
try {
if (partitioner.equals(FieldsPartitioner.class)) {
batchSizesPartitioned.add(batchSize);
numOfOutputsPartitioned.add(configuration
numOfOutputsPartitioned.add(config
.getInteger("numOfOutputs_" + numberOfOutputs, -1));
// TODO:force one partitioning field
keyPosition = configuration.getInteger("partitionerIntParam_" + numberOfOutputs, 1);
keyPosition = config.getInteger("partitionerIntParam_" + numberOfOutputs, 1);
partitioners.add(partitioner.getConstructor(int.class).newInstance(keyPosition));
......@@ -240,8 +360,8 @@ public abstract class AbstractStreamComponent extends AbstractInvokable {
}
}
protected void invokeRecords(StreamRecordInvokable<Tuple, Tuple> userFunction,
AbstractRecordReader inputs) throws Exception {
public void invokeRecords(StreamRecordInvokable userFunction, AbstractRecordReader inputs)
throws Exception {
if (inputs instanceof UnionStreamRecordReader) {
UnionStreamRecordReader recordReader = (UnionStreamRecordReader) inputs;
while (recordReader.hasNext()) {
......@@ -258,75 +378,5 @@ public abstract class AbstractStreamComponent extends AbstractInvokable {
}
}
}
/**
* Reads and creates a StreamComponent from the config.
*
* @param userFunctionClass
* Class of the invokable function
* @return The StreamComponent object
*/
protected StreamComponentInvokable getInvokable(Class<? extends StreamComponentInvokable> userFunctionClass) {
StreamComponentInvokable userFunction = null;
byte[] userFunctionSerialized = configuration.getBytes("serializedudf", null);
try {
ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(
userFunctionSerialized));
userFunction = (StreamComponentInvokable) ois.readObject();
} catch (Exception e) {
if (log.isErrorEnabled()) {
log.error("Cannot instanciate user function: " + userFunctionClass.getSimpleName());
}
}
return userFunction;
}
protected abstract void setInvokable();
// protected void threadSafePublish(AbstractTaskEvent event,
// AbstractRecordReader inputs)
// throws InterruptedException, IOException {
//
// boolean concurrentModificationOccured = false;
// while (!concurrentModificationOccured) {
// try {
// inputs.publishEvent(event);
// concurrentModificationOccured = true;
// } catch (ConcurrentModificationException exeption) {
// if (log.isTraceEnabled()) {
// log.trace("Waiting to publish " + event.getClass());
// }
// }
// }
// }
//
// protected void setAckListener(FaultToleranceUtil recordBuffer, int
// sourceInstanceID,
// List<RecordWriter<StreamRecord>> outputs) {
//
// EventListener[] ackListeners = new EventListener[outputs.size()];
//
// for (int i = 0; i < outputs.size(); i++) {
// ackListeners[i] = new AckEventListener(sourceInstanceID, recordBuffer,
// i);
// outputs.get(i).subscribeToEvent(ackListeners[i], AckEvent.class);
// }
//
// }
//
// protected void setFailListener(FaultToleranceUtil recordBuffer, int
// sourceInstanceID,
// List<RecordWriter<StreamRecord>> outputs) {
//
// EventListener[] failListeners = new EventListener[outputs.size()];
//
// for (int i = 0; i < outputs.size(); i++) {
// failListeners[i] = new FailEventListener(sourceInstanceID, recordBuffer,
// i);
// outputs.get(i).subscribeToEvent(failListeners[i], FailEvent.class);
// }
// }
}
}
\ No newline at end of file
/***********************************************************************************************************************
*
* Copyright (C) 2010-2014 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
**********************************************************************************************************************/
package eu.stratosphere.streaming.api.streamcomponent;
import java.io.Serializable;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import eu.stratosphere.api.java.tuple.Tuple;
import eu.stratosphere.nephele.io.RecordWriter;
import eu.stratosphere.streaming.api.streamrecord.StreamRecord;
import eu.stratosphere.streaming.faulttolerance.FaultToleranceType;
import eu.stratosphere.streaming.faulttolerance.FaultToleranceUtil;
import eu.stratosphere.streaming.util.PerformanceCounter;
public abstract class StreamInvokableComponent<OUT extends Tuple> implements Serializable {
private static final long serialVersionUID = 1L;
private static final Log log = LogFactory.getLog(StreamInvokableComponent.class);
private List<RecordWriter<StreamRecord>> outputs;
protected int channelID;
protected String name;
private FaultToleranceUtil emittedRecords;
protected PerformanceCounter performanceCounter;
private boolean useFaultTolerance;
public final void declareOutputs(List<RecordWriter<StreamRecord>> outputs, int channelID,
String name, FaultToleranceUtil emittedRecords, FaultToleranceType faultToleranceType) {
this.outputs = outputs;
this.channelID = channelID;
this.emittedRecords = emittedRecords;
this.name = name;
this.performanceCounter = new PerformanceCounter("pc", 1000, 1000, 30000,
"/home/strato/stratosphere-distrib/log/counter/" + name + channelID);
this.useFaultTolerance = faultToleranceType != FaultToleranceType.NONE;
}
public final void setPerfCounterDir(String dir) {
performanceCounter.setFname(dir + "/" + name + channelID);
}
public final void emit(StreamRecord record) {
record.setId(channelID);
if (useFaultTolerance) {
emittedRecords.addRecord(record);
}
try {
for (RecordWriter<StreamRecord> output : outputs) {
output.emit(record);
output.flush();
if (log.isInfoEnabled()) {
log.info("EMITTED: " + record.getId() + " -- " + name);
}
}
} catch (Exception e) {
if (useFaultTolerance) {
emittedRecords.failRecord(record.getId());
}
if (log.isWarnEnabled()) {
log.warn("FAILED: " + record.getId() + " -- " + name + " -- due to "
+ e.getClass().getSimpleName());
}
}
}
// TODO: Should we fail record at exception catch?
public final void emit(StreamRecord record, int outputChannel) {
record.setId(channelID);
if (useFaultTolerance) {
emittedRecords.addRecord(record, outputChannel);
}
try {
outputs.get(outputChannel).emit(record);
} catch (Exception e) {
if (log.isWarnEnabled()) {
log.warn("EMIT ERROR: " + e.getClass().getSimpleName() + " -- " + name);
}
}
}
public String getResult() {
return "Override getResult() to pass your own results";
}
}
/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Copyright (C) 2010-2014 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
......@@ -9,6 +10,7 @@
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
**********************************************************************************************************************/
package eu.stratosphere.streaming.api.streamcomponent;
......@@ -17,11 +19,13 @@ import java.io.IOException;
import eu.stratosphere.api.java.tuple.Tuple;
import eu.stratosphere.api.java.typeutils.runtime.TupleSerializer;
import eu.stratosphere.nephele.template.AbstractInvokable;
import eu.stratosphere.nephele.io.AbstractSingleGateRecordReader;
import eu.stratosphere.nephele.io.InputChannelResult;
import eu.stratosphere.nephele.io.MutableRecordDeserializerFactory;
import eu.stratosphere.nephele.io.Reader;
import eu.stratosphere.nephele.template.AbstractOutputTask;
import eu.stratosphere.nephele.template.AbstractTask;
import eu.stratosphere.pact.runtime.plugable.DeserializationDelegate;
import eu.stratosphere.runtime.io.api.AbstractSingleGateRecordReader;
import eu.stratosphere.runtime.io.api.Reader;
import eu.stratosphere.runtime.io.gates.InputChannelResult;
import eu.stratosphere.streaming.api.streamrecord.StreamRecord;
/**
......@@ -35,6 +39,7 @@ public class StreamRecordReader extends AbstractSingleGateRecordReader<StreamRec
private final Class<? extends StreamRecord> recordType;
private DeserializationDelegate<Tuple> deserializationDelegate;
private TupleSerializer<Tuple> tupleSerializer;
/**
* Stores the last read record.
*/
......@@ -57,14 +62,41 @@ public class StreamRecordReader extends AbstractSingleGateRecordReader<StreamRec
* @param recordType
* The class of records that can be read from the record reader.
* @param deserializationDelegate
* deserializationDelegate
* Deserialization delegate
* @param tupleSerializer
* Tuple serializer
*/
public StreamRecordReader(AbstractTask taskBase, Class<? extends StreamRecord> recordType,
DeserializationDelegate<Tuple> deserializationDelegate,
TupleSerializer<Tuple> tupleSerializer) {
// super(taskBase, MutableRecordDeserializerFactory.<StreamRecord>
// get(), 0);
super(taskBase, MutableRecordDeserializerFactory.<StreamRecord> get(), 0);
this.recordType = recordType;
this.deserializationDelegate = deserializationDelegate;
this.tupleSerializer = tupleSerializer;
}
/**
* Constructs a new record reader and registers a new input gate with the
* application's environment.
*
* @param outputBase
* The application that instantiated the record reader.
* @param recordType
* The class of records that can be read from the record reader.
* @param deserializationDelegate
* Deserialization delegate
* @param tupleSerializer
* tupleSerializer
* Tuple serializer
*/
public StreamRecordReader(AbstractInvokable taskBase, Class<? extends StreamRecord> recordType,
public StreamRecordReader(AbstractOutputTask outputBase,
Class<? extends StreamRecord> recordType,
DeserializationDelegate<Tuple> deserializationDelegate,
TupleSerializer<Tuple> tupleSerializer) {
super(taskBase);
// super(outputBase, MutableRecordDeserializerFactory.<StreamRecord>
// get(), 0);
super(outputBase, MutableRecordDeserializerFactory.<StreamRecord> get(), 0);
this.recordType = recordType;
this.deserializationDelegate = deserializationDelegate;
this.tupleSerializer = tupleSerializer;
......@@ -101,11 +133,10 @@ public class StreamRecordReader extends AbstractSingleGateRecordReader<StreamRec
return true;
case END_OF_SUPERSTEP:
if (incrementEndOfSuperstepEventAndCheck()) {
if (incrementEndOfSuperstepEventAndCheck())
return false;
} else {
else
break; // fall through and wait for next record/event
}
case TASK_EVENT:
handleEvent(this.inputGate.getCurrentEvent());
......
......@@ -18,51 +18,46 @@ package eu.stratosphere.streaming.api.streamcomponent;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import eu.stratosphere.api.java.tuple.Tuple;
import eu.stratosphere.runtime.io.api.AbstractRecordReader;
import eu.stratosphere.streaming.api.invokable.DefaultSinkInvokable;
import eu.stratosphere.streaming.api.invokable.StreamRecordInvokable;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.nephele.io.AbstractRecordReader;
import eu.stratosphere.nephele.template.AbstractOutputTask;
import eu.stratosphere.streaming.api.invokable.UserSinkInvokable;
import eu.stratosphere.streaming.faulttolerance.FaultToleranceType;
public class StreamSink extends AbstractStreamComponent {
public class StreamSink extends AbstractOutputTask {
private static final Log log = LogFactory.getLog(StreamSink.class);
private AbstractRecordReader inputs;
private StreamRecordInvokable<Tuple, Tuple> userFunction;
private UserSinkInvokable userFunction;
private StreamComponentHelper<StreamSink> streamSinkHelper;
private String name;
public StreamSink() {
// TODO: Make configuration file visible and call setClassInputs() here
userFunction = null;
streamSinkHelper = new StreamComponentHelper<StreamSink>();
}
@Override
public void registerInputOutput() {
initialize();
Configuration taskConfiguration = getTaskConfiguration();
name = taskConfiguration.getString("componentName", "MISSING_COMPONENT_NAME");
try {
setSerializers();
setSinkSerializer();
inputs = getConfigInputs();
streamSinkHelper.setSerializers(taskConfiguration);
streamSinkHelper.setSinkSerializer();
inputs = streamSinkHelper.getConfigInputs(this, taskConfiguration);
} catch (Exception e) {
if (log.isErrorEnabled()) {
log.error("Cannot register inputs", e);
}
}
// FaultToleranceType faultToleranceType =
// FaultToleranceType.from(taskConfiguration
// .getInteger("faultToleranceType", 0));
setInvokable();
}
FaultToleranceType faultToleranceType = FaultToleranceType.from(taskConfiguration
.getInteger("faultToleranceType", 0));
@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
protected void setInvokable() {
Class<? extends UserSinkInvokable> userFunctionClass = configuration.getClass(
"userfunction", DefaultSinkInvokable.class, UserSinkInvokable.class);
userFunction = (UserSinkInvokable<Tuple>) getInvokable(userFunctionClass);
userFunction = streamSinkHelper.getSinkInvokable(taskConfiguration);
}
@Override
......@@ -70,12 +65,9 @@ public class StreamSink extends AbstractStreamComponent {
if (log.isDebugEnabled()) {
log.debug("SINK " + name + " invoked");
}
invokeRecords(userFunction, inputs);
streamSinkHelper.invokeRecords(userFunction, inputs);
if (log.isDebugEnabled()) {
log.debug("SINK " + name + " invoke finished");
}
}
}
......@@ -21,85 +21,82 @@ import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import eu.stratosphere.api.java.tuple.Tuple;
import eu.stratosphere.runtime.io.api.ChannelSelector;
import eu.stratosphere.runtime.io.api.RecordWriter;
import eu.stratosphere.streaming.api.invokable.DefaultSourceInvokable;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.nephele.io.ChannelSelector;
import eu.stratosphere.nephele.io.RecordWriter;
import eu.stratosphere.nephele.template.AbstractInputTask;
import eu.stratosphere.streaming.api.invokable.UserSourceInvokable;
import eu.stratosphere.streaming.api.streamrecord.StreamRecord;
import eu.stratosphere.streaming.examples.DummyIS;
import eu.stratosphere.streaming.faulttolerance.FaultToleranceType;
import eu.stratosphere.streaming.faulttolerance.FaultToleranceUtil;
public class StreamSource extends AbstractStreamComponent {
public class StreamSource extends AbstractInputTask<DummyIS> {
private static final Log log = LogFactory.getLog(StreamSource.class);
private List<RecordWriter<StreamRecord>> outputs;
private List<ChannelSelector<StreamRecord>> partitioners;
private UserSourceInvokable<Tuple> userFunction;
private UserSourceInvokable userFunction;
private static int numSources;
private int[] numberOfOutputChannels;
// private FaultToleranceUtil recordBuffer;
// private FaultToleranceType faultToleranceType;
private int sourceInstanceID;
private String name;
private FaultToleranceUtil recordBuffer;
private FaultToleranceType faultToleranceType;
StreamComponentHelper<StreamSource> streamSourceHelper;
public StreamSource() {
// TODO: Make configuration file visible and call setClassInputs() here
outputs = new LinkedList<RecordWriter<StreamRecord>>();
partitioners = new LinkedList<ChannelSelector<StreamRecord>>();
userFunction = null;
numSources = newComponent();
instanceID = numSources;
streamSourceHelper = new StreamComponentHelper<StreamSource>();
numSources = StreamComponentHelper.newComponent();
sourceInstanceID = numSources;
}
@Override
public DummyIS[] computeInputSplits(int requestedMinNumber) throws Exception {
return null;
}
@Override
public Class<DummyIS> getInputSplitType() {
return null;
}
@Override
public void registerInputOutput() {
initialize();
Configuration taskConfiguration = getTaskConfiguration();
name = taskConfiguration.getString("componentName", "MISSING_COMPONENT_NAME");
try {
setSerializers();
setConfigOutputs(outputs, partitioners);
setCollector(outputs);
streamSourceHelper.setSerializers(taskConfiguration);
streamSourceHelper.setConfigOutputs(this, taskConfiguration, outputs, partitioners);
streamSourceHelper.setCollector(taskConfiguration, sourceInstanceID, outputs);
} catch (StreamComponentException e) {
if (log.isErrorEnabled()) {
log.error("Cannot register outputs", e);
}
}
numberOfOutputChannels = new int[outputs.size()];
int[] numberOfOutputChannels = new int[outputs.size()];
for (int i = 0; i < numberOfOutputChannels.length; i++) {
numberOfOutputChannels[i] = configuration.getInteger("channels_" + i, 0);
numberOfOutputChannels[i] = taskConfiguration.getInteger("channels_" + i, 0);
}
setInvokable();
// streamSourceHelper.setAckListener(recordBuffer, sourceInstanceID,
// outputs);
// streamSourceHelper.setFailListener(recordBuffer, sourceInstanceID,
// outputs);
}
@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
protected void setInvokable() {
// Default value is a TaskInvokable even if it was called from a source
Class<? extends UserSourceInvokable> userFunctionClass = configuration.getClass(
"userfunction", DefaultSourceInvokable.class, UserSourceInvokable.class);
userFunction = (UserSourceInvokable<Tuple>) getInvokable(userFunctionClass);
userFunction = (UserSourceInvokable) streamSourceHelper
.getSourceInvokable(taskConfiguration);
streamSourceHelper.setAckListener(recordBuffer, sourceInstanceID, outputs);
streamSourceHelper.setFailListener(recordBuffer, sourceInstanceID, outputs);
}
@Override
public void invoke() throws Exception {
if (log.isDebugEnabled()) {
log.debug("SOURCE " + name + " invoked with instance id " + instanceID);
}
for (RecordWriter<StreamRecord> output : outputs) {
output.initializeSerializers();
}
userFunction.invoke(collector);
if (log.isDebugEnabled()) {
log.debug("SOURCE " + name + " invoke finished with instance id " + instanceID);
log.debug("SOURCE " + name + " invoked with instance id " + sourceInstanceID);
}
userFunction.invoke(streamSourceHelper.collector);
}
}
......@@ -21,88 +21,79 @@ import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import eu.stratosphere.api.java.tuple.Tuple;
import eu.stratosphere.runtime.io.api.AbstractRecordReader;
import eu.stratosphere.runtime.io.api.ChannelSelector;
import eu.stratosphere.runtime.io.api.RecordWriter;
import eu.stratosphere.streaming.api.invokable.DefaultTaskInvokable;
import eu.stratosphere.streaming.api.invokable.StreamRecordInvokable;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.nephele.io.AbstractRecordReader;
import eu.stratosphere.nephele.io.ChannelSelector;
import eu.stratosphere.nephele.io.RecordWriter;
import eu.stratosphere.nephele.template.AbstractTask;
import eu.stratosphere.streaming.api.invokable.UserTaskInvokable;
import eu.stratosphere.streaming.api.streamrecord.StreamRecord;
import eu.stratosphere.streaming.faulttolerance.FaultToleranceType;
import eu.stratosphere.streaming.faulttolerance.FaultToleranceUtil;
public class StreamTask extends AbstractStreamComponent {
public class StreamTask extends AbstractTask {
private static final Log log = LogFactory.getLog(StreamTask.class);
private AbstractRecordReader inputs;
private List<RecordWriter<StreamRecord>> outputs;
private List<ChannelSelector<StreamRecord>> partitioners;
private StreamRecordInvokable<Tuple, Tuple> userFunction;
private int[] numberOfOutputChannels;
private UserTaskInvokable userFunction;
private static int numTasks;
private int taskInstanceID;
private String name;
private StreamComponentHelper<StreamTask> streamTaskHelper;
private FaultToleranceType faultToleranceType;
Configuration taskConfiguration;
private FaultToleranceUtil recordBuffer;
public StreamTask() {
// TODO: Make configuration file visible and call setClassInputs() here
outputs = new LinkedList<RecordWriter<StreamRecord>>();
partitioners = new LinkedList<ChannelSelector<StreamRecord>>();
userFunction = null;
numTasks = newComponent();
instanceID = numTasks;
numTasks = StreamComponentHelper.newComponent();
taskInstanceID = numTasks;
streamTaskHelper = new StreamComponentHelper<StreamTask>();
}
@Override
public void registerInputOutput() {
initialize();
taskConfiguration = getTaskConfiguration();
name = taskConfiguration.getString("componentName", "MISSING_COMPONENT_NAME");
try {
setSerializers();
inputs = getConfigInputs();
setConfigOutputs(outputs, partitioners);
setCollector(outputs);
streamTaskHelper.setSerializers(taskConfiguration);
inputs = streamTaskHelper.getConfigInputs(this, taskConfiguration);
streamTaskHelper.setConfigOutputs(this, taskConfiguration, outputs, partitioners);
streamTaskHelper.setCollector(taskConfiguration, taskInstanceID, outputs);
} catch (StreamComponentException e) {
if (log.isErrorEnabled()) {
log.error("Cannot register inputs/outputs for " + getClass().getSimpleName(), e);
}
}
numberOfOutputChannels = new int[outputs.size()];
int[] numberOfOutputChannels = new int[outputs.size()];
for (int i = 0; i < numberOfOutputChannels.length; i++) {
numberOfOutputChannels[i] = configuration.getInteger("channels_" + i, 0);
numberOfOutputChannels[i] = taskConfiguration.getInteger("channels_" + i, 0);
}
setInvokable();
// streamTaskHelper.setAckListener(recordBuffer, taskInstanceID,
// outputs);
// streamTaskHelper.setFailListener(recordBuffer, taskInstanceID,
// outputs);
}
userFunction = (UserTaskInvokable) streamTaskHelper.getTaskInvokable(taskConfiguration);
// TODO consider logging stack trace!
@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
protected void setInvokable() {
// Default value is a TaskInvokable even if it was called from a source
Class<? extends UserTaskInvokable> userFunctionClass = configuration.getClass(
"userfunction", DefaultTaskInvokable.class, UserTaskInvokable.class);
userFunction = (UserTaskInvokable<Tuple, Tuple>) getInvokable(userFunctionClass);
streamTaskHelper.setAckListener(recordBuffer, taskInstanceID, outputs);
streamTaskHelper.setFailListener(recordBuffer, taskInstanceID, outputs);
}
@Override
public void invoke() throws Exception {
if (log.isDebugEnabled()) {
log.debug("TASK " + name + " invoked with instance id " + instanceID);
log.debug("TASK " + name + " invoked with instance id " + taskInstanceID);
}
for (RecordWriter<StreamRecord> output : outputs) {
output.initializeSerializers();
}
invokeRecords(userFunction, inputs);
streamTaskHelper.invokeRecords(userFunction, inputs);
if (log.isDebugEnabled()) {
log.debug("TASK " + name + " invoke finished with instance id " + instanceID);
log.debug("TASK " + name + " invoke finished with instance id " + taskInstanceID);
}
}
}
\ No newline at end of file
......@@ -19,10 +19,10 @@ import java.io.IOException;
import eu.stratosphere.api.java.tuple.Tuple;
import eu.stratosphere.api.java.typeutils.runtime.TupleSerializer;
import eu.stratosphere.nephele.io.AbstractUnionRecordReader;
import eu.stratosphere.nephele.io.MutableRecordReader;
import eu.stratosphere.nephele.io.Reader;
import eu.stratosphere.pact.runtime.plugable.DeserializationDelegate;
import eu.stratosphere.runtime.io.api.AbstractUnionRecordReader;
import eu.stratosphere.runtime.io.api.MutableRecordReader;
import eu.stratosphere.runtime.io.api.Reader;
import eu.stratosphere.streaming.api.streamrecord.StreamRecord;
public final class UnionStreamRecordReader extends AbstractUnionRecordReader<StreamRecord>
......
......@@ -102,7 +102,6 @@ public class ArrayStreamRecord extends StreamRecord {
* Value to set
* @throws NoSuchTupleException
* , TupleSizeMismatchException
* @return Returns the StreamRecord object
*/
public StreamRecord setTuple(int tupleNumber, Tuple tuple) throws NoSuchTupleException {
try {
......
......@@ -111,7 +111,6 @@ public class ListStreamRecord extends StreamRecord {
* Value to set
* @throws NoSuchTupleException
* , TupleSizeMismatchException
* @return Returns the StreamRecord object
*/
public StreamRecord setTuple(int tupleNumber, Tuple tuple) throws NoSuchTupleException {
try {
......
......@@ -129,7 +129,6 @@ public abstract class StreamRecord implements IOReadableWritable, Serializable {
* Value to set
* @throws NoSuchTupleException
* , TupleSizeMismatchException
* @return Returns the StreamRecord object
*/
public abstract StreamRecord setTuple(int tupleNumber, Tuple tuple) throws NoSuchTupleException;
......@@ -150,7 +149,6 @@ public abstract class StreamRecord implements IOReadableWritable, Serializable {
* type of the tuple
* @return Copy of the tuple
*/
@SuppressWarnings("unchecked")
public static <T extends Tuple> T copyTuple(T tuple) {
// TODO: implement deep copy for arrays
int numofFields = tuple.getArity();
......
/***********************************************************************************************************************
*
* Copyright (C) 2010-2014 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
**********************************************************************************************************************/
package eu.stratosphere.streaming.examples;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import eu.stratosphere.core.io.InputSplit;
public class DummyIS implements InputSplit {
@Override
public void write(DataOutput out) throws IOException {
}
@Override
public void read(DataInput in) throws IOException {
}
@Override
public int getSplitNumber() {
return 0;
}
}
\ No newline at end of file
......@@ -17,8 +17,8 @@ package eu.stratosphere.streaming.examples.basictopology;
import eu.stratosphere.api.java.functions.MapFunction;
import eu.stratosphere.api.java.tuple.Tuple1;
import eu.stratosphere.streaming.api.DataStream;
import eu.stratosphere.streaming.api.SourceFunction;
import eu.stratosphere.streaming.api.StreamExecutionEnvironment;
import eu.stratosphere.streaming.api.function.SourceFunction;
import eu.stratosphere.util.Collector;
public class BasicTopology {
......@@ -52,7 +52,7 @@ public class BasicTopology {
private static final int SOURCE_PARALELISM = 1;
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
StreamExecutionEnvironment env = new StreamExecutionEnvironment();
DataStream<Tuple1<String>> stream = env.addSource(new BasicSource(), SOURCE_PARALELISM)
.map(new BasicMap(), PARALELISM);
......
......@@ -21,8 +21,8 @@ import eu.stratosphere.api.java.functions.FlatMapFunction;
import eu.stratosphere.api.java.tuple.Tuple1;
import eu.stratosphere.api.java.tuple.Tuple4;
import eu.stratosphere.streaming.api.DataStream;
import eu.stratosphere.streaming.api.SourceFunction;
import eu.stratosphere.streaming.api.StreamExecutionEnvironment;
import eu.stratosphere.streaming.api.function.SourceFunction;
import eu.stratosphere.util.Collector;
public class CellInfoLocal {
......@@ -111,7 +111,7 @@ public class CellInfoLocal {
//In this example two different source then connect the two stream and apply a function for the connected stream
// TODO add arguments
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
StreamExecutionEnvironment env = new StreamExecutionEnvironment();
DataStream<Tuple4<Boolean, Integer, Long, Integer>> querySource = env.addSource(
new QuerySource(), SOURCE_PARALELISM);
......
......@@ -16,7 +16,7 @@
package eu.stratosphere.streaming.examples.iterative.collaborativefilter;
import eu.stratosphere.api.java.tuple.Tuple4;
import eu.stratosphere.streaming.api.function.SinkFunction;
import eu.stratosphere.streaming.api.SinkFunction;
public class CollaborativeFilteringSink extends SinkFunction<Tuple4<Integer, Integer, Integer, Long>> {
private static final long serialVersionUID = 1L;
......
......@@ -19,7 +19,7 @@ import java.io.BufferedReader;
import java.io.FileReader;
import eu.stratosphere.api.java.tuple.Tuple4;
import eu.stratosphere.streaming.api.function.SourceFunction;
import eu.stratosphere.streaming.api.SourceFunction;
import eu.stratosphere.util.Collector;
public class CollaborativeFilteringSource extends SourceFunction<Tuple4<Integer, Integer, Integer, Long>> {
......
......@@ -29,7 +29,7 @@ import eu.stratosphere.streaming.util.LogUtils;
public class KMeansLocal {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
StreamExecutionEnvironment env = new StreamExecutionEnvironment();
// @SuppressWarnings("unused")
// DataStream<Tuple2<String, Integer>> dataStream = env
......
......@@ -16,7 +16,7 @@
package eu.stratosphere.streaming.examples.iterative.kmeans;
import eu.stratosphere.api.java.tuple.Tuple3;
import eu.stratosphere.streaming.api.function.SinkFunction;
import eu.stratosphere.streaming.api.SinkFunction;
public class KMeansSink extends SinkFunction<Tuple3<Integer, Integer, Long>> {
private static final long serialVersionUID = 1L;
......
......@@ -18,7 +18,7 @@ package eu.stratosphere.streaming.examples.iterative.kmeans;
import java.util.Random;
import eu.stratosphere.api.java.tuple.Tuple2;
import eu.stratosphere.streaming.api.function.SourceFunction;
import eu.stratosphere.streaming.api.SourceFunction;
import eu.stratosphere.util.Collector;
public class KMeansSource extends SourceFunction<Tuple2<String, Long>> {
......
......@@ -29,7 +29,7 @@ import eu.stratosphere.streaming.util.LogUtils;
public class PageRankLocal {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
StreamExecutionEnvironment env = new StreamExecutionEnvironment();
// @SuppressWarnings("unused")
// DataStream<Tuple2<String, Integer>> dataStream = env
......
......@@ -16,7 +16,7 @@
package eu.stratosphere.streaming.examples.iterative.pagerank;
import eu.stratosphere.api.java.tuple.Tuple3;
import eu.stratosphere.streaming.api.function.SinkFunction;
import eu.stratosphere.streaming.api.SinkFunction;
public class PageRankSink extends SinkFunction<Tuple3<Integer, Float, Long>> {
private static final long serialVersionUID = 1L;
......
......@@ -19,7 +19,7 @@ import java.io.BufferedReader;
import java.io.FileReader;
import eu.stratosphere.api.java.tuple.Tuple3;
import eu.stratosphere.streaming.api.function.SourceFunction;
import eu.stratosphere.streaming.api.SourceFunction;
import eu.stratosphere.util.Collector;
public class PageRankSource extends SourceFunction<Tuple3<Integer, Integer, Long>> {
......
......@@ -29,7 +29,7 @@ import eu.stratosphere.streaming.util.LogUtils;
public class SSSPLocal {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
StreamExecutionEnvironment env = new StreamExecutionEnvironment();
// @SuppressWarnings("unused")
// DataStream<Tuple2<String, Integer>> dataStream = env
......
......@@ -16,7 +16,7 @@
package eu.stratosphere.streaming.examples.iterative.sssp;
import eu.stratosphere.api.java.tuple.Tuple3;
import eu.stratosphere.streaming.api.function.SinkFunction;
import eu.stratosphere.streaming.api.SinkFunction;
public class SSSPSink extends SinkFunction<Tuple3<Integer, Integer, Long>> {
private static final long serialVersionUID = 1L;
......
......@@ -19,7 +19,7 @@ import java.io.BufferedReader;
import java.io.FileReader;
import eu.stratosphere.api.java.tuple.Tuple3;
import eu.stratosphere.streaming.api.function.SourceFunction;
import eu.stratosphere.streaming.api.SourceFunction;
import eu.stratosphere.util.Collector;
public class SSSPSource extends SourceFunction<Tuple3<Integer, Integer, Long>> {
......
......@@ -34,7 +34,7 @@ public class JoinLocal {
LogUtils.initializeDefaultConsoleLogger(Level.DEBUG, Level.INFO);
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
StreamExecutionEnvironment env = new StreamExecutionEnvironment();
DataStream<Tuple3<String, String, Integer>> source1 = env.addSource(new JoinSourceOne(),
SOURCE_PARALELISM);
......
......@@ -16,7 +16,7 @@
package eu.stratosphere.streaming.examples.join;
import eu.stratosphere.api.java.tuple.Tuple3;
import eu.stratosphere.streaming.api.function.SinkFunction;
import eu.stratosphere.streaming.api.SinkFunction;
public class JoinSink extends SinkFunction<Tuple3<String, Integer, Integer>> {
private static final long serialVersionUID = 1L;
......
......@@ -18,7 +18,7 @@ package eu.stratosphere.streaming.examples.join;
import java.util.Random;
import eu.stratosphere.api.java.tuple.Tuple3;
import eu.stratosphere.streaming.api.function.SourceFunction;
import eu.stratosphere.streaming.api.SourceFunction;
import eu.stratosphere.util.Collector;
public class JoinSourceOne extends SourceFunction<Tuple3<String, String, Integer>> {
......
......@@ -18,7 +18,7 @@ package eu.stratosphere.streaming.examples.join;
import java.util.Random;
import eu.stratosphere.api.java.tuple.Tuple3;
import eu.stratosphere.streaming.api.function.SourceFunction;
import eu.stratosphere.streaming.api.SourceFunction;
import eu.stratosphere.util.Collector;
public class JoinSourceTwo extends SourceFunction<Tuple3<String, String, Integer>> {
......
......@@ -17,8 +17,8 @@ package eu.stratosphere.streaming.examples.ml;
import eu.stratosphere.api.java.functions.MapFunction;
import eu.stratosphere.api.java.tuple.Tuple1;
import eu.stratosphere.streaming.api.DataStream;
import eu.stratosphere.streaming.api.SourceFunction;
import eu.stratosphere.streaming.api.StreamExecutionEnvironment;
import eu.stratosphere.streaming.api.function.SourceFunction;
import eu.stratosphere.util.Collector;
public class IncrementalLearningSkeleton {
......@@ -124,7 +124,7 @@ public class IncrementalLearningSkeleton {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
StreamExecutionEnvironment env = new StreamExecutionEnvironment();
DataStream<Tuple1<Integer>> model =
env.addSource(new TrainingDataSource(), SOURCE_PARALELISM)
......
......@@ -24,8 +24,8 @@ import eu.stratosphere.api.java.tuple.Tuple;
import eu.stratosphere.api.java.tuple.Tuple1;
import eu.stratosphere.api.java.tuple.Tuple2;
import eu.stratosphere.streaming.api.DataStream;
import eu.stratosphere.streaming.api.SourceFunction;
import eu.stratosphere.streaming.api.StreamExecutionEnvironment;
import eu.stratosphere.streaming.api.function.SourceFunction;
import eu.stratosphere.util.Collector;
public class IncrementalOLS {
......@@ -154,7 +154,7 @@ public class IncrementalOLS {
public static void main(String[] args) {
StreamExecutionEnvironment env =StreamExecutionEnvironment.createLocalEnvironment();
StreamExecutionEnvironment env = new StreamExecutionEnvironment();
DataStream<Tuple2<Boolean, Double[]>> model =
env.addSource(new TrainingDataSource(), SOURCE_PARALELISM)
......
......@@ -36,7 +36,7 @@ public class WindowJoinLocal {
LogUtils.initializeDefaultConsoleLogger(Level.DEBUG, Level.INFO);
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
StreamExecutionEnvironment env = new StreamExecutionEnvironment();
DataStream<Tuple4<String, String, Integer, Long>> dataStream1 = env.addSource(
new WindowJoinSourceOne(), SOURCE_PARALELISM);
......
......@@ -18,7 +18,7 @@ package eu.stratosphere.streaming.examples.window.join;
import java.util.Random;
import eu.stratosphere.api.java.tuple.Tuple4;
import eu.stratosphere.streaming.api.function.SourceFunction;
import eu.stratosphere.streaming.api.SourceFunction;
import eu.stratosphere.util.Collector;
public class WindowJoinSourceOne extends SourceFunction<Tuple4<String, String, Integer, Long>> {
......
......@@ -18,7 +18,7 @@ package eu.stratosphere.streaming.examples.window.join;
import java.util.Random;
import eu.stratosphere.api.java.tuple.Tuple4;
import eu.stratosphere.streaming.api.function.SourceFunction;
import eu.stratosphere.streaming.api.SourceFunction;
import eu.stratosphere.util.Collector;
public class WindowJoinSourceTwo extends SourceFunction<Tuple4<String, String, Integer, Long>> {
......
......@@ -27,7 +27,7 @@ public class WindowSumLocal {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
StreamExecutionEnvironment env = new StreamExecutionEnvironment();
DataStream<Tuple2<Integer, Long>> dataStream = env
.addSource(new WindowSumSource(), SOURCE_PARALELISM)
......
......@@ -16,7 +16,7 @@
package eu.stratosphere.streaming.examples.window.sum;
import eu.stratosphere.api.java.tuple.Tuple2;
import eu.stratosphere.streaming.api.function.SinkFunction;
import eu.stratosphere.streaming.api.SinkFunction;
public class WindowSumSink extends SinkFunction<Tuple2<Integer, Long>> {
private static final long serialVersionUID = 1L;
......
......@@ -16,7 +16,7 @@
package eu.stratosphere.streaming.examples.window.sum;
import eu.stratosphere.api.java.tuple.Tuple2;
import eu.stratosphere.streaming.api.function.SourceFunction;
import eu.stratosphere.streaming.api.SourceFunction;
import eu.stratosphere.util.Collector;
public class WindowSumSource extends SourceFunction<Tuple2<Integer, Long>> {
......
......@@ -27,7 +27,7 @@ public class WindowWordCountLocal {
// This example will count the occurrence of each word in the input file with a sliding window.
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
StreamExecutionEnvironment env = new StreamExecutionEnvironment();
@SuppressWarnings("unused")
DataStream<Tuple3<String, Integer, Long>> dataStream = env
......
......@@ -16,7 +16,7 @@
package eu.stratosphere.streaming.examples.window.wordcount;
import eu.stratosphere.api.java.tuple.Tuple3;
import eu.stratosphere.streaming.api.function.SinkFunction;
import eu.stratosphere.streaming.api.SinkFunction;
public class WindowWordCountSink extends SinkFunction<Tuple3<String, Integer, Long>> {
private static final long serialVersionUID = 1L;
......
......@@ -19,7 +19,7 @@ import java.io.BufferedReader;
import java.io.FileReader;
import eu.stratosphere.api.java.tuple.Tuple2;
import eu.stratosphere.streaming.api.function.SourceFunction;
import eu.stratosphere.streaming.api.SourceFunction;
import eu.stratosphere.util.Collector;
public class WindowWordCountSource extends SourceFunction<Tuple2<String, Long>> {
......
......@@ -23,16 +23,18 @@ import eu.stratosphere.streaming.util.TestDataUtil;
public class WordCountLocal {
// This example will count the occurrence of each word in the input file.
public static void main(String[] args) {
TestDataUtil.downloadIfNotExists("hamlet.txt");
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
StreamExecutionEnvironment env = new StreamExecutionEnvironment();
DataStream<Tuple2<String, Integer>> dataStream = env
.readTextFile("src/test/resources/testdata/hamlet.txt")
.flatMap(new WordCountSplitter()).partitionBy(0).map(new WordCountCounter());
.flatMap(new WordCountSplitter(), 1)
.partitionBy(0)
.map(new WordCountCounter(), 1);
dataStream.print();
env.execute();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册