Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
doujutun3207
flink
提交
ce56b146
F
flink
项目概览
doujutun3207
/
flink
与 Fork 源项目一致
从无法访问的项目Fork
通知
24
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
F
flink
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
ce56b146
编写于
7月 14, 2014
作者:
G
gyfora
提交者:
Stephan Ewen
8月 18, 2014
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[streaming] connectWith and partitioning added to newapi
上级
ba25a0b0
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
88 addition
and
21 deletion
+88
-21
flink-addons/flink-streaming/src/main/java/eu/stratosphere/api/datastream/DataStream.java
.../main/java/eu/stratosphere/api/datastream/DataStream.java
+28
-2
flink-addons/flink-streaming/src/main/java/eu/stratosphere/api/datastream/StreamExecutionEnvironment.java
...ratosphere/api/datastream/StreamExecutionEnvironment.java
+31
-4
flink-addons/flink-streaming/src/main/java/eu/stratosphere/streaming/api/JobGraphBuilder.java
...n/java/eu/stratosphere/streaming/api/JobGraphBuilder.java
+26
-13
flink-addons/flink-streaming/src/test/java/eu/stratosphere/streaming/api/FlatMapTest.java
.../test/java/eu/stratosphere/streaming/api/FlatMapTest.java
+3
-2
未找到文件。
flink-addons/flink-streaming/src/main/java/eu/stratosphere/api/datastream/DataStream.java
浏览文件 @
ce56b146
...
...
@@ -15,8 +15,11 @@
package
eu.stratosphere.api.datastream
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.Random
;
import
eu.stratosphere.api.datastream.StreamExecutionEnvironment.ConnectionType
;
import
eu.stratosphere.api.java.functions.FlatMapFunction
;
import
eu.stratosphere.api.java.functions.MapFunction
;
import
eu.stratosphere.api.java.tuple.Tuple
;
...
...
@@ -28,11 +31,16 @@ public class DataStream<T extends Tuple> {
private
TypeInformation
<
T
>
type
;
private
final
Random
random
=
new
Random
();
private
final
String
id
;
List
<
String
>
connectIDs
;
ConnectionType
ctype
=
ConnectionType
.
SHUFFLE
;
int
cparam
=
0
;
protected
DataStream
()
{
// TODO implement
context
=
new
StreamExecutionEnvironment
();
id
=
"source"
;
connectIDs
=
new
ArrayList
<
String
>();
connectIDs
.
add
(
getId
());
}
protected
DataStream
(
StreamExecutionEnvironment
context
)
{
...
...
@@ -40,15 +48,33 @@ public class DataStream<T extends Tuple> {
throw
new
NullPointerException
(
"context is null"
);
}
//TODO add name based on component number an preferable sequential id
//
TODO add name based on component number an preferable sequential id
this
.
id
=
Long
.
toHexString
(
random
.
nextLong
())
+
Long
.
toHexString
(
random
.
nextLong
());
this
.
context
=
context
;
connectIDs
=
new
ArrayList
<
String
>();
connectIDs
.
add
(
getId
());
}
public
String
getId
()
{
return
id
;
}
public
DataStream
<
T
>
connectWith
(
DataStream
<
T
>
stream
)
{
connectIDs
.
add
(
stream
.
getId
());
return
this
;
}
public
DataStream
<
T
>
partitionBy
(
int
keyposition
)
{
ctype
=
ConnectionType
.
FIELD
;
cparam
=
keyposition
;
return
this
;
}
public
DataStream
<
T
>
broadcast
()
{
ctype
=
ConnectionType
.
BROADCAST
;
return
this
;
}
public
<
R
extends
Tuple
>
DataStream
<
R
>
flatMap
(
FlatMapFunction
<
T
,
R
>
flatMapper
)
{
return
context
.
addFlatMapFunction
(
this
,
flatMapper
);
}
...
...
flink-addons/flink-streaming/src/main/java/eu/stratosphere/api/datastream/StreamExecutionEnvironment.java
浏览文件 @
ce56b146
...
...
@@ -18,11 +18,13 @@ package eu.stratosphere.api.datastream;
import
java.io.ByteArrayOutputStream
;
import
java.io.IOException
;
import
java.io.ObjectOutputStream
;
import
java.util.List
;
import
eu.stratosphere.api.java.functions.FlatMapFunction
;
import
eu.stratosphere.api.java.functions.MapFunction
;
import
eu.stratosphere.api.java.tuple.Tuple
;
import
eu.stratosphere.api.java.tuple.Tuple1
;
import
eu.stratosphere.nephele.io.InputChannelResult
;
import
eu.stratosphere.streaming.api.JobGraphBuilder
;
import
eu.stratosphere.streaming.api.StreamCollector
;
import
eu.stratosphere.streaming.api.invokable.UserSinkInvokable
;
...
...
@@ -54,6 +56,28 @@ public class StreamExecutionEnvironment {
}
public
static
enum
ConnectionType
{
SHUFFLE
,
BROADCAST
,
FIELD
}
private
void
connectGraph
(
List
<
String
>
inputIDs
,
String
outputID
,
ConnectionType
type
,
int
param
)
{
for
(
String
input
:
inputIDs
)
{
switch
(
type
)
{
case
SHUFFLE:
jobGraphBuilder
.
shuffleConnect
(
input
,
outputID
);
break
;
case
BROADCAST:
jobGraphBuilder
.
broadcastConnect
(
input
,
outputID
);
break
;
case
FIELD:
jobGraphBuilder
.
fieldsConnect
(
input
,
outputID
,
param
);
break
;
}
}
}
public
<
T
extends
Tuple
,
R
extends
Tuple
>
DataStream
<
R
>
addFlatMapFunction
(
DataStream
<
T
>
inputStream
,
final
FlatMapFunction
<
T
,
R
>
flatMapper
)
{
DataStream
<
R
>
returnStream
=
new
DataStream
<
R
>(
this
);
...
...
@@ -71,7 +95,8 @@ public class StreamExecutionEnvironment {
jobGraphBuilder
.
setTask
(
returnStream
.
getId
(),
new
FlatMapInvokable
<
T
,
R
>(
flatMapper
),
"flatMap"
,
baos
.
toByteArray
());
jobGraphBuilder
.
shuffleConnect
(
inputStream
.
getId
(),
returnStream
.
getId
());
connectGraph
(
inputStream
.
connectIDs
,
returnStream
.
getId
(),
inputStream
.
ctype
,
inputStream
.
cparam
);
return
returnStream
;
}
...
...
@@ -93,7 +118,8 @@ public class StreamExecutionEnvironment {
jobGraphBuilder
.
setTask
(
returnStream
.
getId
(),
new
MapInvokable
<
T
,
R
>(
mapper
),
"map"
,
baos
.
toByteArray
());
jobGraphBuilder
.
shuffleConnect
(
inputStream
.
getId
(),
returnStream
.
getId
());
connectGraph
(
inputStream
.
connectIDs
,
returnStream
.
getId
(),
inputStream
.
ctype
,
inputStream
.
cparam
);
return
returnStream
;
}
...
...
@@ -124,7 +150,8 @@ public class StreamExecutionEnvironment {
jobGraphBuilder
.
setSink
(
"sink"
,
new
DummySink
(),
"sink"
,
baos
.
toByteArray
());
jobGraphBuilder
.
shuffleConnect
(
inputStream
.
getId
(),
"sink"
);
connectGraph
(
inputStream
.
connectIDs
,
"sink"
,
inputStream
.
ctype
,
inputStream
.
cparam
);
return
new
DataStream
<
R
>(
this
);
}
...
...
flink-addons/flink-streaming/src/main/java/eu/stratosphere/streaming/api/JobGraphBuilder.java
浏览文件 @
ce56b146
...
...
@@ -128,8 +128,9 @@ public class JobGraphBuilder {
* @param subtasksPerInstance
* Number of subtasks allocated to a machine
*/
public
Configuration
setSource
(
String
sourceName
,
UserSourceInvokable
<?
extends
Tuple
>
InvokableObject
,
int
parallelism
,
int
subtasksPerInstance
)
{
public
Configuration
setSource
(
String
sourceName
,
UserSourceInvokable
<?
extends
Tuple
>
InvokableObject
,
int
parallelism
,
int
subtasksPerInstance
)
{
final
JobInputVertex
source
=
new
JobInputVertex
(
sourceName
,
jobGraph
);
source
.
setInputClass
(
StreamSource
.
class
);
Configuration
config
=
setComponent
(
sourceName
,
InvokableObject
,
parallelism
,
...
...
@@ -174,7 +175,8 @@ public class JobGraphBuilder {
* Number of subtasks allocated to a machine
* @return
*/
public
Configuration
setTask
(
String
taskName
,
UserTaskInvokable
<?
extends
Tuple
,
?
extends
Tuple
>
TaskInvokableObject
,
public
Configuration
setTask
(
String
taskName
,
UserTaskInvokable
<?
extends
Tuple
,
?
extends
Tuple
>
TaskInvokableObject
,
int
parallelism
,
int
subtasksPerInstance
)
{
final
JobTaskVertex
task
=
new
JobTaskVertex
(
taskName
,
jobGraph
);
task
.
setTaskClass
(
StreamTask
.
class
);
...
...
@@ -186,8 +188,8 @@ public class JobGraphBuilder {
return
config
;
}
public
void
setSink
(
String
sinkName
,
UserSinkInvokable
<?
extends
Tuple
>
InvokableObject
,
String
operatorName
,
byte
[]
serializedFunction
)
{
public
void
setSink
(
String
sinkName
,
UserSinkInvokable
<?
extends
Tuple
>
InvokableObject
,
String
operatorName
,
byte
[]
serializedFunction
)
{
Configuration
config
=
setSink
(
sinkName
,
InvokableObject
,
1
,
1
);
config
.
setBytes
(
"operator"
,
serializedFunction
);
config
.
setString
(
"operatorName"
,
operatorName
);
...
...
@@ -206,8 +208,9 @@ public class JobGraphBuilder {
* @param subtasksPerInstance
* Number of subtasks allocated to a machine
*/
public
Configuration
setSink
(
String
sinkName
,
UserSinkInvokable
<?
extends
Tuple
>
InvokableObject
,
int
parallelism
,
int
subtasksPerInstance
)
{
public
Configuration
setSink
(
String
sinkName
,
UserSinkInvokable
<?
extends
Tuple
>
InvokableObject
,
int
parallelism
,
int
subtasksPerInstance
)
{
final
JobOutputVertex
sink
=
new
JobOutputVertex
(
sinkName
,
jobGraph
);
sink
.
setOutputClass
(
StreamSink
.
class
);
Configuration
config
=
setComponent
(
sinkName
,
InvokableObject
,
parallelism
,
...
...
@@ -257,8 +260,9 @@ public class JobGraphBuilder {
return
config
;
}
private
Configuration
setComponent
(
String
componentName
,
UserSourceInvokable
<?
extends
Tuple
>
InvokableObject
,
int
parallelism
,
int
subtasksPerInstance
,
AbstractJobVertex
component
)
{
private
Configuration
setComponent
(
String
componentName
,
UserSourceInvokable
<?
extends
Tuple
>
InvokableObject
,
int
parallelism
,
int
subtasksPerInstance
,
AbstractJobVertex
component
)
{
Configuration
config
=
setComponent
(
componentName
,
InvokableObject
.
getClass
(),
parallelism
,
subtasksPerInstance
,
component
);
...
...
@@ -266,8 +270,9 @@ public class JobGraphBuilder {
return
config
;
}
private
Configuration
setComponent
(
String
componentName
,
UserTaskInvokable
<?
extends
Tuple
,
?
extends
Tuple
>
InvokableObject
,
int
parallelism
,
int
subtasksPerInstance
,
AbstractJobVertex
component
)
{
private
Configuration
setComponent
(
String
componentName
,
UserTaskInvokable
<?
extends
Tuple
,
?
extends
Tuple
>
InvokableObject
,
int
parallelism
,
int
subtasksPerInstance
,
AbstractJobVertex
component
)
{
Configuration
config
=
setComponent
(
componentName
,
InvokableObject
.
getClass
(),
parallelism
,
subtasksPerInstance
,
component
);
...
...
@@ -275,8 +280,9 @@ public class JobGraphBuilder {
return
config
;
}
private
Configuration
setComponent
(
String
componentName
,
UserSinkInvokable
<?
extends
Tuple
>
InvokableObject
,
int
parallelism
,
int
subtasksPerInstance
,
AbstractJobVertex
component
)
{
private
Configuration
setComponent
(
String
componentName
,
UserSinkInvokable
<?
extends
Tuple
>
InvokableObject
,
int
parallelism
,
int
subtasksPerInstance
,
AbstractJobVertex
component
)
{
Configuration
config
=
setComponent
(
componentName
,
InvokableObject
.
getClass
(),
parallelism
,
subtasksPerInstance
,
component
);
...
...
@@ -396,6 +402,7 @@ public class JobGraphBuilder {
public
void
broadcastConnect
(
String
upStreamComponentName
,
String
downStreamComponentName
)
{
connect
(
upStreamComponentName
,
downStreamComponentName
,
BroadcastPartitioner
.
class
);
addOutputChannels
(
upStreamComponentName
,
numberOfInstances
.
get
(
downStreamComponentName
));
log
.
info
(
"Broadcastconnected: "
+
upStreamComponentName
+
" to "
+
downStreamComponentName
);
}
/**
...
...
@@ -444,6 +451,9 @@ public class JobGraphBuilder {
+
downStreamComponentName
,
e
);
}
}
log
.
info
(
"Fieldsconnected "
+
upStreamComponentName
+
" to "
+
downStreamComponentName
+
" on "
+
keyPosition
);
}
/**
...
...
@@ -461,6 +471,8 @@ public class JobGraphBuilder {
public
void
globalConnect
(
String
upStreamComponentName
,
String
downStreamComponentName
)
{
connect
(
upStreamComponentName
,
downStreamComponentName
,
GlobalPartitioner
.
class
);
addOutputChannels
(
upStreamComponentName
,
1
);
log
.
info
(
"Globalconnected: "
+
upStreamComponentName
+
" to "
+
downStreamComponentName
);
}
/**
...
...
@@ -478,6 +490,7 @@ public class JobGraphBuilder {
public
void
shuffleConnect
(
String
upStreamComponentName
,
String
downStreamComponentName
)
{
connect
(
upStreamComponentName
,
downStreamComponentName
,
ShufflePartitioner
.
class
);
addOutputChannels
(
upStreamComponentName
,
1
);
log
.
info
(
"Shuffleconnected: "
+
upStreamComponentName
+
" to "
+
downStreamComponentName
);
}
private
void
addOutputChannels
(
String
upStreamComponentName
,
int
numOfInstances
)
{
...
...
flink-addons/flink-streaming/src/test/java/eu/stratosphere/streaming/api/FlatMapTest.java
浏览文件 @
ce56b146
...
...
@@ -39,9 +39,10 @@ public class FlatMapTest {
Tuple1
<
String
>
tup
=
new
Tuple1
<
String
>(
"asd"
);
StreamExecutionEnvironment
context
=
new
StreamExecutionEnvironment
();
DataStream
<
Tuple1
<
String
>>
dataStream0
=
context
.
setDummySource
();
DataStream
<
Tuple1
<
String
>>
dataStream
=
context
.
setDummySource
().
flatMap
(
new
MyFlatMap
()
)
.
addDummySink
();
DataStream
<
Tuple1
<
String
>>
dataStream
1
=
context
.
setDummySource
().
connectWith
(
dataStream0
)
.
partitionBy
(
0
).
flatMap
(
new
MyFlatMap
()).
broadcast
().
addDummySink
();
context
.
execute
();
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录