FlatMapTest.java 7.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/***********************************************************************************************************************
 *
 * Copyright (C) 2010-2014 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 *
 **********************************************************************************************************************/

G
ghermann 已提交
16 17
package eu.stratosphere.streaming.api;

G
gyfora 已提交
18
import static org.junit.Assert.*;
G
gyfora 已提交
19

G
ghermann 已提交
20 21
import java.io.ByteArrayInputStream;
import java.io.ObjectInputStream;
J
jfeher 已提交
22
import java.util.ArrayList;
G
gyfora 已提交
23
import java.util.HashSet;
J
jfeher 已提交
24
import java.util.List;
G
gyfora 已提交
25
import java.util.Set;
G
ghermann 已提交
26

G
ghermann 已提交
27 28 29
import org.junit.Test;

import eu.stratosphere.api.java.functions.FlatMapFunction;
G
ghermann 已提交
30
import eu.stratosphere.api.java.tuple.Tuple;
G
ghermann 已提交
31
import eu.stratosphere.api.java.tuple.Tuple1;
G
gyfora 已提交
32
import eu.stratosphere.api.java.tuple.Tuple2;
G
gyfora 已提交
33
import eu.stratosphere.api.java.typeutils.TupleTypeInfo;
G
ghermann 已提交
34 35 36
import eu.stratosphere.api.java.typeutils.TypeExtractor;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.nephele.jobgraph.AbstractJobVertex;
G
gyfora 已提交
37 38
import eu.stratosphere.nephele.jobgraph.JobInputVertex;
import eu.stratosphere.nephele.jobgraph.JobOutputVertex;
G
ghermann 已提交
39
import eu.stratosphere.nephele.jobgraph.JobTaskVertex;
J
jfeher 已提交
40 41
import eu.stratosphere.streaming.api.MapTest.MyMap;
import eu.stratosphere.streaming.api.MapTest.MySink;
G
gyfora 已提交
42
import eu.stratosphere.streaming.api.PrintTest.MyFlatMap;
G
gyfora 已提交
43 44
import eu.stratosphere.streaming.api.invokable.UserSinkInvokable;
import eu.stratosphere.streaming.api.invokable.UserSourceInvokable;
G
gyfora 已提交
45
import eu.stratosphere.streaming.api.invokable.UserTaskInvokable;
G
ghermann 已提交
46 47
import eu.stratosphere.util.Collector;

48
public class FlatMapTest {
G
ghermann 已提交
49

J
jfeher 已提交
50 51
	public static final class MyFlatMap extends FlatMapFunction<Tuple1<Integer>, Tuple1<Integer>> {

G
ghermann 已提交
52
		@Override
G
gyfora 已提交
53 54 55
		public void flatMap(Tuple1<Integer> value, Collector<Tuple1<Integer>> out) throws Exception {
			out.collect(new Tuple1<Integer>(value.f0 * value.f0));

J
jfeher 已提交
56
		}
G
gyfora 已提交
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79

	}

	public static final class ParallelFlatMap extends
			FlatMapFunction<Tuple1<Integer>, Tuple1<Integer>> {

		@Override
		public void flatMap(Tuple1<Integer> value, Collector<Tuple1<Integer>> out) throws Exception {
			numberOfElements++;

		}

	}

	public static final class GenerateSequenceFlatMap extends
			FlatMapFunction<Tuple1<Long>, Tuple1<Long>> {

		@Override
		public void flatMap(Tuple1<Long> value, Collector<Tuple1<Long>> out) throws Exception {
			out.collect(new Tuple1<Long>(value.f0 * value.f0));

		}

G
ghermann 已提交
80
	}
G
ghermann 已提交
81

J
jfeher 已提交
82
	public static final class MySink extends SinkFunction<Tuple1<Integer>> {
G
gyfora 已提交
83

G
gyfora 已提交
84
		@Override
J
jfeher 已提交
85 86
		public void invoke(Tuple1<Integer> tuple) {
			result.add(tuple.f0);
G
gyfora 已提交
87 88 89 90
		}

	}

G
gyfora 已提交
91
	public static final class FromElementsSink extends SinkFunction<Tuple1<Integer>> {
J
jfeher 已提交
92 93

		@Override
G
gyfora 已提交
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
		public void invoke(Tuple1<Integer> tuple) {
			fromElementsResult.add(tuple.f0);
		}

	}

	public static final class FromCollectionSink extends SinkFunction<Tuple1<Integer>> {

		@Override
		public void invoke(Tuple1<Integer> tuple) {
			fromCollectionResult.add(tuple.f0);
		}

	}

	public static final class GenerateSequenceSink extends SinkFunction<Tuple1<Long>> {

		@Override
		public void invoke(Tuple1<Long> tuple) {
			generateSequenceResult.add(tuple.f0);
		}

	}

	private static void fillExpectedList() {
		for (int i = 0; i < 10; i++) {
			expected.add(i * i);
		}
	}

	private static void fillFromElementsExpected() {
		fromElementsExpected.add(4);
		fromElementsExpected.add(25);
		fromElementsExpected.add(81);
	}

	private static void fillSequenceSet() {
		for (int i = 0; i < 10; i++) {
			sequenceExpected.add(i * i);
133 134
		}
	}
J
jfeher 已提交
135
	
G
gyfora 已提交
136 137 138 139 140 141 142 143 144 145 146
	private static void fillLongSequenceSet() {
		for (int i = 0; i < 10; i++) {
			sequenceLongExpected.add((long)(i * i));
		}
	}

	private static void fillFromCollectionSet() {
		if(fromCollectionSet.isEmpty()){
			for (int i = 0; i < 10; i++) {
				fromCollectionSet.add(i);
			}
J
jfeher 已提交
147 148
		}
	}
149

J
jfeher 已提交
150
	private static final int PARALELISM = 1;
G
gyfora 已提交
151 152 153 154 155 156 157 158 159 160
	private static int numberOfElements = 0;
	private static Set<Integer> expected = new HashSet<Integer>();
	private static Set<Integer> result = new HashSet<Integer>();
	private static Set<Integer> fromElementsExpected = new HashSet<Integer>();
	private static Set<Integer> fromElementsResult = new HashSet<Integer>();
	private static Set<Integer> fromCollectionSet = new HashSet<Integer>();
	private static Set<Integer> sequenceExpected = new HashSet<Integer>();
	private static Set<Long> sequenceLongExpected = new HashSet<Long>();
	private static Set<Integer> fromCollectionResult = new HashSet<Integer>();
	private static Set<Long> generateSequenceResult = new HashSet<Long>();
161

G
ghermann 已提交
162
	@Test
G
gyfora 已提交
163
	public void test() throws Exception {
G
gyfora 已提交
164
		StreamExecutionEnvironment env = new StreamExecutionEnvironment(2, 1000);
G
gyfora 已提交
165 166 167 168 169
		
		fillFromCollectionSet();
		
		DataStream<Tuple1<Integer>> dataStream = env.fromCollection(fromCollectionSet)
				.flatMap(new MyFlatMap(), PARALELISM).addSink(new MySink());
J
jfeher 已提交
170

G
gyfora 已提交
171
		env.execute();
G
gyfora 已提交
172

173
		fillExpectedList();
G
gyfora 已提交
174

G
gyfora 已提交
175 176 177 178 179 180 181
		assertTrue(expected.equals(result));

	}

	@Test
	public void parallelShuffleconnectTest() throws Exception {
		StreamExecutionEnvironment env = new StreamExecutionEnvironment();
G
gyfora 已提交
182 183
		
		fillFromCollectionSet();
J
jfeher 已提交
184
		
G
gyfora 已提交
185 186 187 188 189 190
		DataStream<Tuple1<Integer>> source = env.fromCollection(fromCollectionSet);
		DataStream<Tuple1<Integer>> map = source.flatMap(new ParallelFlatMap(), 1).addSink(
				new MySink());
		DataStream<Tuple1<Integer>> map2 = source.flatMap(new ParallelFlatMap(), 1).addSink(
				new MySink());

G
gyfora 已提交
191 192 193 194 195 196 197 198 199 200 201 202 203
		env.execute();

		assertEquals(20, numberOfElements);
		numberOfElements=0;
		

	}

	@Test
	public void fromElementsTest() throws Exception {
		StreamExecutionEnvironment env = new StreamExecutionEnvironment();
		DataStream<Tuple1<Integer>> map = env.fromElements(2, 5, 9).flatMap(new MyFlatMap(), 1);
		DataStream<Tuple1<Integer>> sink = map.addSink(new FromElementsSink());
G
gyfora 已提交
204 205 206

		fillFromElementsExpected();

G
gyfora 已提交
207 208 209 210 211 212 213 214 215
		env.execute();
		assertEquals(fromElementsExpected, fromElementsResult);

	}

	@Test
	public void fromCollectionTest() throws Exception {
		StreamExecutionEnvironment env = new StreamExecutionEnvironment();

G
gyfora 已提交
216 217
		fillFromCollectionSet();

G
gyfora 已提交
218
		DataStream<Tuple1<Integer>> map = env.fromCollection(fromCollectionSet).flatMap(
G
gyfora 已提交
219
				new MyFlatMap(), 1);
G
gyfora 已提交
220
		DataStream<Tuple1<Integer>> sink = map.addSink(new FromCollectionSink());
G
gyfora 已提交
221 222 223

		fillSequenceSet();

G
gyfora 已提交
224 225 226 227 228 229 230 231 232 233
		env.execute();
		assertEquals(sequenceExpected, fromCollectionResult);

	}

	@Test
	public void generateSequenceTest() throws Exception {
		StreamExecutionEnvironment env = new StreamExecutionEnvironment();

		DataStream<Tuple1<Long>> map = env.generateSequence(0, 9).flatMap(
G
gyfora 已提交
234
				new GenerateSequenceFlatMap(), 1);
G
gyfora 已提交
235
		DataStream<Tuple1<Long>> sink = map.addSink(new GenerateSequenceSink());
G
gyfora 已提交
236 237 238 239 240

		fillLongSequenceSet();

		env.execute();
		assertEquals(sequenceLongExpected, generateSequenceResult);
G
ghermann 已提交
241 242
	}
}