From b843f1851c334bd6019fe50a29dd6e460183f69d Mon Sep 17 00:00:00 2001 From: Fabian Hueske Date: Mon, 20 Dec 2010 18:01:14 +0100 Subject: [PATCH] - pact modules are builed with tests - removed println statements from test - improved pact examples (K-Means and WordCount) --- .../example/datamining/KMeansIteration.java | 2 +- .../pact/example/wordcount/WordCount.java | 37 ++++++++++--------- pact/pact-runtime/pom.xml | 13 +++++++ .../runtime/hash/SerializingHashMapTest.java | 5 ++- .../pact/runtime/sort/TestMergeIterator.java | 2 +- pact/pact-tests/pom.xml | 22 +++++++++++ ...ContractsTests.java => ContractTests.java} | 2 +- .../pact/test/contracts/MapTest.java | 20 ---------- .../stratosphere/pact/test/util/TestBase.java | 8 ++-- pom.xml | 2 +- .../src/main/assemblies/stratosphere-bin.xml | 2 +- 11 files changed, 67 insertions(+), 48 deletions(-) rename pact/pact-tests/src/test/java/eu/stratosphere/pact/test/contracts/{ContractsTests.java => ContractTests.java} (97%) diff --git a/pact/pact-examples/src/main/java/eu/stratosphere/pact/example/datamining/KMeansIteration.java b/pact/pact-examples/src/main/java/eu/stratosphere/pact/example/datamining/KMeansIteration.java index 9d738397ffd..3592ebfbcb5 100644 --- a/pact/pact-examples/src/main/java/eu/stratosphere/pact/example/datamining/KMeansIteration.java +++ b/pact/pact-examples/src/main/java/eu/stratosphere/pact/example/datamining/KMeansIteration.java @@ -696,7 +696,7 @@ public class KMeansIteration implements PlanAssembler, PlanAssemblerDescription @Override public String getDescription() { - return "Parameters: dop, data-points, cluster-centers, output"; + return "Parameters: [dop] [data-points] [cluster-centers] [output]"; } } diff --git a/pact/pact-examples/src/main/java/eu/stratosphere/pact/example/wordcount/WordCount.java b/pact/pact-examples/src/main/java/eu/stratosphere/pact/example/wordcount/WordCount.java index 879016df793..e2f3679bb4d 100644 --- a/pact/pact-examples/src/main/java/eu/stratosphere/pact/example/wordcount/WordCount.java +++ b/pact/pact-examples/src/main/java/eu/stratosphere/pact/example/wordcount/WordCount.java @@ -13,9 +13,6 @@ * **********************************************************************************************************************/ -/** - * - */ package eu.stratosphere.pact.example.wordcount; import java.util.Iterator; @@ -48,7 +45,8 @@ import eu.stratosphere.pact.common.type.base.PactString; public class WordCount implements PlanAssembler, PlanAssemblerDescription { /** - * {@inheritDoc} + * Converts a input string (a line) into a KeyValuePair with the string + * being the key and the value being a zero Integer. */ public static class LineInFormat extends TextInputFormat { @@ -65,7 +63,8 @@ public class WordCount implements PlanAssembler, PlanAssemblerDescription { } /** - * {@inheritDoc} + * Writes a (String,Integer)-KeyValuePair to a string. The output format is: + * "<key> <value>\nl" */ public static class WordCountOutFormat extends TextOutputFormat { @@ -83,7 +82,10 @@ public class WordCount implements PlanAssembler, PlanAssemblerDescription { } /** - * {@inheritDoc} + * Converts a (String,Integer)-KeyValuePair into multiple KeyValuePairs. The + * key string is tokenized by spaces. For each token a new + * (String,Integer)-KeyValuePair is emitted where the Token is the key and + * an Integer(1) is the value. */ public static class TokenizeLine extends MapStub { @@ -103,8 +105,11 @@ public class WordCount implements PlanAssembler, PlanAssemblerDescription { } /** - * {@inheritDoc} + * Counts the number of values for a given key. Hence, the number of + * occurences of a given token (word) is computed and emitted. The key is + * not modified, hence a SameKey OutputContract is attached to this class. */ + @SameKey @Combinable public static class CountWords extends ReduceStub { @@ -138,6 +143,7 @@ public class WordCount implements PlanAssembler, PlanAssemblerDescription { */ @Override public Plan getPlan(String... args) { + if (args == null) { args = new String[0]; } @@ -146,21 +152,20 @@ public class WordCount implements PlanAssembler, PlanAssemblerDescription { String dataInput = (args.length > 1 && args[1] != null ? args[1] : "hdfs://localhost:9000/countwords/data"); String output = (args.length > 2 && args[2] != null ? args[2] : "hdfs://localhost:9000/countwords/result"); - DataSourceContract data = new DataSourceContract(LineInFormat.class, - dataInput, "Lines"); + DataSourceContract data = new DataSourceContract( + LineInFormat.class, dataInput, "Input Lines"); data.setDegreeOfParallelism(noSubTasks); MapContract mapper = new MapContract( - TokenizeLine.class, "Tokenize Lines"); + TokenizeLine.class, "Tokenize Lines"); mapper.setDegreeOfParallelism(noSubTasks); - mapper.setOutputContract(SameKey.class); ReduceContract reducer = new ReduceContract( - CountWords.class, "Count Words"); + CountWords.class, "Count Words"); reducer.setDegreeOfParallelism(noSubTasks); - DataSinkContract out = new DataSinkContract(WordCountOutFormat.class, - output, "Output"); + DataSinkContract out = new DataSinkContract( + WordCountOutFormat.class, output, "Output"); out.setDegreeOfParallelism(noSubTasks); out.setInput(reducer); @@ -175,9 +180,7 @@ public class WordCount implements PlanAssembler, PlanAssemblerDescription { */ @Override public String getDescription() { - return "WordCount: [noSubStasks] [input] [output]
" - + "\t noSubTasks: defines the degree of parallelism
" + "\t input: Location of the input file
" - + "\t output: Location of the output file
"; + return "Parameters: [noSubStasks] [input] [output]"; } } diff --git a/pact/pact-runtime/pom.xml b/pact/pact-runtime/pom.xml index a78bef9e908..c6c8a25d0f3 100644 --- a/pact/pact-runtime/pom.xml +++ b/pact/pact-runtime/pom.xml @@ -68,6 +68,19 @@ 1.6 + + + org.apache.maven.plugins + maven-surefire-plugin + 2.7 + + + **/TestData.java + + + + + diff --git a/pact/pact-runtime/src/test/java/eu/stratosphere/pact/runtime/hash/SerializingHashMapTest.java b/pact/pact-runtime/src/test/java/eu/stratosphere/pact/runtime/hash/SerializingHashMapTest.java index 7ce02bd8f24..f71740e6efc 100644 --- a/pact/pact-runtime/src/test/java/eu/stratosphere/pact/runtime/hash/SerializingHashMapTest.java +++ b/pact/pact-runtime/src/test/java/eu/stratosphere/pact/runtime/hash/SerializingHashMapTest.java @@ -22,6 +22,7 @@ import java.util.Set; import junit.framework.Assert; +import org.apache.log4j.Logger; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -109,8 +110,8 @@ public class SerializingHashMapTest { Assert.assertTrue("Number of values in map is below lower bound", pactHashMap.numberOfValues() >= SEGMENT_SIZE / (2 * VALUE_LENGTH + 16)); - System.out.println("Inserted " + pactHashMap.numberOfKeys() + " keys"); - System.out.println("Inserted " + pactHashMap.numberOfValues() + " values"); + Logger.getRootLogger().debug("Inserted " + pactHashMap.numberOfKeys() + " keys"); + Logger.getRootLogger().debug("Inserted " + pactHashMap.numberOfValues() + " values"); // test value iterators for (Key key : javaHashMap.keySet()) { diff --git a/pact/pact-runtime/src/test/java/eu/stratosphere/pact/runtime/sort/TestMergeIterator.java b/pact/pact-runtime/src/test/java/eu/stratosphere/pact/runtime/sort/TestMergeIterator.java index b5fbe98de90..d0b6254f788 100644 --- a/pact/pact-runtime/src/test/java/eu/stratosphere/pact/runtime/sort/TestMergeIterator.java +++ b/pact/pact-runtime/src/test/java/eu/stratosphere/pact/runtime/sort/TestMergeIterator.java @@ -99,7 +99,7 @@ public class TestMergeIterator { KeyValuePair pair1 = iterator.next(); while (iterator.hasNext()) { KeyValuePair pair2 = iterator.next(); - System.out.println("1 -> " + pair1.getKey() + " | 2 -> " + pair2.getKey()); + Logger.getRootLogger().debug("1 -> " + pair1.getKey() + " | 2 -> " + pair2.getKey()); Assert.assertTrue(comparator.compare(pair1.getKey(), pair2.getKey()) <= 0); pair1 = pair2; } diff --git a/pact/pact-tests/pom.xml b/pact/pact-tests/pom.xml index 2788b7d2465..4dc758d94b1 100644 --- a/pact/pact-tests/pom.xml +++ b/pact/pact-tests/pom.xml @@ -133,6 +133,28 @@ 1.6 + + + org.apache.maven.plugins + maven-surefire-plugin + 2.7 + + + + java.net.preferIPv4Stack + true + + + + + **/ContractTests.java + + + + + diff --git a/pact/pact-tests/src/test/java/eu/stratosphere/pact/test/contracts/ContractsTests.java b/pact/pact-tests/src/test/java/eu/stratosphere/pact/test/contracts/ContractTests.java similarity index 97% rename from pact/pact-tests/src/test/java/eu/stratosphere/pact/test/contracts/ContractsTests.java rename to pact/pact-tests/src/test/java/eu/stratosphere/pact/test/contracts/ContractTests.java index fc7df1e172c..e4927dc13ed 100644 --- a/pact/pact-tests/src/test/java/eu/stratosphere/pact/test/contracts/ContractsTests.java +++ b/pact/pact-tests/src/test/java/eu/stratosphere/pact/test/contracts/ContractTests.java @@ -21,6 +21,6 @@ import org.junit.runners.Suite.SuiteClasses; @RunWith(Suite.class) @SuiteClasses( { MapTest.class, ReduceTest.class, MatchTest.class, CrossTest.class, CoGroupTest.class }) -public class ContractsTests { +public class ContractTests { } diff --git a/pact/pact-tests/src/test/java/eu/stratosphere/pact/test/contracts/MapTest.java b/pact/pact-tests/src/test/java/eu/stratosphere/pact/test/contracts/MapTest.java index 40c4ec436da..2b4fa631d96 100644 --- a/pact/pact-tests/src/test/java/eu/stratosphere/pact/test/contracts/MapTest.java +++ b/pact/pact-tests/src/test/java/eu/stratosphere/pact/test/contracts/MapTest.java @@ -56,9 +56,6 @@ import eu.stratosphere.pact.test.util.TestBase; */ @RunWith(Parameterized.class) public class MapTest extends TestBase -/* - * TODO: - Allow multiple data sinks - */ { public MapTest(String clusterConfig, Configuration testConfig) { @@ -82,7 +79,6 @@ public class MapTest extends TestBase getHDFSProvider().writeFileToHDFS("mapTest_3.txt", MAP_IN_3); getHDFSProvider().writeFileToHDFS("mapTest_4.txt", MAP_IN_4); - // getHDFSProvider().createDir(getHDFSProvider().getHdfsHome()+"/result/"); } public static class MapTestInFormat extends TextInputFormat { @@ -100,28 +96,12 @@ public class MapTest extends TestBase return true; } - // @Override - // public byte[] writeLine(KeyValuePair pair) - // { - // return (pair.getKey().toString() + " " + pair.getValue().toString() + "\n").getBytes(); - // } } public static class MapTestOutFormat extends TextOutputFormat { private static final Log LOG = LogFactory.getLog(MapTestOutFormat.class); - // @Override - // public void readLine(KeyValuePair pair, byte[] line) - // { - // - // String[] tokens = line.toString().split(" "); - // - // pair.setKey(new N_String(tokens[0])); - // pair.setValue(new N_Integer(Integer.parseInt(tokens[1]))); - // - // } - @Override public byte[] writeLine(KeyValuePair pair) { LOG.info("Writing out: [" + pair.getKey() + "," + pair.getValue() + "]"); diff --git a/pact/pact-tests/src/test/java/eu/stratosphere/pact/test/util/TestBase.java b/pact/pact-tests/src/test/java/eu/stratosphere/pact/test/util/TestBase.java index 7229d4f2848..b588b5e2dae 100644 --- a/pact/pact-tests/src/test/java/eu/stratosphere/pact/test/util/TestBase.java +++ b/pact/pact-tests/src/test/java/eu/stratosphere/pact/test/util/TestBase.java @@ -225,9 +225,9 @@ public abstract class TestBase extends TestCase { expectedResult.add(st.nextToken()); } - // print expected and computed results - System.out.println("Expected: " + expectedResult); - System.out.println("Computed: " + computedResult); + // log expected and computed results + LOG.debug("Expected: " + expectedResult); + LOG.debug("Computed: " + computedResult); Assert.assertEquals("Computed and expected results have different size", expectedResult.size(), computedResult .size()); @@ -235,7 +235,7 @@ public abstract class TestBase extends TestCase { while (!expectedResult.isEmpty()) { String expectedLine = expectedResult.poll(); String computedLine = computedResult.poll(); - System.out.println("expLine: <" + expectedLine + ">\t\t: compLine: <" + computedLine + ">"); + LOG.debug("expLine: <" + expectedLine + ">\t\t: compLine: <" + computedLine + ">"); Assert.assertEquals("Computed and expected lines differ", expectedLine, computedLine); } } diff --git a/pom.xml b/pom.xml index e509657d55b..ee7062242bf 100644 --- a/pom.xml +++ b/pom.xml @@ -238,7 +238,7 @@ org.apache.maven.plugins maven-surefire-report-plugin - + 2.7 diff --git a/stratosphere-dist/src/main/assemblies/stratosphere-bin.xml b/stratosphere-dist/src/main/assemblies/stratosphere-bin.xml index 2bb62c63268..0346da647d0 100644 --- a/stratosphere-dist/src/main/assemblies/stratosphere-bin.xml +++ b/stratosphere-dist/src/main/assemblies/stratosphere-bin.xml @@ -4,8 +4,8 @@ stratosphere-bin dir - -- GitLab